Feature: support Larynx TTS (#50)

* Added Larynx, need to fix issues * Refactor generateWithLarynx method to improve error handling and file management * Update voice for Larynx in TextToSpeech.php
VMGWARE · Dec 11, 2023 · 5995fbc · 5995fbc
1 parent 7135665
commit 5995fbc
Show file tree

Hide file tree

Showing 2 changed files with 86 additions and 6 deletions.
diff --git a/src/app/Custom/TextToSpeech.php b/src/app/Custom/TextToSpeech.php
@@ -2,6 +2,7 @@
 
 namespace App\Custom;
 
+use Illuminate\Support\Facades\Log;
 use Exception;
 use FFMpeg\FFMpeg;
 use FFMpeg\Format\Audio\Mp3;
@@ -10,8 +11,9 @@
  * Text to Speech Generator.
  * 
  * Supported TTS engines:
- *     - VoiceRSS   
- *    - Eleven Labs
+ * - VoiceRSS   
+ * - Eleven Labs
+ * - Larynx (Beta)
  */
 class TextToSpeech
 {
@@ -27,7 +29,7 @@ class TextToSpeech
      *
      * @param string $text The text to be converted to speech.
      * @param string $language The language of the text (currently only used with VoiceRSS).
-     * @param string $engine The TTS engine to be used ('VoiceRSS', 'ElevenLabs', ...).
+     * @param string $engine The TTS engine to be used ('Larynx', 'VoiceRSS', 'ElevenLabs', ...).
      * @param array $options Additional options for the TTS engine:
      *      - VoiceRSS:
      *          - format: 'MP3' (default), ...
@@ -41,8 +43,13 @@ class TextToSpeech
      *              - similarity_boost: int (default 0), ...
      *              - style: int (default 0), ...
      *              - use_speaker_boost: bool (default true), ...
-     *      - AnotherTTSAPI (if added later):
-     *          - ...
+     *      - Larynx:
+     *         - voice: 'en-us/harvard-glow_tts' (default), ... 
+     *         - vocoder: 'hifi_gan/universal_large' (default), ...  
+     *         - denoiserStrength: float (default 0.01), ...
+     *         - noiseScale: float (default 0.333), ...
+     *         - lengthScale: float (default 1.0), ...
+     *         - ssml: bool (default false), ...
      * @param array|null $apiKeys (Optional) The API keys for the TTS engines. Keys should be engine names with associated API key as value.
      * @throws Exception If the TTS engine is not supported.
      */
@@ -120,6 +127,14 @@ public function __construct($text, $language = 'en-us', $engine = 'VoiceRSS', $o
                     'use_speaker_boost' => true
                 ]
             ],
+            'Larynx' => [
+                'voice' => 'en-us/cmu_rms-glow_tts',
+                'vocoder' => 'hifi_gan/universal_large',
+                'denoiserStrength' => 0.002,
+                'noiseScale' => 0.667,
+                'lengthScale' => 0.85,
+                'ssml' => 'false',
+            ]
         ];
 
         // Check if the engine is supported
@@ -146,6 +161,9 @@ public function generateAudio(): bool|string
                 // Eleven Labs
             case 'ElevenLabs':
                 return $this->generateWithElevenLabs();
+                // Larynx
+            case 'Larynx':
+                return $this->generateWithLarynx();
                 // Unsupported engine
             default:
                 throw new Exception('Unsupported TTS Engine');
@@ -237,6 +255,56 @@ private function generateWithElevenLabs()
         throw new Exception('Eleven Labs API Error');  // Handle error as per your requirements
     }
 
+    /**
+     * Generate the audio file using Larynx.
+     *
+     * @return bool|string Returns the generated audio data or false on failure
+     * @throws Exception If the API request fails
+     */
+    private function generateWithLarynx()
+    {
+        $requesturl = "http://voice.vmgware.dev/api/tts?" .
+            "text=" . rawurlencode($this->text) .
+            "&voice=" . $this->OPTIONS['voice'] .
+            "&vocoder=" . $this->OPTIONS['vocoder'] .
+            "&denoiserStrength=" . $this->OPTIONS['denoiserStrength'] .
+            "&noiseScale=" . $this->OPTIONS['noiseScale'] .
+            "&lengthScale=" . $this->OPTIONS['lengthScale'] .
+            "&ssml=" . $this->OPTIONS['ssml'];
+
+        $ch = curl_init($requesturl);
+
+        $tmpfname = tempnam(sys_get_temp_dir(), 'tts');
+        $fp = fopen($tmpfname, 'wb');
+
+        curl_setopt($ch, CURLOPT_FILE, $fp);
+        curl_setopt($ch, CURLOPT_HEADER, 0);
+        curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
+        curl_setopt($ch, CURLOPT_TIMEOUT, 60); // Adjust timeout as needed
+        curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, 1);
+        curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 2);
+
+        curl_exec($ch);
+        if (curl_errno($ch)) {
+            throw new Exception('Larynx API Error: ' . curl_error($ch));
+        }
+
+        fclose($fp);
+        curl_close($ch);
+
+        if (filesize($tmpfname) > 0) {
+            // return $tmpfname; // Return the path to the temporary file
+            // Return the audio data
+            $audioData = file_get_contents($tmpfname);
+            unlink($tmpfname); // Delete the temporary file
+            return $audioData;
+        } else {
+            throw new Exception('Larynx API Error: No response or empty file');
+        }
+    }
+
+
+
     /**
      * Convert the provided audio data to MP3. (Beta)
      *
@@ -341,6 +409,18 @@ public static function validateCustomConfig(array $customConfig): bool
 
             return true; // Custom config is valid for ElevenLabs
         }
+        if ($engine === 'Larynx') {
+            // Validate custom config for Larynx engine (example rules)
+            $validOptions = ['voice', 'vocoder', 'denoiserStrength', 'noiseScale', 'lengthScale', 'ssml'];
+
+            foreach ($customConfig[$engine] as $key => $value) {
+                if (!in_array($key, $validOptions)) {
+                    return false; // Invalid option found
+                }
+            }
+
+            return true; // Custom config is valid for Larynx
+        }
 
         return false; // Unsupported engine
     }

diff --git a/src/app/Http/Controllers/API/TextToSpeechController.php b/src/app/Http/Controllers/API/TextToSpeechController.php
@@ -141,7 +141,7 @@ public function generate(Request $request): JsonResponse
         }
 
         // Make sure at least one API key is set
-        if (!TextToSpeech::hasApiKey() && !$options) {
+        if (!TextToSpeech::hasApiKey() && !$options && $engine != 'Larynx') {
             Log::error('Your server voice API configuration is incorrect. Please check your .env file.');
 
             // Return the response