Skip to content

Commit

Permalink
Feature: support Larynx TTS (#50)
Browse files Browse the repository at this point in the history
* Added Larynx, need to fix issues

* Refactor generateWithLarynx method to improve error handling and file management

* Update voice for Larynx in TextToSpeech.php
  • Loading branch information
Codycody31 authored Dec 11, 2023
1 parent 7135665 commit 5995fbc
Show file tree
Hide file tree
Showing 2 changed files with 86 additions and 6 deletions.
90 changes: 85 additions & 5 deletions src/app/Custom/TextToSpeech.php
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

namespace App\Custom;

use Illuminate\Support\Facades\Log;
use Exception;
use FFMpeg\FFMpeg;
use FFMpeg\Format\Audio\Mp3;
Expand All @@ -10,8 +11,9 @@
* Text to Speech Generator.
*
* Supported TTS engines:
* - VoiceRSS
* - Eleven Labs
* - VoiceRSS
* - Eleven Labs
* - Larynx (Beta)
*/
class TextToSpeech
{
Expand All @@ -27,7 +29,7 @@ class TextToSpeech
*
* @param string $text The text to be converted to speech.
* @param string $language The language of the text (currently only used with VoiceRSS).
* @param string $engine The TTS engine to be used ('VoiceRSS', 'ElevenLabs', ...).
* @param string $engine The TTS engine to be used ('Larynx', 'VoiceRSS', 'ElevenLabs', ...).
* @param array $options Additional options for the TTS engine:
* - VoiceRSS:
* - format: 'MP3' (default), ...
Expand All @@ -41,8 +43,13 @@ class TextToSpeech
* - similarity_boost: int (default 0), ...
* - style: int (default 0), ...
* - use_speaker_boost: bool (default true), ...
* - AnotherTTSAPI (if added later):
* - ...
* - Larynx:
* - voice: 'en-us/harvard-glow_tts' (default), ...
* - vocoder: 'hifi_gan/universal_large' (default), ...
* - denoiserStrength: float (default 0.01), ...
* - noiseScale: float (default 0.333), ...
* - lengthScale: float (default 1.0), ...
* - ssml: bool (default false), ...
* @param array|null $apiKeys (Optional) The API keys for the TTS engines. Keys should be engine names with associated API key as value.
* @throws Exception If the TTS engine is not supported.
*/
Expand Down Expand Up @@ -120,6 +127,14 @@ public function __construct($text, $language = 'en-us', $engine = 'VoiceRSS', $o
'use_speaker_boost' => true
]
],
'Larynx' => [
'voice' => 'en-us/cmu_rms-glow_tts',
'vocoder' => 'hifi_gan/universal_large',
'denoiserStrength' => 0.002,
'noiseScale' => 0.667,
'lengthScale' => 0.85,
'ssml' => 'false',
]
];

// Check if the engine is supported
Expand All @@ -146,6 +161,9 @@ public function generateAudio(): bool|string
// Eleven Labs
case 'ElevenLabs':
return $this->generateWithElevenLabs();
// Larynx
case 'Larynx':
return $this->generateWithLarynx();
// Unsupported engine
default:
throw new Exception('Unsupported TTS Engine');
Expand Down Expand Up @@ -237,6 +255,56 @@ private function generateWithElevenLabs()
throw new Exception('Eleven Labs API Error'); // Handle error as per your requirements
}

/**
* Generate the audio file using Larynx.
*
* @return bool|string Returns the generated audio data or false on failure
* @throws Exception If the API request fails
*/
private function generateWithLarynx()
{
$requesturl = "http://voice.vmgware.dev/api/tts?" .
"text=" . rawurlencode($this->text) .
"&voice=" . $this->OPTIONS['voice'] .
"&vocoder=" . $this->OPTIONS['vocoder'] .
"&denoiserStrength=" . $this->OPTIONS['denoiserStrength'] .
"&noiseScale=" . $this->OPTIONS['noiseScale'] .
"&lengthScale=" . $this->OPTIONS['lengthScale'] .
"&ssml=" . $this->OPTIONS['ssml'];

$ch = curl_init($requesturl);

$tmpfname = tempnam(sys_get_temp_dir(), 'tts');
$fp = fopen($tmpfname, 'wb');

curl_setopt($ch, CURLOPT_FILE, $fp);
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_TIMEOUT, 60); // Adjust timeout as needed
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, 1);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 2);

curl_exec($ch);
if (curl_errno($ch)) {
throw new Exception('Larynx API Error: ' . curl_error($ch));
}

fclose($fp);
curl_close($ch);

if (filesize($tmpfname) > 0) {
// return $tmpfname; // Return the path to the temporary file
// Return the audio data
$audioData = file_get_contents($tmpfname);
unlink($tmpfname); // Delete the temporary file
return $audioData;
} else {
throw new Exception('Larynx API Error: No response or empty file');
}
}



/**
* Convert the provided audio data to MP3. (Beta)
*
Expand Down Expand Up @@ -341,6 +409,18 @@ public static function validateCustomConfig(array $customConfig): bool

return true; // Custom config is valid for ElevenLabs
}
if ($engine === 'Larynx') {
// Validate custom config for Larynx engine (example rules)
$validOptions = ['voice', 'vocoder', 'denoiserStrength', 'noiseScale', 'lengthScale', 'ssml'];

foreach ($customConfig[$engine] as $key => $value) {
if (!in_array($key, $validOptions)) {
return false; // Invalid option found
}
}

return true; // Custom config is valid for Larynx
}

return false; // Unsupported engine
}
Expand Down
2 changes: 1 addition & 1 deletion src/app/Http/Controllers/API/TextToSpeechController.php
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ public function generate(Request $request): JsonResponse
}

// Make sure at least one API key is set
if (!TextToSpeech::hasApiKey() && !$options) {
if (!TextToSpeech::hasApiKey() && !$options && $engine != 'Larynx') {
Log::error('Your server voice API configuration is incorrect. Please check your .env file.');

// Return the response
Expand Down

0 comments on commit 5995fbc

Please sign in to comment.