-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
22 changed files
with
362 additions
and
125 deletions.
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,3 +18,4 @@ reqwest-eventsource = "0" | |
futures = "0" | ||
rand = "0" | ||
base64 = "0" | ||
uuid = { version = "1", features = ["v4"] } |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,3 @@ | ||
pub mod chat; | ||
pub mod generate_zsh_completion; | ||
pub mod speak; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
use std::path::PathBuf; | ||
|
||
use clap::arg; | ||
use clap::Args; | ||
use tokio::io::stdin; | ||
use tokio::io::AsyncReadExt; | ||
|
||
use crate::gcloud::synthesize; | ||
use crate::tts; | ||
use crate::util::exception::Exception; | ||
|
||
#[derive(Args)] | ||
pub struct Speak { | ||
#[arg(long, help = "conf path")] | ||
conf: PathBuf, | ||
|
||
#[arg(long, help = "model name")] | ||
name: String, | ||
|
||
#[arg(long, help = "text")] | ||
text: Option<String>, | ||
|
||
#[arg(long, help = "stdin", default_value_t = false)] | ||
stdin: bool, | ||
} | ||
|
||
impl Speak { | ||
pub async fn execute(&self) -> Result<(), Exception> { | ||
if !self.stdin && self.text.is_none() { | ||
return Err(Exception::ValidationError("must specify --stdin or --text".to_string())); | ||
} | ||
|
||
let config = tts::load(&self.conf).await?; | ||
let model = config | ||
.models | ||
.get(&self.name) | ||
.ok_or_else(|| Exception::ValidationError(format!("can not find model, name={}", self.name)))?; | ||
|
||
let mut buffer = String::new(); | ||
let text = if self.stdin { | ||
stdin().read_to_string(&mut buffer).await?; | ||
&buffer | ||
} else { | ||
self.text.as_ref().unwrap() | ||
}; | ||
|
||
let gcloud = synthesize::GCloud { | ||
endpoint: model.endpoint.to_string(), | ||
project: model.params.get("project").unwrap().to_string(), | ||
voice: model.params.get("voice").unwrap().to_string(), | ||
}; | ||
|
||
gcloud.synthesize(text).await?; | ||
|
||
Ok(()) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,10 @@ | ||
mod api; | ||
pub mod vertex; | ||
use std::env; | ||
|
||
pub mod gemini; | ||
mod gemini_api; | ||
pub mod synthesize; | ||
mod synthesize_api; | ||
|
||
pub fn token() -> String { | ||
env::var("GCLOUD_AUTH_TOKEN").expect("please set GCLOUD_AUTH_TOKEN env") | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
use std::borrow::Cow; | ||
use std::env::temp_dir; | ||
|
||
use base64::prelude::BASE64_STANDARD; | ||
use base64::DecodeError; | ||
use base64::Engine; | ||
use tokio::fs; | ||
use tokio::process::Command; | ||
use tracing::info; | ||
use uuid::Uuid; | ||
|
||
use super::token; | ||
use crate::gcloud::synthesize_api::AudioConfig; | ||
use crate::gcloud::synthesize_api::Input; | ||
use crate::gcloud::synthesize_api::SynthesizeRequest; | ||
use crate::gcloud::synthesize_api::SynthesizeResponse; | ||
use crate::gcloud::synthesize_api::Voice; | ||
use crate::util::exception::Exception; | ||
use crate::util::http_client; | ||
use crate::util::json; | ||
|
||
pub struct GCloud { | ||
pub endpoint: String, | ||
pub project: String, | ||
pub voice: String, | ||
} | ||
|
||
impl GCloud { | ||
pub async fn synthesize(&self, text: &str) -> Result<(), Exception> { | ||
info!("call gcloud synthesize api, endpoint={}", self.endpoint); | ||
let request = SynthesizeRequest { | ||
audio_config: AudioConfig { | ||
audio_encoding: "LINEAR16".to_string(), | ||
effects_profile_id: vec!["headphone-class-device".to_string()], | ||
pitch: 0, | ||
speaking_rate: 1, | ||
}, | ||
input: Input { text: Cow::from(text) }, | ||
voice: Voice { | ||
language_code: "en-US".to_string(), | ||
name: Cow::from(&self.voice), | ||
}, | ||
}; | ||
|
||
let body = json::to_json(&request)?; | ||
let response = http_client::http_client() | ||
.post(&self.endpoint) | ||
.bearer_auth(token()) | ||
.header("x-goog-user-project", &self.project) | ||
.header("Content-Type", "application/json") | ||
.header("Accept", "application/json") | ||
.body(body) | ||
.send() | ||
.await?; | ||
|
||
let status = response.status(); | ||
if status != 200 { | ||
let response_text = response.text().await?; | ||
return Err(Exception::ExternalError(format!( | ||
"failed to call gcloud api, status={status}, response={response_text}" | ||
))); | ||
} | ||
|
||
let response_body = response.text_with_charset("utf-8").await?; | ||
let response: SynthesizeResponse = json::from_json(&response_body)?; | ||
let content = BASE64_STANDARD.decode(response.audio_content)?; | ||
|
||
play(content).await?; | ||
|
||
Ok(()) | ||
} | ||
} | ||
|
||
async fn play(audio: Vec<u8>) -> Result<(), Exception> { | ||
let temp_file = temp_dir().join(format!("{}.wav", Uuid::new_v4())); | ||
fs::write(&temp_file, &audio).await?; | ||
info!("play audio file, file={}", temp_file.to_string_lossy()); | ||
let mut command = Command::new("afplay").args([temp_file.to_string_lossy().to_string()]).spawn()?; | ||
let _ = command.wait().await; | ||
fs::remove_file(temp_file).await?; | ||
Ok(()) | ||
} | ||
|
||
impl From<DecodeError> for Exception { | ||
fn from(err: DecodeError) -> Self { | ||
Exception::unexpected(err) | ||
} | ||
} |
Oops, something went wrong.