Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add voice code #13

Merged
merged 9 commits into from
Dec 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 40 additions & 16 deletions .env.example
Original file line number Diff line number Diff line change
@@ -1,27 +1,51 @@
# MinIO 配置
VOICEFLOW_MINIO_ENDPOINT='localhost:9000' # MinIO 服务地址
VOICEFLOW_MINIO_ACCESS_KEY='minioadmin' # MinIO 访问密钥
VOICEFLOW_MINIO_SECRET_KEY='minioadmin' # MinIO 密钥
VOICEFLOW_MINIO_ENDPOINT='s3.api..cc' # MinIO 服务地址
VOICEFLOW_MINIO_ACCESS_KEY='' # MinIO 访问密钥
VOICEFLOW_MINIO_SECRET_KEY='' # MinIO 密钥

# Azure 配置
VOICEFLOW_AZURE_STT_KEY='your_azure_stt_key' # Azure 语音转文本密钥
VOICEFLOW_AZURE_TTS_KEY='your_azure_tts_key' # Azure 文本转语音密钥
VOICEFLOW_AZURE_REGION='eastus' # Azure 服务区域
VOICEFLOW_AZURE_STT_KEY='' # Azure STT 密钥
VOICEFLOW_AZURE_TTS_KEY='' # Azure TTS 密钥
VOICEFLOW_AZURE_SPEECH_KEY='' # Azure 语音密钥
VOICEFLOW_AZURE_REGION='japaneast' # Azure 区域

# AWS 配置
VOICEFLOW_AWS_SECRET_ACCESS_KEY='' # AWS 秘密访问密钥
VOICEFLOW_AWS_ACCESS_KEY_ID='' # AWS 访问密钥 ID

# Google 配置
VOICEFLOW_GOOGLE_STT_KEY='your_google_stt_key' # Google 语音转文本密钥
VOICEFLOW_GOOGLE_TTS_KEY='your_google_tts_key' # Google 文本转语音密钥
VOICEFLOW_GOOGLE_STT_KEY='' # Google STT 密钥
VOICEFLOW_GOOGLE_TTS_KEY='' # Google TTS 密钥

# OpenAI 配置
VOICEFLOW_OPENAI_API_KEY='your_openai_api_key' # OpenAI API 密钥
VOICEFLOW_OPENAI_API_KEY='' # OpenAI API 密钥
VOICEFLOW_OPENAI_BASE_URL='' # OpenAI 基础 URL

# AssemblyAI 配置
VOICEFLOW_ASSEMBLYAI_API_KEY='your_assemblyai_api_key' # AssemblyAI API 密钥
VOICEFLOW_ASSEMBLYAI_API_KEY='' # AssemblyAI API 密钥

# 语音服务端口配置
VOICEFLOW_SERVER_PORT=80 # VoiceFlow 服务端口, 默认是 80
# VOLCENGINE STT 配置
VOICEFLOW_VOLCENGINE_STT_WS_URL='wss://openspeech.bytedance.com/api/v3/sauc/bigmodel' # STT WebSocket URL
VOICEFLOW_VOLCENGINE_STT_UID='test' # STT 用户ID
VOICEFLOW_VOLCENGINE_STT_RATE='16000' # STT 采样率
VOICEFLOW_VOLCENGINE_STT_FORMAT='pcm' # STT 音频格式
VOICEFLOW_VOLCENGINE_STT_BITS='16' # STT 位深度
VOICEFLOW_VOLCENGINE_STT_CHANNEL='1' # STT 声道数
VOICEFLOW_VOLCENGINE_STT_CODEC='pcm' # STT 编码格式
VOICEFLOW_VOLCENGINE_STT_ACCESS_KEY='' # STT 访问密钥
VOICEFLOW_VOLCENGINE_STT_APP_KEY='' # STT 应用密钥
VOICEFLOW_VOLCENGINE_STT_RESOURCE_ID='volc.bigasr.sauc.duration' # STT 资源ID

# VOLCENGINE 配置
VOICEFLOW_VOLCENGINE_ACCESS_KEY=''
VOICEFLOW_VOLCENGINE_APP_KEY=''
VOICEFLOW_VOLCENGINE_WS_URL='wss://openspeech.bytedance.com/api/v3/sauc/bigmode
# VOLCENGINE TTS 配置
VOICEFLOW_VOLCENGINE_TTS_WS_URL='wss://openspeech.bytedance.com/api/v1/tts/ws_binary' # TTS WebSocket URL
VOICEFLOW_VOLCENGINE_TTS_APP_ID='' # TTS 应用ID
VOICEFLOW_VOLCENGINE_TTS_TOKEN='' # TTS 令牌
VOICEFLOW_VOLCENGINE_TTS_CLUSTER='volcano_tts' # TTS 集群名称
VOICEFLOW_VOLCENGINE_TTS_VOICE_TYPE='zh_female_1' # TTS 音色类型
VOICEFLOW_VOLCENGINE_TTS_ENCODING='mp3' # TTS 音频编码
VOICEFLOW_VOLCENGINE_TTS_SPEED_RATIO='1.0' # TTS 语速比例
VOICEFLOW_VOLCENGINE_TTS_VOLUME_RATIO='1.0' # TTS 音量比例
VOICEFLOW_VOLCENGINE_TTS_PITCH_RATIO='1.0' # TTS 音调比例

# 语音服务端口配置
VOICEFLOW_SERVER_PORT=18080 # 语音服务端口
27 changes: 27 additions & 0 deletions cmd/voiceflow/realtime.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
// cmd/voiceflow/realtime.go
package main

import (
"fmt"
"github.com/spf13/cobra"
"github.com/telepace/voiceflow/pkg/voiceprocessor"
)

var realtimeCmd = &cobra.Command{
Use: "realtime",
Short: "在终端中实时监听语音并翻译",
RunE: runRealtime,
}

func init() {
rootCmd.AddCommand(realtimeCmd)
}

func runRealtime(cmd *cobra.Command, args []string) error {
fmt.Println("启动实时语音监听...")
err := voiceprocessor.StartRealtime()
if err != nil {
return err
}
return nil
}
98 changes: 93 additions & 5 deletions cmd/voiceflow/root.go
Original file line number Diff line number Diff line change
@@ -1,18 +1,21 @@
// root.go
// cmd/voiceflow/root.go
package main

import (
"context"
"embed"
"fmt"
"github.com/joho/godotenv"
"github.com/telepace/voiceflow/pkg/config"
"io/fs"
"io/ioutil"
"net/http"
"os"
"strings"
"time"

"github.com/joho/godotenv"
"github.com/telepace/voiceflow/pkg/config"
"github.com/telepace/voiceflow/pkg/sttservice"

"github.com/spf13/cobra"
"github.com/spf13/viper"

Expand Down Expand Up @@ -64,9 +67,20 @@ var rootCmd = &cobra.Command{
RunE: run,
}

// 添加新的子命令 transcribe
var transcribeCmd = &cobra.Command{
Use: "transcribe",
Short: "Transcribe an audio file using STT service",
Long: `Transcribe an audio file by specifying its path and using the configured STT service.`,
RunE: runTranscribe,
}

func run(cmd *cobra.Command, args []string) error {
ctx := context.Background()

if err := ensureDirectories(); err != nil {
logger.Fatalf("Failed to ensure directories: %v", err)
}
// Load configuration
cfg, err := config.GetConfig()
if err != nil {
Expand Down Expand Up @@ -102,7 +116,7 @@ func run(cmd *cobra.Command, args []string) error {
// Set up HTTP server
mux := http.NewServeMux()
if err := setupFileServers(mux); err != nil {
return fmt.Errorf("failed to setup file servers: %w", err)
logger.Fatalf("Failed to setup file servers: %v", err)
}

// Initialize WebSocket server
Expand Down Expand Up @@ -158,6 +172,8 @@ func Execute() {
}
}

var transcribeFile string

func init() {
cobra.OnInitialize(initConfig)

Expand All @@ -180,14 +196,22 @@ func init() {

// 绑定到 viper
viper.BindPFlags(rootCmd.PersistentFlags())

// 配置 transcribe 子命令的标志
transcribeCmd.Flags().StringVarP(&transcribeFile, "file", "f", "", "Path to the audio file to transcribe")
transcribeCmd.MarkFlagRequired("file") // 标记为必需

// 将 transcribe 子命令添加到 rootCmd
rootCmd.AddCommand(transcribeCmd)
}

func initConfig() {
// 加载 .env 文件
if err := godotenv.Load(); err != nil {
logger.Warn("No .env file found or failed to load, proceeding without it")
} else {
logger.Info(".env file loaded")
envPath, _ := os.Getwd()
logger.Info(fmt.Sprintf(".env file loaded from: %s/.env", envPath))
}

if cfgFile != "" {
Expand Down Expand Up @@ -228,8 +252,72 @@ func setDefaults() {
viper.SetDefault("logging.compress", true)
viper.SetDefault("logging.report_caller", true)

// AWS 默认配置
viper.SetDefault("aws.region", "us-east-2")

// 其他服务配置...
viper.SetDefault("web.port", 18090)
viper.SetDefault("minio.enabled", true)
viper.SetDefault("minio.endpoint", "localhost:9000")
}

// runTranscribe 处理 transcribe 子命令的逻辑
func runTranscribe(cmd *cobra.Command, args []string) error {
ctx := context.Background()

// 初始化配置
if err := ensureDirectories(); err != nil {
logger.Fatalf("Failed to ensure directories: %v", err)
}

cfg, err := config.GetConfig()
if err != nil {
return fmt.Errorf("failed to get config: %w", err)
}

// 初始化日志
logCfg := logger.Config{
Level: cfg.Logging.Level,
Format: cfg.Logging.Format,
Filename: cfg.Logging.Filename,
MaxSize: cfg.Logging.MaxSize,
MaxBackups: cfg.Logging.MaxBackups,
MaxAge: cfg.Logging.MaxAge,
Compress: cfg.Logging.Compress,
ReportCaller: cfg.Logging.ReportCaller,
}

fields := logger.StandardFields{
ServiceID: "voiceflow",
InstanceID: fmt.Sprintf("instance-%d", time.Now().Unix()),
}

if err := logger.Init(logCfg, fields); err != nil {
return fmt.Errorf("failed to initialize logger: %w", err)
}

// 记录启动信息
logger.InfoContextf(ctx, "Starting VoiceFlow transcribe command with config: %+v", cfg)

// 初始化服务
serverpkg.InitServices()

// 读取音频文件
audioData, err := ioutil.ReadFile(transcribeFile)
if err != nil {
logger.Errorf("Failed to read audio file: %v", err)
return fmt.Errorf("failed to read audio file: %w", err)
}

// 调用 STT 服务进行转录
transcript, err := sttservice.Recognize(audioData)
if err != nil {
logger.Errorf("STT Recognize error: %v", err)
return fmt.Errorf("STT Recognize error: %w", err)
}

// 输出转录结果
fmt.Printf("Transcript:\n%s\n", transcript)

return nil
}
29 changes: 29 additions & 0 deletions cmd/voiceflow/transcribe.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
// cmd/voiceflow/transcribe.go
package main

//import (
// "fmt"
// "github.com/spf13/cobra"
// "github.com/telepace/voiceflow/pkg/voiceprocessor"
//)
//
//var transcribeCmd = &cobra.Command{
// Use: "transcribe [音频文件路径]",
// Short: "转录并翻译指定的音频文件",
// Args: cobra.ExactArgs(1),
// RunE: runTranscribe,
//}
//
//func init() {
// rootCmd.AddCommand(transcribeCmd)
//}
//
//func runTranscribe(cmd *cobra.Command, args []string) error {
// audioFile := args[0]
// fmt.Printf("正在转录音频文件:%s\n", audioFile)
// err := voiceprocessor.TranscribeFile(audioFile)
// if err != nil {
// return err
// }
// return nil
//}
2 changes: 2 additions & 0 deletions cmd/voiceflow/voiceflow.go
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
// cmd/voiceflow/voiceflow.go

package main

func main() {
Expand Down
Loading
Loading