telepace · cubxxw · Dec 20, 2024 · Nov 1, 2024 · Nov 1, 2024 · Dec 17, 2024
@@ -1,27 +1,51 @@
 # MinIO 配置
-VOICEFLOW_MINIO_ENDPOINT='localhost:9000'      # MinIO 服务地址
-VOICEFLOW_MINIO_ACCESS_KEY='minioadmin'        # MinIO 访问密钥
-VOICEFLOW_MINIO_SECRET_KEY='minioadmin'        # MinIO 密钥
+VOICEFLOW_MINIO_ENDPOINT='s3.api..cc'      # MinIO 服务地址
+VOICEFLOW_MINIO_ACCESS_KEY=''        # MinIO 访问密钥
+VOICEFLOW_MINIO_SECRET_KEY=''        # MinIO 密钥
 
 # Azure 配置
-VOICEFLOW_AZURE_STT_KEY='your_azure_stt_key'   # Azure 语音转文本密钥
-VOICEFLOW_AZURE_TTS_KEY='your_azure_tts_key'   # Azure 文本转语音密钥
-VOICEFLOW_AZURE_REGION='eastus'                # Azure 服务区域
+VOICEFLOW_AZURE_STT_KEY=''          # Azure STT 密钥
+VOICEFLOW_AZURE_TTS_KEY=''          # Azure TTS 密钥
+VOICEFLOW_AZURE_SPEECH_KEY=''                        # Azure 语音密钥
+VOICEFLOW_AZURE_REGION='japaneast'  # Azure 区域
+
+# AWS 配置
+VOICEFLOW_AWS_SECRET_ACCESS_KEY=''   # AWS 秘密访问密钥
+VOICEFLOW_AWS_ACCESS_KEY_ID=''       # AWS 访问密钥 ID
 
 # Google 配置
-VOICEFLOW_GOOGLE_STT_KEY='your_google_stt_key' # Google 语音转文本密钥
-VOICEFLOW_GOOGLE_TTS_KEY='your_google_tts_key' # Google 文本转语音密钥
+VOICEFLOW_GOOGLE_STT_KEY=''          # Google STT 密钥
+VOICEFLOW_GOOGLE_TTS_KEY=''          # Google TTS 密钥
 
 # OpenAI 配置
-VOICEFLOW_OPENAI_API_KEY='your_openai_api_key' # OpenAI API 密钥
+VOICEFLOW_OPENAI_API_KEY=''          # OpenAI API 密钥
+VOICEFLOW_OPENAI_BASE_URL=''         # OpenAI 基础 URL
 
 # AssemblyAI 配置
-VOICEFLOW_ASSEMBLYAI_API_KEY='your_assemblyai_api_key' # AssemblyAI API 密钥
+VOICEFLOW_ASSEMBLYAI_API_KEY=''      # AssemblyAI API 密钥
 
-# 语音服务端口配置
-VOICEFLOW_SERVER_PORT=80                    # VoiceFlow 服务端口, 默认是 80
+# VOLCENGINE STT 配置
+VOICEFLOW_VOLCENGINE_STT_WS_URL='wss://openspeech.bytedance.com/api/v3/sauc/bigmodel'  # STT WebSocket URL
+VOICEFLOW_VOLCENGINE_STT_UID='test'                                                      # STT 用户ID
+VOICEFLOW_VOLCENGINE_STT_RATE='16000'                                                    # STT 采样率
+VOICEFLOW_VOLCENGINE_STT_FORMAT='pcm'                                                    # STT 音频格式
+VOICEFLOW_VOLCENGINE_STT_BITS='16'                                                       # STT 位深度
+VOICEFLOW_VOLCENGINE_STT_CHANNEL='1'                                                     # STT 声道数
+VOICEFLOW_VOLCENGINE_STT_CODEC='pcm'                                                     # STT 编码格式
+VOICEFLOW_VOLCENGINE_STT_ACCESS_KEY=''                                                   # STT 访问密钥
+VOICEFLOW_VOLCENGINE_STT_APP_KEY=''                                                      # STT 应用密钥
+VOICEFLOW_VOLCENGINE_STT_RESOURCE_ID='volc.bigasr.sauc.duration'                        # STT 资源ID
 
-# VOLCENGINE 配置
-VOICEFLOW_VOLCENGINE_ACCESS_KEY=''
-VOICEFLOW_VOLCENGINE_APP_KEY=''
-VOICEFLOW_VOLCENGINE_WS_URL='wss://openspeech.bytedance.com/api/v3/sauc/bigmode
+# VOLCENGINE TTS 配置
+VOICEFLOW_VOLCENGINE_TTS_WS_URL='wss://openspeech.bytedance.com/api/v1/tts/ws_binary'   # TTS WebSocket URL
+VOICEFLOW_VOLCENGINE_TTS_APP_ID=''                                                       # TTS 应用ID
+VOICEFLOW_VOLCENGINE_TTS_TOKEN=''                                                        # TTS 令牌
+VOICEFLOW_VOLCENGINE_TTS_CLUSTER='volcano_tts'                                          # TTS 集群名称
+VOICEFLOW_VOLCENGINE_TTS_VOICE_TYPE='zh_female_1'                                        # TTS 音色类型
+VOICEFLOW_VOLCENGINE_TTS_ENCODING='mp3'                                                  # TTS 音频编码
+VOICEFLOW_VOLCENGINE_TTS_SPEED_RATIO='1.0'                                              # TTS 语速比例
+VOICEFLOW_VOLCENGINE_TTS_VOLUME_RATIO='1.0'                                             # TTS 音量比例
+VOICEFLOW_VOLCENGINE_TTS_PITCH_RATIO='1.0'                                              # TTS 音调比例
+
+# 语音服务端口配置
+VOICEFLOW_SERVER_PORT=18080           # 语音服务端口
@@ -0,0 +1,27 @@
+// cmd/voiceflow/realtime.go
+package main
+
+import (
+	"fmt"
+	"github.com/spf13/cobra"
+	"github.com/telepace/voiceflow/pkg/voiceprocessor"
+)
+
+var realtimeCmd = &cobra.Command{
+	Use:   "realtime",
+	Short: "在终端中实时监听语音并翻译",
+	RunE:  runRealtime,
+}
+
+func init() {
+	rootCmd.AddCommand(realtimeCmd)
+}
+
+func runRealtime(cmd *cobra.Command, args []string) error {
+	fmt.Println("启动实时语音监听...")
+	err := voiceprocessor.StartRealtime()
+	if err != nil {
+		return err
+	}
+	return nil
+}
@@ -1,18 +1,21 @@
-// root.go
+// cmd/voiceflow/root.go
 package main
 
 import (
 	"context"
 	"embed"
 	"fmt"
-	"github.com/joho/godotenv"
-	"github.com/telepace/voiceflow/pkg/config"
 	"io/fs"
+	"io/ioutil"
 	"net/http"
 	"os"
 	"strings"
 	"time"
 
+	"github.com/joho/godotenv"
+	"github.com/telepace/voiceflow/pkg/config"
+	"github.com/telepace/voiceflow/pkg/sttservice"
+
 	"github.com/spf13/cobra"
 	"github.com/spf13/viper"
 
@@ -64,9 +67,20 @@ var rootCmd = &cobra.Command{
 	RunE:  run,
 }
 
+// 添加新的子命令 transcribe
+var transcribeCmd = &cobra.Command{
+	Use:   "transcribe",
+	Short: "Transcribe an audio file using STT service",
+	Long:  `Transcribe an audio file by specifying its path and using the configured STT service.`,
+	RunE:  runTranscribe,
+}
+
 func run(cmd *cobra.Command, args []string) error {
 	ctx := context.Background()
 
+	if err := ensureDirectories(); err != nil {
+		logger.Fatalf("Failed to ensure directories: %v", err)
+	}
 	// Load configuration
 	cfg, err := config.GetConfig()
 	if err != nil {
@@ -102,7 +116,7 @@ func run(cmd *cobra.Command, args []string) error {
 	// Set up HTTP server
 	mux := http.NewServeMux()
 	if err := setupFileServers(mux); err != nil {
-		return fmt.Errorf("failed to setup file servers: %w", err)
+		logger.Fatalf("Failed to setup file servers: %v", err)
 	}
 
 	// Initialize WebSocket server
@@ -158,6 +172,8 @@ func Execute() {
 	}
 }
 
+var transcribeFile string
+
 func init() {
 	cobra.OnInitialize(initConfig)
 
@@ -180,14 +196,22 @@ func init() {
 
 	// 绑定到 viper
 	viper.BindPFlags(rootCmd.PersistentFlags())
+
+	// 配置 transcribe 子命令的标志
+	transcribeCmd.Flags().StringVarP(&transcribeFile, "file", "f", "", "Path to the audio file to transcribe")
+	transcribeCmd.MarkFlagRequired("file") // 标记为必需
+
+	// 将 transcribe 子命令添加到 rootCmd
+	rootCmd.AddCommand(transcribeCmd)
 }
 
 func initConfig() {
 	// 加载 .env 文件
 	if err := godotenv.Load(); err != nil {
 		logger.Warn("No .env file found or failed to load, proceeding without it")
 	} else {
-		logger.Info(".env file loaded")
+		envPath, _ := os.Getwd()
+		logger.Info(fmt.Sprintf(".env file loaded from: %s/.env", envPath))
 	}
 
 	if cfgFile != "" {
@@ -228,8 +252,72 @@ func setDefaults() {
 	viper.SetDefault("logging.compress", true)
 	viper.SetDefault("logging.report_caller", true)
 
+	// AWS 默认配置
+	viper.SetDefault("aws.region", "us-east-2")
+
 	// 其他服务配置...
 	viper.SetDefault("web.port", 18090)
 	viper.SetDefault("minio.enabled", true)
 	viper.SetDefault("minio.endpoint", "localhost:9000")
 }
+
+// runTranscribe 处理 transcribe 子命令的逻辑
+func runTranscribe(cmd *cobra.Command, args []string) error {
+	ctx := context.Background()
+
+	// 初始化配置
+	if err := ensureDirectories(); err != nil {
+		logger.Fatalf("Failed to ensure directories: %v", err)
+	}
+
+	cfg, err := config.GetConfig()
+	if err != nil {
+		return fmt.Errorf("failed to get config: %w", err)
+	}
+
+	// 初始化日志
+	logCfg := logger.Config{
+		Level:        cfg.Logging.Level,
+		Format:       cfg.Logging.Format,
+		Filename:     cfg.Logging.Filename,
+		MaxSize:      cfg.Logging.MaxSize,
+		MaxBackups:   cfg.Logging.MaxBackups,
+		MaxAge:       cfg.Logging.MaxAge,
+		Compress:     cfg.Logging.Compress,
+		ReportCaller: cfg.Logging.ReportCaller,
+	}
+
+	fields := logger.StandardFields{
+		ServiceID:  "voiceflow",
+		InstanceID: fmt.Sprintf("instance-%d", time.Now().Unix()),
+	}
+
+	if err := logger.Init(logCfg, fields); err != nil {
+		return fmt.Errorf("failed to initialize logger: %w", err)
+	}
+
+	// 记录启动信息
+	logger.InfoContextf(ctx, "Starting VoiceFlow transcribe command with config: %+v", cfg)
+
+	// 初始化服务
+	serverpkg.InitServices()
+
+	// 读取音频文件
+	audioData, err := ioutil.ReadFile(transcribeFile)
+	if err != nil {
+		logger.Errorf("Failed to read audio file: %v", err)
+		return fmt.Errorf("failed to read audio file: %w", err)
+	}
+
+	// 调用 STT 服务进行转录
+	transcript, err := sttservice.Recognize(audioData)
+	if err != nil {
+		logger.Errorf("STT Recognize error: %v", err)
+		return fmt.Errorf("STT Recognize error: %w", err)
+	}
+
+	// 输出转录结果
+	fmt.Printf("Transcript:\n%s\n", transcript)
+
+	return nil
+}
@@ -0,0 +1,29 @@
+// cmd/voiceflow/transcribe.go
+package main
+
+//import (
+//	"fmt"
+//	"github.com/spf13/cobra"
+//	"github.com/telepace/voiceflow/pkg/voiceprocessor"
+//)
+//
+//var transcribeCmd = &cobra.Command{
+//	Use:   "transcribe [音频文件路径]",
+//	Short: "转录并翻译指定的音频文件",
+//	Args:  cobra.ExactArgs(1),
+//	RunE:  runTranscribe,
+//}
+//
+//func init() {
+//	rootCmd.AddCommand(transcribeCmd)
+//}
+//
+//func runTranscribe(cmd *cobra.Command, args []string) error {
+//	audioFile := args[0]
+//	fmt.Printf("正在转录音频文件：%s\n", audioFile)
+//	err := voiceprocessor.TranscribeFile(audioFile)
+//	if err != nil {
+//		return err
+//	}
+//	return nil
+//}
@@ -1,3 +1,5 @@
+// cmd/voiceflow/voiceflow.go
+
 package main
 
 func main() {