Merge branch 'lobehub:main' into patch-2

LovelyGuYiMeng · Sep 19, 2024 · 51adc1b · 51adc1b
2 parents 05e09e2 + a73ef61
commit 51adc1b
Show file tree

Hide file tree

Showing 18 changed files with 111 additions and 26 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,6 +2,31 @@
 
 # Changelog
 
+### [Version 1.19.6](https://github.com/lobehub/lobe-chat/compare/v1.19.5...v1.19.6)
+
+<sup>Released on **2024-09-19**</sup>
+
+#### ♻ Code Refactoring
+
+- **misc**: Refactor the tts route url.
+
+<br/>
+
+<details>
+<summary><kbd>Improvements and Fixes</kbd></summary>
+
+#### Code refactoring
+
+- **misc**: Refactor the tts route url, closes [#4030](https://github.com/lobehub/lobe-chat/issues/4030) ([60dcf19](https://github.com/lobehub/lobe-chat/commit/60dcf19))
+
+</details>
+
+<div align="right">
+
+[![](https://img.shields.io/badge/-BACK_TO_TOP-151515?style=flat-square)](#readme-top)
+
+</div>
+
 ### [Version 1.19.5](https://github.com/lobehub/lobe-chat/compare/v1.19.4...v1.19.5)
 
 <sup>Released on **2024-09-19**</sup>

diff --git a/package.json b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@lobehub/chat",
-  "version": "1.19.5",
+  "version": "1.19.6",
   "description": "Lobe Chat - an open-source, high-performance chatbot framework that supports speech synthesis, multimodal, and extensible Function Call plugin system. Supports one-click free deployment of your private ChatGPT/LLM web application.",
   "keywords": [
     "framework",

diff --git a/src/app/api/openai/createBizOpenAI/index.ts b/src/app/api/openai/createBizOpenAI/index.ts
@@ -8,6 +8,7 @@ import { checkAuth } from './auth';
 import { createOpenai } from './createOpenai';
 
 /**
+ * @deprecated
  * createOpenAI Instance with Auth and azure openai support
  * if auth not pass ,just return error response
  */

diff --git a/src/app/api/openai/stt/route.ts → src/app/webapi/stt/openai/route.ts b/src/app/api/openai/stt/route.ts → src/app/webapi/stt/openai/route.ts
diff --git a/src/app/api/tts/edge-speech/route.ts → src/app/webapi/tts/edge/route.ts b/src/app/api/tts/edge-speech/route.ts → src/app/webapi/tts/edge/route.ts
diff --git a/src/app/api/tts/microsoft-speech/route.ts → src/app/webapi/tts/microsoft/route.ts b/src/app/api/tts/microsoft-speech/route.ts → src/app/webapi/tts/microsoft/route.ts
diff --git a/src/app/api/openai/tts/route.ts → src/app/webapi/tts/openai/route.ts b/src/app/api/openai/tts/route.ts → src/app/webapi/tts/openai/route.ts
@@ -28,6 +28,7 @@ export const preferredRegion = [
 export const POST = async (req: Request) => {
   const payload = (await req.json()) as OpenAITTSPayload;
 
+  // need to be refactored with jwt auth mode
   const openaiOrErrResponse = createBizOpenAI(req);
 
   // if resOrOpenAI is a Response, it means there is an error,just return it

diff --git a/src/config/modelProviders/stepfun.ts b/src/config/modelProviders/stepfun.ts
@@ -7,8 +7,8 @@ const Stepfun: ModelProviderCard = {
     {
       description: '支持大规模上下文交互，适合复杂对话场景。',
       displayName: 'Step 2 16K',
-      functionCall: true,
       enabled: true,
+      functionCall: true,
       id: 'step-2-16k',
       tokens: 16_000,
     },
@@ -22,49 +22,49 @@ const Stepfun: ModelProviderCard = {
     {
       description: '平衡性能与成本，适合一般场景。',
       displayName: 'Step 1 128K',
-      functionCall: true,
       enabled: true,
+      functionCall: true,
       id: 'step-1-128k',
       tokens: 128_000,
     },
     {
       description: '支持中等长度的对话，适用于多种应用场景。',
       displayName: 'Step 1 32K',
-      functionCall: true,
       enabled: true,
+      functionCall: true,
       id: 'step-1-32k',
       tokens: 32_000,
     },
     {
       description: '小型模型，适合轻量级任务。',
       displayName: 'Step 1 8K',
-      functionCall: true,
       enabled: true,
+      functionCall: true,
       id: 'step-1-8k',
       tokens: 8000,
     },
     {
       description: '高速模型，适合实时对话。',
       displayName: 'Step 1 Flash',
-      functionCall: true,
       enabled: true,
+      functionCall: true,
       id: 'step-1-flash',
       tokens: 8000,
     },
     {
       description: '支持视觉输入，增强多模态交互体验。',
       displayName: 'Step 1V 32K',
-      functionCall: true,
       enabled: true,
+      functionCall: true,
       id: 'step-1v-32k',
       tokens: 32_000,
       vision: true,
     },
     {
       description: '小型视觉模型，适合基本的图文任务。',
       displayName: 'Step 1V 8K',
-      functionCall: true,
       enabled: true,
+      functionCall: true,
       id: 'step-1v-8k',
       tokens: 8000,
       vision: true,

diff --git a/src/const/fetch.ts b/src/const/fetch.ts
@@ -1,5 +1,6 @@
 export const OPENAI_END_POINT = 'X-openai-end-point';
 export const OPENAI_API_KEY_HEADER_KEY = 'X-openai-api-key';
+export const LOBE_USER_ID = 'X-lobe-user-id';
 
 export const USE_AZURE_OPENAI = 'X-use-azure-openai';
 
@@ -19,9 +20,10 @@ export const getOpenAIAuthFromRequest = (req: Request) => {
   const useAzureStr = req.headers.get(USE_AZURE_OPENAI);
   const apiVersion = req.headers.get(AZURE_OPENAI_API_VERSION);
   const oauthAuthorizedStr = req.headers.get(OAUTH_AUTHORIZED);
+  const userId = req.headers.get(LOBE_USER_ID);
 
   const oauthAuthorized = !!oauthAuthorizedStr;
   const useAzure = !!useAzureStr;
 
-  return { accessCode, apiKey, apiVersion, endpoint, oauthAuthorized, useAzure };
+  return { accessCode, apiKey, apiVersion, endpoint, oauthAuthorized, useAzure, userId };
 };
diff --git a/src/libs/agent-runtime/AgentRuntime.ts b/src/libs/agent-runtime/AgentRuntime.ts
@@ -35,6 +35,7 @@ import {
   EmbeddingsPayload,
   ModelProvider,
   TextToImagePayload,
+  TextToSpeechPayload,
 } from './types';
 import { LobeUpstageAI } from './upstage';
 import { LobeZeroOneAI } from './zeroone';
@@ -97,6 +98,9 @@ class AgentRuntime {
   async embeddings(payload: EmbeddingsPayload, options?: EmbeddingsOptions) {
     return this._runtime.embeddings?.(payload, options);
   }
+  async textToSpeech(payload: TextToSpeechPayload, options?: EmbeddingsOptions) {
+    return this._runtime.textToSpeech?.(payload, options);
+  }
 
   /**
    * @description Initialize the runtime with the provider and the options

diff --git a/src/libs/agent-runtime/BaseAI.ts b/src/libs/agent-runtime/BaseAI.ts
@@ -1,6 +1,5 @@
 import OpenAI from 'openai';
 
-import { TextToImagePayload } from '@/libs/agent-runtime/types/textToImage';
 import { ChatModelCard } from '@/types/llm';
 
 import {
@@ -9,6 +8,9 @@ import {
   EmbeddingItem,
   EmbeddingsOptions,
   EmbeddingsPayload,
+  TextToImagePayload,
+  TextToSpeechOptions,
+  TextToSpeechPayload,
 } from './types';
 
 export interface LobeRuntimeAI {
@@ -20,6 +22,11 @@ export interface LobeRuntimeAI {
   models?(): Promise<any>;
 
   textToImage?: (payload: TextToImagePayload) => Promise<string[]>;
+
+  textToSpeech?: (
+    payload: TextToSpeechPayload,
+    options?: TextToSpeechOptions,
+  ) => Promise<ArrayBuffer>;
 }
 
 export abstract class LobeOpenAICompatibleRuntime {

diff --git a/src/libs/agent-runtime/types/index.ts b/src/libs/agent-runtime/types/index.ts
@@ -1,4 +1,5 @@
 export * from './chat';
 export * from './embeddings';
 export * from './textToImage';
+export * from './tts';
 export * from './type';
diff --git a/src/libs/agent-runtime/types/tts.ts b/src/libs/agent-runtime/types/tts.ts
@@ -0,0 +1,14 @@
+export interface TextToSpeechPayload {
+  input: string;
+  model: string;
+  voice: string;
+}
+
+export interface TextToSpeechOptions {
+  headers?: Record<string, any>;
+  signal?: AbortSignal;
+  /**
+   * userId for the embeddings
+   */
+  user?: string;
+}
diff --git a/src/libs/agent-runtime/utils/openaiCompatibleFactory/index.ts b/src/libs/agent-runtime/utils/openaiCompatibleFactory/index.ts
@@ -1,7 +1,6 @@
 import OpenAI, { ClientOptions } from 'openai';
 
 import { LOBE_DEFAULT_MODEL_LIST } from '@/config/modelProviders';
-import { TextToImagePayload } from '@/libs/agent-runtime/types/textToImage';
 import { ChatModelCard } from '@/types/llm';
 
 import { LobeRuntimeAI } from '../../BaseAI';
@@ -13,6 +12,9 @@ import {
   EmbeddingItem,
   EmbeddingsOptions,
   EmbeddingsPayload,
+  TextToImagePayload,
+  TextToSpeechOptions,
+  TextToSpeechPayload,
 } from '../../types';
 import { AgentRuntimeError } from '../createError';
 import { debugResponse, debugStream } from '../debugStream';
@@ -253,6 +255,19 @@ export const LobeOpenAICompatibleFactory = <T extends Record<string, any> = any>
       }
     }
 
+    async textToSpeech(payload: TextToSpeechPayload, options?: TextToSpeechOptions) {
+      try {
+        const mp3 = await this.client.audio.speech.create(payload as any, {
+          headers: options?.headers,
+          signal: options?.signal,
+        });
+
+        return mp3.arrayBuffer();
+      } catch (error) {
+        throw this.handleError(error);
+      }
+    }
+
     private handleError(error: any): ChatCompletionErrorPayload {
       let desensitizedEndpoint = this.baseURL;
 

diff --git a/src/services/_header.ts b/src/services/_header.ts
@@ -1,4 +1,9 @@
-import { LOBE_CHAT_ACCESS_CODE, OPENAI_API_KEY_HEADER_KEY, OPENAI_END_POINT } from '@/const/fetch';
+import {
+  LOBE_CHAT_ACCESS_CODE,
+  LOBE_USER_ID,
+  OPENAI_API_KEY_HEADER_KEY,
+  OPENAI_END_POINT,
+} from '@/const/fetch';
 import { useUserStore } from '@/store/user';
 import { keyVaultsConfigSelectors } from '@/store/user/selectors';
 
@@ -8,12 +13,14 @@ import { keyVaultsConfigSelectors } from '@/store/user/selectors';
  */
 // eslint-disable-next-line no-undef
 export const createHeaderWithOpenAI = (header?: HeadersInit): HeadersInit => {
-  const openAIConfig = keyVaultsConfigSelectors.openAIConfig(useUserStore.getState());
+  const state = useUserStore.getState();
+  const openAIConfig = keyVaultsConfigSelectors.openAIConfig(state);
 
   // eslint-disable-next-line no-undef
   return {
     ...header,
-    [LOBE_CHAT_ACCESS_CODE]: keyVaultsConfigSelectors.password(useUserStore.getState()),
+    [LOBE_CHAT_ACCESS_CODE]: keyVaultsConfigSelectors.password(state),
+    [LOBE_USER_ID]: state.user?.id || '',
     [OPENAI_API_KEY_HEADER_KEY]: openAIConfig.apiKey || '',
     [OPENAI_END_POINT]: openAIConfig.baseURL || '',
   };

diff --git a/src/services/_url.ts b/src/services/_url.ts
@@ -1,4 +1,4 @@
-// TODO: 未来所有路由需要全部迁移到 trpc
+// TODO: 未来路由需要迁移到 trpc or /webapi
 
 /* eslint-disable sort-keys-fix/sort-keys-fix */
 import { transform } from 'lodash-es';
@@ -38,9 +38,11 @@ export const API_ENDPOINTS = mapWithBasePath({
   // image
   images: '/api/text-to-image/openai',
 
-  // TTS & STT
-  stt: '/api/openai/stt',
-  tts: '/api/openai/tts',
-  edge: '/api/tts/edge-speech',
-  microsoft: '/api/tts/microsoft-speech',
+  // STT
+  stt: '/webapi/stt/openai',
+
+  // TTS
+  tts: '/webapi/tts/openai',
+  edge: '/webapi/tts/edge',
+  microsoft: '/webapi/tts/microsoft',
 });
diff --git a/src/store/file/slices/tts/action.ts b/src/store/file/slices/tts/action.ts
@@ -39,7 +39,7 @@ export const createTTSFileSlice: StateCreator<
     };
     const file = new File([blob], fileName, fileOptions);
 
-    const res = await get().uploadWithProgress({ file });
+    const res = await get().uploadWithProgress({ file, skipCheckFileType: true });
 
     return res?.id;
   },

diff --git a/src/store/file/slices/upload/action.ts b/src/store/file/slices/upload/action.ts
@@ -29,6 +29,12 @@ interface UploadWithProgressParams {
           type: 'removeFile';
         },
   ) => void;
+  /**
+   * Optional flag to indicate whether to skip the file type check.
+   * When set to `true`, any file type checks will be bypassed.
+   * Default is `false`, which means file type checks will be performed.
+   */
+  skipCheckFileType?: boolean;
 }
 
 interface UploadWithProgressResult {
@@ -52,8 +58,8 @@ export const createFileUploadSlice: StateCreator<
   [],
   FileUploadAction
 > = (set, get) => ({
-  internal_uploadToClientDB: async ({ file, onStatusUpdate }) => {
-    if (!file.type.startsWith('image')) {
+  internal_uploadToClientDB: async ({ file, onStatusUpdate, skipCheckFileType }) => {
+    if (!skipCheckFileType && !file.type.startsWith('image')) {
       onStatusUpdate?.({ id: file.name, type: 'removeFile' });
       message.info({
         content: t('upload.fileOnlySupportInServerMode', {
@@ -158,11 +164,11 @@ export const createFileUploadSlice: StateCreator<
     return data;
   },
 
-  uploadWithProgress: async ({ file, onStatusUpdate, knowledgeBaseId }) => {
+  uploadWithProgress: async (payload) => {
     const { internal_uploadToServer, internal_uploadToClientDB } = get();
 
-    if (isServerMode) return internal_uploadToServer({ file, knowledgeBaseId, onStatusUpdate });
+    if (isServerMode) return internal_uploadToServer(payload);
 
-    return internal_uploadToClientDB({ file, onStatusUpdate });
+    return internal_uploadToClientDB(payload);
   },
 });