From a65b25c898d0998ddc7682b2222737a36a25d945 Mon Sep 17 00:00:00 2001 From: Altay Date: Sat, 16 Dec 2023 19:46:39 +0300 Subject: [PATCH] feat: support non-english languages --- lib/services/ai/deepgram.ts | 13 ++++++++++--- lib/services/ai/transcribe-episode.ts | 9 ++++++--- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/lib/services/ai/deepgram.ts b/lib/services/ai/deepgram.ts index a66a8a9..2a1866d 100644 --- a/lib/services/ai/deepgram.ts +++ b/lib/services/ai/deepgram.ts @@ -4,13 +4,20 @@ import { z } from 'zod'; const deepgram = createDeepgramClient(env.DEEPGRAM_API_KEY); -export const transcribeAudio = async ({ fileURL }: { fileURL: string }) => { +export const transcribeAudio = async ({ + language = 'en', + url, +}: { + language?: string; + url: string; +}) => { const { error, result } = await deepgram.listen.prerecorded.transcribeUrl( { - url: fileURL, + url, }, { - model: 'nova-2', + language, + model: language === 'en' ? 'nova-2' : 'base', paragraphs: true, smart_format: true, }, diff --git a/lib/services/ai/transcribe-episode.ts b/lib/services/ai/transcribe-episode.ts index e4ce19e..acae1f7 100644 --- a/lib/services/ai/transcribe-episode.ts +++ b/lib/services/ai/transcribe-episode.ts @@ -28,7 +28,7 @@ export const transcribeEpisode = async (id: Tables<'episode'>['id']) => { const episodeQuery = await supabase .from('episode') - .select('audio_url') + .select('audio_url, show(language)') .eq('id', id) .single(); @@ -36,8 +36,11 @@ export const transcribeEpisode = async (id: Tables<'episode'>['id']) => { throw new DatabaseError(episodeQuery.error); } - const fileURL = await getFinalRedirectURL(episodeQuery.data.audio_url); - const transcription = await transcribeAudio({ fileURL }); + const url = await getFinalRedirectURL(episodeQuery.data.audio_url); + const transcription = await transcribeAudio({ + language: episodeQuery.data.show?.language ?? undefined, + url, + }); const updateEpisodeContentQuery = await supabase .from('episode_content')