🔥 Remove punctuation reconstruction

The current implementation doesn't work properly and leads to audaoplis crashing on most plattforms Fixes #457 Fixes #458 Fixes #460
bugbakery · Sep 20, 2023 · 3ebd48e · 3ebd48e
1 parent 5738a57
commit 3ebd48e
Show file tree

Hide file tree

Showing 15 changed files with 32 additions and 519 deletions.
diff --git a/app/src/pages/LanguageSettings.tsx b/app/src/pages/LanguageSettings.tsx
@@ -272,16 +272,6 @@ export function LanguageSettingsPage(): JSX.Element {
           id={'transcription_table'}
         />
 
-        <Heading marginTop={majorScale(3)} marginBottom={majorScale(2)} paddingLeft={majorScale(1)}>
-          Punctuation Models for {language.lang}
-        </Heading>
-        <ModelTable
-          models={language.punctuation_models}
-          lang={language.lang}
-          type={'punctuation'}
-          id={'punctuation_table'}
-        />
-
         <BackButton marginY={majorScale(2)} />
       </MainMaxWidthContainer>
     </AppContainer>

diff --git a/app/src/pages/ModelManager.tsx b/app/src/pages/ModelManager.tsx
@@ -41,7 +41,6 @@ export function ModelManagerPage(): JSX.Element {
           <Table.Head padding={0}>
             <Table.TextHeaderCell {...firstColumnProps}>Language</Table.TextHeaderCell>
             <Table.TextHeaderCell>Transcription Models</Table.TextHeaderCell>
-            <Table.TextHeaderCell>Punctuation Models</Table.TextHeaderCell>
             <Table.TextHeaderCell {...lastColumnProps} />
           </Table.Head>
 
@@ -62,11 +61,6 @@ export function ModelManagerPage(): JSX.Element {
                   lang={lang.lang}
                   downloaded={downloaded}
                 />
-                <ModelNumberTextCell
-                  models={lang.punctuation_models}
-                  lang={lang.lang}
-                  downloaded={downloaded}
-                />
                 <Table.Cell {...lastColumnProps}>
                   <Tooltip content={'manage language'}>
                     <Icon color={theme.colors.default} icon={ChevronRightIcon} />

diff --git a/app/src/pages/Transcribe.tsx b/app/src/pages/Transcribe.tsx
@@ -8,7 +8,6 @@ import { RootState } from '../state';
 import { openLanding, openModelManager } from '../state/nav';
 import {
   Button,
-  Checkbox,
   Combobox,
   Dialog,
   FormField,
@@ -68,9 +67,6 @@ export function TranscribePage(): JSX.Element {
       .map((lang) => {
         return {
           ...lang,
-          punctuation_models: lang.punctuation_models.filter(
-            (x) => x.model_id in state.models.downloaded
-          ),
           transcription_models: lang.transcription_models.filter(
             (x) => x.model_id in state.models.downloaded
           ),
@@ -95,24 +91,6 @@ export function TranscribePage(): JSX.Element {
       )
     );
   }, [selectedLanguage]);
-  const [punctuate, setPunctuate] = useState(selectedLanguage.punctuation_models.length > 0);
-  const [selectedPunctuationModel, setSelectedPunctuationModel] = useState(
-    getDefaultModelInstance(
-      selectedLanguage.punctuation_models,
-      selectedLanguage.lang,
-      'punctuation'
-    )
-  );
-  useEffect(() => {
-    setSelectedPunctuationModel(
-      getDefaultModelInstance(
-        selectedLanguage.punctuation_models,
-        selectedLanguage.lang,
-        'punctuation'
-      )
-    );
-    setPunctuate(selectedLanguage.punctuation_models.length > 0);
-  }, [selectedLanguage]);
   const [diarizationMode, setDiarizationMode] = useState('on' as 'off' | 'on' | 'advanced');
   const [diarizationSpeakers, setDiarizationSpeakers] = useState('4');
   const [animationDone, setAnimationDone] = useState(false);
@@ -150,7 +128,6 @@ export function TranscribePage(): JSX.Element {
                   dispatch(
                     startTranscription({
                       transcription_model: selectedTranscriptionModel,
-                      punctuation_model: punctuate ? selectedPunctuationModel : null,
                       diarize: diarizationMode != 'off',
                       diarize_max_speakers:
                         diarizationMode == 'advanced' ? parsedSpeakers - 1 : null,
@@ -209,33 +186,6 @@ export function TranscribePage(): JSX.Element {
                   setSelectedModel={setSelectedTranscriptionModel}
                 />
               </FormField>
-              {selectedLanguage.punctuation_models.length > 0 ? (
-                <FormField
-                  marginTop={majorScale(2)}
-                  label={'Punctuation Reconstruction'}
-                  description={
-                    'Audapolis can try to automatically guess the punctuation. This requires a punctuation model, which is only supported for a few languages'
-                  }
-                >
-                  <Checkbox
-                    label={'Enable Punctuation Reconstruction'}
-                    checked={punctuate}
-                    disabled={selectedLanguage.punctuation_models.length == 0}
-                    onChange={(e) => setPunctuate(e.target.checked)}
-                  />
-                  {punctuate ? (
-                    <ModelSelector
-                      selectedModel={selectedPunctuationModel}
-                      models={selectedLanguage.punctuation_models}
-                      setSelectedModel={setSelectedPunctuationModel}
-                    />
-                  ) : (
-                    <></>
-                  )}
-                </FormField>
-              ) : (
-                <></>
-              )}
             </Pane>
           </details>
         </FormField>

diff --git a/app/src/server_api/api.ts b/app/src/server_api/api.ts
@@ -39,7 +39,6 @@ export interface DownloadModelTask extends Task {
 export function startTranscription(
   server: ServerConfig,
   transcription_model: string,
-  punctuation_model: string | null,
   diarize: boolean,
   diarize_max_speakers: number | null,
   file: File,
@@ -49,9 +48,6 @@ export function startTranscription(
   if (diarize_max_speakers !== null) {
     opts['diarize_max_speakers'] = diarize_max_speakers;
   }
-  if (punctuation_model !== null) {
-    opts['punctuation_model'] = punctuation_model;
-  }
   return fetchFromServer(server, 'POST', 'tasks/start_transcription', opts, {
     form: { file, fileName },
   })

diff --git a/app/src/state/models.ts b/app/src/state/models.ts
@@ -18,14 +18,13 @@ export interface Model {
   url: string;
   description: string;
   size: string;
-  type: 'transcription' | 'punctuation';
+  type: 'transcription';
   model_id: string;
 }
 
 export interface Language {
   lang: string;
   transcription_models: Model[];
-  punctuation_models: Model[];
 }
 
 export type DownloadingModel = Model & {
@@ -54,7 +53,7 @@ export const fetchModelState = createAsyncThunk<
 
   const flattenLanguages = (x: Record<string, Language>) =>
     Object.values(x).flatMap((x) => {
-      return x.transcription_models.concat(x.punctuation_models);
+      return x.transcription_models;
     });
 
   return { all: flattenLanguages(all), downloaded: downloaded, languages: all };

diff --git a/app/src/state/transcribe.ts b/app/src/state/transcribe.ts
@@ -67,17 +67,13 @@ export const startTranscription = createAsyncThunk<
   void,
   {
     transcription_model: Model;
-    punctuation_model: Model | null;
     diarize: boolean;
     diarize_max_speakers: number | null;
   },
   { state: RootState }
 >(
   'transcribing/upload',
-  async (
-    { transcription_model, punctuation_model, diarize, diarize_max_speakers },
-    { dispatch, getState }
-  ) => {
+  async ({ transcription_model, diarize, diarize_max_speakers }, { dispatch, getState }) => {
     const state = getState();
     const server = getServer(state);
     const path = state?.transcribe?.file;
@@ -110,7 +106,6 @@ export const startTranscription = createAsyncThunk<
     const task = await startTranscriptionApiCall(
       server,
       transcription_model.model_id,
-      punctuation_model !== null ? punctuation_model.model_id : null,
       diarize,
       diarize_max_speakers,
       file,

diff --git a/app/src/tour/LanguageSettingsTour.tsx b/app/src/tour/LanguageSettingsTour.tsx
@@ -39,15 +39,6 @@ const steps = [
     target: '.downloaded',
     body: <Text>Congratulations 🎉. You now have a transcription model...</Text>,
   },
-  {
-    target: '#punctuation_table',
-    body: (
-      <Text>
-        For some languages you can also download a punctuation model. These help the computer to
-        guess which punctuation was present in the spoken text.
-      </Text>
-    ),
-  },
   {
     target: '#back',
     body: <Text>Now you can go back and transcribe your media file.</Text>,

diff --git a/server/app/main.py b/server/app/main.py
@@ -59,7 +59,6 @@ def startup_event():
 async def start_transcription(
     background_tasks: BackgroundTasks,
     transcription_model: str,
-    punctuation_model: Optional[str] = None,
     diarize_max_speakers: Optional[int] = None,
     diarize: bool = False,
     file: UploadFile = File(...),
@@ -70,13 +69,11 @@ async def start_transcription(
         TranscriptionTask(
             file.filename,
             TranscriptionState.QUEUED,
-            punctuate=punctuation_model is not None,
         )
     )
     background_tasks.add_task(
         process_audio,
         transcription_model,
-        punctuation_model,
         file.file,
         fileName,
         task.uuid,

diff --git a/server/app/models.py b/server/app/models.py
@@ -10,7 +10,6 @@
 
 import requests
 import yaml
-from punctuator import Punctuator
 from vosk import Model
 
 from .config import CACHE_DIR, DATA_DIR
@@ -59,10 +58,9 @@ def is_downloaded(self) -> bool:
 class Language:
     lang: str
     transcription_models: List[ModelDescription] = field(default_factory=list)
-    punctuation_models: List[ModelDescription] = field(default_factory=list)
 
     def all_models(self):
-        return self.transcription_models + self.punctuation_models
+        return self.transcription_models
 
 
 class ModelDefaultDict(defaultdict):
@@ -83,8 +81,6 @@ def __init__(self):
                     models[model_description.model_id] = model_description
                     if model["type"] == "transcription":
                         languages[lang].transcription_models.append(model_description)
-                    elif model["type"] == "punctuation":
-                        languages[lang].punctuation_models.append(model_description)
         self.available = dict(languages)
         self.model_descriptions = models
 
@@ -111,12 +107,10 @@ def get_model_description(self, model_id) -> ModelDescription:
     def _load_model(self, model):
         if model.type == "transcription":
             return Model(str(model.path()))
-        elif model.type == "punctuation":
-            return Punctuator(str(model.path()))
         else:
             raise ModelTypeNotSupported()
 
-    def get(self, model_id: str) -> Union[Model, Punctuator]:
+    def get(self, model_id: str) -> Union[Model]:
         model = self.get_model_description(model_id)
         if not model.is_downloaded():
             raise ModelNotDownloaded()

diff --git a/server/app/models.yml b/server/app/models.yml
@@ -1,41 +1,7 @@
 # this file is autogenerated by the ../scripts/generate_models_list.py script.
 # do not edit manually!
 
-German:
-- name: punctuator2-subtitle2go
-  url: https://github.com/bugbakery/audapolis-model-hub/releases/download/2/Model_subs_norm1_filt_5M_tageschau_euparl_h256_lr0.02.zip
-  description: "Punctuation Model trained by the Language Technology Group of the\
-    \ Universit\xE4t Hamburg (UHH)"
-  size: 213M
-  type: punctuation
-  compressed: false
-- name: big
-  url: https://alphacephei.com/vosk/models/vosk-model-de-0.21.zip
-  description: Big German model for telephony and server
-  size: 1.9G
-  type: transcription
-  compressed: true
-- name: big-2
-  url: https://alphacephei.com/vosk/models/vosk-model-de-tuda-0.6-900k.zip
-  description: Latest big wideband model from <a href="https://github.com/uhh-lt/kaldi-tuda-de">Tuda-DE</a>
-    project
-  size: 4.4G
-  type: transcription
-  compressed: true
-- name: small
-  url: https://alphacephei.com/vosk/models/vosk-model-small-de-0.15.zip
-  description: Lightweight wideband model for Android and RPi
-  size: 45M
-  type: transcription
-  compressed: true
 English:
-- name: punctuator2-europarl
-  url: https://github.com/bugbakery/audapolis-model-hub/releases/download/2/Demo-EUROPARL-EN.zip
-  description: "Model trained on the Europarl dataset by Ottokar Tilk and Tanel Alum\xE4\
-    e of the Language Technology Laboratory at Tallinn University of Technology"
-  size: 138M
-  type: punctuation
-  compressed: false
 - name: small
   url: https://alphacephei.com/vosk/models/vosk-model-small-en-us-0.15.zip
   description: Lightweight wideband model for Android and RPi
@@ -142,6 +108,26 @@ French Other:
   size: 1.5G
   type: transcription
   compressed: true
+German:
+- name: big
+  url: https://alphacephei.com/vosk/models/vosk-model-de-0.21.zip
+  description: Big German model for telephony and server
+  size: 1.9G
+  type: transcription
+  compressed: true
+- name: big-2
+  url: https://alphacephei.com/vosk/models/vosk-model-de-tuda-0.6-900k.zip
+  description: Latest big wideband model from <a href="https://github.com/uhh-lt/kaldi-tuda-de">Tuda-DE</a>
+    project
+  size: 4.4G
+  type: transcription
+  compressed: true
+- name: small
+  url: https://alphacephei.com/vosk/models/vosk-model-small-de-0.15.zip
+  description: Lightweight wideband model for Android and RPi
+  size: 45M
+  type: transcription
+  compressed: true
 Spanish:
 - name: small
   url: https://alphacephei.com/vosk/models/vosk-model-small-es-0.42.zip
@@ -375,7 +361,7 @@ Korean:
   compressed: true
 Breton:
 - name: big
-  url: https://alphacephei.com/vosk/models/vosk-model-br-0.7.zip
+  url: https://alphacephei.com/vosk/models/vosk-model-br-0.8.zip
   description: Breton model from <a href="https://github.com/gweltou/vosk-br">vosk-br</a>
     project
   size: 70M