Skip to content

Commit

Permalink
🔥 Remove punctuation reconstruction
Browse files Browse the repository at this point in the history
The current implementation doesn't work properly and leads to audaoplis crashing on most plattforms

Fixes #457
Fixes #458
Fixes #460
  • Loading branch information
pajowu committed Sep 20, 2023
1 parent 5738a57 commit 3ebd48e
Show file tree
Hide file tree
Showing 15 changed files with 32 additions and 519 deletions.
10 changes: 0 additions & 10 deletions app/src/pages/LanguageSettings.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -272,16 +272,6 @@ export function LanguageSettingsPage(): JSX.Element {
id={'transcription_table'}
/>

<Heading marginTop={majorScale(3)} marginBottom={majorScale(2)} paddingLeft={majorScale(1)}>
Punctuation Models for {language.lang}
</Heading>
<ModelTable
models={language.punctuation_models}
lang={language.lang}
type={'punctuation'}
id={'punctuation_table'}
/>

<BackButton marginY={majorScale(2)} />
</MainMaxWidthContainer>
</AppContainer>
Expand Down
6 changes: 0 additions & 6 deletions app/src/pages/ModelManager.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@ export function ModelManagerPage(): JSX.Element {
<Table.Head padding={0}>
<Table.TextHeaderCell {...firstColumnProps}>Language</Table.TextHeaderCell>
<Table.TextHeaderCell>Transcription Models</Table.TextHeaderCell>
<Table.TextHeaderCell>Punctuation Models</Table.TextHeaderCell>
<Table.TextHeaderCell {...lastColumnProps} />
</Table.Head>

Expand All @@ -62,11 +61,6 @@ export function ModelManagerPage(): JSX.Element {
lang={lang.lang}
downloaded={downloaded}
/>
<ModelNumberTextCell
models={lang.punctuation_models}
lang={lang.lang}
downloaded={downloaded}
/>
<Table.Cell {...lastColumnProps}>
<Tooltip content={'manage language'}>
<Icon color={theme.colors.default} icon={ChevronRightIcon} />
Expand Down
50 changes: 0 additions & 50 deletions app/src/pages/Transcribe.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ import { RootState } from '../state';
import { openLanding, openModelManager } from '../state/nav';
import {
Button,
Checkbox,
Combobox,
Dialog,
FormField,
Expand Down Expand Up @@ -68,9 +67,6 @@ export function TranscribePage(): JSX.Element {
.map((lang) => {
return {
...lang,
punctuation_models: lang.punctuation_models.filter(
(x) => x.model_id in state.models.downloaded
),
transcription_models: lang.transcription_models.filter(
(x) => x.model_id in state.models.downloaded
),
Expand All @@ -95,24 +91,6 @@ export function TranscribePage(): JSX.Element {
)
);
}, [selectedLanguage]);
const [punctuate, setPunctuate] = useState(selectedLanguage.punctuation_models.length > 0);
const [selectedPunctuationModel, setSelectedPunctuationModel] = useState(
getDefaultModelInstance(
selectedLanguage.punctuation_models,
selectedLanguage.lang,
'punctuation'
)
);
useEffect(() => {
setSelectedPunctuationModel(
getDefaultModelInstance(
selectedLanguage.punctuation_models,
selectedLanguage.lang,
'punctuation'
)
);
setPunctuate(selectedLanguage.punctuation_models.length > 0);
}, [selectedLanguage]);
const [diarizationMode, setDiarizationMode] = useState('on' as 'off' | 'on' | 'advanced');
const [diarizationSpeakers, setDiarizationSpeakers] = useState('4');
const [animationDone, setAnimationDone] = useState(false);
Expand Down Expand Up @@ -150,7 +128,6 @@ export function TranscribePage(): JSX.Element {
dispatch(
startTranscription({
transcription_model: selectedTranscriptionModel,
punctuation_model: punctuate ? selectedPunctuationModel : null,
diarize: diarizationMode != 'off',
diarize_max_speakers:
diarizationMode == 'advanced' ? parsedSpeakers - 1 : null,
Expand Down Expand Up @@ -209,33 +186,6 @@ export function TranscribePage(): JSX.Element {
setSelectedModel={setSelectedTranscriptionModel}
/>
</FormField>
{selectedLanguage.punctuation_models.length > 0 ? (
<FormField
marginTop={majorScale(2)}
label={'Punctuation Reconstruction'}
description={
'Audapolis can try to automatically guess the punctuation. This requires a punctuation model, which is only supported for a few languages'
}
>
<Checkbox
label={'Enable Punctuation Reconstruction'}
checked={punctuate}
disabled={selectedLanguage.punctuation_models.length == 0}
onChange={(e) => setPunctuate(e.target.checked)}
/>
{punctuate ? (
<ModelSelector
selectedModel={selectedPunctuationModel}
models={selectedLanguage.punctuation_models}
setSelectedModel={setSelectedPunctuationModel}
/>
) : (
<></>
)}
</FormField>
) : (
<></>
)}
</Pane>
</details>
</FormField>
Expand Down
4 changes: 0 additions & 4 deletions app/src/server_api/api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ export interface DownloadModelTask extends Task {
export function startTranscription(
server: ServerConfig,
transcription_model: string,
punctuation_model: string | null,
diarize: boolean,
diarize_max_speakers: number | null,
file: File,
Expand All @@ -49,9 +48,6 @@ export function startTranscription(
if (diarize_max_speakers !== null) {
opts['diarize_max_speakers'] = diarize_max_speakers;
}
if (punctuation_model !== null) {
opts['punctuation_model'] = punctuation_model;
}
return fetchFromServer(server, 'POST', 'tasks/start_transcription', opts, {
form: { file, fileName },
})
Expand Down
5 changes: 2 additions & 3 deletions app/src/state/models.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,13 @@ export interface Model {
url: string;
description: string;
size: string;
type: 'transcription' | 'punctuation';
type: 'transcription';
model_id: string;
}

export interface Language {
lang: string;
transcription_models: Model[];
punctuation_models: Model[];
}

export type DownloadingModel = Model & {
Expand Down Expand Up @@ -54,7 +53,7 @@ export const fetchModelState = createAsyncThunk<

const flattenLanguages = (x: Record<string, Language>) =>
Object.values(x).flatMap((x) => {
return x.transcription_models.concat(x.punctuation_models);
return x.transcription_models;
});

return { all: flattenLanguages(all), downloaded: downloaded, languages: all };
Expand Down
7 changes: 1 addition & 6 deletions app/src/state/transcribe.ts
Original file line number Diff line number Diff line change
Expand Up @@ -67,17 +67,13 @@ export const startTranscription = createAsyncThunk<
void,
{
transcription_model: Model;
punctuation_model: Model | null;
diarize: boolean;
diarize_max_speakers: number | null;
},
{ state: RootState }
>(
'transcribing/upload',
async (
{ transcription_model, punctuation_model, diarize, diarize_max_speakers },
{ dispatch, getState }
) => {
async ({ transcription_model, diarize, diarize_max_speakers }, { dispatch, getState }) => {
const state = getState();
const server = getServer(state);
const path = state?.transcribe?.file;
Expand Down Expand Up @@ -110,7 +106,6 @@ export const startTranscription = createAsyncThunk<
const task = await startTranscriptionApiCall(
server,
transcription_model.model_id,
punctuation_model !== null ? punctuation_model.model_id : null,
diarize,
diarize_max_speakers,
file,
Expand Down
9 changes: 0 additions & 9 deletions app/src/tour/LanguageSettingsTour.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -39,15 +39,6 @@ const steps = [
target: '.downloaded',
body: <Text>Congratulations 🎉. You now have a transcription model...</Text>,
},
{
target: '#punctuation_table',
body: (
<Text>
For some languages you can also download a punctuation model. These help the computer to
guess which punctuation was present in the spoken text.
</Text>
),
},
{
target: '#back',
body: <Text>Now you can go back and transcribe your media file.</Text>,
Expand Down
3 changes: 0 additions & 3 deletions server/app/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,6 @@ def startup_event():
async def start_transcription(
background_tasks: BackgroundTasks,
transcription_model: str,
punctuation_model: Optional[str] = None,
diarize_max_speakers: Optional[int] = None,
diarize: bool = False,
file: UploadFile = File(...),
Expand All @@ -70,13 +69,11 @@ async def start_transcription(
TranscriptionTask(
file.filename,
TranscriptionState.QUEUED,
punctuate=punctuation_model is not None,
)
)
background_tasks.add_task(
process_audio,
transcription_model,
punctuation_model,
file.file,
fileName,
task.uuid,
Expand Down
10 changes: 2 additions & 8 deletions server/app/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@

import requests
import yaml
from punctuator import Punctuator
from vosk import Model

from .config import CACHE_DIR, DATA_DIR
Expand Down Expand Up @@ -59,10 +58,9 @@ def is_downloaded(self) -> bool:
class Language:
lang: str
transcription_models: List[ModelDescription] = field(default_factory=list)
punctuation_models: List[ModelDescription] = field(default_factory=list)

def all_models(self):
return self.transcription_models + self.punctuation_models
return self.transcription_models


class ModelDefaultDict(defaultdict):
Expand All @@ -83,8 +81,6 @@ def __init__(self):
models[model_description.model_id] = model_description
if model["type"] == "transcription":
languages[lang].transcription_models.append(model_description)
elif model["type"] == "punctuation":
languages[lang].punctuation_models.append(model_description)
self.available = dict(languages)
self.model_descriptions = models

Expand All @@ -111,12 +107,10 @@ def get_model_description(self, model_id) -> ModelDescription:
def _load_model(self, model):
if model.type == "transcription":
return Model(str(model.path()))
elif model.type == "punctuation":
return Punctuator(str(model.path()))
else:
raise ModelTypeNotSupported()

def get(self, model_id: str) -> Union[Model, Punctuator]:
def get(self, model_id: str) -> Union[Model]:
model = self.get_model_description(model_id)
if not model.is_downloaded():
raise ModelNotDownloaded()
Expand Down
56 changes: 21 additions & 35 deletions server/app/models.yml
Original file line number Diff line number Diff line change
@@ -1,41 +1,7 @@
# this file is autogenerated by the ../scripts/generate_models_list.py script.
# do not edit manually!

German:
- name: punctuator2-subtitle2go
url: https://github.com/bugbakery/audapolis-model-hub/releases/download/2/Model_subs_norm1_filt_5M_tageschau_euparl_h256_lr0.02.zip
description: "Punctuation Model trained by the Language Technology Group of the\
\ Universit\xE4t Hamburg (UHH)"
size: 213M
type: punctuation
compressed: false
- name: big
url: https://alphacephei.com/vosk/models/vosk-model-de-0.21.zip
description: Big German model for telephony and server
size: 1.9G
type: transcription
compressed: true
- name: big-2
url: https://alphacephei.com/vosk/models/vosk-model-de-tuda-0.6-900k.zip
description: Latest big wideband model from <a href="https://github.com/uhh-lt/kaldi-tuda-de">Tuda-DE</a>
project
size: 4.4G
type: transcription
compressed: true
- name: small
url: https://alphacephei.com/vosk/models/vosk-model-small-de-0.15.zip
description: Lightweight wideband model for Android and RPi
size: 45M
type: transcription
compressed: true
English:
- name: punctuator2-europarl
url: https://github.com/bugbakery/audapolis-model-hub/releases/download/2/Demo-EUROPARL-EN.zip
description: "Model trained on the Europarl dataset by Ottokar Tilk and Tanel Alum\xE4\
e of the Language Technology Laboratory at Tallinn University of Technology"
size: 138M
type: punctuation
compressed: false
- name: small
url: https://alphacephei.com/vosk/models/vosk-model-small-en-us-0.15.zip
description: Lightweight wideband model for Android and RPi
Expand Down Expand Up @@ -142,6 +108,26 @@ French Other:
size: 1.5G
type: transcription
compressed: true
German:
- name: big
url: https://alphacephei.com/vosk/models/vosk-model-de-0.21.zip
description: Big German model for telephony and server
size: 1.9G
type: transcription
compressed: true
- name: big-2
url: https://alphacephei.com/vosk/models/vosk-model-de-tuda-0.6-900k.zip
description: Latest big wideband model from <a href="https://github.com/uhh-lt/kaldi-tuda-de">Tuda-DE</a>
project
size: 4.4G
type: transcription
compressed: true
- name: small
url: https://alphacephei.com/vosk/models/vosk-model-small-de-0.15.zip
description: Lightweight wideband model for Android and RPi
size: 45M
type: transcription
compressed: true
Spanish:
- name: small
url: https://alphacephei.com/vosk/models/vosk-model-small-es-0.42.zip
Expand Down Expand Up @@ -375,7 +361,7 @@ Korean:
compressed: true
Breton:
- name: big
url: https://alphacephei.com/vosk/models/vosk-model-br-0.7.zip
url: https://alphacephei.com/vosk/models/vosk-model-br-0.8.zip
description: Breton model from <a href="https://github.com/gweltou/vosk-br">vosk-br</a>
project
size: 70M
Expand Down
Loading

0 comments on commit 3ebd48e

Please sign in to comment.