diff --git a/src/whisper_ctranslate2/commandline.py b/src/whisper_ctranslate2/commandline.py index 9b8dc12..e242e48 100644 --- a/src/whisper_ctranslate2/commandline.py +++ b/src/whisper_ctranslate2/commandline.py @@ -381,10 +381,10 @@ def read_command_line(): ) vad_args.add_argument( - "--vad_onset", + "--vad_threshold", type=float, default=None, - help="when `vad_filter` is enabled, probabilities above this value are considered as speech. This parameter was called `vad_threshold` before", + help="when `vad_filter` is enabled, probabilities above this value are considered as speech", ) vad_args.add_argument( diff --git a/src/whisper_ctranslate2/transcribe.py b/src/whisper_ctranslate2/transcribe.py index 6392bb8..4c15deb 100644 --- a/src/whisper_ctranslate2/transcribe.py +++ b/src/whisper_ctranslate2/transcribe.py @@ -48,7 +48,7 @@ class TranscriptionOptions(NamedTuple): append_punctuations: str hallucination_silence_threshold: Optional[float] vad_filter: bool - vad_onset: Optional[float] + vad_threshold: Optional[float] vad_min_speech_duration_ms: Optional[int] vad_max_speech_duration_s: Optional[int] vad_min_silence_duration_ms: Optional[int] @@ -84,8 +84,8 @@ def _get_colored_text(self, words): def _get_vad_parameters_dictionary(self, options): vad_parameters = {} - if options.vad_onset: - vad_parameters["onset"] = options.vad_onset + if options.vad_threshold: + vad_parameters["vad_threshold"] = options.vad_threshold if options.vad_min_speech_duration_ms: vad_parameters["min_speech_duration_ms"] = ( diff --git a/src/whisper_ctranslate2/whisper_ctranslate2.py b/src/whisper_ctranslate2/whisper_ctranslate2.py index bd5bf70..219f415 100644 --- a/src/whisper_ctranslate2/whisper_ctranslate2.py +++ b/src/whisper_ctranslate2/whisper_ctranslate2.py @@ -70,7 +70,7 @@ def get_transcription_options(args): print_colors=args.pop("print_colors"), hallucination_silence_threshold=args.pop("hallucination_silence_threshold"), vad_filter=args.pop("vad_filter"), - vad_onset=args.pop("vad_onset"), + vad_threshold=args.pop("vad_threshold"), vad_min_speech_duration_ms=args.pop("vad_min_speech_duration_ms"), vad_max_speech_duration_s=args.pop("vad_max_speech_duration_s"), vad_min_silence_duration_ms=args.pop("vad_min_silence_duration_ms"),