From 542eda04971d97a3d308600dc0d0415fa22fd6cc Mon Sep 17 00:00:00 2001
From: Abhiroop Talasila <abhiroop.talasila@gmail.com>
Date: Tue, 11 Jan 2022 22:39:17 +0530
Subject: [PATCH] Refactor main.py, add utils.py, and update README

---
 README.md                   |  89 ++++++---------
 autosub/main.py             | 213 ++++++++----------------------------
 autosub/segmentAudio.py     |  11 +-
 autosub/trainAudio.py       |   4 +-
 autosub/utils.py            | 103 +++++++++++++++++
 autosub/writeToFile.py      |   1 +
 getmodel.sh => getmodels.sh |   0
 7 files changed, 190 insertions(+), 231 deletions(-)
 create mode 100644 autosub/utils.py
 rename getmodel.sh => getmodels.sh (100%)

diff --git a/README.md b/README.md
index 5f95d5f..43e5be6 100644
--- a/README.md
+++ b/README.md
@@ -1,14 +1,14 @@
 # AutoSub
 
-- [About](#about)
-- [Motivation](#motivation)
-- [Installation](#installation)
-- [Docker](#docker)
-- [How-to example](#how-to-example)
-- [How it works](#how-it-works)
-- [TO-DO](#to-do)
-- [Contributing](#contributing)
-- [References](#references)
+- [AutoSub](#autosub)
+  - [About](#about)
+  - [Installation](#installation)
+  - [Docker](#docker)
+  - [How-to example](#how-to-example)
+  - [How it works](#how-it-works)
+  - [Motivation](#motivation)
+  - [Contributing](#contributing)
+  - [References](#references)
 
 ## About
 
@@ -16,109 +16,92 @@ AutoSub is a CLI application to generate subtitle files (.srt, .vtt, and .txt tr
 
 ⭐ Featured in [DeepSpeech Examples](https://github.com/mozilla/DeepSpeech-examples) by Mozilla
 
-## Motivation
-
-In the age of OTT platforms, there are still some who prefer to download movies/videos from YouTube/Facebook or even torrents rather than stream. I am one of them and on one such occasion, I couldn't find the subtitle file for a particular movie I had downloaded. Then the idea for AutoSub struck me and since I had worked with DeepSpeech previously, I decided to use it. 
-
-
 ## Installation
 
-* Clone the repo. All further steps should be performed while in the `AutoSub/` directory
+* Clone the repo
     ```bash
     $ git clone https://github.com/abhirooptalasila/AutoSub
     $ cd AutoSub
     ```
-* Create a pip virtual environment to install the required packages
+* Create a virtual environment to install the required packages. All further steps should be performed while in the `AutoSub/` directory
     ```bash
-    $ python3 -m venv sub
+    $ python3 -m pip install --user virtualenv
+    $ virtualenv sub
     $ source sub/bin/activate
-    $ pip3 install -r requirements.txt
     ```
-* Download the model and scorer files from DeepSpeech repo. The scorer file is optional, but it greatly improves inference results.
+* Use the corresponding requirements file depending on whether you have a GPU or not. Make sure you have the appropriate [CUDA](https://deepspeech.readthedocs.io/en/v0.9.3/USING.html#cuda-dependency-inference) version
     ```bash
-    # Model file (~190 MB)
-    $ wget https://github.com/mozilla/DeepSpeech/releases/download/v0.9.3/deepspeech-0.9.3-models.pbmm
-    # Scorer file (~950 MB)
-    $ wget https://github.com/mozilla/DeepSpeech/releases/download/v0.9.3/deepspeech-0.9.3-models.scorer
+    $ pip3 install -r requirements.txt
+    OR
+    $ pip3 install -r requirements-gpu.txt
     ```
-* Create two folders `audio/` and `output/` to store audio segments and final SRT and VTT file
+* Use `getmodels.sh` to download the model and scorer files with the version number as argument
     ```bash
-    $ mkdir audio output
+    $ ./getmodels.sh 0.9.3
     ```
-* Install FFMPEG. If you're running Ubuntu, this should work fine.
+* Install FFMPEG. If you're on Ubuntu, this should work fine
     ```bash
     $ sudo apt-get install ffmpeg
     $ ffmpeg -version               # I'm running 4.1.4
     ```
-    
-* [OPTIONAL] If you would like the subtitles to be generated faster, you can use the GPU package instead. Make sure to install the appropriate [CUDA](https://deepspeech.readthedocs.io/en/v0.9.3/USING.html#cuda-dependency-inference) version. 
-    ```bash
-    $ source sub/bin/activate
-    $ pip3 install deepspeech-gpu
-    ```
 
 
 ## Docker
 
-* Installation using Docker is pretty straight-forward.  
-    * First start by downloading training models by specifying which version you want:
-        * if you have your own, then skip this step and just ensure they are placed in project directory with .pbmm and .scorer extensions
+* If you don't have the model files, get them
     ```bash
-    $ ./getmodel.sh 0.9.3
+    $ ./getmodels.sh 0.9.3
     ```
-
-    * Then for a CPU build, run: 
+* For a CPU build
     ```bash
     $ docker build -t autosub .
     $ docker run --volume=`pwd`/input:/input --name autosub autosub --file /input/video.mp4
     $ docker cp autosub:/output/ .
     ```
-
-    * For a GPU build that is reusable (saving time on instantiating the program):
+* For a GPU build that is reusable (saving time on instantiating the program)
     ```bash
     $ docker build --build-arg BASEIMAGE=nvidia/cuda:10.1-cudnn7-runtime-ubuntu18.04 --build-arg DEPSLIST=requirements-gpu.txt -t autosub-base . && \
     docker run --gpus all --name autosub-base autosub-base --dry-run || \
     docker commit --change 'CMD []' autosub-base autosub-instance
     ```
-    * Then
+* Finally
     ```bash
-    $ docker run --volume=`pwd`/input:/input --name autosub autosub-instance --file video.mp4
+    $ docker run --volume=`pwd`/input:/input --name autosub autosub-instance --file ~/video.mp4
     $ docker cp autosub:/output/ .
     ```
 
 ## How-to example
 
-* Make sure the model and scorer files are in the root directory. They are automatically loaded
-* After following the installation instructions, you can run `autosub/main.py` as given below. The `--file` argument is the video file for which SRT file is to be generated
+* The model files should be in the repo root directory and will be loaded automatically. But incase you have multiple versions, use the `--model` and `--scorer` args while executing
+* After following the installation instructions, you can run `autosub/main.py` as given below. The `--file` argument is the video file for which subtitles are to be generated
     ```bash
     $ python3 autosub/main.py --file ~/movie.mp4
     ```
 * After the script finishes, the SRT file is saved in `output/`
-* Open the video file and add this SRT file as a subtitle, or you can just drag and drop in VLC.
 * The optional `--split-duration` argument allows customization of the maximum number of seconds any given subtitle is displayed for. The default is 5 seconds
     ```bash
     $ python3 autosub/main.py --file ~/movie.mp4 --split-duration 8
     ```
-* By default, AutoSub outputs in a number of formats. To only produce the file formats you want use the `--format` argument:
+* By default, AutoSub outputs SRT, VTT and TXT files. To only produce the file formats you want, use the `--format` argument
     ```bash
     $ python3 autosub/main.py --file ~/movie.mp4 --format srt txt
     ```
+* Open the video file and add this SRT file as a subtitle. You can just drag and drop in VLC.
+
 
 
 ## How it works
 
-Mozilla DeepSpeech is an amazing open-source speech-to-text engine with support for fine-tuning using custom datasets, external language models, exporting memory-mapped models and a lot more. You should definitely check it out for STT tasks. So, when you first run the script, I use FFMPEG to **extract the audio** from the video and save it in `audio/`. By default DeepSpeech is configured to accept 16kHz audio samples for inference, hence while extracting I make FFMPEG use 16kHz sampling rate. 
+Mozilla DeepSpeech is an open-source speech-to-text engine with support for fine-tuning using custom datasets, external language models, exporting memory-mapped models and a lot more. You should definitely check it out for STT tasks. So, when you run the script, I use FFMPEG to **extract the audio** from the video and save it in `audio/`. By default DeepSpeech is configured to accept 16kHz audio samples for inference, hence while extracting I make FFMPEG use 16kHz sampling rate. 
 
-Then, I use [pyAudioAnalysis](https://github.com/tyiannak/pyAudioAnalysis) for silence removal - which basically takes the large audio file initially extracted, and splits it wherever silent regions are encountered, resulting in smaller audio segments which are much easier to process. I haven't used the whole library, instead I've integrated parts of it in `autosub/featureExtraction.py` and `autosub/trainAudio.py` All these audio files are stored in `audio/`. Then for each audio segment, I perform DeepSpeech inference on it, and write the inferred text in a SRT file. After all files are processed, the final SRT file is stored in `output/`.
+Then, I use [pyAudioAnalysis](https://github.com/tyiannak/pyAudioAnalysis) for silence removal - which basically takes the large audio file initially extracted, and splits it wherever silent regions are encountered, resulting in smaller audio segments which are much easier to process. I haven't used the whole library, instead I've integrated parts of it in `autosub/featureExtraction.py` and `autosub/trainAudio.py`. All these audio files are stored in `audio/`. Then for each audio segment, I perform DeepSpeech inference on it, and write the inferred text in a SRT file. After all files are processed, the final SRT file is stored in `output/`.
 
-When I tested the script on my laptop, it took about **40 minutes to generate the SRT file for a 70 minutes video file**. My config is an i5 dual-core @ 2.5 Ghz and 8 gigs of RAM. Ideally, the whole process shouldn't take more than 60% of the duration of original video file. 
+When I tested the script on my laptop, it took about **40 minutes to generate the SRT file for a 70 minutes video file**. My config is an i5 dual-core @ 2.5 Ghz and 8GB RAM. Ideally, the whole process shouldn't take more than 60% of the duration of original video file. 
 
 
-## TO-DO
+## Motivation
 
-* Pre-process inferred text before writing to file (prettify)
-* Add progress bar to `extract_audio()`
-* GUI support (?)
+In the age of OTT platforms, there are still some who prefer to download movies/videos from YouTube/Facebook or even torrents rather than stream. I am one of them and on one such occasion, I couldn't find the subtitle file for a particular movie I had downloaded. Then the idea for AutoSub struck me and since I had worked with DeepSpeech previously, I decided to use it. 
 
 
 ## Contributing
diff --git a/autosub/main.py b/autosub/main.py
index 92d39a8..b9e6562 100644
--- a/autosub/main.py
+++ b/autosub/main.py
@@ -1,42 +1,28 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
-import argparse
 import os
 import re
-import shutil
 import sys
 import wave
+import argparse
 
 import numpy as np
-from deepspeech import Model
 from tqdm import tqdm
 
-from audioProcessing import extract_audio, convert_samplerate
-from segmentAudio import silenceRemoval
+from utils import *
 from writeToFile import write_to_file
+from audioProcessing import extract_audio
+from segmentAudio import remove_silent_segments
+
 
 # Line count for SRT file
 line_count = 1
 
-
-def sort_alphanumeric(data):
-    """Sort function to sort os.listdir() alphanumerically
-    Helps to process audio files sequentially after splitting
-
-    Args:
-        data : file name
-    """
-
-    convert = lambda text: int(text) if text.isdigit() else text.lower()
-    alphanum_key = lambda key: [convert(c) for c in re.split('([0-9]+)', key)]
-
-    return sorted(data, key=alphanum_key)
-
-
 def ds_process_audio(ds, audio_file, output_file_handle_dict, split_duration):
-    """Run DeepSpeech inference on each audio file generated after silenceRemoval
-    and write to file pointed by file_handle
+    """sttWithMetadata() will run DeepSpeech inference on each audio file 
+    generated after remove_silent_segments. These files contain start and end 
+    timings in their title which we use in srt file. 
 
     Args:
         ds : DeepSpeech Model
@@ -47,24 +33,10 @@ def ds_process_audio(ds, audio_file, output_file_handle_dict, split_duration):
 
     global line_count
     fin = wave.open(audio_file, 'rb')
-    fs_orig = fin.getframerate()
-    desired_sample_rate = ds.sampleRate()
-
-    # Check if sampling rate is required rate (16000)
-    # won't be carried out as FFmpeg already converts to 16kHz
-    if fs_orig != desired_sample_rate:
-        print("Warning: original sample rate ({}) is different than {}hz. Resampling might \
-            produce erratic speech recognition".format(fs_orig, desired_sample_rate), file=sys.stderr)
-        audio = convert_samplerate(audio_file, desired_sample_rate)
-    else:
-        audio = np.frombuffer(fin.readframes(fin.getnframes()), np.int16)
-
+    audio = np.frombuffer(fin.readframes(fin.getnframes()), np.int16)
     fin.close()
 
-    # Perform inference on audio segment
     metadata = ds.sttWithMetadata(audio)
-
-    # File name contains start and end times in seconds. Extract that
     limits = audio_file.split(os.sep)[-1][:-4].split("_")[-1].split("-")
 
     # Run-on sentences are inferred as a single block, so write the sentence out as multiple separate lines
@@ -99,130 +71,52 @@ def ds_process_audio(ds, audio_file, output_file_handle_dict, split_duration):
             line_count += 1
         current_token_index += 1
 
-    # For the text transcript output, after each audio segment write newlines for readability.
-    if 'txt' in output_file_handle_dict.keys():
-        output_file_handle_dict['txt'].write("\n\n")
+    if "txt" in output_file_handle_dict.keys():
+        output_file_handle_dict["txt"].write("\n\n")
 
 
 def main():
     global line_count
-    print("AutoSub\n")
-    
-    #Parsing Arguments Section
-    supported_output_formats = ['srt', 'vtt', 'txt']
+    supported_output_formats = ["srt", "vtt", "txt"]
+
     parser = argparse.ArgumentParser(description="AutoSub")
-    
-    #Runtime Options
-    parser.add_argument('--format', choices=supported_output_formats, nargs='+',
-                        help='Create only certain output formats rather than all formats',
+    parser.add_argument("--format", choices=supported_output_formats, nargs="+",
+                        help="Create only certain output formats rather than all formats",
                         default=supported_output_formats)
-    parser.add_argument('--split-duration', type=float,
-                        help='Split run-on sentences exceededing this duration (in seconds) into multiple subtitles',
+    parser.add_argument("--split-duration", dest="split_duration", type=float,
+                        help="Split run-on sentences exceededing this duration (in seconds) into multiple subtitles",
                         default=5)
-    parser.add_argument('--dry-run', dest="dry_run", action="store_true",
-                        help="Perform dry-run to verify options prior to running. Also useful to instantiate cuda/tensorflow cache prior to running multiple times.")
-
-    #Files that should be supplied
-    #File no longer required here, but will check manually later
-    #Basically EITHER file OR dry-run is sufficient
-    parser.add_argument('--file', required=False, 
-                        help='Input video file')
-    parser.add_argument('--model', required=False,
-                        help='Input *.pbmm model file')
-    parser.add_argument('--scorer', required=False,
-                        help='Input *.scorer file')
+    parser.add_argument("--dry-run", dest="dry_run", action="store_true",
+                        help="Perform dry-run to verify options prior to running. Also useful to instantiate \
+                            cuda/tensorflow cache prior to running multiple times")
+    parser.add_argument("--file", required=False, help="Input video file")
+    parser.add_argument("--model", required=False, help="Input *.pbmm model file")
+    parser.add_argument("--scorer", required=False, help="Input *.scorer file")
     
     args = parser.parse_args()
     
-    #Please keep the following because I need it for verifying dockerfiles.
-    print(sys.argv[0:])
+    #print(sys.argv[0:])
     print("ARGS:", args)
 
-    def getmodel(args, arg_name):
-        #prioritize supplied argument
-        
-        if arg_name == 'model':
-            arg_extension = '.pbmm'
-        elif arg_name == 'scorer':
-            arg_extension = '.scorer'
-        else:
-            print("Coding Error. This function only accepts model or scorer for arg_name.")
-            sys.exit(1)
-
-        arg = args.__getattribute__(arg_name)
-        
-        if arg is not None:
-            model = os.path.abspath(arg)
-            if not os.path.isfile(model):
-                print(f"Error. Supplied file {arg} doesn't exist. Please supply a valid {arg_name} file via the --{arg_name} flag.")
-                sys.exit(1)
-        else:
-        #try to find local models
-            models_ = os.listdir()
-            models = []
-            for file in models_:
-                if file.endswith(arg_extension):
-                    models.append(file) 
-            del(models_)
-
-            num_models = len(models)
-        
-            if num_models == 0:
-                print(f"Warning no {arg_name}s specified via --{arg_name} and none found in local directory. Please run getmodel.sh convenience script from autosub repo to get some.")
-                if arg_name == 'model':
-                    print("Error: Must have pbmm model. Exiting")
-                    sys.exit(1)
-                else:
-                    model = ''
-            elif num_models != 1: 
-                print(f"Warning. Detected multiple *{arg_extension} files in local dir. You must specify which one you wish to use via the --{arg_name} field. Details: \n {num_models} {models}")
-                if arg_name == 'model':
-                    print("Error: Must specify pbmm model. Exiting")
-                    sys.exit(1)
-                else:
-                    print("Since I cannot know which scorer you wish to use, I just won't use any and try to run inference without it.")
-                    model = ''
-            else:
-                model = os.path.abspath(models[0])                    
-        
-        print(f"{arg_name}: ", model)
-        return(model)
- 
-    def InstantiateModel(model, scorer):
-        # Load DeepSpeech model
-        try:
-            ds = Model(model)
-        except:
-            print("Invalid model file. Exiting\n")
-            sys.exit(1)
-
-        try:
-            ds.enableExternalScorer(scorer)
-        except:
-            print("Invalid scorer file. Running inference using only model file\n")
-        return(ds)
-    
-
-    ds_model = getmodel(args, 'model')
-    ds_scorer = getmodel(args, 'scorer')
+    ds_model = get_model(args, "model")
+    ds_scorer = get_model(args, "scorer")
 
     if args.dry_run:
-        InstantiateModel(ds_model, ds_scorer) 
+        create_model(ds_model, ds_scorer) 
         if args.file is not None:
             if not os.path.isfile(args.file):
-                print(f"Error: {args.file}: No such file exists")
+                print(f"Invalid file: {args.file}")
         sys.exit(0)
 
-    #Not a dry-run
     if args.file is not None:
         if os.path.isfile(args.file):
             input_file = args.file
-            print("\nInput file:", input_file)
+            print(f"Input file: {args.file}")
         else:
-            print(args.file, ": No such file exists")
+            print(f"Invalid file: {args.file}")
             sys.exit(1)
     else:
-        print("Error. You must supply a file with --file or to instantiate cuda cache you must supply a --dry-run.")
+        print("Error. One or more of --file or --dry-run are required.")
         sys.exit(1)
 
     base_directory = os.getcwd()
@@ -230,61 +124,40 @@ def InstantiateModel(model, scorer):
     audio_directory = os.path.join(base_directory, "audio")
     video_prefix = os.path.splitext(os.path.basename(input_file))[0]
     audio_file_name = os.path.join(audio_directory, video_prefix + ".wav")
- 
+    
+    os.makedirs(output_directory, exist_ok=True)
+    os.makedirs(audio_directory, exist_ok=True)
     output_file_handle_dict = {}
+
     for format in args.format:
         output_filename = os.path.join(output_directory, video_prefix + "." + format)
-        print("Creating file: " + output_filename)
+        # print("Creating file: " + output_filename)
         output_file_handle_dict[format] = open(output_filename, "w")
         # For VTT format, write header
         if format == "vtt":
             output_file_handle_dict[format].write("WEBVTT\n")
             output_file_handle_dict[format].write("Kind: captions\n\n")
 
-    # Clean audio/ directory
-    for filename in os.listdir(audio_directory):
-        if filename.lower().endswith(".wav") and filename.startswith(video_prefix):
-            file_path = os.path.join(audio_directory, filename)
-            try:
-                if os.path.isfile(file_path) or os.path.islink(file_path):
-                    os.unlink(file_path)
-                elif os.path.isdir(file_path):
-                    shutil.rmtree(file_path)
-            except Exception as e:
-                print('Failed to delete %s. Reason: %s' % (file_path, e))
-
-    # Extract audio from input video file
+    clean_folder(audio_directory)
     extract_audio(input_file, audio_file_name)
 
     print("Splitting on silent parts in audio file")
-    silenceRemoval(audio_file_name)
+    remove_silent_segments(audio_file_name)
 
-    print("\nRunning inference:")
-
-    #Remove master audio file
-    audiofiles=sort_alphanumeric(os.listdir(audio_directory))
+    audiofiles = [file for file in os.listdir(audio_directory) if file.startswith(video_prefix)]
+    audiofiles = sort_alphanumeric(audiofiles)
     audiofiles.remove(os.path.basename(audio_file_name))
 
-    #Remove non related audiofiles potentially from other instances of autosub
-    audiofiles_ = []
-    for filename in audiofiles:
-        if filename.startswith(video_prefix):
-            audiofiles_.append(filename)
-    audiofiles = audiofiles_
-    del(audiofiles_)
-
-    #Process Segments
-
-    ds = InstantiateModel(ds_model, ds_scorer) 
+    print("\nRunning inference:")
+    ds = create_model(ds_model, ds_scorer) 
 
     for filename in tqdm(audiofiles):
         audio_segment_path = os.path.join(audio_directory, filename)
         ds_process_audio(ds, audio_segment_path, output_file_handle_dict, split_duration=args.split_duration)
 
-    print("\n")
     for format in output_file_handle_dict:
         file_handle = output_file_handle_dict[format]
-        print(format.upper() + " file saved to", file_handle.name)
+        print(format.upper(), "file saved to", file_handle.name)
         file_handle.close()
 
 
diff --git a/autosub/segmentAudio.py b/autosub/segmentAudio.py
index c1d8e8d..55f25f6 100644
--- a/autosub/segmentAudio.py
+++ b/autosub/segmentAudio.py
@@ -3,10 +3,11 @@
 
 import os
 import numpy as np
+
+import trainAudio as TA
 from pydub import AudioSegment
-import scipy.io.wavfile as wavfile
 import featureExtraction as FE
-import trainAudio as TA
+import scipy.io.wavfile as wavfile
 
 
 def read_audio_file(input_file):
@@ -185,8 +186,8 @@ def silence_removal(signal, sampling_rate, st_win, st_step, smooth_window=0.5,
     return seg_limits
 
 
-def silenceRemoval(input_file, smoothing_window=1.0, weight=0.2):
-    """Remove silence segments from an audio file and split on those segments
+def remove_silent_segments(input_file, smoothing_window=1.0, weight=0.2):
+    """Remove silent segments from an audio file and split on those segments
 
     Args:
         input_file : audio from input video file
@@ -204,5 +205,3 @@ def silenceRemoval(input_file, smoothing_window=1.0, weight=0.2):
         strOut = "{0:s}_{1:.3f}-{2:.3f}.wav".format(input_file[0:-4], s[0], s[1])
         wavfile.write(strOut, fs, x[int(fs * s[0]):int(fs * s[1])])
 
-# if __name__ == "__main__":
-#    silenceRemoval("video.wav")
diff --git a/autosub/trainAudio.py b/autosub/trainAudio.py
index 1b514b0..108c497 100644
--- a/autosub/trainAudio.py
+++ b/autosub/trainAudio.py
@@ -4,9 +4,9 @@
 import numpy as np
 import sklearn.svm
 
-shortTermWindow = 0.050
-shortTermStep = 0.050
 eps = 0.00000001
+shortTermStep = 0.050
+shortTermWindow = 0.050
 
 
 def train_svm(features, c_param, kernel='linear'):
diff --git a/autosub/utils.py b/autosub/utils.py
new file mode 100644
index 0000000..07e1819
--- /dev/null
+++ b/autosub/utils.py
@@ -0,0 +1,103 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import re
+import os
+import sys
+import shutil
+from deepspeech import Model
+
+def sort_alphanumeric(data):
+    """Sort function to sort os.listdir() alphanumerically
+    Helps to process audio files sequentially after splitting
+
+    Args:
+        data : file name
+    """
+
+    convert = lambda text: int(text) if text.isdigit() else text.lower()
+    alphanum_key = lambda key: [convert(c) for c in re.split('([0-9]+)', key)]
+
+    return sorted(data, key=alphanum_key)
+
+def clean_folder(folder):
+    """Delete everything inside a folder
+
+    Args:
+        folder : target folder
+    """
+
+    for filename in os.listdir(folder):
+        file_path = os.path.join(folder, filename)
+        try:
+            if os.path.isfile(file_path) or os.path.islink(file_path):
+                os.unlink(file_path)
+            elif os.path.isdir(file_path):
+                shutil.rmtree(file_path)
+        except Exception as e:
+            print(f"Failed to delete {file_path}. Reason: {e}")
+
+def get_model(args, arg_name):
+    """Will prioritze supplied arguments but if not, try to find files
+
+    Args:
+        args : run-time arguments
+        arg_name : either model or scorer file
+    """
+    
+    if arg_name == 'model':
+        arg_extension = '.pbmm'
+    elif arg_name == 'scorer':
+        arg_extension = '.scorer'
+
+    arg = args.__getattribute__(arg_name)
+    
+    if arg is not None:
+        model = os.path.abspath(arg)
+        if not os.path.isfile(model):
+            print(f"Error. Supplied file {arg} doesn't exist. Please supply a valid {arg_name} file via the --{arg_name} flag.")
+            sys.exit(1)
+    else:
+        models = [file for file in os.listdir() if file.endswith(arg_extension)]
+        num_models = len(models)
+    
+        if num_models == 0:
+            print(f"Warning no {arg_name}s specified via --{arg_name} and none found in local directory. Please run getmodel.sh to get some.")
+            if arg_name == 'model':
+                print("Error: Must have pbmm model. Exiting")
+                sys.exit(1)
+            else:
+                model = ''
+        elif num_models != 1: 
+            print(f"Warning. Detected {num_models} {arg_name} files in local dir")
+            if arg_name == 'model':
+                print("Must specify pbmm model. Exiting")
+                sys.exit(1)
+            else:
+                print("Please specify scorer using --scorer")
+                model = ''
+        else:
+            model = os.path.abspath(models[0])                    
+    
+    print(f"{arg_name.capitalize()}: {model}")
+    return(model)
+
+def create_model(model, scorer):
+    """Instantiate model and scorer
+
+    Args:
+        model : .pbmm model file
+        scorer : .scorer file
+    """
+
+    try:
+        ds = Model(model)
+    except:
+        print("Invalid model file. Exiting")
+        sys.exit(1)
+
+    try:
+        ds.enableExternalScorer(scorer)
+    except:
+        print("Invalid scorer file. Running inference using only model file")
+    return(ds)
\ No newline at end of file
diff --git a/autosub/writeToFile.py b/autosub/writeToFile.py
index dfca750..f087749 100644
--- a/autosub/writeToFile.py
+++ b/autosub/writeToFile.py
@@ -11,6 +11,7 @@ def get_timestamp_string(timedelta, format):
         timedelta : timedelta timestmap
         format : subtitle format
     """
+    
     sep = '.' if format == "vtt" else ','
     # timedelta may be eg, '0:00:14'
     if '.' in str(timedelta):
diff --git a/getmodel.sh b/getmodels.sh
similarity index 100%
rename from getmodel.sh
rename to getmodels.sh