aflr-archive · evelyndjwilliams · Mar 21, 2022 · Mar 21, 2022 · Mar 22, 2022 · Mar 22, 2022
diff --git a/env.yml b/env.yml
@@ -22,3 +22,4 @@ dependencies:
     - scikit-learn==1.0.2
     - seaborn==0.11.2
     - tqdm==4.62.3
+    - apiaudio=0.15.0
diff --git a/eval_config/big_utts.csv b/eval_config/big_utts.csv
diff --git a/eval_config/speakers.txt b/eval_config/speakers.txt
@@ -0,0 +1,8 @@
+Bernd
+Ralf
+Mauro
+Conrad
+Birgit
+Erika
+Greta
+Tanja
diff --git a/run_meta_eval.py b/run_meta_eval.py
@@ -0,0 +1,224 @@
+import csv 
+import pandas as pd
+import numpy as np 
+import os 
+import shutil
+import subprocess
+import argparse 
+import apiaudio
+import os
+import requests
+import shutil
+from urllib.request import urlretrieve
+
+def generate_filepath(speaker, n):
+    outpath = os.path.join(f"{os.getcwd()}", speaker, f"{n}.wav")
+    return(outpath)
+
+def generate_sample(script, speaker, audience):
+    # utt_txt = audience
+    r = apiaudio.Speech().create(
+        scriptId=script.get("scriptId"),
+        voice=speaker,
+        silence_padding=0,
+        audience=audience,
+    )
+    try:    
+        url = r['main']['url']
+        return url
+    except:
+        print(f"Error with the following utterance: {utt}")
+
+def download_audio_file(url, filename):
+    with requests.get(url, stream=True) as r:
+        with open(filename, 'wb') as f:
+            for chunk in r.iter_content(chunk_size=1024):
+                if chunk:
+                    f.write(chunk)
+
+def return_targets(filename, column_index, range_min, range_max):
+    df = pd.read_csv(filename, header='infer')
+    return df.loc[(df.iloc[:, column_index] >= range_min) & (df.iloc[:, column_index] <= range_max)]
+
+def copy_files(data, file_dir, out_dir, speaker, n_files):
+    for filename in data.iloc[0:n_files, 0]:
+        file_loc =os.path.join(file_dir, speaker, filename)
+        out_loc =os.path.join(out_dir, speaker, filename)
+        shutil.copyfile(file_loc, out_loc)
+
+def inrange_write_to_csv(filename, data, speaker, n_rows):
+    # which_mode = 'a' if os.path.exists(filename) else 'w+'
+    data['Voice'] = [speaker] * len(data.iloc[:,1])
+    data = data.iloc[0:n_rows, [-1]+list(range(len(data.iloc[1,:-1])))] # reorders columns so name is first
+    write_to_csv(filename, data)
+    # return len(data.iloc[:,1])
+
+def calculate_stats(filename, column_index):
+    df = pd.read_csv(filename, header='infer')
+    mean_score = np.round(np.mean(df.iloc[:, column_index]), 2)
+    sd_score = np.round(np.std(df.iloc[:, column_index]), 2)
+    med_score = np.round(np.median(df.iloc[:, column_index]), 2)
+    return mean_score, sd_score, med_score
+
+def stats_write_to_csv(filename, data, col_names):
+    data = data.sort_values('Mean MOS', ascending=False)
+    write_to_csv(filename, data, col_names)
+
+def write_to_csv(filename, data, col_names):
+    which_mode = 'a' if os.path.exists(filename) else 'w+'
+    data.to_csv(filename, mode=which_mode, header=col_names)
+
+def make_df(data, col_names):
+    return pd.DataFrame(data, columns=col_names)
+
+
+if __name__ == "__main__":
+
+    parser = argparse.ArgumentParser(description='Run automated NISQA evaluation')
+
+    parser.add_argument("--fullEval", action='store_true',help="Run all steps of evaluation")
+    parser.add_argument("--preprocess", action='store_true',help="Select subtext of  sentences without corrupted characters.")
+    parser.add_argument("--generate", action='store_true',help="Generate samples using Aflorithmic API.")
+    parser.add_argument("--apiKey", help="Your Aflorithmic API key.")
+    parser.add_argument("--predict", action='store_true',help="Run NISQA to predict scores.")
+    parser.add_argument("--quality", action='store_true',help="Predict quality scores")
+    parser.add_argument("--naturalness", action='store_true',help="Predict naturalness scores") # if neither arg provided, do both
+    parser.add_argument("--stats", action='store_true',help="Compute mean & s.d. of NISQA scores for each voice.")
+    parser.add_argument("--inRange", action='store_true',  help="Get synthesis files within specified NISQA quality score range and copy files to new folder.")
+    parser.add_argument("--min", type=float, help="Minimum value for NISQA range.")
+    parser.add_argument("--max", type=float, help="Maximum value for NISQA range.")
+    parser.add_argument("--nFiles", type=int, help="Number of files in range to copy to folder.")
+    args = parser.parse_args()
+
+    if args.fullEval == True:
+        args.preprocess = True
+        args.generate = True
+        args.predict = True 
+        args.stats = True 
+        args.inRange = True
+
+    if args.preprocess == True:
+        to_discard = 'Ã' # characters to exclude
+        kept = 1
+        discarded = 0
+        written=0
+        keep_every=4
+        with open('eval_config/big_utts.csv', 'r') as inf:
+            with open('eval_config/processed_utts.txt', 'w+') as outf:
+                for line in csv.reader(inf, delimiter='\t'):
+                    utt = (line[0].split(' ', 1)[1])
+                    if to_discard not in utt:
+                        kept+=1
+                        if kept%keep_every == 0: # writing every 4th sentence to file - because there are too many
+                            outf.write(f'{utt}\n')
+                            written+=1
+                    else:
+                        discarded+=1
+        print(f"There were {kept-1+discarded} sentences in your sentence file.")
+        print(f"{kept-1} sentences were kept and {discarded} sentences were discarded because they contained the character(s) {to_discard}.")
+        print(f"{written} sentences were written to the final sentence file, because you opted to write every 1/{keep_every} sentences.")
+
+    if args.generate == True:
+        if args.apiKey == False:
+            print("API Key missing: you need to pass your Aflorithmic API Key in order to generate audio.")
+        else:
+            apiaudio.api_key = args.apiKey
+            with open("eval_config/speakers.txt", "r") as o:
+                speakers  = o.read().splitlines()
+            with open("eval_config/processed_utts.txt", "r") as o:
+                utts_list  = o.read().splitlines()
+            utts = [{'utt':utt}for utt in utts_list]
+            text = """
+                <<sectionName::main>> 
+                {{utt}}
+            """
+            script = apiaudio.Script().create(scriptText=text, 
+                                                scriptName="German_sample", 
+                                                moduleName="German_module", 
+                                                projectName="German")
+            #Create speech. Choose a voice! https://library.api.audio/voices
+            for speaker in speakers:
+                if not os.path.exists(os.path.join(f"{os.getcwd()}", speaker)):
+                    os.mkdir(os.path.join(f"{os.getcwd()}", speaker))
+                for n, utt in enumerate(utts):
+                    url = generate_sample(script, speaker, [utt])
+                    filename = generate_filepath(speaker, n)
+                    download_audio_file(url, filename)
+
+
+    if args.predict == True:
+        cl_args = []
+        if args.quality == False and args.naturalness == False:
+            args.quality = True 
+            args.naturalness = True 
+        if args.quality == True:
+            cl_args.append('-q')
+            # add command line flags to shell scrip to handle quality/naturalness modes
+        if args.naturalness == True:
+            cl_args.append('-n')
+        # print(cl_args, 'x'*3000)
+        cl_str = "./run_predict_batch.sh" + ' ' + ' '.join(cl_args)
+        rc = subprocess.call(cl_str, shell=True)
+        # print(rc, 'x'*10000)
+
+    factors = {}
+    if args.quality == True:
+        factors['quality'] = 1
+        factors['noise'] = 2
+        factors['discontinuity'] = 3
+        factors['colouration'] = 4
+        factors['loudness'] = 5
+    if args.naturalness == True:
+        factors['naturalness'] = 1
+
+    # print('x'*10000, factors)
+
+    if args.stats == True:
+        for factor, col_index in factors.items():
+            print(factor)
+            csv_file = 'naturalness' if factor == 'naturalness' else 'quality'
+            base_dir = os.path.join(os.getcwd(), f'to_evaluate/results/{csv_file}')
+            speaker_stats = []
+            for speaker in os.listdir(base_dir):
+                if speaker == '.DS_Store':
+                    continue
+                try:
+                    mean_score, sd_score, med_score = calculate_stats(os.path.join(base_dir, speaker, 'NISQA_results.csv'), col_index)
+                except:
+                    print(f'{speaker} missing')
+                speaker_stats.append([speaker, factor, mean_score, sd_score, med_score])
+                print(speaker, factor, mean_score, sd_score)
+            speaker_stats = make_df(speaker_stats, ['Speaker', factor, 'Mean MOS', 'S.D', 'Median MOS'])
+            col_names = ['Speaker', factor, 'Mean MOS', 'S.D', 'Median MOS'] if col_index == 1 else False
+            stats_write_to_csv(f'mean_NISQA_results.csv', speaker_stats, col_names)
+
+    if args.inRange is True:
+        range_min = args.min if args.min is not None else 4.6
+        range_max = args.max if args.max is not None else 4.9
+        n_files = args.nFiles if args.nFiles is not None else 15 # default number of files copied per speaker to 15
+
+        csv_filename = f'{n_files}_samples_in_range.csv' 
+        wav_dir = f'{os.getcwd()}/to_evaluate/input'
+        out_dir = f'{os.getcwd()}/eval_in_range'
+
+        if not os.path.exists(out_dir):
+            os.mkdir(out_dir)
+
+        if os.path.exists(csv_filename):
+            os.remove(csv_filename)
+
+        n_in_range = 0
+        for factor, col_index in factors.items():
+            base_dir = f"/home/jovyan/NISQA/to_evaluate/results/{factor}" # change this to arg
+            for speaker in os.listdir(base_dir):
+                if not os.path.exists(os.path.join(out_dir,speaker)):
+                    os.mkdir(os.path.join(out_dir,speaker))
+                # try:
+                df = return_targets(os.path.join(base_dir, speaker, 'NISQA_results.csv'), col_index, range_min, range_max)
+                try:
+                    copy_files(df,wav_dir, out_dir, speaker, n_files)
+                    n_in_range += inrange_write_to_csv(csv_filename, df,  speaker, n_files)
+                except IndexError:
+                    print(f"Speaker {speaker} doesn't have {n_files} wav files in the specified range.")
+                    continue
+
diff --git a/run_predict_batch.sh b/run_predict_batch.sh
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+path_to_conda_sh=/home/ubuntu/anaconda3/etc/profile.d/conda.sh
+# differentiate between running on kubeflow vs ec2
+if [[ -f $path_to_conda_sh ]]; then
+    . /home/ubuntu/anaconda3/etc/profile.d/conda.sh && conda deactivate && conda activate nisqa
+else
+    . /opt/conda/etc/profile.d/conda.sh && conda deactivate && conda activate nisqa
+fi
+
+
+print_usage() {
+  printf "Usage: ..."
+}
+
+while getopts 'nq' flag; do
+  case "${flag}" in
+    n) n_flag='true' ;;
+    q) q_flag='true' ;;
+    *) print_usage
+       exit 1 ;;
+    esac
+done 
+
+
+if $q_flag; then
+    for speaker_dir in to_evaluate/input/*/; do
+        speaker=$(basename $speaker_dir)
+        echo $speaker $speaker_dir
+        mkdir -p ./to_evaluate/results/quality/"$speaker"
+        python ./run_predict.py --mode predict_dir --pretrained_model weights/nisqa.tar --data_dir "${speaker_dir}" --num_workers 0 --bs 10 --output_dir to_evaluate/results/quality/"${speaker}"
+    done
+fi
+
+if $n_flag; then
+    for speaker_dir in to_evaluate/input/*/; do
+        speaker=$(basename $speaker_dir)
+        echo $speaker $speaker_dir
+        mkdir -p ./to_evaluate/results/naturalness/"$speaker"
+        python ./run_predict.py --mode predict_dir --pretrained_model weights/nisqa_tts.tar --data_dir "${speaker_dir}" --num_workers 0 --bs 10 --output_dir to_evaluate/results/naturalness/"${speaker}"
+
+    done
+fi
diff --git a/to_evaluate/results/naturalness/Bernd/NISQA_results.csv b/to_evaluate/results/naturalness/Bernd/NISQA_results.csv
@@ -0,0 +1,11 @@
+deg,mos_pred,model
+9.wav,3.863346576690674,NISQA_TTS_v1
+0.wav,3.8886377811431885,NISQA_TTS_v1
+4.wav,4.163519382476807,NISQA_TTS_v1
+1.wav,4.275750160217285,NISQA_TTS_v1
+2.wav,3.4831244945526123,NISQA_TTS_v1
+6.wav,4.349552154541016,NISQA_TTS_v1
+8.wav,4.271938323974609,NISQA_TTS_v1
+3.wav,3.6796951293945312,NISQA_TTS_v1
+5.wav,4.345401287078857,NISQA_TTS_v1
+7.wav,4.298731327056885,NISQA_TTS_v1
diff --git a/to_evaluate/results/quality/Bernd/NISQA_results.csv b/to_evaluate/results/quality/Bernd/NISQA_results.csv
@@ -0,0 +1,11 @@
+deg,mos_pred,noi_pred,dis_pred,col_pred,loud_pred,model
+9.wav,4.627307,4.5496407,4.528632,4.233586,4.4571004,NISQAv2
+0.wav,4.585354,4.2865133,4.5136375,4.01594,4.357506,NISQAv2
+4.wav,4.6173377,4.4061966,4.442736,4.232208,4.40322,NISQAv2
+1.wav,4.127182,4.437383,4.157149,4.031782,4.106723,NISQAv2
+2.wav,3.392549,3.592441,3.974288,3.5049887,4.029476,NISQAv2
+6.wav,3.817934,3.794438,4.272844,3.6566107,4.235783,NISQAv2
+8.wav,3.8613153,3.988343,4.3777413,3.922384,4.303879,NISQAv2
+3.wav,4.3247924,4.354585,4.319347,4.112709,4.303415,NISQAv2
+5.wav,2.836534,2.9724293,4.1104445,3.4023385,4.058653,NISQAv2
+7.wav,4.684951,4.498423,4.6252823,4.397271,4.4555926,NISQAv2
-Original file line number
+Diff line change
@@ -0,0 +1,8 @@
+    Bernd
+    Ralf
+    Mauro
+    Conrad
+    Birgit
+    Erika
+    Greta
+    Tanja