Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions env.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,4 @@ dependencies:
- scikit-learn==1.0.2
- seaborn==0.11.2
- tqdm==4.62.3
- apiaudio=0.15.0
1,899 changes: 1,899 additions & 0 deletions eval_config/big_utts.csv

Large diffs are not rendered by default.

8 changes: 8 additions & 0 deletions eval_config/speakers.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
Bernd
Ralf
Mauro
Conrad
Birgit
Erika
Greta
Tanja
224 changes: 224 additions & 0 deletions run_meta_eval.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,224 @@
import csv
import pandas as pd
import numpy as np
import os
import shutil
import subprocess
import argparse
import apiaudio
import os
import requests
import shutil
from urllib.request import urlretrieve

def generate_filepath(speaker, n):
outpath = os.path.join(f"{os.getcwd()}", speaker, f"{n}.wav")
return(outpath)

def generate_sample(script, speaker, audience):
# utt_txt = audience
r = apiaudio.Speech().create(
scriptId=script.get("scriptId"),
voice=speaker,
silence_padding=0,
audience=audience,
)
try:
url = r['main']['url']
return url
except:
print(f"Error with the following utterance: {utt}")

def download_audio_file(url, filename):
with requests.get(url, stream=True) as r:
with open(filename, 'wb') as f:
for chunk in r.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)

def return_targets(filename, column_index, range_min, range_max):
df = pd.read_csv(filename, header='infer')
return df.loc[(df.iloc[:, column_index] >= range_min) & (df.iloc[:, column_index] <= range_max)]

def copy_files(data, file_dir, out_dir, speaker, n_files):
for filename in data.iloc[0:n_files, 0]:
file_loc =os.path.join(file_dir, speaker, filename)
out_loc =os.path.join(out_dir, speaker, filename)
shutil.copyfile(file_loc, out_loc)

def inrange_write_to_csv(filename, data, speaker, n_rows):
# which_mode = 'a' if os.path.exists(filename) else 'w+'
data['Voice'] = [speaker] * len(data.iloc[:,1])
data = data.iloc[0:n_rows, [-1]+list(range(len(data.iloc[1,:-1])))] # reorders columns so name is first
write_to_csv(filename, data)
# return len(data.iloc[:,1])

def calculate_stats(filename, column_index):
df = pd.read_csv(filename, header='infer')
mean_score = np.round(np.mean(df.iloc[:, column_index]), 2)
sd_score = np.round(np.std(df.iloc[:, column_index]), 2)
med_score = np.round(np.median(df.iloc[:, column_index]), 2)
return mean_score, sd_score, med_score

def stats_write_to_csv(filename, data, col_names):
data = data.sort_values('Mean MOS', ascending=False)
write_to_csv(filename, data, col_names)

def write_to_csv(filename, data, col_names):
which_mode = 'a' if os.path.exists(filename) else 'w+'
data.to_csv(filename, mode=which_mode, header=col_names)

def make_df(data, col_names):
return pd.DataFrame(data, columns=col_names)


if __name__ == "__main__":

parser = argparse.ArgumentParser(description='Run automated NISQA evaluation')

parser.add_argument("--fullEval", action='store_true',help="Run all steps of evaluation")
parser.add_argument("--preprocess", action='store_true',help="Select subtext of sentences without corrupted characters.")
parser.add_argument("--generate", action='store_true',help="Generate samples using Aflorithmic API.")
parser.add_argument("--apiKey", help="Your Aflorithmic API key.")
parser.add_argument("--predict", action='store_true',help="Run NISQA to predict scores.")
parser.add_argument("--quality", action='store_true',help="Predict quality scores")
parser.add_argument("--naturalness", action='store_true',help="Predict naturalness scores") # if neither arg provided, do both
parser.add_argument("--stats", action='store_true',help="Compute mean & s.d. of NISQA scores for each voice.")
parser.add_argument("--inRange", action='store_true', help="Get synthesis files within specified NISQA quality score range and copy files to new folder.")
parser.add_argument("--min", type=float, help="Minimum value for NISQA range.")
parser.add_argument("--max", type=float, help="Maximum value for NISQA range.")
parser.add_argument("--nFiles", type=int, help="Number of files in range to copy to folder.")
args = parser.parse_args()

if args.fullEval == True:
args.preprocess = True
args.generate = True
args.predict = True
args.stats = True
args.inRange = True

if args.preprocess == True:
to_discard = 'Ã' # characters to exclude
kept = 1
discarded = 0
written=0
keep_every=4
with open('eval_config/big_utts.csv', 'r') as inf:
with open('eval_config/processed_utts.txt', 'w+') as outf:
for line in csv.reader(inf, delimiter='\t'):
utt = (line[0].split(' ', 1)[1])
if to_discard not in utt:
kept+=1
if kept%keep_every == 0: # writing every 4th sentence to file - because there are too many
outf.write(f'{utt}\n')
written+=1
else:
discarded+=1
print(f"There were {kept-1+discarded} sentences in your sentence file.")
print(f"{kept-1} sentences were kept and {discarded} sentences were discarded because they contained the character(s) {to_discard}.")
print(f"{written} sentences were written to the final sentence file, because you opted to write every 1/{keep_every} sentences.")

if args.generate == True:
if args.apiKey == False:
print("API Key missing: you need to pass your Aflorithmic API Key in order to generate audio.")
else:
apiaudio.api_key = args.apiKey
with open("eval_config/speakers.txt", "r") as o:
speakers = o.read().splitlines()
with open("eval_config/processed_utts.txt", "r") as o:
utts_list = o.read().splitlines()
utts = [{'utt':utt}for utt in utts_list]
text = """
<<sectionName::main>>
{{utt}}
"""
script = apiaudio.Script().create(scriptText=text,
scriptName="German_sample",
moduleName="German_module",
projectName="German")
#Create speech. Choose a voice! https://library.api.audio/voices
for speaker in speakers:
if not os.path.exists(os.path.join(f"{os.getcwd()}", speaker)):
os.mkdir(os.path.join(f"{os.getcwd()}", speaker))
for n, utt in enumerate(utts):
url = generate_sample(script, speaker, [utt])
filename = generate_filepath(speaker, n)
download_audio_file(url, filename)


if args.predict == True:
cl_args = []
if args.quality == False and args.naturalness == False:
args.quality = True
args.naturalness = True
if args.quality == True:
cl_args.append('-q')
# add command line flags to shell scrip to handle quality/naturalness modes
if args.naturalness == True:
cl_args.append('-n')
# print(cl_args, 'x'*3000)
cl_str = "./run_predict_batch.sh" + ' ' + ' '.join(cl_args)
rc = subprocess.call(cl_str, shell=True)
# print(rc, 'x'*10000)

factors = {}
if args.quality == True:
factors['quality'] = 1
factors['noise'] = 2
factors['discontinuity'] = 3
factors['colouration'] = 4
factors['loudness'] = 5
if args.naturalness == True:
factors['naturalness'] = 1

# print('x'*10000, factors)

if args.stats == True:
for factor, col_index in factors.items():
print(factor)
csv_file = 'naturalness' if factor == 'naturalness' else 'quality'
base_dir = os.path.join(os.getcwd(), f'to_evaluate/results/{csv_file}')
speaker_stats = []
for speaker in os.listdir(base_dir):
if speaker == '.DS_Store':
continue
try:
mean_score, sd_score, med_score = calculate_stats(os.path.join(base_dir, speaker, 'NISQA_results.csv'), col_index)
except:
print(f'{speaker} missing')
speaker_stats.append([speaker, factor, mean_score, sd_score, med_score])
print(speaker, factor, mean_score, sd_score)
speaker_stats = make_df(speaker_stats, ['Speaker', factor, 'Mean MOS', 'S.D', 'Median MOS'])
col_names = ['Speaker', factor, 'Mean MOS', 'S.D', 'Median MOS'] if col_index == 1 else False
stats_write_to_csv(f'mean_NISQA_results.csv', speaker_stats, col_names)

if args.inRange is True:
range_min = args.min if args.min is not None else 4.6
range_max = args.max if args.max is not None else 4.9
n_files = args.nFiles if args.nFiles is not None else 15 # default number of files copied per speaker to 15

csv_filename = f'{n_files}_samples_in_range.csv'
wav_dir = f'{os.getcwd()}/to_evaluate/input'
out_dir = f'{os.getcwd()}/eval_in_range'

if not os.path.exists(out_dir):
os.mkdir(out_dir)

if os.path.exists(csv_filename):
os.remove(csv_filename)

n_in_range = 0
for factor, col_index in factors.items():
base_dir = f"/home/jovyan/NISQA/to_evaluate/results/{factor}" # change this to arg
for speaker in os.listdir(base_dir):
if not os.path.exists(os.path.join(out_dir,speaker)):
os.mkdir(os.path.join(out_dir,speaker))
# try:
df = return_targets(os.path.join(base_dir, speaker, 'NISQA_results.csv'), col_index, range_min, range_max)
try:
copy_files(df,wav_dir, out_dir, speaker, n_files)
n_in_range += inrange_write_to_csv(csv_filename, df, speaker, n_files)
except IndexError:
print(f"Speaker {speaker} doesn't have {n_files} wav files in the specified range.")
continue

43 changes: 43 additions & 0 deletions run_predict_batch.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#!/bin/bash

path_to_conda_sh=/home/ubuntu/anaconda3/etc/profile.d/conda.sh
# differentiate between running on kubeflow vs ec2
if [[ -f $path_to_conda_sh ]]; then
. /home/ubuntu/anaconda3/etc/profile.d/conda.sh && conda deactivate && conda activate nisqa
else
. /opt/conda/etc/profile.d/conda.sh && conda deactivate && conda activate nisqa
fi


print_usage() {
printf "Usage: ..."
}

while getopts 'nq' flag; do
case "${flag}" in
n) n_flag='true' ;;
q) q_flag='true' ;;
*) print_usage
exit 1 ;;
esac
done


if $q_flag; then
for speaker_dir in to_evaluate/input/*/; do
speaker=$(basename $speaker_dir)
echo $speaker $speaker_dir
mkdir -p ./to_evaluate/results/quality/"$speaker"
python ./run_predict.py --mode predict_dir --pretrained_model weights/nisqa.tar --data_dir "${speaker_dir}" --num_workers 0 --bs 10 --output_dir to_evaluate/results/quality/"${speaker}"
done
fi

if $n_flag; then
for speaker_dir in to_evaluate/input/*/; do
speaker=$(basename $speaker_dir)
echo $speaker $speaker_dir
mkdir -p ./to_evaluate/results/naturalness/"$speaker"
python ./run_predict.py --mode predict_dir --pretrained_model weights/nisqa_tts.tar --data_dir "${speaker_dir}" --num_workers 0 --bs 10 --output_dir to_evaluate/results/naturalness/"${speaker}"

done
fi
11 changes: 11 additions & 0 deletions to_evaluate/results/naturalness/Bernd/NISQA_results.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
deg,mos_pred,model
9.wav,3.863346576690674,NISQA_TTS_v1
0.wav,3.8886377811431885,NISQA_TTS_v1
4.wav,4.163519382476807,NISQA_TTS_v1
1.wav,4.275750160217285,NISQA_TTS_v1
2.wav,3.4831244945526123,NISQA_TTS_v1
6.wav,4.349552154541016,NISQA_TTS_v1
8.wav,4.271938323974609,NISQA_TTS_v1
3.wav,3.6796951293945312,NISQA_TTS_v1
5.wav,4.345401287078857,NISQA_TTS_v1
7.wav,4.298731327056885,NISQA_TTS_v1
11 changes: 11 additions & 0 deletions to_evaluate/results/quality/Bernd/NISQA_results.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
deg,mos_pred,noi_pred,dis_pred,col_pred,loud_pred,model
9.wav,4.627307,4.5496407,4.528632,4.233586,4.4571004,NISQAv2
0.wav,4.585354,4.2865133,4.5136375,4.01594,4.357506,NISQAv2
4.wav,4.6173377,4.4061966,4.442736,4.232208,4.40322,NISQAv2
1.wav,4.127182,4.437383,4.157149,4.031782,4.106723,NISQAv2
2.wav,3.392549,3.592441,3.974288,3.5049887,4.029476,NISQAv2
6.wav,3.817934,3.794438,4.272844,3.6566107,4.235783,NISQAv2
8.wav,3.8613153,3.988343,4.3777413,3.922384,4.303879,NISQAv2
3.wav,4.3247924,4.354585,4.319347,4.112709,4.303415,NISQAv2
5.wav,2.836534,2.9724293,4.1104445,3.4023385,4.058653,NISQAv2
7.wav,4.684951,4.498423,4.6252823,4.397271,4.4555926,NISQAv2