forked from ivanvovk/WaveGrad
-
Notifications
You must be signed in to change notification settings - Fork 3
/
train_test_tool.py
39 lines (34 loc) · 1.26 KB
/
train_test_tool.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import os
import math
import random
def get_audiofile_paths(dataset_path):
audiofile_paths = []
print(f'\nReading audio files from {dataset_path}...')
for root, dirs, files in os.walk(dataset_path):
for name in files:
audiofile_paths.append(os.path.join(root, name))
return audiofile_paths
def get_train_test(dataset_path):
audiofile_paths = get_audiofile_paths(dataset_path)
return audiofile_paths
def write_out_file(train, test, out_path):
trainout = open(f'{out_path}/train.txt', "w")
for sfx in train:
trainout.write(sfx + "\n")
testout = open(f'{out_path}/test.txt', "w")
for sfx in test:
testout.write(sfx + "\n")
if __name__ == '__main__':
#Put your dataset path here
DATASET_PATH = ''
TRAIN_TEST_SPLIT = 0.8
OUT_PATH = 'filelists'
audiofile_paths = get_train_test(DATASET_PATH)
if os.path.exists(OUT_PATH) == False:
os.makedirs(OUT_PATH)
#Randomise to get random train/test split
random.shuffle(audiofile_paths)
total_files = len(audiofile_paths)
train_split = audiofile_paths[0:math.ceil(total_files*TRAIN_TEST_SPLIT)]
test_split = audiofile_paths[math.ceil(total_files*TRAIN_TEST_SPLIT):-1]
write_out_file(train_split, test_split, OUT_PATH)