-
Notifications
You must be signed in to change notification settings - Fork 224
Feature/add phonemizer metadata #125
base: main
Are you sure you want to change the base?
Conversation
…Rollback to this commit if the config file inputs dont work.
…do some comment cleanup.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think the script could be simplified.
test_metadata_path = cm.valid_metadata_path | ||
print(f'\nReading metadata from {metadatareader.metadata_path}') | ||
print(f'\nFound {len(metadatareader.filenames)} lines.') | ||
def get_short_files(phonemized=False): |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
type hints missing.
print(f'\nReading metadata from {metadatareader.metadata_path}') | ||
print(f'\nFound {len(metadatareader.filenames)} lines.') | ||
def get_short_files(phonemized=False): | ||
if not phonemized: |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
if not phonemized: | |
symbol_list = all_phonemes if phonemized else _alphabet |
return filter_metadata | ||
|
||
|
||
remove_files = pickle.load(open(cm.data_dir / 'under-over_sized_mels.pkl', 'rb')) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
unclosed fileio
remove_files = pickle.load(open(cm.data_dir / 'under-over_sized_mels.pkl', 'rb')) | ||
phonemized_metadata_path = cm.phonemized_metadata_path | ||
train_metadata_path = cm.train_metadata_path | ||
test_metadata_path = cm.valid_metadata_path |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
inconsistent naming of train and validation
|
||
new_metadata = [f'{k}|{v}\n' for k, v in phonemized_data.items()] | ||
shuffled_metadata = np.random.permutation(new_metadata) | ||
train_metadata = shuffled_metadata[0:train_len] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
train_metadata = shuffled_metadata[0:train_len] | |
train_metadata = shuffled_metadata[0:-test_len] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think its safer to just use the test_len, also it saves a couple of lines
file.writelines(test_metadata) | ||
|
||
# some checks | ||
assert metadata_len == len(set(list(phonemized_data.keys()))), \ |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
assert metadata_len == len(set(list(phonemized_data.keys()))), \ | |
assert metadata_len == len(phonemized_data) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Same for the other dict keys.
phoneme_language
flag intraining_config.yaml
skip_phonemization
flag since it was unused