Skip to content

Commit

Permalink
fix: better handling of empty filelist
Browse files Browse the repository at this point in the history
  • Loading branch information
MENGZHEGENG authored and roedoejet committed Jul 24, 2024
1 parent 3e6a819 commit 189e338
Showing 1 changed file with 20 additions and 12 deletions.
32 changes: 20 additions & 12 deletions everyvoice/wizard/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import os
import random
import re
import sys
from pathlib import Path
from typing import Sequence

Expand Down Expand Up @@ -174,7 +175,14 @@ def looks_like_sv(self, file_type, separator) -> bool:
filelist_path, delimiter=separator, record_limit=10
)

column_count = len(initial_records[0])
if len(initial_records) > 0:
column_count = len(initial_records[0])
else:
print(
f"ERROR: The filelist you specify ({filelist_path}) is empty. Please double check."
)
sys.exit(1)

if column_count < 2:
print(
f"File '{filelist_path}' does not look like a '{file_type}' file: no record separator found on header line."
Expand Down Expand Up @@ -439,11 +447,11 @@ def effect(self):
if self.state[StepNames.data_has_speaker_value_step] == "no":
add_missing_speaker(self.state)
# apply automatic conversions
self.state[
"model_target_training_text_representation"
] = apply_automatic_text_conversions(
self.state["filelist_data"],
self.state[StepNames.filelist_text_representation_step],
self.state["model_target_training_text_representation"] = (
apply_automatic_text_conversions(
self.state["filelist_data"],
self.state[StepNames.filelist_text_representation_step],
)
)


Expand Down Expand Up @@ -645,12 +653,12 @@ def effect(self):
# Apply the language code:
isocode = get_iso_code(self.response)
# Apply text conversions and get target training representation
self.state[
"model_target_training_text_representation"
] = apply_automatic_text_conversions(
self.state["filelist_data"],
self.state[StepNames.filelist_text_representation_step],
global_isocode=isocode,
self.state["model_target_training_text_representation"] = (
apply_automatic_text_conversions(
self.state["filelist_data"],
self.state[StepNames.filelist_text_representation_step],
global_isocode=isocode,
)
)


Expand Down

0 comments on commit 189e338

Please sign in to comment.