diff --git a/nemo/collections/tts/data/dataset.py b/nemo/collections/tts/data/dataset.py index 901b4168130f..e1b010a00030 100644 --- a/nemo/collections/tts/data/dataset.py +++ b/nemo/collections/tts/data/dataset.py @@ -190,10 +190,10 @@ def __init__( self.phoneme_probability = getattr(self.text_tokenizer, "phoneme_probability", None) else: if text_tokenizer_pad_id is None: - raise ValueError(f"text_tokenizer_pad_id must be specified if text_tokenizer is not BaseTokenizer") + raise ValueError("text_tokenizer_pad_id must be specified if text_tokenizer is not BaseTokenizer") if tokens is None: - raise ValueError(f"tokens must be specified if text_tokenizer is not BaseTokenizer") + raise ValueError("tokens must be specified if text_tokenizer is not BaseTokenizer") self.text_tokenizer_pad_id = text_tokenizer_pad_id self.cache_text = True if self.phoneme_probability is None else False @@ -496,7 +496,7 @@ def add_reference_audio(self, **kwargs): speaker_to_index_map[d["speaker_id"]].add(i) # Random sample a reference audio from the same speaker self.get_reference_for_sample = lambda sample: self.data[ - random.sample(speaker_to_index_map[sample["speaker_id"]], 1)[0] + random.choice(speaker_to_index_map[tuple(sample["speaker_id"])]) ] elif reference_audio_type == "ground-truth": # Use ground truth audio as reference audio @@ -679,7 +679,7 @@ def __getitem__(self, index): sample_pitch_mean = pitch_stats["pitch_mean"] sample_pitch_std = pitch_stats["pitch_std"] else: - raise ValueError(f"Missing statistics for pitch normalization.") + raise ValueError("Missing statistics for pitch normalization.") pitch -= sample_pitch_mean pitch[pitch == -sample_pitch_mean] = 0.0 # Zero out values that were previously zero diff --git a/nemo/collections/tts/parts/utils/helpers.py b/nemo/collections/tts/parts/utils/helpers.py index 28be259502c5..f0ca0d0133f6 100644 --- a/nemo/collections/tts/parts/utils/helpers.py +++ b/nemo/collections/tts/parts/utils/helpers.py @@ -632,10 +632,8 @@ def plot_gate_outputs_to_numpy(gate_targets, gate_outputs): def save_figure_to_numpy(fig): - # save it to a numpy array. - data = np.fromstring(fig.canvas.tostring_rgb(), dtype=np.uint8, sep='') - data = data.reshape(fig.canvas.get_width_height()[::-1] + (3,)) - return data + img_array = np.array(fig.canvas.renderer.buffer_rgba()) + return img_array @rank_zero_only @@ -802,8 +800,7 @@ def clip_grad_value_(parameters, clip_value, norm_type=2): def convert_pad_shape(pad_shape): - l = pad_shape[::-1] - pad_shape = [item for sublist in l for item in sublist] + pad_shape = [item for sublist in pad_shape[::-1] for item in sublist] return pad_shape