-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathencoder.py
84 lines (75 loc) · 3.21 KB
/
encoder.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import tensorflow as tf
import numpy as np
from hparams import hparams
from feeder import Feeder
import io
class Encoder(tf.keras.Model):
def __init__(self, hparams, is_training, scope):
"""Nachotron Encoder"""
super(Encoder, self).__init__()
self.batch_size = hparams['batch_size']
self.lstm_units = hparams['enc_lstm_units']
scope = 'enc_conv_layers' if scope is None else scope # Names are given when calling them
drop_rate = hparams['enc_dropout_rate'] if is_training else 0
filters = hparams['enc_conv_filters']
kernel_size = hparams['enc_conv_kernel_size']
activation = hparams['enc_conv_activation']
input_shape = (self.batch_size, None, None) # (Batch_size, time_steps, input vector length)
# Create the different layers of the encoder
self.character_embedding = tf.keras.layers.Embedding(
hparams['char_size'],
hparams['char_embedding_dim'],
mask_zero=True)
self.conv_1 = tf.keras.layers.Conv1D(
filters = filters,
kernel_size = kernel_size,
activation = activation,
padding = 'same',
input_shape = input_shape)
self.drop_out_1 = tf.keras.layers.Dropout(drop_rate)
self.conv_2 = tf.keras.layers.Conv1D(
filters = filters,
kernel_size = kernel_size,
activation = activation,
padding = 'same')
self.drop_out_2 = tf.keras.layers.Dropout(drop_rate)
self.conv_3 = tf.keras.layers.Conv1D(
filters = filters,
kernel_size = kernel_size,
activation = activation,
padding = 'same')
self.batch_normalization = tf.keras.layers.BatchNormalization()
self.bidirectional_LSTM = tf.keras.layers.Bidirectional(
tf.keras.layers.LSTM(
units = self.lstm_units,
activation = hparams["enc_lstm_activation"],
return_sequences = True,
return_state = True))
def call(self, inputs, hidden=None):
x = self.character_embedding(inputs)
x = self.conv_1(x)
x = self.drop_out_1(x)
x = self.conv_2(x)
x = self.drop_out_2(x)
x = self.conv_3(x)
x = self.batch_normalization(x)
output, forward_h, forward_c, backward_h, backward_c = self.bidirectional_LSTM(x, initial_state=hidden)
return output, [forward_h, forward_c, backward_h, backward_c]
def initialize_hidden_state(self):
return [tf.zeros((self.batch_size, self.lstm_units)) for i in range(4)]
if __name__ == "__main__":
print("Create Feeder")
feeder = Feeder()
sentences, audio_tittles = feeder.create_dataset()
print("Nachotron Encoder test:")
encoder = Encoder(hparams, True, "Test")
input_batch, _ = feeder.get_batch(encoder.batch_size, (sentences, audio_tittles))
print(f"\nInput batch shape: {input_batch.shape}")
sample_hidden = encoder.initialize_hidden_state()
# print (f'Encoder Hidden state shape: (batch size, units) {sample_hidden.shape}')
output, _ = encoder(input_batch, sample_hidden)
print(f'\nOutput shape: {output.shape}')
print(encoder.summary())
# sample_output, [forward_h, forward_c, backward_h, backward_c] = encoder(example_input_batch, sample_hidden)
# print ('Encoder Hidden state shape: (batch size, units) {}'.format(sample_hidden.shape))
# print ('Encoder output shape: (batch size, sequence length, units) {}'.format(sample_output.shape))