-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtrain.py
47 lines (41 loc) · 1.4 KB
/
train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import tensorflow as tf
from pickle import dump
from buildModel import build_rnn_model
from dataPreprocessing import process_data,create_training_data
from tokenize_sentence import tokenize_sentence
def train(model, batch_size, epochs, learning_rate,X,y):
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
model.compile(loss='categorical_crossentropy', optimizer = optimizer, metrics=['accuracy'])
history = model.fit(X,y,batch_size=batch_size,epochs=epochs)
print("--"*10)
print("Model Training Complete")
print("--"*10)
print("Saving model...")
filename = 'model_RNN_Shakespeare.h5'
model.save(filename)
print("Model saved as : ",filename)
print("--"*10)
return history, model
### TODO: Make all inputs from ARG
print("We are here")
# Processing data
path = 't8.shakespeare.txt'
sentences = process_data(path)
tokenizer, numeric_sentences = tokenize_sentence(sentences)
X,y,vocabulary_size,input_length = create_training_data(tokenizer, numeric_sentences)
model = build_rnn_model(vocabulary_size,input_length)
print("--"*10)
print("Starting the traing of the model: ")
print("--"*10)
batch_size = 128
epochs = 150
learning_rate = 0.0009
train(model, batch_size, epochs, learning_rate, X,y)
# Dump the tokenizer
print("--"*10)
print("Saving tokenizer... ")
dump(tokenizer, open('tokenizer.pkl', 'wb'))
print("Tokenizer saved!!")
print("--"*10)