-
Notifications
You must be signed in to change notification settings - Fork 3
/
translate.py
109 lines (85 loc) · 2.63 KB
/
translate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
#!/usr/bin/env python3
import os
import numpy as np
import random
from tinygrad.tensor import Device
from tinygrad.nn.state import get_parameters
from extra.training import train, evaluate
from extra.models.transformer import Transformer
from tinygrad.nn.optim import Adam
# Based on https://github.com/geohot/tinygrad/blob/master/examples/transformer.py
MAX_LEN = 10
NUM_CLASSES = 255
LAYERS = 16
FF_DIM = 64
NUM_HEADS = 4
EMBED_DIM = NUM_HEADS * 16
TRAIN_STEPS = 100
TRAIN_LOOPS = 100
def int_of_char(c):
i = ord(c)
if i >= NUM_CLASSES:
i = 88 # "X" character
return i
def ints_of_str(s):
return [int_of_char(c) for c in s]
def str_of_ints(ints):
to_return = ""
for i in ints:
to_return += chr(i)
return to_return
def make_translation_dataset():
"""
x = list()
for i in range(10000):
x.append("HelloX")
x.append("DogXXX")
y = list()
for i in range(10000):
y.append("HolaXX")
y.append("PerroX")
"""
x = list()
with open("run/source") as source_file:
for line in source_file:
while len(line) < MAX_LEN:
line += " "
x.append(line)
y = list()
with open("run/target") as target_file:
for line in target_file:
while len(line) < MAX_LEN:
line += " "
y.append(line)
for i in range(len(x)):
x[i] = x[i][:MAX_LEN]
y[i] = y[i][:MAX_LEN]
x = list(map(ints_of_str, x))
y = list(map(ints_of_str, y))
test_size = 2000
ds_X_train = x[test_size:]
ds_Y_train = y[test_size:]
ds_X_test = x[:test_size]
ds_Y_test = y[:test_size]
ds_X_train = np.array(ds_X_train)
ds_Y_train = np.array(ds_Y_train)
ds_X_test = np.array(ds_X_test)
ds_Y_test = np.array(ds_Y_test)
return ds_X_train, ds_Y_train, ds_X_test, ds_Y_test
if __name__ == "__main__":
model = Transformer(NUM_CLASSES, MAX_LEN, LAYERS, EMBED_DIM, NUM_HEADS, FF_DIM)
# X_train, Y_train, X_test, Y_test = make_dataset()
X_train, Y_train, X_test, Y_test = make_translation_dataset()
lr = 0.003
for i in range(TRAIN_LOOPS):
optim = Adam(get_parameters(model), lr=lr)
train(model, X_train, Y_train, optim, TRAIN_STEPS, BS=64)
acc, Y_test_preds = evaluate(
model, X_test, Y_test, num_classes=NUM_CLASSES, return_predict=True
)
lr /= 1.2
print(f"reducing lr to {lr:.4f}")
k = random.randint(0, len(Y_test_preds))
print("Source: " + str_of_ints(X_test[k]))
print("Target: " + str_of_ints(Y_test[k]))
print("Pred: " + str_of_ints(Y_test_preds[k]))