-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtrain.py
113 lines (84 loc) · 3.18 KB
/
train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
#!/usr/bin/env python3
import os
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch import optim
from torch.utils.tensorboard import SummaryWriter
name = "Ding"
class ChessValueDataset(Dataset):
global name
def __init__(self):
dat = np.load(f"processed/{name}_1M.npz")
self.X = dat['arr_0']
self.Y = dat['arr_1']
print("loaded", self.X.shape, self.Y.shape)
def __len__(self):
return self.X.shape[0]
def __getitem__(self, idx):
return (self.X[idx], self.Y[idx])
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.a1 = nn.Conv2d(5, 16, kernel_size=3, padding=1)
self.a2 = nn.Conv2d(16, 16, kernel_size=3, padding=1)
self.a3 = nn.Conv2d(16, 32, kernel_size=3, stride=2)
self.b1 = nn.Conv2d(32, 32, kernel_size=3, padding=1)
self.b2 = nn.Conv2d(32, 32, kernel_size=3, padding=1)
self.b3 = nn.Conv2d(32, 64, kernel_size=3, stride=2)
self.c1 = nn.Conv2d(64, 64, kernel_size=2, padding=1)
self.c2 = nn.Conv2d(64, 64, kernel_size=2, padding=1)
self.c3 = nn.Conv2d(64, 128, kernel_size=2, stride=2)
self.d1 = nn.Conv2d(128, 128, kernel_size=1)
self.d2 = nn.Conv2d(128, 128, kernel_size=1)
self.d3 = nn.Conv2d(128, 128, kernel_size=1)
self.last = nn.Linear(128, 1)
def forward(self, x):
x = F.relu(self.a1(x))
x = F.relu(self.a2(x))
x = F.relu(self.a3(x))
x = F.relu(self.b1(x))
x = F.relu(self.b2(x))
x = F.relu(self.b3(x))
x = F.relu(self.c1(x))
x = F.relu(self.c2(x))
x = F.relu(self.c3(x))
x = F.relu(self.d1(x))
x = F.relu(self.d2(x))
x = F.relu(self.d3(x))
x = x.view(-1, 128)
x = torch.tanh(self.last(x)) # Using torch.tanh instead of F.tanh
return x
if __name__ == "__main__":
device = "cuda" if torch.cuda.is_available() else "cpu"
writer = SummaryWriter()
chess_dataset = ChessValueDataset()
train_loader = DataLoader(chess_dataset, batch_size=256, shuffle=True)
model = Net().to(device)
optimizer = optim.Adam(model.parameters())
floss = nn.MSELoss()
model.train()
for epoch in range(100):
all_loss = 0
num_loss = 0
for batch_idx, (data, target) in enumerate(train_loader):
target = target.unsqueeze(-1) # (batch_size, 1)
data, target = data.to(device), target.to(device)
data = data.float() # ? ? float32 format it seems
target = target.float()
optimizer.zero_grad()
output = model(data)
loss = floss(output, target)
loss.backward()
optimizer.step()
all_loss += loss.item()
num_loss += 1
avg_loss = all_loss / num_loss
print(f"Epoch {epoch:03d}: Loss = {avg_loss:.6f}")
writer.add_scalar('Loss/train', avg_loss, epoch)
if not os.path.exists("nets"):
os.makedirs("nets")
torch.save(model.state_dict(), f"nets/{name.lower()}.pth")
writer.close()