-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathTrain.py
63 lines (53 loc) · 2.66 KB
/
Train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# coding: utf-8
# Reference: http://efavdb.com/battleship/
from Game import Game
from GameConfig import *
from Network import Network
class TrainGame:
def __init__(self, model_file=None):
self.gamma = 0.5
self.alpha = 0.01
self.network = Network(BOARD_WIDTH, BOARD_HEIGHT, len(SHIPS))
if not model_file is None:
self.network.restoreModel('./models/mymodel')
self.game = Game(BOARD_WIDTH, BOARD_HEIGHT, SHIPS, network=self.network)
self.total_ships_lengths = sum([ship['length'] for ship in self.game.board.ships])
self.board_size = self.game.board.board_height * self.game.board.board_width
self.max_train_step = 300000
def selfPlayOneGame(self):
all_input_states = []
all_moves = []
all_hits = []
self.game.resetBoard()
(input_dimensions, move, is_hit) = self.game.takeAMove()
while not input_dimensions is None:
all_input_states.append(input_dimensions)
all_moves.append(move)
all_hits.append(is_hit)
(input_dimensions, move, is_hit) = self.game.takeAMove()
all_discounted_reward = self.rewardsCalculator(all_hits)
return (all_input_states, all_moves, all_hits, all_discounted_reward)
def rewardsCalculator(self, hit_log, gamma=0.5):
""" Discounted sum of future hits over trajectory"""
hit_log_weighted = [(item -
float(self.total_ships_lengths - sum(hit_log[:index])) / float(self.board_size - index)) *
(gamma ** index) for index, item in enumerate(hit_log)]
return [((gamma) ** (-i)) * sum(hit_log_weighted[i:]) for i in range(len(hit_log))]
def trainWithSelfPlay(self):
all_entropy = 0
all_num_move = 0
batch_size = 50
for i in range(self.max_train_step):
(all_input_states, all_moves, all_hits, all_discounted_reward) = self.selfPlayOneGame()
all_num_move += len(all_hits)
for input_states, moves, discounted_reward in zip(all_input_states, all_moves, all_discounted_reward):
entropy = self.network.runTrainStep(input_states, [moves], self.alpha * discounted_reward)
if i % batch_size == 0 and i != 0:
print('Total Game: ' + str(i) + ' ' + ' Avg moves: ' + str(all_num_move * 1.0/ batch_size * 1.0))
all_entropy = 0
all_num_move = 0
if i % (batch_size * 20) == 0:
self.game.network.saveModel('./models/mymodel')
print("SAVE MODEL # {}".format(i))
train_game = TrainGame('./models/mymodel')#model_file='./models/mymodel')
train_game.trainWithSelfPlay()