-
Notifications
You must be signed in to change notification settings - Fork 0
/
conf.py
107 lines (99 loc) · 4.77 KB
/
conf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
from math import sqrt
conf = {
'MODEL_DIR': 'sp_models',
'EVAL_DIR': 'sp_eval_games',
'SELF_PLAY_DIR': 'sp_self_play_data',
'EXTRA_TRAINING_DATA_DIR': ['/home/miruware/go/sejonggo/sp_self_play_data'],
'LOG_DIR': 'logs',
'TMP_DIR': 'temp',
'BEST_MODEL': 'best_model.h5',
'SHOW_EACH_MOVE': True,
'SHOW_END_GAME': True,
'REPORT_PERIOD': 200,
'GAMES_DIR': 'sp_eval_games',
'GPUs': [0,1,2,3,4,5,6,7],
'SIMULATION_MODE': "ASYNC",
'THREAD_SIMULATION': True,
'PREDICTING_BATCH_SIZE': 32,
'KGS_DATA_DIR': 'kgs_data',
'KGS_ZIP_FOLDER': 'kgs_data_zip',
### MODEL ###
'N_RESIDUAL_BLOCKS': 20, # Size of the tower of residual blocks, 20 for small model, 40 for alphagozero full size
'L2_EPSILON': 1e-4, # The epsilon coefficient in the loss value function, 1e-4 in paper
### SELF-PLAY ###
'N_GAMES': 5000, # Number of games of self play generated by best_model, 25k in paper
'GAME_RANGE': [0,5000],
'MCTS_SIMULATIONS': 1600, # 1.6k in paper
'N_GAME_PROCESS': 32, # number of game processes per server
'ENERGY': 8, # energy for async simulation
'SLEEP_SECONDS': 120, # number of seconds to sleep to wait for other jobs. For ex ifthere is no new best model to self-play
'SIZE': 19, # board size 19 in paper
'KOMI': 5.5, # The komi points given to white player
'STOP_EXPLORATION': 30, # Number of plays after which temperature goes to 0 , 30 in paper
'MCTS_BATCH_SIZE': 100, # Size of the prediction batch while exploring mcts
'DIRICHLET_ALPHA': .03, # The value of dirichlet coefficient in the nois of root_node of mcts simulation
'DIRICHLET_EPSILON': .25, # How much the noise is accounted for
'RESIGNATION_PERCENT': .10, # 10% of the time we don't use resignation to assess resignation value
'RESIGNATION_ALLOWED_ERROR': .05, # 5% of the time we resign a game we could have won
### TRAIN ###
'EPOCHS_PER_SAVE': 300, # A model will be saved to be evaluated this amount of epochs, 1000 in paper
'EPOCHS_PER_BACKUP': 2, # auto-save model to 'backup.h5' (overwrite if existing) after this number of epoch
'TRAIN_BATCH_SIZE': 32, # Batch size in the training phase, 32 in paper
'NUM_WORKERS': 64, # We use this many GPU workers so split the task, 64 in paper. This is actually num of iteration, should be set so as NUM_WORKER * BATCH_SIZE = NUM OF SAMPLES IN A DATASET = 25k * num of move of each game
'HISTOGRAM_FREQ': 0, # Shows the histograms in Tensorboard. For debugging
'VALIDATION_SPLIT': 0, # Needed if you want histograms in Tensorboard.
'N_MOST_RECENT_GAMES': 500000, # Number of most recent self-play games to retrieve for training. 500,000 in the paper. Data outside this range will be purged
### EVALUATOR ###
'EVALUATE_N_GAMES': 100, # The number of games to test on to elect new best model, 400 in paper
'EVALUATE_MARGIN': .55, # Model has to win by that margin to be elected, 55% in paper
'SGF_ENABLED': False,
### DISTRIBUTED SYSTEM CONFIG
'SELF_PLAY_SERVER': [
{
'user':'miruware',
'host': '211.180.114.9',
'creds':'miruware!',
'dest': '/home/miruware/go/sejonggo'
},
{
'user':'miruware',
'host': '211.180.114.146',
'creds':'miruware!',
'dest': '/home/miruware/go/sejonggo'
},
{
'user':'miruware',
'host': '211.180.114.147',
'creds':'miruware!',
'dest': '/home/miruware/go/sejonggo'
},
],
'TRAINING_SERVER': {
'user':'miruware',
'host': '211.180.114.12',
'creds':'miruware!',
'dest': '/home/miruware/sdb/go/sejonggo'
},
### MCTS
'MSTC_PATTERN_FILE': 'patterns.spat',
'LARGE_MCTS_PATTERN_FILE': 'patterns.prob',
'PRIOR_EVEN': 10,
'RESIGN_THRES': 0.2,
'N_SIMS': 1400,
'EXPAND_VISITS': 8,
'FASTPLAY20_THRES': 0.8, # if at 20% playouts winrate is >this, stop reading
'FASTPLAY5_THRES': 0.95, # if at 5% playouts winrate is >this, stop reading
'PROB_HEURISTIC': {'capture': 0.9, 'pat3': 0.95}, # probability of heuristic suggestions being taken in playout
'PROB_SSAREJECT': 0.9, # probability of rejecting suggested self-atari in playout
'PROB_RSAREJECT': 0.5,
# probability of rejecting random self-atari in playout; this is lower than above to allow nakade
'PRIOR_EVEN': 10, # should be even number; 0.5 prior
'PRIOR_SELFATARI': 10, # negative prior
'PRIOR_CAPTURE_ONE': 15,
'PRIOR_CAPTURE_MANY': 30,
'PRIOR_PAT3': 10,
'PRIOR_LARGEPATTERN': 100, # most moves have relatively small probability
'PRIOR_CFG': [24, 22, 8], # priors for moves in cfg dist. 1, 2, 3
'PRIOR_EMPTYAREA': 10,
'RAVE_EQUIV': 3500,
}