-
Notifications
You must be signed in to change notification settings - Fork 1
/
grid_search.py
84 lines (63 loc) · 3.03 KB
/
grid_search.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
"""
Defining Grid Search functionality
"""
from train import*
# STD
import itertools
import operator
import functools
# CONSTANTS
EPS = float(np.finfo(np.float32).eps)
def grid_search(ENVIRONMENTS, hyperparameter_options, num_episodes):
# Let's run it!
memory_size = 10000
# init envs
envs = {name: gym.envs.make(name) for name in ENVIRONMENTS}
for _,env in enumerate(envs.values()):
# Perform grid search over 5 random seeds
for i in range(5):
print("Seed:",i)
best_sum_rewards = -np.inf
# Perform Grid Search
n_combinations = functools.reduce(operator.mul, [len(options) for options in hyperparameter_options.values()])
for i, hyperparams in enumerate(itertools.product(*hyperparameter_options.values())):
current_model_params = dict(zip(hyperparameter_options.keys(), hyperparams))
#print(
# "\rTrying out combination {}/{}: {}".format(
# i + 1, n_combinations, str(current_model_params)
# ), flush=True, end=""
#)
num_hidden = current_model_params['num_hidden']
# Single DQN
memory = ReplayMemory(memory_size)
n_out = env.action_space.n
n_in = len(env.observation_space.low)
model = QNetwork(n_in, n_out, num_hidden)
episode_durations_single, cum_reward_single = run_episodes(train, model, memory, env, num_episodes, **current_model_params)
# Double DQN
memory = ReplayMemory(memory_size)
n_out = env.action_space.n
n_in = len(env.observation_space.low)
model = QNetwork(n_in, n_out, num_hidden)
model_2 = QNetwork(n_in, n_out, num_hidden)
episode_durations_double, cum_reward_double = run_episodes(train, model, memory, env, num_episodes, model_2 =model_2,**current_model_params)
# Calculation best score to select best hyperparameters
# best score = sum of cumulative rewards over all episodes and over Double DQN and DQN
sum_rewards = cum_reward_double + cum_reward_single
if sum_rewards > best_sum_rewards:
print("\n New highest score found ({:.4f})".format(sum_rewards))
best_sum_rewards = sum_rewards
best_parameters = current_model_params
print("\n Found best parameters")
print(str(best_parameters))
print("Score:", best_sum_rewards )
if __name__ == "__main__":
ENVIRONMENTS = ['Acrobot-v1']#[['MountainCar-v0'] 'MountainCarContinuous-v0'] # ['MountainCar-v0'] # 'CartPole-v1']]
hyperparameter_opt_mountain_car = {
"batch_size": [128],
"discount_factor": [0.9, 0.99],
"learn_rate": [0.01, 0.001],
"num_hidden": [128],
"update_target": [5, 10,20]
}
grid_search(ENVIRONMENTS,hyperparameter_opt_mountain_car, num_episodes=200)