-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils.py
153 lines (129 loc) · 8.04 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
from problem import bbob
import numpy as np
from numpy import random
import os
from Population import Population
import torch
from tensorboardX import SummaryWriter
# def construct_problem(problem, dim, upperbound, train_batch_size, test_batch_size, difficulty, instance_seed = 3849):
# if problem in ['bbob', 'bbob-noisy']:
# return bbob.BBOB_Dataset.get_datasets(suit = problem,
# dim = dim,
# upperbound = upperbound,
# train_batch_size = train_batch_size,
# test_batch_size = test_batch_size,
# difficulty = difficulty,
# instance_seed = instance_seed)
def construct_problem(config, seed):
if config.problem in ['bbob', 'bbob-noisy']:
return bbob.BBOB_Dataset.get_datasets(suit = config.problem,
dim = config.dim,
upperbound = config.upperbound,
train_batch_size = config.train_batch_size,
test_batch_size = config.test_batch_size,
difficulty = config.difficulty,
instance_seed = seed,
mix_dim = config.mix_dim,
test_all = config.test_all)
else:
raise ValueError(config.problem + ' is not defined!')
# a test for the initialization of the population
def save_log(config, train_set, epochs, steps, cost, returns, normalizer):
if config.no_saving:
return
log_dir = config.log_dir + f'/train/{config.run_time}/log/'
if not os.path.exists(log_dir):
os.makedirs(log_dir)
return_save = np.stack((steps, returns), 0)
np.save(log_dir + 'return', return_save)
for problem in train_set:
name = f"{problem.__str__()}_{problem.dim}" if config.mix_dim else problem.__str__()
if len(cost[name]) == 0:
continue
while len(cost[name]) < len(epochs):
cost[name].append(cost[name][-1]) # 一直填充
normalizer[name].append(normalizer[name][-1])
cost_save = np.stack((epochs, cost[name], normalizer[name]), 0)
np.save(log_dir + name + '_cost', cost_save)
# tb_logger
def log_to_tb_train(tb_logger, agent, Reward, R, Critic_out, ratios, bl_val_detached, total_cost, grad_norms, reward,
entropy, approx_kl_divergence, reinforce_loss, baseline_loss, logprobs, show_figs, mini_step):
tb_logger.add_scalar('learnrate/fe', agent.optimizer.param_groups[0]['lr'], mini_step)
tb_logger.add_scalar('learnrate/actor', agent.optimizer.param_groups[1]['lr'], mini_step)
tb_logger.add_scalar('learnrate/critic', agent.optimizer.param_groups[2]['lr'], mini_step)
tb_logger.add_scalar('train/episode_Return', Reward.item(), mini_step) # episode 的回报
tb_logger.add_scalar('train/Target_Return_changed', R.mean().item(), mini_step) # 学习率的
tb_logger.add_scalar('train/Critic_output', Critic_out.mean().item(), mini_step)
tb_logger.add_scalar('train/ratios', ratios.mean().item(), mini_step)
avg_reward = torch.stack(reward).mean().item()
max_reward = torch.stack(reward).max().item()
grad_norms, grad_norms_clipped = grad_norms
tb_logger.add_scalar('train/avg_reward', avg_reward, mini_step)
tb_logger.add_scalar('train/max_reward', max_reward, mini_step)
tb_logger.add_scalar('loss/actor_loss', reinforce_loss.item(), mini_step)
tb_logger.add_scalar('loss/-logprobs', -logprobs.mean().item(), mini_step)
tb_logger.add_scalar('train/entropy', entropy.mean().item(), mini_step)
tb_logger.add_scalar('train/approx_kl_divergence', approx_kl_divergence.item(), mini_step)
tb_logger.add_histogram('train/bl_val', bl_val_detached.cpu(), mini_step)
tb_logger.add_scalar('train/total_cost', total_cost, mini_step)
tb_logger.add_scalar('grad/fe', grad_norms[0], mini_step)
tb_logger.add_scalar('grad_clipped/fe', grad_norms_clipped[0], mini_step)
tb_logger.add_scalar('grad/actor', grad_norms[1], mini_step)
tb_logger.add_scalar('grad_clipped/actor', grad_norms_clipped[1], mini_step)
tb_logger.add_scalar('grad/critic', grad_norms[2], mini_step)
tb_logger.add_scalar('grad_clipped/critic', grad_norms_clipped[2], mini_step)
tb_logger.add_scalar('loss/critic_loss', baseline_loss.item(), mini_step)
tb_logger.add_scalar('loss/total_loss', (reinforce_loss + baseline_loss).item(), mini_step)
if mini_step % 1000 == 0 and show_figs:
tb_logger.add_images('grad/fe', [plot_grad_flow(agent.Fe)], mini_step)
tb_logger.add_images('grad/actor', [plot_grad_flow(agent.Actor)], mini_step)
tb_logger.add_images('grad/critic', [plot_grad_flow(agent.Critic)], mini_step)
def log_to_tb_rollout(tb_logger, problem, gbest, fes, R, T0, T1, T2, mini_step):
tb_logger.add_scalar('rollout_T0/' + problem, T0, mini_step)
tb_logger.add_scalar('rollout_T1/' + problem, T1, mini_step)
tb_logger.add_scalar('rollout_T2/' + problem, T2, mini_step)
tb_logger.add_scalar('rollout_cost/' + problem, gbest, mini_step)
tb_logger.add_scalar('rollout_fes/' + problem, fes, mini_step)
tb_logger.add_scalar('rollout_return/' + problem, R, mini_step)
# problems = rollout_result['cost'].keys()
#
# tb_logger.add_scalar('rollout/T0', rollout_result['T0'], epoch)
# tb_logger.add_scalar('rollout/T1', rollout_result['T1'], epoch)
# tb_logger.add_scalar('rollout/T2', rollout_result['T2'], epoch)
#
# for problem in problems:
# tb_logger.add_scalar('rollout_cost/' + problem, rollout_result['cost'][problem], epoch)
# tb_logger.add_scalar('rollout_fes/' + problem, rollout_result['fes'][problem], epoch)
# tb_logger.add_scalar('rollout_return/' + problem, rollout_result['return'][problem], epoch)
def log_to_tb_operator(tb_logger, mutation_op, crossover_op, mutation_action, crossover_action, mini_step):
# 存储
for i, m_op in enumerate(mutation_op):
m_cnt = len(np.where(mutation_action == i)[0])
tb_logger.add_scalar('mutation/' + m_op, m_cnt, mini_step)
for i, c_op in enumerate(crossover_op):
c_cnt = len(np.where(crossover_action == i)[0])
tb_logger.add_scalar('crossover/' + c_op, c_cnt, mini_step)
def log_to_tb_epoch(tb_logger, epoch_record, mini_step):
for name in epoch_record.keys():
tb_logger.add_scalar('Find_best/' + name, epoch_record[name], mini_step)
def log_to_tb_rollout_experiment(tb_logger, results, mini_step):
cost = results['cost']
problem_name = cost.keys()
for problem in problem_name:
problem_cost = np.stack(cost[problem])[:, -1]
tb_logger.add_scalar('rollout_avgcost/' + problem, np.mean(problem_cost), mini_step)
tb_logger.add_scalar('rollout_gbest/' + problem, np.min(problem_cost), mini_step)
tb_logger.add_scalar('rollout_avgfes/' + problem, np.mean(results['fes'][problem]), mini_step)
tb_logger.add_scalar('rollout_minfes/' + problem, np.min(results['fes'][problem]), mini_step)
tb_logger.add_scalar('rollout_avgreturn/' + problem, np.mean(results['return'][problem]), mini_step)
tb_logger.add_scalar('rollout_maxreturn/' + problem, np.max(results['return'][problem]), mini_step)
tb_logger.add_scalar('rollout_time/T0', results['T0'], mini_step)
tb_logger.add_scalar('rollout_time/T1', results['T1'], mini_step)
tb_logger.add_scalar('rollout_time/T2', results['T2'], mini_step)
def log_gen_operator(mutation_dict, crossover_dict, mutation_op, crossover_op, mutation_action, crossover_action):
for i, m_op in enumerate(mutation_op):
m_cnt = len(np.where(mutation_action == i)[0])
mutation_dict[m_op].append(m_cnt)
for i, c_op in enumerate(crossover_op):
c_cnt = len(np.where(crossover_action == i)[0])
crossover_dict[c_op].append(c_cnt)