-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathmaster.py
192 lines (152 loc) · 6.65 KB
/
master.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
import time
from main_ddpg import start
import yaml
import tensorflow as tf
from train_model import training
from train_model import prediction
import pickle
import numpy as np
import os.path
import sys
from collections import OrderedDict
import global_params
import multiprocessing
from difference_model import DifferenceModel
def foo():
path = os.path.dirname(os.path.abspath(__file__))
# Run the training on the ideal model phase
# cfg = "{}/leo_rbdl_zmq_drl.yaml".format(path)
# a = start(cfg)
# time.sleep(5)
# #
# No of policy iterations
for ii in range(1, 10):
#
# No of runs of one policy
for i in range(1):
#
# Run the trained policy on a real model
global_params.ou_sigma = 0.1
global_params.ou_theta = 0.15
d = {'transitions': {'load': 0, 'save': 1, 'save_filename': 'saved_data-perturbed-{}'.format(ii), 'buffer_size': 5000},
'difference_model': 0}
with open('config.yaml', 'w') as yaml_file:
yaml.dump(d, yaml_file, default_flow_style=False)
cfg = "{}/leo_rbdl_zmq_drl_2.yaml".format(path)
new_cfg = rl_run_rbdl_agent(cfg, ii - 1)
start(new_cfg)
time.sleep(2)
# Run the transitions on the original model
d = {'transitions': {'load': 1, 'load_filename': 'saved_data-perturbed-{}'.format(ii), 'save': 1,
'save_filename': 'saved_data-original-{}'.format(ii), 'buffer_size': 5000},
'difference_model': 0}
with open('config.yaml', 'w') as yaml_file:
yaml.dump(d, yaml_file, default_flow_style=False)
cfg = "{}/leo_rbdl_zmq_drl_3.yaml".format(path)
start(cfg)
# Train a new difference model or update one
with tf.Graph().as_default() as diff_model:
model = DifferenceModel(24, 18)
with tf.Session(graph=diff_model) as sess:
if i == 0 and ii == 1:
d = {'difference_model': 0}
else:
d = {'difference_model': 1}
with open('config.yaml', 'w') as yaml_file:
yaml.dump(d, yaml_file, default_flow_style=False)
perturbed_files = ['saved_data-perturbed-{}'.format(b) for b in range(1, ii+1)]
ideal_files = ['saved_data-original-{}'.format(b) for b in range(1, ii+1)]
print perturbed_files
model = training(sess, model, perturbed_files, ideal_files, 24, 18, 300)
# Training the policy with the difference model included
global_params.ou_sigma = 0.12
global_params.ou_theta = 0.15
global_params.actor_learning_rate = 0.0001
global_params.critic_learning_rate = 0.001
iterations = 0
while not global_params.learning_success and iterations != 1:
if ii == 1:
d = {'replay_buffer': {'load': 0, 'save': 1, 'save_filename': 'saved_replay_buffer',
'buffer_size': 100000}, 'difference_model': 0}
with open('config.yaml', 'w') as yaml_file:
yaml.dump(d, yaml_file, default_flow_style=False)
else:
d = {'replay_buffer': {'load': 1, 'load_filename': 'saved_replay_buffer', 'save': 0, 'save_filename': 'saved_replay_buffer',
'buffer_size': 100000}, 'difference_model': 0}
with open('config.yaml', 'w') as yaml_file:
yaml.dump(d, yaml_file, default_flow_style=False)
cfg = "{}/leo_rbdl_zmq_drl.yaml".format(path)
new_cfg = rl_run_rbdl_agent(cfg, ii - 1)
start(new_cfg, ii)
time.sleep(5)
# Running the learned policy on the difference model on the perturbed system to see if it works
global_params.test_run_on_model = 1
d = {'replay_buffer': {'load': 0, 'save': 0, 'buffer_size': 2000}, 'difference_model': 0}
with open('config.yaml', 'w') as yaml_file:
yaml.dump(d, yaml_file, default_flow_style=False)
cfg = "{}/leo_rbdl_zmq_drl_real.yaml".format(path)
new_cfg = rl_run_rbdl_agent(cfg, ii)
start(new_cfg)
global_params.test_run_on_model = 0
iterations += 1
global_params.learning_success = 0
if global_params.learning_success:
print ("Entire training was successful")
break
def rl_run_rbdl_agent(config, iteration=0, c=0):
fname = os.path.splitext(config)[0]
new_config = "{}-{}.yaml".format(fname, iteration)
conf = read_cfg_divyam(config)
if "output" in conf['experiment']:
output = conf['experiment']['output']
conf['experiment']['output'] = "{}-{}".format(output, iteration)
# Change the binding ports for each config file
if "load_file" in conf['experiment']:
if iteration != 0:
conf["experiment"]["load_file"] = "model-leo-rbdl-with-diff-{}.ckpt".format(iteration)
write_cfg(new_config, conf)
return new_config
def write_cfg(outCfg, conf):
"""Write configuration file"""
# create local yaml configuration file
outfile = file(outCfg, 'w')
ordered_dump(conf, outfile, yaml.SafeDumper)
outfile.close()
def read_cfg_divyam(cfg):
"""Read configuration file"""
# check if file exists
yfile = '%s' % cfg
if os.path.isfile(yfile) == False:
print 'File %s not found' % yfile
sys.exit()
# open configuration
stream = file(yfile, 'r')
conf = ordered_load(stream, yaml.SafeLoader)
stream.close()
return conf
def ordered_load(stream, Loader=yaml.Loader, object_pairs_hook=OrderedDict):
class OrderedLoader(Loader):
pass
def construct_mapping(loader, node):
loader.flatten_mapping(node)
return object_pairs_hook(loader.construct_pairs(node))
OrderedLoader.add_constructor(
yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG,
construct_mapping)
return yaml.load(stream, OrderedLoader)
def ordered_dump(data, stream=None, Dumper=yaml.Dumper, **kwds):
class OrderedDumper(Dumper):
pass
def _dict_representer(dumper, data):
return dumper.represent_mapping(
yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG,
data.items())
OrderedDumper.add_representer(OrderedDict, _dict_representer)
return yaml.dump(data, stream, OrderedDumper, **kwds)
def main():
global_params.init()
foo()
if __name__ == '__main__':
main()