-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathmaster_diff_rate.py
160 lines (125 loc) · 5.11 KB
/
master_diff_rate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
import time
from main_ddpg_diff_rate import start
import yaml
import tensorflow as tf
from train_model import training
from train_model import prediction
import pickle
import numpy as np
import os.path
import sys
from collections import OrderedDict
import global_params
import multiprocessing
from difference_model import DifferenceModel
def foo():
path = os.path.dirname(os.path.abspath(__file__))
# Run the training on the ideal model phase
# cfg = "{}/leo_rbdl_zmq_drl.yaml".format(path)
# a = start(cfg)
# time.sleep(5)
# #
# No of policy iterations
for ii in range(1,2):
# No of runs of one policy
# Training the policy with the difference model included
global_params.ou_sigma = 0.2
global_params.ou_theta = 0.15
global_params.actor_learning_rate = 0.00012
global_params.critic_learning_rate = 0.0012
RUNS = 5
while global_params.reward == 0:
d = {'replay_buffer': {'load': 0, 'save': 0, 'buffer_size': 20000}, 'difference_model': 0}
with open('config.yaml', 'w') as yaml_file:
yaml.dump(d, yaml_file, default_flow_style=False)
cfg = ["/leo_drl_server/leo_rbdl_zmq_drl_diff_rate.yaml"]
new_cfg = rl_run_zmqagent(cfg, range(RUNS))
pool = multiprocessing.Pool(RUNS)
pool.map(start, new_cfg)
pool.close()
# start(new_cfg, ii)
global_params.reward = 0
time.sleep(5)
global_params.test_run_on_model = 1
global_params.learning_success = 0
# Running the learned policy on the difference model on the perturbed system to see if it works
def rl_run_zmqagent(list_of_cfgs, runs):
list_of_new_cfgs = []
for cfg in list_of_cfgs:
conf = read_cfg_divyam(cfg)
fname, fext = os.path.splitext(cfg.replace("/", "_"))
for run in runs:
# create local filename
list_of_new_cfgs.append("{}-mp{}{}".format(fname, run, fext))
# modify options
# conf['experiment']['environment']['xml'] = "../grl/addons/cfg/leo/xm430_210_vc_leo_walk_ankle_bound.xml"
conf['experiment']['output'] = "{}-mp-rbdl{}".format(fname, run)
if "exporter" in conf['experiment']['environment']:
conf['experiment']['environment']['exporter']['file'] = "{}-mp-rbdl{}".format(fname, run)
# Change the binding ports for each config file
if "communicator" in conf['experiment']['agent']:
conf['experiment']['agent']['communicator']['addr'] = "tcp://localhost:555{}".format(run)
conf['experiment']['test_agent']['communicator']['addr'] = "tcp://localhost:555{}".format(run)
if "learning_rate" in conf['experiment']:
conf['experiment']['learning_rate']= 0.001 + 0.0002*run
if "difference_model" in conf['experiment']:
conf['experiment']['difference_model']= 1
write_cfg(list_of_new_cfgs[-1], conf)
return list_of_new_cfgs
def rl_run_rbdl_agent(config, iteration = 0):
fname = os.path.splitext(config)[0]
new_config = "{}-{}.yaml".format(fname, iteration)
conf = read_cfg_divyam(config)
if "output" in conf['experiment']:
output = conf['experiment']['output']
conf['experiment']['output'] = "{}-{}".format(output, iteration)
# Change the binding ports for each config file
if "load_file" in conf['experiment']:
if iteration != 0:
conf["experiment"]["load_file"] = "model-leo-rbdl-with-diff-{}.ckpt".format(iteration)
write_cfg(new_config, conf)
return new_config
def write_cfg(outCfg, conf):
"""Write configuration file"""
# create local yaml configuration file
outfile = file(outCfg, 'w')
ordered_dump(conf, outfile, yaml.SafeDumper)
outfile.close()
def read_cfg_divyam(cfg):
"""Read configuration file"""
# check if file exists
yfile = '%s' % cfg
if os.path.isfile(yfile) == False:
print 'File %s not found' % yfile
sys.exit()
# open configuration
stream = file(yfile, 'r')
conf = ordered_load(stream, yaml.SafeLoader)
stream.close()
return conf
def ordered_load(stream, Loader=yaml.Loader, object_pairs_hook=OrderedDict):
class OrderedLoader(Loader):
pass
def construct_mapping(loader, node):
loader.flatten_mapping(node)
return object_pairs_hook(loader.construct_pairs(node))
OrderedLoader.add_constructor(
yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG,
construct_mapping)
return yaml.load(stream, OrderedLoader)
def ordered_dump(data, stream=None, Dumper=yaml.Dumper, **kwds):
class OrderedDumper(Dumper):
pass
def _dict_representer(dumper, data):
return dumper.represent_mapping(
yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG,
data.items())
OrderedDumper.add_representer(OrderedDict, _dict_representer)
return yaml.dump(data, stream, OrderedDumper, **kwds)
def main():
global_params.init()
foo()
if __name__ == '__main__':
main()