This repository has been archived by the owner on Feb 6, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 13
/
Copy pathmain.py
86 lines (66 loc) · 2.54 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import numpy as np
import chainer
from chainer import functions as F
from chainerrl.agents import a3c
from chainerrl.agents import PPO
from chainerrl import links
from chainerrl import misc
from chainerrl import policies
import gym
import gym.wrappers
from chainerrl_visualizer import launch_visualizer
class A3CFFGaussian(chainer.Chain, a3c.A3CModel):
"""An example of A3C feedforward Gaussian policy."""
def __init__(self, obs_size, action_space,
n_hidden_layers=2, n_hidden_channels=64,
bound_mean=None, normalize_obs=None):
assert bound_mean in [False, True]
assert normalize_obs in [False, True]
super().__init__()
hidden_sizes = (n_hidden_channels,) * n_hidden_layers
self.normalize_obs = normalize_obs
with self.init_scope():
self.pi = policies.FCGaussianPolicyWithStateIndependentCovariance(
obs_size, action_space.low.size,
n_hidden_layers, n_hidden_channels,
var_type='diagonal', nonlinearity=F.tanh,
bound_mean=bound_mean,
min_action=action_space.low, max_action=action_space.high,
mean_wscale=1e-2)
self.v = links.MLP(obs_size, 1, hidden_sizes=hidden_sizes)
if self.normalize_obs:
self.obs_filter = links.EmpiricalNormalization(
shape=obs_size
)
def pi_and_v(self, state):
if self.normalize_obs:
state = F.clip(self.obs_filter(state, update=False),
-5.0, 5.0)
return self.pi(state), self.v(state)
def phi(obs):
return obs.astype(np.float32)
gpu = -1
env_name = "BipedalWalker-v2"
seed = 0
def make_env():
env = gym.make(env_name)
env.seed(seed)
misc.env_modifiers.make_rendered(env)
return env
env = make_env()
timestep_limit = env.spec.tags.get("wrapper_config.TimeLimit.max_episode_steps")
obs_space = env.observation_space
action_space = env.action_space
model = A3CFFGaussian(obs_space.low.size, action_space, bound_mean=False, normalize_obs=False)
opt = chainer.optimizers.Adam(alpha=3e-4, eps=1e-5)
opt.setup(model)
agent = PPO(model, opt, gpu=-1, phi=phi, update_interval=2048, minibatch_size=64, epochs=10, clip_eps_vf=None,
entropy_coef=0.0, standardize_advantages=False)
agent.load("parameters")
ACTION_MEANINGS = {
0: 'Hip1(Torque/Velocity)',
1: 'Knee1(Torque/Velocity)',
2: 'Hip2(Torque/Velocity)',
3: 'Knee2(Torque/Velocity)',
}
launch_visualizer(agent, env, ACTION_MEANINGS)