forked from axelbr/dvbf
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathwrapper.py
113 lines (86 loc) · 3.15 KB
/
wrapper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
from gym import ObservationWrapper
import gym
from gym import spaces
from gym.utils import seeding
import numpy as np
from os import path
class PendulumEnv(gym.Env):
metadata = {
'render.modes': ['human', 'rgb_array'],
'video.frames_per_second': 30
}
def __init__(self, g=10.0):
self.max_speed = 8
self.max_torque = 2.
self.dt = .05
self.g = g
self.m = 1.
self.l = 1.
self.viewer = None
high = np.array([1., 1., self.max_speed], dtype=np.float32)
self.action_space = spaces.Box(
low=-self.max_torque,
high=self.max_torque, shape=(1,),
dtype=np.float32
)
self.observation_space = spaces.Box(
low=-high,
high=high,
dtype=np.float32
)
self.seed()
def seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
return [seed]
def step(self, u):
th, thdot = self.state # th := theta
g = self.g
m = self.m
l = self.l
dt = self.dt
u = np.clip(u, -self.max_torque, self.max_torque)[0]
self.last_u = u # for rendering
costs = angle_normalize(th) ** 2 + .1 * thdot ** 2 + .001 * (u ** 2)
newthdot = thdot + (-3 * g / (2 * l) * np.sin(th + np.pi) + 3. / (m * l ** 2) * u) * dt
newth = th + newthdot * dt
newthdot = np.clip(newthdot, -self.max_speed, self.max_speed)
self.state = np.array([newth, newthdot])
return self._get_obs(), -costs, False, {}
def reset(self):
high = np.array([np.pi, 1])
self.state = self.np_random.uniform(low=-high, high=high)
self.last_u = None
return self._get_obs()
def _get_obs(self):
theta, thetadot = self.state
return np.array([np.cos(theta), np.sin(theta), thetadot])
def render(self, mode='human'):
if self.viewer is None:
from gym.envs.classic_control import rendering
self.viewer = rendering.Viewer(500, 500)
self.viewer.set_bounds(-2.2, 2.2, -2.2, 2.2)
rod = rendering.make_capsule(1, .2)
rod.set_color(.8, .3, .3)
self.pole_transform = rendering.Transform()
rod.add_attr(self.pole_transform)
self.viewer.add_geom(rod)
axle = rendering.make_circle(.05)
axle.set_color(0, 0, 0)
self.viewer.add_geom(axle)
self.imgtrans = rendering.Transform()
self.pole_transform.set_rotation(self.state[0] + np.pi / 2)
if self.last_u:
self.imgtrans.scale = (-self.last_u / 2, np.abs(self.last_u) / 2)
return self.viewer.render(return_rgb_array=mode == 'rgb_array')
def close(self):
if self.viewer:
self.viewer.close()
self.viewer = None
def angle_normalize(x):
return (((x+np.pi) % (2*np.pi)) - np.pi)
class PixelDictWrapper(ObservationWrapper):
def __init__(self, env):
super().__init__(env)
self.observation_space = env.observation_space.spaces['pixels']
def observation(self, observation):
return observation['pixels']