forked from clvoloshin/constrained_batch_policy_learning
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathplay_car_racing.py
47 lines (46 loc) · 1.81 KB
/
play_car_racing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
from car_racing import ExtendedCarRacing
import numpy as np
if __name__=="__main__":
from pyglet.window import key
a = np.array( [0.0, 0.0, 0.0] )
def key_press(k, mod):
global restart
if k==0xff0d: restart = True
if k==key.LEFT: a[0] = -1.0
if k==key.RIGHT: a[0] = +1.0
if k==key.UP: a[1] = +1.0
if k==key.DOWN: a[2] = +0.8 # set 1.0 for wheels to block to zero rotation
def key_release(k, mod):
if k==key.LEFT and a[0]==-1.0: a[0] = 0
if k==key.RIGHT and a[0]==+1.0: a[0] = 0
if k==key.UP: a[1] = 0
if k==key.DOWN: a[2] = 0
env = ExtendedCarRacing(0, False, 12)
env.render()
record_video = False
if record_video:
env.monitor.start('/tmp/video-test', force=True)
env.viewer.window.on_key_press = key_press
env.viewer.window.on_key_release = key_release
while True:
env.reset()
total_reward = 0.0
steps = 0
restart = False
while True:
s, r, done, info = env.step(a)
print r[1][1], r[1][3], r[1][4]
total_reward += r[0]
if steps % 200 == 0 or done:
pass
# print("\naction " + str(["{:+0.2f}".format(x) for x in a]))
# print("step {} total_reward {:+0.2f}".format(steps, total_reward))
#import matplotlib.pyplot as plt
#plt.imshow(s)
#plt.savefig("test.jpeg")
steps += 1
if not record_video: # Faster, but you can as well call env.render() every time to play full window.
env.render()
if done or restart or float(env.tile_visited_count)>139: break
print steps, float(env.tile_visited_count), len(env.track), float(env.tile_visited_count)/len(env.track)
env.close()