-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathmake_demo.py
More file actions
222 lines (197 loc) · 8.57 KB
/
make_demo.py
File metadata and controls
222 lines (197 loc) · 8.57 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
# This code is based on code from OpenAI gym. (https://github.com/openai/gym/blob/master/gym/utils/play.py)
import gym
import pygame
import matplotlib
import argparse
import os
import pickle
from gym import logger
from run_atari import make_env
import numpy as np
"""
backspace: このエピソードの行動軌跡を保存せずリセット(reset this episode without saving trajectories)
return: このエピソードの行動軌跡を保存してリセット(reset this episode with saving trajectories)
esc: このエピソードの行動軌跡を保存せずゲームを終了(quit game without saving trajectories)
keypad plus: increase game's speed
keypad minus: decrease game's speed
"""
try:
matplotlib.use('TkAgg')
import matplotlib.pyplot as plt
except ImportError as e:
logger.warn('failed to set matplotlib backend, plotting will not work: %s' % str(e))
plt = None
from collections import deque
from pygame.locals import VIDEORESIZE
def display_arr(screen, arr, video_size, transpose):
arr_min, arr_max = arr.min(), arr.max()
arr = 255.0 * (arr - arr_min) / (arr_max - arr_min)
pyg_img = pygame.surfarray.make_surface(arr.swapaxes(0, 1) if transpose else arr)
pyg_img = pygame.transform.scale(pyg_img, video_size)
screen.blit(pyg_img, (0, 0))
def play(env, transpose=True, fps=30, zoom=None, callback=None, keys_to_action=None):
"""Allows one to play the game using keyboard.
To simply play the game use:
play(gym.make("Pong-v4"))
Above code works also if env is wrapped, so it's particularly useful in
verifying that the frame-level preprocessing does not render the game
unplayable.
If you wish to plot real time statistics as you play, you can use
gym.utils.play.PlayPlot. Here's a sample code for plotting the reward
for last 5 second of gameplay.
def callback(obs_t, obs_tp1, action, rew, done, info):
return [rew,]
plotter = PlayPlot(callback, 30 * 5, ["reward"])
env = gym.make("Pong-v4")
play(env, callback=plotter.callback)
Arguments
---------
env: gym.Env
Environment to use for playing.
transpose: bool
If True the output of observation is transposed.
Defaults to true.
fps: int
Maximum number of steps of the environment to execute every second.
Defaults to 30.
zoom: float
Make screen edge this many times bigger
callback: lambda or None
Callback if a callback is provided it will be executed after
every step. It takes the following input:
obs_t: observation before performing action
obs_tp1: observation after performing action
action: action that was executed
rew: reward that was received
done: whether the environment is done or not
info: debug info
keys_to_action: dict: tuple(int) -> int or None
Mapping from keys pressed to action performed.
For example if pressed 'w' and space at the same time is supposed
to trigger action number 2 then key_to_action dict would look like this:
{
# ...
sorted(ord('w'), ord(' ')) -> 2
# ...
}
If None, default key_to_action mapping for that env is used, if provided.
"""
env.reset()
rendered = env.render(mode='rgb_array')
if keys_to_action is None:
if hasattr(env, 'get_keys_to_action'):
keys_to_action = env.get_keys_to_action()
elif hasattr(env.unwrapped, 'get_keys_to_action'):
keys_to_action = env.unwrapped.get_keys_to_action()
else:
assert False, env.spec.id + " does not have explicit key to action mapping, " + \
"please specify one manually"
relevant_keys = set(sum(map(list, keys_to_action.keys()), []))
video_size = [rendered.shape[1], rendered.shape[0]]
if zoom is not None:
video_size = int(video_size[0] * zoom), int(video_size[1] * zoom)
pressed_keys = []
running = True
env_done = True
save_trajectory = True
screen = pygame.display.set_mode(video_size)
clock = pygame.time.Clock()
trajectories = []
episode_trajectory = []
while running:
if env_done:
env_done = False
obs = env.reset()
if len(episode_trajectory) > 0 and save_trajectory:
trajectories.append(episode_trajectory)
print("saved trajectory len", len(episode_trajectory))
print("reset! episode reward is", sum([epi[2] for epi in episode_trajectory]))
episode_trajectory = []
save_trajectory = True
else:
action = keys_to_action.get(tuple(sorted(pressed_keys)), 0)
prev_obs = obs
obs, rew, env_done, info = env.step(action)
if callback is not None:
callback(prev_obs, obs, action, rew, env_done, info)
episode_trajectory.append((np.array(prev_obs), np.array(action, dtype='int64'), rew, np.array(obs), env_done))
if obs is not None:
rendered = env.render(mode='rgb_array')
display_arr(screen, rendered, transpose=transpose, video_size=video_size)
# process pygame events
for event in pygame.event.get():
# test events, set key states
if event.type == pygame.KEYDOWN:
if event.key in relevant_keys:
pressed_keys.append(event.key)
elif event.key == pygame.K_BACKSPACE:
save_trajectory = False
env_done = True
elif event.key == pygame.K_RETURN:
save_trajectory = True
env_done = True
elif event.key == pygame.K_ESCAPE:
running = False
elif event.key == pygame.K_KP_PLUS:
fps *= 2
print(f"fps: {fps}")
elif event.key == pygame.K_KP_MINUS:
fps /= 2
print(f"fps: {fps}")
elif event.type == pygame.KEYUP:
if event.key in relevant_keys:
pressed_keys.remove(event.key)
elif event.type == pygame.QUIT:
running = False
elif event.type == VIDEORESIZE:
video_size = event.size
screen = pygame.display.set_mode(video_size)
print(video_size)
pygame.display.flip()
clock.tick(fps)
pygame.quit()
return trajectories
class PlayPlot(object):
def __init__(self, callback, horizon_timesteps, plot_names):
self.data_callback = callback
self.horizon_timesteps = horizon_timesteps
self.plot_names = plot_names
assert plt is not None, "matplotlib backend failed, plotting will not work"
num_plots = len(self.plot_names)
self.fig, self.ax = plt.subplots(num_plots)
if num_plots == 1:
self.ax = [self.ax]
for axis, name in zip(self.ax, plot_names):
axis.set_title(name)
self.t = 0
self.cur_plot = [None for _ in range(num_plots)]
self.data = [deque(maxlen=horizon_timesteps) for _ in range(num_plots)]
def callback(self, obs_t, obs_tp1, action, rew, done, info):
points = self.data_callback(obs_t, obs_tp1, action, rew, done, info)
for point, data_series in zip(points, self.data):
data_series.append(point)
self.t += 1
xmin, xmax = max(0, self.t - self.horizon_timesteps), self.t
for i, plot in enumerate(self.cur_plot):
if plot is not None:
plot.remove()
self.cur_plot[i] = self.ax[i].scatter(range(xmin, xmax), list(self.data[i]), c='blue')
self.ax[i].set_xlim(xmin, xmax)
plt.pause(0.000001)
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--env', type=str, default='MontezumaRevengeNoFrameskip-v4', help='Define Environment')
parser.add_argument('--fps', type=int, default=20, help='Game fps')
parser.add_argument('--seed', type=int, default=None, help='Game seed')
args = parser.parse_args()
dir_path = "data/demo"
task_name = "human." + args.env + ".pkl"
env = make_env(args.env, seed=args.seed, wrapper_kwargs={'frame_stack': True})
trajectories = play(env, zoom=4, fps=args.fps)
print("num episodes", len(trajectories))
if len(trajectories) != 0:
os.makedirs(dir_path, exist_ok=True)
with open(os.path.join(dir_path, task_name), mode="wb") as f:
pickle.dump(trajectories, f)
if __name__ == '__main__':
main()