-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest.py
More file actions
63 lines (52 loc) · 2.48 KB
/
test.py
File metadata and controls
63 lines (52 loc) · 2.48 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import numpy as np
import matplotlib.pyplot as plt
from functools import reduce
from frozen_lake import FrozenLake
from mountain_car import MountainCar
class GymTester(object):
def __init__(self):
pass
def reward_visualizer(instances=20):
def _reward_visualizer(gen_instance_rewards):
def wrapper(self):
plt.figure(0)
all_rewards = []
for i in range(instances):
all_rewards.append(gen_instance_rewards(self, i))
fig = plt.imshow(all_rewards)
fig.axes.get_xaxis().set_visible(False)
fig.axes.get_yaxis().set_visible(False)
plt.show()
return wrapper
return _reward_visualizer
@reward_visualizer(instances=20)
def frozen_lake(self, i):
qrl = FrozenLake(env_name='FrozenLake-v0', learning_rate=0.97 ** i, discount=0.99 ** i, iterations=1000)
qrl.run()
num_rewards = len(qrl.rewards)
return self.rolling_max(qrl.rewards, window=num_rewards // 100, strides=num_rewards // 50)
@reward_visualizer(instances=5)
def mountain_car(self, i):
qrl = MountainCar(env_name='MountainCar-v0', learning_rate=0.97 ** i, discount=0.99 ** i, iterations=1000)
qrl.run()
num_rewards = len(qrl.rewards)
return self.rolling_mean(qrl.rewards, window=num_rewards // 100, strides=num_rewards // 50)
def rolling_mean(self, arr, window=2, strides=5):
window = min(window, strides)
return reduce(lambda acc, i: acc + [np.mean(arr[i-window:i+window])] if i % strides == 0 and not arr[i-window:i+window] == [] else acc, range(len(arr)), [])
def rolling_max(self, arr, window=2, strides=5):
window = min(window, strides)
return reduce(lambda acc, i: acc + [np.max(arr[i-window:i+window])] if i % strides == 0 and not arr[i-window:i+window] == [] else acc, range(len(arr)), [])
def inspect_rewards(self, rewards):
num_rewards = len(rewards)
print(self.rolling_max(rewards, window=num_rewards // 200, strides=num_rewards // 100))
def visualize_rewards(self, rewards):
plt.figure(0)
num_rewards = len(rewards)
fig = plt.imshow(np.atleast_2d(self.rolling_max(rewards, window=num_rewards // 200, strides=num_rewards // 100)))
fig.axes.get_xaxis().set_visible(False)
fig.axes.get_yaxis().set_visible(False)
plt.show()
gym_tester = GymTester()
#gym_tester.frozen_lake()
gym_tester.mountain_car()