ReinforcementLearning/test.py at master · Ianphorsman/ReinforcementLearning · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import numpy as np
import matplotlib.pyplot as plt
from functools import reduce
from frozen_lake import FrozenLake
from mountain_car import MountainCar


class GymTester(object):

    def __init__(self):
        pass

    def reward_visualizer(instances=20):
        def _reward_visualizer(gen_instance_rewards):
            def wrapper(self):
                plt.figure(0)
                all_rewards = []
                for i in range(instances):
                    all_rewards.append(gen_instance_rewards(self, i))
                fig = plt.imshow(all_rewards)
                fig.axes.get_xaxis().set_visible(False)
                fig.axes.get_yaxis().set_visible(False)
                plt.show()
            return wrapper
        return _reward_visualizer

    @reward_visualizer(instances=20)
    def frozen_lake(self, i):
        qrl = FrozenLake(env_name='FrozenLake-v0', learning_rate=0.97 ** i, discount=0.99 ** i, iterations=1000)
        qrl.run()
        num_rewards = len(qrl.rewards)
        return self.rolling_max(qrl.rewards, window=num_rewards // 100, strides=num_rewards // 50)

    @reward_visualizer(instances=5)
    def mountain_car(self, i):
        qrl = MountainCar(env_name='MountainCar-v0', learning_rate=0.97 ** i, discount=0.99 ** i, iterations=1000)
        qrl.run()
        num_rewards = len(qrl.rewards)
        return self.rolling_mean(qrl.rewards, window=num_rewards // 100, strides=num_rewards // 50)

    def rolling_mean(self, arr, window=2, strides=5):
        window = min(window, strides)
        return reduce(lambda acc, i: acc + [np.mean(arr[i-window:i+window])] if i % strides == 0 and not arr[i-window:i+window] == [] else acc, range(len(arr)), [])

    def rolling_max(self, arr, window=2, strides=5):
        window = min(window, strides)
        return reduce(lambda acc, i: acc + [np.max(arr[i-window:i+window])] if i % strides == 0 and not arr[i-window:i+window] == [] else acc, range(len(arr)), [])

    def inspect_rewards(self, rewards):
        num_rewards = len(rewards)
        print(self.rolling_max(rewards, window=num_rewards // 200, strides=num_rewards // 100))

    def visualize_rewards(self, rewards):
        plt.figure(0)
        num_rewards = len(rewards)
        fig = plt.imshow(np.atleast_2d(self.rolling_max(rewards, window=num_rewards // 200, strides=num_rewards // 100)))
        fig.axes.get_xaxis().set_visible(False)
        fig.axes.get_yaxis().set_visible(False)
        plt.show()

gym_tester = GymTester()
#gym_tester.frozen_lake()
gym_tester.mountain_car()