-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path01-intro.py
More file actions
54 lines (47 loc) · 1.64 KB
/
01-intro.py
File metadata and controls
54 lines (47 loc) · 1.64 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
"""
An intro file to explore OpenAI gym environments
"""
import numpy as np
import matplotlib.pyplot as plt
import gym
available_envs = gym.envs.registry.all()
env_ids = [env_spec.id for env_spec in available_envs]
class dotdict(dict):
"""dot.notation access to dictionary attributes"""
__getattr__ = dict.get
__setattr__ = dict.__setitem__
__delattr__ = dict.__delitem__
def print_env_info(env):
print("Action space: ", env.action_space)
print("Observation space: ", env.observation_space)
print("Reward range: ", env.reward_range)
def main():
env = gym.make('CartPole-v0')
tmax = 1000
print_env_info(env)
random_policies = np.random.uniform(-1,1,(500,env.observation_space.shape[0]))
total_reward = []
for i in range(random_policies.shape[0]):
obs = env.reset()
done = False
episode_reward = 0
while tmax>0 or not done:
action = int(np.dot(obs, random_policies[i,:])>0)
obs, reward, done, info = env.step(action)
episode_reward += reward
tmax-=1
total_reward.append(episode_reward)
idx = np.argmax(total_reward)
print("Max reward values: {} \n With policy number {} \n "
"The following parameters: {}".format(total_reward[idx],
idx,
random_policies[idx,:]))
obs = env.reset()
done = False
while not done:
env.render()
obs, reward, done, info = env.step(int(np.dot(random_policies[idx,:], obs)>0))
env.env.close()
if __name__ == '__main__':
print("executing ", __file__)
main()