-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbinQLearning.py
More file actions
145 lines (115 loc) · 4.68 KB
/
binQLearning.py
File metadata and controls
145 lines (115 loc) · 4.68 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
# based on: https://github.com/lbarazza/Tile-Coding/blob/master/main.py
import sys
import random
import time
import argparse
from tqdm import tqdm
sys.path.append('Code/TileCoding/')
sys.path.append('Code/envs/')
sys.path.append("CATRL/")
from Code.binQLearning.config import *
from Code.utils import save_log, save_model, load_model
from Code.binQLearning.agent import QLearningAgent
def run_agent(env, agent, nEpisodes, env_name, seed=None, verbose=False, model_save=True, log_save=True, log_folder="results/"):
log_data = {
"episode": [],
"reward": [],
"epochs": [],
"epsilon": [],
"success": []
}
log_info = {
"seed": [],
"time": [],
"agent": [],
"env": [],
"episodes": [],
"epochs": [],
"success_rate": [],
}
if seed is None:
seed = random.randint(0, 1000)
random.seed(seed)
start_time = time.time()
for i in range(nEpisodes):
state = env.reset()
action = agent.reset_episode(state)
total_reward = 0
epochs = 0
done = False
while not done:
state, reward, done, success = env.step(action)
total_reward += reward
epochs += 1
action = agent.act(state, reward, done)
if verbose:
print ("_______________________________")
print ("Episode: " + str(i) + '\t' + "Reward: " + str (total_reward) + '\t' + "Epsilon: " + str(agent.epsilon) + '\t' + "epochs: " + str(epochs) + '\t' + "success: " + str(success))
log_data["episode"].append(i)
log_data["reward"].append(total_reward)
log_data["epochs"].append(epochs)
log_data["epsilon"].append(agent.epsilon)
log_data["success"].append(success)
end_time = time.time()
log_info["seed"].append(seed)
log_info["time"].append(end_time - start_time)
log_info["agent"].append("binQ")
log_info["env"].append(env_name)
log_info["episodes"].append(nEpisodes)
log_info["epochs"].append(sum(log_data["epochs"]))
log_info["success_rate"].append(sum(log_data["success"])/nEpisodes)
if model_save:
save_model(agent, log_info)
if log_save:
save_log(log_data, log_info, folder=log_folder)
return agent
def show_model(env, agent):
for i in range(1):
state = env.reset()
done = False
action = agent.reset_episode(state)
while not done:
env.render()
action = agent.act(state, mode="test")
new_state, reward, done, success = env.step(action)
state = new_state
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Set options for training and rendering TileCoding')
parser.add_argument('-t', '--train', choices=['t', 'f'], default='t', help='Train the model')
parser.add_argument('-r', '--render', choices=['t', 'f'], default='t', help='Render the model')
parser.add_argument('-s', '--seed', type=int, default=None, help='Seed for the model. If rendering, provide the seed of the model to render')
parser.add_argument('-v', '--verbose', choices=['t', 'f'], default='t', help='Verbose mode')
# choose the environment to train and render
parser.add_argument('-e', '--env', default='MountainCar', choices=['MountainCar', 'MountainCarContinuous','CartPole', 'LunarLander', 'Acrobot', 'Pendulum'], help='Choose the environment to train and render')
args = parser.parse_args()
if args.env == 'MountainCar':
config = MountainCar
elif args.env == 'MountainCarContinuous':
config = MountainCarContinuous
elif args.env == 'CartPole':
config = CartPole
elif args.env == 'LunarLander':
config = LunarLander
elif args.env == 'Acrobot':
config = Acrobot
elif args.env == 'Pendulum':
config = Pendulum
else:
print("Invalid environment")
sys.exit()
print("Environment: ", args.env)
env = config['env']
verbose = args.verbose == 't'
if args.train == 't':
print("Training the model")
agent = QLearningAgent(env._env, config["bins"], config["alpha"], config["gamma"], config["epsilon"], config["decay"], config["eps_min"], seed=args.seed)
trained_agent = run_agent(env, agent, config["episodes"], config['map_name'], args.seed, verbose=verbose)
if args.render == 't':
print("Rendering the model")
if args.train != 't' and args.seed is None:
print("Please provide a seed to render the model")
sys.exit()
if args.train != 't':
trained_agent = load_model("binQ", config['map_name'], args.seed)
env = config['renderEnv']
show_model(env, trained_agent)