-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsingle_run.py
More file actions
64 lines (51 loc) · 2.03 KB
/
Copy pathsingle_run.py
File metadata and controls
64 lines (51 loc) · 2.03 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import numpy as np
from run_functions import *
from agent_functions import *
import math
def single_run(game, n_agents, n_states, n_actions, n_iter, epsilon, gamma, alpha, q_initial, qmin, qmax, kwargs):
Q = initialize_q_table(q_initial, n_agents, n_states, n_actions, qmin, qmax)
alpha = initialize_learning_rates(alpha, n_agents)
eps_decay = N_ITER / 8
if epsilon == "DECAYED":
eps_start = 1
eps_end = 0
else:
eps_start = epsilon
eps_end = epsilon
ind = np.arange(n_agents)
S = np.random.randint(n_states, size=n_agents)
data = {}
for t in range(n_iter):
epsilon = (eps_end + (eps_start - eps_end) * math.exp(-1. * t / eps_decay)) # if t < N_ITER/10 else 0
A = e_greedy_select_action(Q, S, epsilon)
R, S = game(A)
Q, sum_of_belief_updates = bellman_update_q_table(Q, S, A, R, alpha, gamma)
## SAVE PROGRESS DATA
data[t] = {"nA": np.bincount(A, minlength=3),
"R": R,
"Qmean": Q.mean(axis=1).mean(axis=0),
"groups": count_groups(Q[ind, S, :], 0.1),
"Qvar": Q[ind, S, :].var(axis=0),
"A": A,
"Q": Q,
}
return data
if __name__ == "__main__":
from routing_networks import braess_augmented_network, braess_initial_network
from recommenders import heuristic_recommender, constant_recommender
from plot_functions import plot_run
N_AGENTS = 100
N_STATES = 3
N_ACTIONS = 3
N_ITER = 10000
EPSILON = 0.01
mask = np.zeros(N_AGENTS)
mask[:] = 1
GAMMA = 0
ALPHA = 0.01
QINIT = "UNIFORM" # np.array([-2, -2, -2])
game = lambda x,y: braess_initial_network(x,y)
M = single_run(braess_initial_network, N_AGENTS, N_STATES, N_ACTIONS, N_ITER, EPSILON, GAMMA, ALPHA, QINIT,
constant_recommender)
NAME = f"run_N{N_AGENTS}_S{N_STATES}_A{N_ACTIONS}_I{N_ITER}_e{EPSILON}_g{GAMMA}_a{ALPHA}_q{QINIT}"
plot_run(M, NAME, N_AGENTS, N_ACTIONS, N_ITER)