-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrun.py
More file actions
67 lines (47 loc) · 2.27 KB
/
run.py
File metadata and controls
67 lines (47 loc) · 2.27 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import os
import sys
import datetime
import pandas as pd
from ray.rllib.algorithms.algorithm import Algorithm
from learn import create_config
from analyze_res import save_stats
from UnitCell_Environment.unitcell_environment.env.hier_paral_unitcell_environment import HierParalUnitCellEnvironment
from ray.rllib.utils.checkpoints import get_checkpoint_info
def run_sac(env_config, algo_config, input_dir, checkpoint=None):
config = create_config(env_config, algo_config)
# config["enable_connectors"]=False
print(env_config, file=sys.stderr)
print(algo_config, file=sys.stderr)
# Instead of calling .from_checkpoint directly, do this procedure:
checkpoint_info = get_checkpoint_info(checkpoint)
state = Algorithm._checkpoint_info_to_algorithm_state(
checkpoint_info=checkpoint_info,
policy_ids=None,
policy_mapping_fn=None,
policies_to_train=None,
)
state["config"] = config
algo = Algorithm.from_state(state)
env = HierParalUnitCellEnvironment(env_config)
compute_action_time = 0
env_step_time = 0
env_reset_time = 0
cif_files = [f for f in os.listdir(input_dir) if os.path.isfile(f"{input_dir}/{f}") and f.endswith(".cif")]
cif_files.sort()
for cif in cif_files:
reset_start = datetime.datetime.now()
observations, infos = env.reset(cif=f"{input_dir}/{cif}")
env_reset_time += (datetime.datetime.now() - reset_start).total_seconds()
terminations = truncations = {"a": False}
while not (all(terminations.values()) or all(truncations.values())):
actions = {}
compute_action_start = datetime.datetime.now()
actions = algo.compute_actions(observations, policy_id="2_stepsize", explore=True)
compute_action_time += (datetime.datetime.now() - compute_action_start).total_seconds()
env_step_start = datetime.datetime.now()
observations, rewards, terminations, truncations, infos = env.step(actions)
env_step_time += (datetime.datetime.now() - env_step_start).total_seconds()
observations, infos = env.reset() #to save the last structure optimization
df = pd.read_csv(f"{env.output_dir}/opt_traj.csv")
save_stats(df, env_config["output_dir"])
return checkpoint