diff --git a/.gitignore b/.gitignore index e9a5622..10068d4 100644 --- a/.gitignore +++ b/.gitignore @@ -7,15 +7,27 @@ __pycache__/ .Python .vscode +.cursorindexingignore + +# Claude Code worktree config — per-session local state, not part of the project +.claude/ # Jupyter Notebook .ipynb_checkpoints # Sandbox additions (not upstream) -.venv/ +# .venv as a bare name covers both a real directory and a symlink to a +# shared parent venv (a trailing-slash pattern misses the symlink case). +.venv recordings/ # tensorboard event files live in subdirs of runs/ (e.g. runs//events.out.tfevents.*) runs/**/events.out.tfevents.* +# per-training-job stdout logs written by frontend/server.py on /api/training/start +runs/_training_*.log +# Per-run summary CSVs that deep_rl_zoo..run_atari appends to on +# every training run. Untracked so any TRAIN keeps the working tree +# clean — historical snapshots remain in git history if needed. +logs/*_atari_results.csv # new training checkpoints; bundled ones are kept via !-rules below checkpoints/*.ckpt !checkpoints/IQN_Pong_2.ckpt diff --git a/deep_rl_zoo/gym_env.py b/deep_rl_zoo/gym_env.py index 1ad9dcd..0bae0b4 100644 --- a/deep_rl_zoo/gym_env.py +++ b/deep_rl_zoo/gym_env.py @@ -30,6 +30,15 @@ import os import datetime import numpy as np + +# NumPy 2.0 removed np.bool8; gym 0.25.2 (pinned by this repo) still references +# it in its passive_env_checker.py during env.step. The upstream README §9 +# notes this as a known deprecation warning, but on NumPy 2.0+ the attribute +# is gone entirely and gym blows up. Restore the alias so every entry point +# (run_atari / run_classic / eval_agent / frontend.stream_eval) is covered. +if not hasattr(np, "bool8"): + np.bool8 = np.bool_ # type: ignore[attr-defined] + import cv2 import logging import gym diff --git a/frontend/stream_eval.py b/frontend/stream_eval.py index 8f92651..16d7f62 100644 --- a/frontend/stream_eval.py +++ b/frontend/stream_eval.py @@ -39,6 +39,12 @@ from typing import NamedTuple import numpy as np + +# NumPy 2.0 removed np.bool8; gym 0.25.2 (pinned by this repo) still references +# it in its step/reset paths. Restore the alias before any gym import. +if not hasattr(np, "bool8"): + np.bool8 = np.bool_ # type: ignore[attr-defined] + import torch import gym @@ -99,7 +105,7 @@ def _build_iqn(state_dim, action_dim, device, checkpoint_path, env_name) -> tupl return network, actor -def _build_prioritized_dqn(state_dim, action_dim, device, checkpoint_path, env_name) -> tuple: +def _build_dqn(state_dim, action_dim, device, checkpoint_path, env_name) -> tuple: network = DqnConvNet(state_dim=state_dim, action_dim=action_dim) ckpt = PyTorchCheckpoint(environment_name=env_name, agent_name="DQN", restore_only=True) ckpt.register_pair(("network", network)) @@ -114,6 +120,25 @@ def _build_prioritized_dqn(state_dim, action_dim, device, checkpoint_path, env_n return network, actor +def _build_prioritized_dqn(state_dim, action_dim, device, checkpoint_path, env_name) -> tuple: + network = DqnConvNet(state_dim=state_dim, action_dim=action_dim) + # The bundled PER-DQN_Pong_4.ckpt is stamped with agent_name="PER-DQN" + # (matches deep_rl_zoo/prioritized_dqn/run_atari.py line 96). The previous + # "DQN" value here caused PyTorchCheckpoint.restore to reject the file with + # 'agent_name "PER-DQN" and "DQN" mismatch.' + ckpt = PyTorchCheckpoint(environment_name=env_name, agent_name="PER-DQN", restore_only=True) + ckpt.register_pair(("network", network)) + ckpt.restore(checkpoint_path) + network.eval() + actor = greedy_actors.EpsilonGreedyActor( + network=network, + exploration_epsilon=0.01, + random_state=np.random.RandomState(0), + device=device, + ) + return network, actor + + def _build_rainbow(state_dim, action_dim, device, checkpoint_path, env_name) -> tuple: atoms = torch.linspace(-10.0, 10.0, 51) network = RainbowDqnConvNet(state_dim=state_dim, action_dim=action_dim, atoms=atoms) @@ -145,6 +170,7 @@ def _build_ppo_rnd(state_dim, action_dim, device, checkpoint_path, env_name) -> ALGO_FACTORIES: dict[str, Callable] = { + "dqn": _build_dqn, "iqn": _build_iqn, "prioritized_dqn": _build_prioritized_dqn, "rainbow": _build_rainbow, diff --git a/logs/dqn_atari_results.csv b/logs/dqn_atari_results.csv deleted file mode 100644 index fa50993..0000000 --- a/logs/dqn_atari_results.csv +++ /dev/null @@ -1,11 +0,0 @@ -iteration,train_step,train_episode_return,train_num_episodes,train_step_rate,train_duration,eval_episode_return,eval_num_episodes,eval_step_rate,eval_duration -0,100000,-20.131782945736433,258,476.45469128261504,524.7088643979987,-20.151898734177216,79,1217.7285937384095,82.12010501699842 -1,100000,-19.065,200,390.1878171324131,640.7170829610004,-14.707317073170731,41,1220.744320352241,81.91723552000258 -2,100000,-17.714285714285715,154,406.5791149076162,614.8864780149997,-13.155555555555555,45,1270.0919436545248,78.73445737500151 -3,100000,-13.85344827586207,116,390.42991455198853,640.3197877059974,-11.095238095238095,42,1255.365842830174,79.65805392199763 -4,100000,-9.304878048780488,82,406.4174204111975,615.1311126059991,-4.827586206896552,29,1259.9330047162384,79.3692994990015 -5,100000,-5.972602739726027,73,380.72264559119515,656.6459938619992,-9.277777777777779,36,1341.3067628432095,74.55416074099776 -6,100000,-5.094594594594595,74,380.8526366902956,656.4218700769998,-8.266666666666667,30,1259.1598138307777,79.41803645699838 -7,100000,-3.7222222222222223,72,387.59119042180583,645.009500158998,-2.933333333333333,30,1323.8880291940434,75.53508891599995 -8,100000,-3.175675675675676,74,391.36636828092804,638.7876431439981,-1.625,32,1328.2857482934512,75.28500560099928 -9,100000,-1.125,72,356.01601829885976,702.2155946650018,1.8518518518518519,27,1273.4912241384397,78.52429455699894 diff --git a/logs/iqn_atari_results.csv b/logs/iqn_atari_results.csv deleted file mode 100644 index c113f32..0000000 --- a/logs/iqn_atari_results.csv +++ /dev/null @@ -1,4 +0,0 @@ -iteration,train_step,train_episode_return,train_num_episodes,train_step_rate,train_duration,eval_episode_return,eval_num_episodes,eval_step_rate,eval_duration -0,20000,-19.7558886509636,467,188.7191985026718,2649.4389758280013,-13.88888888888889,9,1054.6446813851342,18.963732859992888 -1,20000,-14.63671875,256,143.51256916545498,3484.015392572008,-8.428571428571429,7,1060.580719299218,18.857593426000676 -2,20000,-4.253246753246753,154,135.5667938192059,3688.218817557994,1.3333333333333333,6,1120.5408966548387,17.848523029999342 diff --git a/logs/per_dqn_atari_results.csv b/logs/per_dqn_atari_results.csv deleted file mode 100644 index 7b87621..0000000 --- a/logs/per_dqn_atari_results.csv +++ /dev/null @@ -1,11 +0,0 @@ -iteration,train_step,train_episode_return,train_num_episodes,train_step_rate,train_duration,eval_episode_return,eval_num_episodes,eval_step_rate,eval_duration -0,100000,-20.29811320754717,265,440.58968904905515,567.421358724001,-20.721739130434784,115,1190.0236988683332,84.03193994799949 -1,100000,-19.023923444976077,209,268.8951271906998,929.7304960930014,-14.895833333333334,48,1122.0835214096946,89.11992564899992 -2,100000,-17.026143790849673,153,231.6129773466,1079.3868412039992,-13.023809523809524,42,1125.142582129794,88.87762456799828 -3,100000,-13.451327433628318,113,197.2038077621946,1267.7240000429993,-6.352941176470588,34,1274.459201403998,78.46465378400171 -4,100000,-7.426829268292683,82,183.92364790829856,1359.259686522997,-0.6785714285714286,28,1289.0650044715117,77.57560685700082 -5,100000,-5.135135135135135,74,181.10410635845724,1380.4214880979998,-7.0625,32,1248.378690973606,80.10389853900051 -6,100000,-2.1710526315789473,76,229.9497161461916,1087.1942100640008,8.382352941176471,34,1434.766830123162,69.69773617600003 -7,100000,3.0126582278481013,79,229.34122898997447,1090.0787490370021,8.794117647058824,34,1428.4741134458516,70.00476876600442 -8,100000,7.036144578313253,83,235.12215889236467,1063.2770691530022,9.833333333333334,36,1445.4816297503303,69.1810936519978 -9,100000,9.363636363636363,88,232.42290581021103,1075.625481612995,12.54054054054054,37,1476.188982063073,67.74200404899602 diff --git a/logs/ppo_rnd_atari_results.csv b/logs/ppo_rnd_atari_results.csv deleted file mode 100644 index c646e91..0000000 --- a/logs/ppo_rnd_atari_results.csv +++ /dev/null @@ -1,396 +0,0 @@ -iteration,type,step,episode_return,num_episodes,step_rate,duration -0,PPO-RND-actor0,250000,0.0,363,40.10077376343568,6234.293669115001 -0,PPO-RND-actor1,250000,0.2604166666666667,384,39.97605988967947,6253.742882363 -0,PPO-RND-actor2,250000,0.0,364,39.920651102024294,6262.422908912 -0,PPO-RND-actor3,250000,0.0,392,39.87488723392417,6269.61020688 -0,PPO-RND-actor4,250000,0.0,402,39.87641710888787,6269.3696707340005 -0,PPO-RND-actor5,250000,0.0,407,39.83500873574856,6275.886661866 -0,PPO-RND-actor6,250000,0.0,379,39.78679800082163,6283.4913227960005 -0,PPO-RND-actor8,250000,0.0,418,39.791459096483635,6282.755286601 -0,PPO-RND-actor7,250000,0.0,382,39.76486058806976,6286.957788932999 -0,PPO-RND-actor9,250000,0.0,417,39.76379227374892,6287.126697547001 -0,PPO-RND-actor10,250000,0.0,375,39.742506796156434,6290.493986257 -0,PPO-RND-actor12,250000,0.2518891687657431,397,39.73313255605236,6291.978102841999 -0,PPO-RND-actor13,250000,0.0,412,39.73924296930664,6291.010631307 -0,PPO-RND-actor11,250000,0.0,389,39.724377885049,6293.364762650999 -0,PPO-RND-actor16,250000,0.0,348,39.74437704651411,6290.197974607 -0,PPO-RND-actor14,250000,0.0,375,39.72844239790694,6292.720904989999 -0,PPO-RND-actor15,250000,0.0,380,39.72764700239217,6292.846892869 -0,PPO-RND-actor17,250000,0.0,386,39.72994007221621,6292.4836922879995 -0,PPO-RND-actor18,250000,0.0,393,39.73017361572876,6292.446703556 -0,PPO-RND-actor19,250000,0.0,383,39.734026227225094,6291.8365878740005 -0,PPO-RND-actor22,250000,0.0,374,39.74721922873651,6289.748184931 -0,PPO-RND-actor20,250000,0.0,386,39.73456133673493,6291.751855050001 -0,PPO-RND-actor24,250000,0.0,391,39.75370395783253,6288.722184609 -0,PPO-RND-actor21,250000,0.28169014084507044,355,39.733151591348296,6291.975088491 -0,PPO-RND-actor23,250000,0.0,346,39.744098190326845,6290.242108471 -0,PPO-RND-actor25,250000,0.0,402,39.755549202822394,6288.430294965 -0,PPO-RND-actor26,250000,0.0,359,39.757490799593235,6288.123193191 -0,PPO-RND-actor27,250000,0.26595744680851063,376,39.76254646740721,6287.323680462 -0,PPO-RND-actor30,250000,0.0,436,39.7802013734303,6284.53329467 -0,PPO-RND-actor29,250000,0.0,371,39.772854846859225,6285.694123859001 -0,PPO-RND-actor28,250000,0.0,365,39.76659110212953,6286.684200764 -0,PPO-RND-actor31,250000,0.0,386,39.783422743508225,6284.024419211 -0,evaluation,100000,0.0,181,934.0691745381986,107.0584521210003 -1,PPO-RND-actor6,250000,0.0,556,39.41228207451263,6343.200313226001 -1,PPO-RND-actor16,250000,0.0,495,39.40692990575388,6344.061833741001 -1,PPO-RND-actor13,250000,0.0,541,39.40262906008047,6344.754295932999 -1,PPO-RND-actor5,250000,0.0,515,39.402426785208526,6344.786867133999 -1,PPO-RND-actor4,250000,0.0,526,39.39912121449483,6345.3191922470005 -1,PPO-RND-actor26,250000,0.0,501,39.39742859370066,6345.591804434999 -1,PPO-RND-actor30,250000,0.0,492,39.39631659156912,6345.770915382999 -1,PPO-RND-actor7,250000,0.0,505,39.395701056161286,6345.8700644420005 -1,PPO-RND-actor23,250000,0.0,496,39.39536811440133,6345.923695243001 -1,PPO-RND-actor22,250000,0.0,505,39.39490321142087,6345.998584088999 -1,PPO-RND-actor8,250000,0.0,513,39.3948257755348,6346.011058012 -1,PPO-RND-actor10,250000,0.0,508,39.3946609104636,6346.037615813 -1,PPO-RND-actor14,250000,0.0,487,39.39355349115066,6346.216013647 -1,PPO-RND-actor15,250000,0.0,510,39.39315353715671,6346.280446022 -1,PPO-RND-actor17,250000,0.0,460,39.39286034426812,6346.327680071 -1,PPO-RND-actor3,250000,0.0,507,39.39248454422409,6346.388223350999 -1,PPO-RND-actor27,250000,0.0,523,39.39200277420782,6346.465840616999 -1,PPO-RND-actor28,250000,0.0,536,39.39125338264046,6346.5865777750005 -1,PPO-RND-actor21,250000,0.0,483,39.390733259306174,6346.6703794079995 -1,PPO-RND-actor29,250000,0.0,483,39.389801487679655,6346.820510842001 -1,PPO-RND-actor2,250000,0.0,542,39.38925576287296,6346.908443891999 -1,PPO-RND-actor0,250000,0.0,487,39.38919732996486,6346.917859374999 -1,PPO-RND-actor19,250000,0.0,479,39.38893441599996,6346.960223896001 -1,PPO-RND-actor18,250000,0.0,480,39.387620829353416,6347.171896548999 -1,PPO-RND-actor1,250000,0.0,525,39.38738086045583,6347.210566899999 -1,PPO-RND-actor20,250000,0.0,515,39.386947910527134,6347.280336823999 -1,PPO-RND-actor31,250000,0.0,486,39.386231450400224,6347.395797814001 -1,PPO-RND-actor25,250000,0.0,519,39.386147799351896,6347.409278855999 -1,PPO-RND-actor11,250000,0.0,464,39.38498925842746,6347.595992971 -1,PPO-RND-actor12,250000,0.0,459,39.384374496209276,6347.695074453 -1,PPO-RND-actor24,250000,0.0,503,39.38148951015941,6348.160090173999 -1,PPO-RND-actor9,250000,0.0,476,39.37965847887325,6348.455259816999 -1,evaluation,100000,0.0,240,964.7532577057474,103.65344630999971 -iteration,type,step,episode_return,num_episodes,step_rate,duration -0,PPO-RND-actor0,250000,0.19455252918287938,514,68.51926352388662,3648.6089771360002 -0,PPO-RND-actor1,250000,0.19193857965451055,521,68.26257341854344,3662.3289671070006 -0,PPO-RND-actor2,250000,0.0,475,67.98746488722144,3677.1484333870003 -0,PPO-RND-actor3,250000,0.0,473,67.73195728191534,3691.0198676150003 -0,PPO-RND-actor4,250000,0.20161290322580644,496,67.65161714412301,3695.403163348001 -0,PPO-RND-actor5,250000,0.0,568,67.59957498744575,3698.2481035779992 -0,PPO-RND-actor7,250000,0.0,459,67.57150584348665,3699.784352580001 -0,PPO-RND-actor6,250000,0.0,474,67.51138469157492,3703.0791346099995 -0,PPO-RND-actor9,250000,0.18518518518518517,540,67.4258717672246,3707.775568154 -0,PPO-RND-actor8,250000,0.0,463,67.40619726370765,3708.8577927330007 -0,PPO-RND-actor10,250000,0.0,498,67.3810645897205,3710.241171199 -0,PPO-RND-actor15,250000,0.0,476,67.35470524854738,3711.6931783380005 -0,PPO-RND-actor12,250000,0.0,531,67.29658239006548,3714.898901565999 -0,PPO-RND-actor14,250000,0.0,526,67.3234866991206,3713.4143262260004 -0,PPO-RND-actor13,250000,0.0,497,67.28542175378668,3715.5150920330007 -0,PPO-RND-actor11,250000,0.20161290322580644,496,67.23836671955806,3718.115299301 -0,PPO-RND-actor17,250000,0.0,530,67.34066820535003,3712.4668742170006 -0,PPO-RND-actor16,250000,0.4098360655737705,488,67.27466396418174,3716.109234423 -0,PPO-RND-actor20,250000,0.1968503937007874,508,67.30968629173664,3714.1756821809995 -0,PPO-RND-actor19,250000,0.205761316872428,486,67.28984649062424,3715.270773204 -0,PPO-RND-actor25,250000,0.0,464,67.38712233261406,3709.9076403060008 -0,PPO-RND-actor27,250000,0.2070393374741201,483,67.41623205181659,3708.3057357440002 -0,PPO-RND-actor18,250000,0.0,512,67.25480217353396,3717.206681464 -0,PPO-RND-actor22,250000,0.0,522,67.31886544525152,3713.669241845999 -0,PPO-RND-actor26,250000,0.37523452157598497,533,67.36516634541537,3711.1167916980003 -0,PPO-RND-actor24,250000,0.0,446,67.32766525651112,3713.183860565 -0,PPO-RND-actor23,250000,0.18832391713747645,531,67.30683561069159,3714.3329905749997 -0,PPO-RND-actor29,250000,0.2127659574468085,470,67.40510043604573,3708.918143920001 -0,PPO-RND-actor21,250000,0.20202020202020202,495,67.26371794281974,3716.7139677370005 -0,PPO-RND-actor30,250000,0.19342359767891681,517,67.4140343084863,3708.4266290309997 -0,PPO-RND-actor28,250000,0.0,522,67.37782413491824,3710.4196107519992 -0,PPO-RND-actor31,250000,0.0,499,67.41638880784944,3708.297113221999 -0,evaluation,100000,0.0,176,943.6470162243431,105.97182874600003 -1,PPO-RND-actor31,250000,0.21321961620469082,469,65.29084173141425,3829.0209372459994 -1,PPO-RND-actor10,250000,0.0,481,65.28902998040711,3829.127191428999 -1,PPO-RND-actor27,250000,0.22026431718061673,454,65.25471539885476,3831.1407608160007 -1,PPO-RND-actor4,250000,0.0,470,65.25016332922705,3831.4080340090004 -1,PPO-RND-actor17,250000,0.19723865877712032,507,65.23702949850536,3832.1793913950005 -1,PPO-RND-actor20,250000,0.0,487,65.23148775693863,3832.5049542260003 -1,PPO-RND-actor2,250000,0.0,453,65.22914637768665,3832.6425207600005 -1,PPO-RND-actor13,250000,0.0,483,65.22685406255043,3832.7772141250007 -1,PPO-RND-actor7,250000,0.2145922746781116,466,65.22523889544199,3832.872124865 -1,PPO-RND-actor26,250000,0.0,501,65.22080137709543,3833.132907315001 -1,PPO-RND-actor19,250000,0.22779043280182232,439,65.21699884904032,3833.356401122999 -1,PPO-RND-actor22,250000,0.2159827213822894,463,65.21467297189758,3833.4931175340007 -1,PPO-RND-actor18,250000,0.0,475,65.21388849601267,3833.539231682 -1,PPO-RND-actor0,250000,0.0,490,65.20818155477939,3833.8747384020007 -1,PPO-RND-actor25,250000,0.4672897196261682,428,65.20726737099865,3833.928488026 -1,PPO-RND-actor1,250000,0.0,491,65.20636343153751,3833.9816368150005 -1,PPO-RND-actor12,250000,0.0,457,65.19557990343873,3834.6157879149996 -1,PPO-RND-actor14,250000,0.0,476,65.19389959468586,3834.7146213720007 -1,PPO-RND-actor30,250000,0.0,480,65.19287253420595,3834.7750341699993 -1,PPO-RND-actor24,250000,0.2183406113537118,458,65.19261784965052,3834.790015282999 -1,PPO-RND-actor11,250000,0.0,418,65.19168920436987,3834.844641259 -1,PPO-RND-actor23,250000,0.0,447,65.18737800456198,3835.098260625 -1,PPO-RND-actor9,250000,0.0,407,65.18469296609162,3835.256233086 -1,PPO-RND-actor3,250000,0.0,416,65.18318864968951,3835.3447442340002 -1,PPO-RND-actor16,250000,0.20366598778004075,491,65.17878630258328,3835.603793532 -1,PPO-RND-actor6,250000,0.21739130434782608,460,65.17578968805557,3835.780144691 -1,PPO-RND-actor5,250000,0.0,477,65.17325305335092,3835.929438651001 -1,PPO-RND-actor15,250000,0.21141649048625794,473,65.16796653913971,3836.240614471999 -1,PPO-RND-actor29,250000,0.20876826722338204,479,65.16342080666696,3836.5082266280006 -1,PPO-RND-actor21,250000,0.4048582995951417,494,65.16355393019732,3836.500388972001 -1,PPO-RND-actor8,250000,0.0,424,65.16186033198947,3836.6001020580006 -1,PPO-RND-actor28,250000,0.0,472,65.1613877292595,3836.627928164 -1,evaluation,100000,0.0,269,908.7096939988777,110.0461463770007 -iteration,type,step,episode_return,num_episodes,step_rate,duration -0,PPO-RND-actor0,250000,0.0,183,65.96957019791607,3789.6260237859988 -0,PPO-RND-actor1,250000,0.5494505494505495,182,65.77103997098803,3801.065029689 -0,PPO-RND-actor2,250000,0.0,175,65.46562681691744,3818.797927333 -0,PPO-RND-actor3,250000,0.0,180,65.29067898233608,3829.0304817880005 -0,PPO-RND-actor4,250000,0.0,188,65.12647945487119,3838.6843890929995 -0,PPO-RND-actor5,250000,0.0,185,65.01937909141138,3845.007496127002 -0,PPO-RND-actor6,250000,0.0,190,64.9679163358041,3848.0532253459987 -0,PPO-RND-actor7,250000,0.0,166,64.90325958024071,3851.8866635799986 -0,PPO-RND-actor8,250000,0.0,181,64.89513273532147,3852.369036975999 -0,PPO-RND-actor9,250000,0.0,193,64.89385731997422,3852.444750931001 -0,PPO-RND-actor11,250000,0.0,166,64.8782552386102,3853.3711962589987 -0,PPO-RND-actor10,250000,0.0,180,64.85139424481945,3854.9672356500014 -0,PPO-RND-actor13,250000,0.0,208,64.8589867436422,3854.515966895 -0,PPO-RND-actor12,250000,0.0,180,64.82992512930713,3856.2438488299995 -0,PPO-RND-actor14,250000,0.0,191,64.81340863122205,3857.226541230999 -0,PPO-RND-actor15,250000,0.0,175,64.80927202357931,3857.472737991 -0,PPO-RND-actor16,250000,0.0,189,64.80690641308364,3857.6135451749997 -0,PPO-RND-actor18,250000,0.0,188,64.81650254127516,3857.042422811999 -0,PPO-RND-actor19,250000,0.0,169,64.81811640478163,3856.9463888579994 -0,PPO-RND-actor17,250000,0.0,196,64.77538794627817,3859.490586259999 -0,PPO-RND-actor21,250000,0.5555555555555556,180,64.82588040614891,3856.4844539509995 -0,PPO-RND-actor20,250000,0.0,185,64.80786332299209,3857.556586212999 -0,PPO-RND-actor23,250000,0.0,176,64.84416972784078,3855.3967311060005 -0,PPO-RND-actor22,250000,0.0,172,64.8210581254022,3856.7713522410013 -0,PPO-RND-actor24,250000,0.0,163,64.85622026225404,3854.680383609999 -0,PPO-RND-actor25,250000,0.0,171,64.86961616308669,3853.884372793 -0,PPO-RND-actor26,250000,0.0,183,64.87951570030366,3853.296334005001 -0,PPO-RND-actor28,250000,0.0,176,64.90886975109584,3851.5537392450024 -0,PPO-RND-actor27,250000,0.0,176,64.89006706081977,3852.6697740299987 -0,PPO-RND-actor29,250000,0.0,161,64.92292655443674,3850.719819144002 -0,PPO-RND-actor30,250000,0.0,166,64.93824901038144,3849.811225431 -0,PPO-RND-actor31,250000,0.5847953216374269,171,64.9527033055448,3848.954505003001 -0,evaluation,100000,0.0,83,965.4209725481414,103.5817563979981 -iteration,type,step,episode_return,num_episodes,step_rate,duration -0,PPO-RND-actor0,250000,290.7859078590786,369,38.41402016057045,6508.040526740002 -0,PPO-RND-actor1,250000,274.937343358396,399,38.280254866358966,6530.782014716999 -0,PPO-RND-actor2,250000,284.98659517426273,373,38.11769462103782,6558.6337916150005 -0,PPO-RND-actor3,250000,283.7628865979381,388,37.98888228562734,6580.872743776003 -0,PPO-RND-actor4,250000,284.8,375,37.885100242546,6598.900316997002 -0,PPO-RND-actor5,250000,278.60696517412936,402,37.85055792053815,6604.922456488999 -0,PPO-RND-actor6,250000,294.2708333333333,384,37.82177470299701,6609.948950391001 -0,PPO-RND-actor7,250000,294.25587467362925,383,37.796766128700575,6614.322483271 -0,PPO-RND-actor9,250000,282.5,400,37.79794977942078,6614.115354376001 -0,PPO-RND-actor8,250000,285.5614973262032,374,37.77998803360916,6617.259904306997 -0,PPO-RND-actor10,250000,274.9360613810742,391,37.76336759097249,6620.172297868998 -0,PPO-RND-actor11,250000,277.4611398963731,386,37.759148817489375,6620.911959863999 -0,PPO-RND-actor13,250000,284.5771144278607,402,37.75246012379771,6622.085002677999 -0,PPO-RND-actor12,250000,281.1224489795918,392,37.733968224289626,6625.330220082 -0,PPO-RND-actor15,250000,300.2659574468085,376,37.76257815342621,6620.310694472999 -0,PPO-RND-actor14,250000,292.6509186351706,381,37.75114106125244,6622.316384936999 -0,PPO-RND-actor17,250000,286.88946015424165,389,37.750942552770546,6622.351207537002 -0,PPO-RND-actor16,250000,298.14323607427053,377,37.74178378604597,6623.958247899001 -0,PPO-RND-actor18,250000,280.5128205128205,390,37.75358908118576,6621.886980397998 -0,PPO-RND-actor19,250000,291.0941475826972,393,37.755532294489875,6621.546163089999 -0,PPO-RND-actor20,250000,287.27272727272725,385,37.76012715392586,6620.740417024997 -0,PPO-RND-actor21,250000,278.1954887218045,399,37.76244370642415,6620.334265006 -0,PPO-RND-actor23,250000,296.8421052631579,380,37.77910681660147,6617.414255282001 -0,PPO-RND-actor22,250000,295.97855227882036,373,37.76836523085266,6619.296293920001 -0,PPO-RND-actor25,250000,283.3333333333333,390,37.789847215383034,6615.533494356998 -0,PPO-RND-actor24,250000,285.0828729281768,362,37.7804422036362,6617.180356241002 -0,PPO-RND-actor28,250000,289.6825396825397,378,37.81204234139295,6611.650271170998 -0,PPO-RND-actor26,250000,282.25,400,37.79599606115843,6614.4572455630005 -0,PPO-RND-actor27,250000,284.9104859335038,391,37.799942792662826,6613.766623174 -0,PPO-RND-actor30,250000,287.012987012987,385,37.81929641989205,6610.382097655998 -0,PPO-RND-actor31,250000,298.67021276595744,376,37.82680193682498,6609.070479114998 -0,PPO-RND-actor29,250000,296.7741935483871,372,37.81095516757551,6611.840375150998 -0,evaluation,100000,415.1515151515151,132,741.4156057393667,134.87711780800237 -1,PPO-RND-actor8,250000,1191.3725490196077,255,50.23752569939795,4976.359733477002 -1,PPO-RND-actor9,250000,1190.6474820143885,278,50.23541252383664,4976.569066321001 -1,PPO-RND-actor24,250000,1180.2197802197802,273,50.232073295091425,4976.899888869 -1,PPO-RND-actor1,250000,1241.4937759336099,241,50.22482984283342,4977.6176600760045 -1,PPO-RND-actor10,250000,1200.7751937984497,258,50.198235817206,4980.254702782 -1,PPO-RND-actor19,250000,1189.090909090909,275,50.19613889947928,4980.462750344996 -1,PPO-RND-actor4,250000,1205.226480836237,287,50.19495448104159,4980.580271159004 -1,PPO-RND-actor6,250000,1237.630662020906,287,50.1885214160885,4981.218672042003 -1,PPO-RND-actor27,250000,1237.4045801526718,262,50.180639668455036,4982.001059606999 -1,PPO-RND-actor20,250000,1199.2424242424242,264,50.179500507838235,4982.114159565001 -1,PPO-RND-actor5,250000,1203.0075187969924,266,50.17908862435254,4982.155054101 -1,PPO-RND-actor16,250000,1230.514705882353,272,50.17714190938817,4982.348346014998 -1,PPO-RND-actor11,250000,1205.3639846743295,261,50.176613401285735,4982.400824875 -1,PPO-RND-actor23,250000,1241.0646387832699,263,50.17610364288271,4982.451443008002 -1,PPO-RND-actor13,250000,1237.6923076923076,260,50.17501013539602,4982.560029890999 -1,PPO-RND-actor7,250000,1240.2826855123674,283,50.17337418716395,4982.722490765998 -1,PPO-RND-actor21,250000,1208.921933085502,269,50.17223092096988,4982.836031226001 -1,PPO-RND-actor2,250000,1228.957528957529,259,50.171176888832875,4982.9407142260025 -1,PPO-RND-actor31,250000,1230.5343511450383,262,50.17022677939793,4983.0350797349965 -1,PPO-RND-actor12,250000,1264.6840148698884,269,50.16975234167676,4983.082202546997 -1,PPO-RND-actor17,250000,1195.3846153846155,260,50.1666285685315,4983.392488863003 -1,PPO-RND-actor0,250000,1197.7272727272727,264,50.16528042829271,4983.526412403 -1,PPO-RND-actor30,250000,1190.5511811023623,254,50.16114378820768,4983.937388979 -1,PPO-RND-actor15,250000,1224.6031746031747,252,50.160926383532875,4983.958990081002 -1,PPO-RND-actor25,250000,1245.4212454212454,273,50.15633544239143,4984.415184939997 -1,PPO-RND-actor3,250000,1212.8301886792453,265,50.152197646602175,4984.826422994 -1,PPO-RND-actor18,250000,1191.2727272727273,275,50.150339428152336,4985.011125561003 -1,PPO-RND-actor14,250000,1220.4724409448818,254,50.14890044073688,4985.154166948003 -1,PPO-RND-actor29,250000,1230.188679245283,265,50.147741459715675,4985.269380493002 -1,PPO-RND-actor28,250000,1217.6,250,50.14826761138272,4985.217075440003 -1,PPO-RND-actor22,250000,1208.1712062256809,257,50.14747926230219,4985.295446104999 -1,PPO-RND-actor26,250000,1223.5714285714287,280,50.14484981090127,4985.556860630004 -1,evaluation,100000,1346.1538461538462,52,997.7336813966709,100.2271466470047 -iteration,role,step,episode_return,num_episodes,step_rate,duration -0,PPO-RND-actor20,0,65.0197628458498,506,75.06282349711725,6661.087029575996 -0,PPO-RND-actor19,0,66.39511201629328,491,73.93385252240324,6762.801922820014 -0,PPO-RND-actor17,0,69.25996204933587,527,72.70260233550557,6877.332914338011 -0,PPO-RND-actor12,0,74.45972495088408,509,72.36699076245084,6909.227463129995 -0,PPO-RND-actor21,0,68.33667334669339,499,72.26974423913788,6918.524553587995 -0,PPO-RND-actor15,0,71.68316831683168,505,72.00338542569388,6944.117933398986 -0,PPO-RND-actor10,0,73.17554240631164,507,71.91253449855648,6952.890806679003 -0,PPO-RND-actor16,0,70.8,500,71.90837338810368,6953.293148509998 -0,PPO-RND-actor14,0,71.71314741035856,502,71.62797789813082,6980.512568860999 -0,PPO-RND-actor18,0,81.73076923076923,520,70.87743260765924,7054.4315955649945 -0,PPO-RND-actor13,0,78.91566265060241,498,70.73619683220294,7068.516861120996 -0,PPO-RND-actor11,0,90.7949790794979,478,70.37057649256266,7105.242345895007 -0,PPO-RND-actor0,0,84.30962343096235,478,69.89899485960741,7153.178683102 -0,PPO-RND-actor8,0,85.57504873294347,513,69.92832076854128,7150.178847494011 -0,PPO-RND-actor3,0,81.4453125,512,69.71526752763762,7172.030141056006 -0,PPO-RND-actor4,0,83.16831683168317,505,69.6980362126558,7173.803268638003 -0,PPO-RND-actor1,0,96.56565656565657,495,69.66320276826649,7177.390360061996 -0,PPO-RND-actor7,0,94.6938775510204,490,69.71724149464922,7171.827072911008 -0,PPO-RND-actor6,0,82.93172690763052,498,69.70413984509756,7173.1750956419855 -0,PPO-RND-actor2,0,89.09465020576131,486,69.65638420196706,7178.092944794 -0,PPO-RND-actor9,0,83.26530612244898,490,69.70490354387238,7173.0965051160165 -0,PPO-RND-actor5,0,87.60162601626017,492,69.65051169254286,7178.698158129002 -0,evaluation,0,491.30434782608694,23,867.2369707301143,23.061747451982228 -1,PPO-RND-actor20,500000,478.08641975308643,324,76.13885218707664,6566.949535455002 -1,PPO-RND-actor17,500000,480.82191780821915,292,75.13620421044342,6654.581572947005 -1,PPO-RND-actor19,500000,476.5676567656766,303,74.88687002773453,6676.737855579006 -iteration,role,step,episode_return,num_episodes,step_rate,duration -0,PPO-RND-actor27,0,8.305084745762711,590,60.78072857302168,8226.291650967993 -0,PPO-RND-actor31,0,8.528428093645484,598,60.7875456234115,8225.369109283987 -0,PPO-RND-actor29,0,7.641196013289036,602,58.7534136579586,8510.143817528995 -0,PPO-RND-actor25,0,7.308970099667774,602,58.714365865349066,8515.803460207011 -0,PPO-RND-actor30,0,8.038585209003216,622,58.52596641949247,8543.216465938982 -0,PPO-RND-actor26,0,5.741626794258373,627,58.1754075944177,8594.69698065301 -0,PPO-RND-actor28,0,8.047945205479452,584,58.12914714906444,8601.536828294018 -0,PPO-RND-actor23,0,6.0,600,58.01405104913374,8618.601717307 -0,PPO-RND-actor24,0,5.583756345177665,591,57.87442286674586,8639.395007899002 -0,PPO-RND-actor13,0,5.339805825242719,618,57.61759015762619,8677.905456165987 -0,PPO-RND-actor15,0,7.6923076923076925,624,57.12912146528722,8752.103781322978 -0,PPO-RND-actor21,0,7.413509060955519,607,56.997585112780435,8772.301475767017 -0,PPO-RND-actor10,0,5.815831987075929,619,56.812778597386995,8800.836930426012 -0,PPO-RND-actor22,0,4.368932038834951,618,56.868320973412395,8792.241294300999 -0,PPO-RND-actor14,0,8.085808580858085,606,56.774281565529705,8806.804528612003 -0,PPO-RND-actor12,0,6.688963210702341,598,56.64303111568042,8827.211223546008 -0,PPO-RND-actor16,0,6.57051282051282,624,56.65066245958657,8826.022120336012 -0,PPO-RND-actor18,0,7.33652312599681,627,56.628741451556785,8829.438676961 -0,PPO-RND-actor7,0,5.990016638935108,601,56.490555735565586,8851.037018303003 -0,PPO-RND-actor3,0,6.610169491525424,590,56.373134280855915,8869.473134294007 -0,PPO-RND-actor1,0,7.863247863247863,585,56.32332572919873,8877.316698306997 -0,PPO-RND-actor11,0,6.644518272425249,602,56.35590448842082,8872.184814330016 -0,PPO-RND-actor8,0,7.619047619047619,630,56.300579929496834,8880.903191869991 -0,PPO-RND-actor4,0,9.586776859504132,605,56.25453987581705,8888.171534310997 -0,PPO-RND-actor6,0,6.310679611650485,618,56.26354430813193,8886.749069018988 -0,PPO-RND-actor5,0,7.448494453248811,631,56.220449663292925,8893.561026183976 -0,PPO-RND-actor9,0,7.783417935702199,591,56.252312706179794,8888.523439234006 -0,PPO-RND-actor0,0,6.840390879478828,614,56.170872981834336,8901.410525731015 -0,PPO-RND-actor2,0,5.970149253731344,603,56.18253492612748,8899.562838476995 -0,PPO-RND-actor17,0,6.957928802588997,618,56.306431157155714,8879.98030996602 -0,PPO-RND-actor20,0,5.443234836702955,643,56.33471989750631,8875.521186750993 -0,PPO-RND-actor19,0,6.6115702479338845,605,56.3144641151477,8878.713628130005 -0,evaluation,0,0.0,20,958.0275841149519,20.876225624000654 -1,PPO-RND-actor27,500000,1.4610389610389611,616,62.55334050150349,7993.178237827 -1,PPO-RND-actor31,500000,2.0761245674740483,578,62.35334282116275,8018.816271552001 -1,PPO-RND-actor29,500000,1.5,600,60.442123027535814,8272.376530722016 -1,PPO-RND-actor25,500000,1.335559265442404,599,60.43961339955241,8272.720023780013 -1,PPO-RND-actor30,500000,2.1702838063439067,599,60.18296622412119,8307.99861439201 -1,PPO-RND-actor26,500000,1.495016611295681,602,59.88622516506365,8349.165415283001 -1,PPO-RND-actor24,500000,0.9630818619582665,623,59.69085459554522,8376.492569722992 -1,PPO-RND-actor23,500000,1.680672268907563,595,59.67378818715491,8378.88820518399 -1,PPO-RND-actor28,500000,2.1630615640599,601,59.667286317542505,8379.80124215901 -1,PPO-RND-actor13,500000,1.6920473773265652,591,59.21470126414199,8443.849066630006 -1,PPO-RND-actor21,500000,1.644736842105263,608,58.684265640910056,8520.171370287018 -1,PPO-RND-actor15,500000,2.4958402662229617,601,58.57270308837018,8536.399613410991 -1,PPO-RND-actor16,500000,2.195945945945946,592,58.34186524920674,8570.175085494004 -1,PPO-RND-actor14,500000,2.5906735751295336,579,58.32539406654414,8572.595316365012 -1,PPO-RND-actor22,500000,2.2071307300509337,589,58.246738400958655,8584.17164164802 -1,PPO-RND-actor10,500000,2.1630615640599,601,58.18479974943009,8593.30962989002 -1,PPO-RND-actor18,500000,1.3675213675213675,585,58.1702609260808,8595.457404521003 -1,PPO-RND-actor12,500000,1.6891891891891893,592,58.12003015901073,8602.886107113998 -1,PPO-RND-actor7,500000,2.7777777777777777,576,57.99906330224611,8620.828881224996 -1,PPO-RND-actor11,500000,1.174496644295302,596,57.845272423240196,8643.748729224026 -1,PPO-RND-actor3,500000,3.1986531986531985,594,57.71409957409739,8663.394277824002 -1,PPO-RND-actor1,500000,1.9130434782608696,575,57.702992455283734,8665.061875040003 -1,PPO-RND-actor8,500000,1.6778523489932886,596,57.69729010622238,8665.918262009975 -1,PPO-RND-actor20,500000,2.413793103448276,580,57.681180668903146,8668.338515295996 -1,PPO-RND-actor6,500000,1.362862010221465,587,57.67146477471464,8669.798867658013 -1,PPO-RND-actor9,500000,3.247863247863248,585,57.669576817285815,8670.082695146994 -1,PPO-RND-actor17,500000,1.7064846416382253,586,57.666904393646284,8670.484487720998 -1,PPO-RND-actor19,500000,2.413793103448276,580,57.65974144012425,8671.561604541988 -1,PPO-RND-actor4,500000,1.530612244897959,588,57.655328751862136,8672.225288176007 -1,PPO-RND-actor5,500000,2.4955436720142603,561,57.654677752816355,8672.323209291993 -1,PPO-RND-actor2,500000,1.9031141868512111,578,57.627794724424476,8676.368797227013 -1,PPO-RND-actor0,500000,2.1352313167259784,562,57.613104574927235,8678.581091733009 -1,evaluation,20000,0.0,24,961.2247098905945,20.806789291003952 -2,PPO-RND-actor27,1000000,3.3755274261603376,711,62.40720541993789,8011.895367457997 -2,PPO-RND-actor31,1000000,3.566333808844508,701,62.036822960293094,8059.729304965003 -2,PPO-RND-actor25,1000000,3.4732272069464543,691,60.39041807395265,8279.459158367012 -2,PPO-RND-actor29,1000000,2.7434842249657065,729,60.205425807607234,8304.899322493002 -2,PPO-RND-actor30,1000000,3.6465638148667603,713,59.83692387545674,8356.044522621 -2,PPO-RND-actor26,1000000,5.255681818181818,704,59.78913752796494,8362.72307434 -2,PPO-RND-actor24,1000000,3.8081805359661494,709,59.59072320607845,8390.567744427011 -2,PPO-RND-actor28,1000000,3.8904899135446684,694,59.47454672476161,8406.957724518987 -2,PPO-RND-actor23,1000000,5.8171745152354575,722,59.41776968535815,8414.991048094002 -2,PPO-RND-actor13,1000000,3.576341127922971,727,58.956580217688156,8480.81754663901 -2,PPO-RND-actor15,1000000,2.292263610315186,698,58.473747986509,8550.845759286021 -2,PPO-RND-actor21,1000000,5.04774897680764,733,58.37872413569031,8564.764088331984 -2,PPO-RND-actor14,1000000,7.174231332357247,683,58.15551156560227,8597.63737846198 -2,PPO-RND-actor16,1000000,4.0,725,58.13925776296911,8600.040991896996 -2,PPO-RND-actor10,1000000,4.680851063829787,705,58.134771215816684,8600.704699496011 -2,PPO-RND-actor18,1000000,4.378531073446328,708,58.10251299889899,8605.479766589007 -2,PPO-RND-actor22,1000000,4.728789986091794,719,58.070540421785374,8610.217786305002 -2,PPO-RND-actor12,1000000,5.439330543933054,717,57.931990671275834,8630.809923952998 -2,PPO-RND-actor7,1000000,4.243281471004243,707,57.821718993516235,8647.269723268982 -2,PPO-RND-actor11,1000000,4.2995839112343965,721,57.728850271543365,8661.18063408701 -2,PPO-RND-actor6,1000000,4.571428571428571,700,57.59444988230475,8681.392061592021 -2,PPO-RND-actor9,1000000,3.0346820809248554,692,57.579695109918795,8683.616664616013 -2,PPO-RND-actor1,1000000,4.934687953555878,689,57.56787336470706,8685.399872814014 -2,PPO-RND-actor8,1000000,4.305555555555555,720,57.567221648119286,8685.498199935013 -2,PPO-RND-actor3,1000000,4.583333333333333,720,57.56636723305355,8685.627112368995 -2,PPO-RND-actor20,1000000,3.3566433566433567,715,57.53731305933136,8690.013026579982 -2,PPO-RND-actor5,1000000,4.833836858006042,662,57.53014190063428,8691.096240707993 -2,PPO-RND-actor17,1000000,4.60431654676259,695,57.51705642612386,8693.073517108976 -2,PPO-RND-actor4,1000000,3.278688524590164,732,57.51375141532026,8693.573062020994 -2,PPO-RND-actor19,1000000,4.202898550724638,690,57.50906306853278,8694.281793534989 -2,PPO-RND-actor2,1000000,3.542234332425068,734,57.48991148908549,8697.178114370996 -2,PPO-RND-actor0,1000000,5.298913043478261,736,57.46990911627718,8700.205162815982 -2,evaluation,40000,0.0,35,933.6058788812,21.422315831994638 -iteration,role,step,episode_return,num_episodes,step_rate,duration -0,PPO-RND-actor25,0,161.21794871794873,312,48.65246831360169,10276.97087796501 -0,PPO-RND-actor24,0,156.08308605341247,337,48.44812226787633,10320.317415718018 -0,PPO-RND-actor17,0,142.85714285714286,343,48.38786124720945,10333.170078452997 -0,PPO-RND-actor26,0,153.6741214057508,313,48.34012330979667,10343.37452545698 -0,PPO-RND-actor15,0,158.7859424920128,313,48.25320083842909,10362.006899276981 -0,PPO-RND-actor18,0,160.3658536585366,328,48.20853472090674,10371.607494288008 -0,PPO-RND-actor7,0,153.73134328358208,335,48.109091510198844,10393.045977473987 -0,PPO-RND-actor27,0,153.5031847133758,314,48.22688744598041,10367.660582699988 -0,PPO-RND-actor14,0,176.78018575851394,323,48.085856904810505,10398.067793401016 -0,PPO-RND-actor16,0,155.87301587301587,315,47.90374222970223,10437.59791463599 -0,PPO-RND-actor22,0,163.63636363636363,330,47.91751780811393,10434.597259445989 -0,PPO-RND-actor12,0,150.0,334,47.838344290297414,10451.866748686996 -0,PPO-RND-actor21,0,166.15384615384616,325,47.696076195905626,10483.042629047995 -0,PPO-RND-actor23,0,162.8930817610063,318,47.683086000681094,10485.898500630981 -0,PPO-RND-actor19,0,155.35168195718654,327,47.6414092943325,10495.071564969025 -0,PPO-RND-actor11,0,166.06060606060606,330,47.53895170179774,10517.690906109987 -0,PPO-RND-actor13,0,164.32926829268294,328,47.48606199702407,10529.405450199993 -0,PPO-RND-actor20,0,160.92307692307693,325,47.452166213394,10536.92676012899 -0,PPO-RND-actor10,0,157.14285714285714,329,47.36410434772294,10556.517575614998 -0,PPO-RND-actor9,0,174.21383647798743,318,47.27363777533389,10576.719362623 -0,PPO-RND-actor8,0,159.54692556634305,309,47.244032761410935,10583.347161006997 -0,PPO-RND-actor4,0,157.4468085106383,329,47.15027988744715,10604.390921826009 -0,PPO-RND-actor3,0,162.11180124223603,322,47.075574389332274,10621.219315664988 -0,PPO-RND-actor5,0,146.4968152866242,314,47.08116219473309,10619.958741288981 -0,PPO-RND-actor1,0,166.66666666666666,339,47.05131293641193,10626.696021760989 -0,PPO-RND-actor0,0,154.92537313432837,335,47.039853532602365,10629.284796847001 -0,PPO-RND-actor2,0,166.56716417910448,335,47.04615756769081,10627.860506579978 -0,PPO-RND-actor6,0,173.71794871794873,312,47.04547206097912,10628.015366748004 -0,PPO-RND-actor28,0,163.14199395770393,331,47.142310061686835,10606.183688193007 -0,PPO-RND-actor29,0,166.34920634920636,315,47.12962707195697,10609.037903835007 -0,PPO-RND-actor30,0,181.22977346278316,309,47.13438622681959,10607.966710203997 -0,PPO-RND-actor31,0,181.15015974440894,313,47.137811413403334,10607.195900865016 -0,evaluation,0,833.3333333333334,6,988.0560444235097,20.241766763007035 diff --git a/logs/r2d2_atari_results.csv b/logs/r2d2_atari_results.csv deleted file mode 100644 index e2494a3..0000000 --- a/logs/r2d2_atari_results.csv +++ /dev/null @@ -1,22 +0,0 @@ -iteration,type,step,episode_return,num_episodes,step_rate,duration -0,R2D2-actor0,250000,-20.726666666666667,300,100.87947550641276,2478.204795821999 -0,R2D2-actor1,250000,-20.82838283828383,303,100.47824870515302,2488.100690663996 -0,R2D2-actor2,250000,-20.845394736842106,304,100.23461857875937,2494.1482647890007 -0,R2D2-actor3,250000,-20.889250814332247,307,100.10183781850482,2497.456644635 -0,R2D2-actor4,250000,-20.916129032258066,310,99.97058680834368,2500.7355461390034 -0,R2D2-actor5,250000,-20.903846153846153,312,99.905568316516,2502.3630235300006 -0,R2D2-actor6,250000,-20.914285714285715,315,99.8384697212569,2504.044790529995 -0,R2D2-actor7,250000,-20.92721518987342,316,99.81112718221299,2504.7307555559964 -0,R2D2-actor8,250000,-20.937304075235108,319,99.78765009898059,2505.320044635002 -0,R2D2-actor9,250000,-20.949685534591193,318,99.79169320186747,2505.218540527996 -0,R2D2-actor10,250000,-20.956656346749227,323,99.78235129569634,2505.4530861790045 -0,R2D2-actor11,250000,-20.94392523364486,321,99.78461122491962,2505.3963424930043 -0,R2D2-actor12,250000,-20.96273291925466,322,99.80103310497016,2504.9840890630003 -0,R2D2-actor13,250000,-20.984520123839008,323,99.83080458007936,2504.2370544000005 -0,R2D2-actor14,250000,-20.969135802469136,324,99.85258009278569,2503.6909388589993 -0,R2D2-actor15,250000,-20.97222222222222,324,99.89119505087885,2502.723086580998 -0,R2D2-actor16,250000,-20.96904024767802,323,99.93486084407358,2501.629540367001 -0,R2D2-actor17,250000,-20.981538461538463,325,99.98007814047565,2500.498145728001 -0,R2D2-actor18,250000,-20.966153846153848,325,100.02385374329384,2499.403798633997 -0,R2D2-actor19,250000,-20.981424148606813,323,100.06412254229588,2498.3979637090015 -0,evaluation,100000,-20.99230769230769,130,1011.5056919993099,98.86251831400295 diff --git a/logs/rainbow_atari_results.csv b/logs/rainbow_atari_results.csv deleted file mode 100644 index 8b3a30c..0000000 --- a/logs/rainbow_atari_results.csv +++ /dev/null @@ -1,4 +0,0 @@ -iteration,train_step,train_episode_return,train_num_episodes,train_step_rate,train_duration,eval_episode_return,eval_num_episodes,eval_step_rate,eval_duration -0,20000,-18.922600619195048,323,172.20710652455807,2903.4806407870055,-10.714285714285714,7,1168.8725007930923,17.110506052995333 -1,20000,2.8473282442748094,131,132.7355243915388,3766.889099900014,9.2,5,1180.5650297735472,16.94104051501199 -2,20000,12.301775147928995,169,131.64805053720417,3798.0053480450006,15.25,8,1148.3487681337294,17.416311624998343 diff --git a/screenshots/live_play/DQN_Breakout_1.gif b/screenshots/live_play/DQN_Breakout_1.gif new file mode 100644 index 0000000..67c22e7 Binary files /dev/null and b/screenshots/live_play/DQN_Breakout_1.gif differ diff --git a/screenshots/live_play/DQN_Breakout_1_montage.png b/screenshots/live_play/DQN_Breakout_1_montage.png new file mode 100644 index 0000000..5945fbd Binary files /dev/null and b/screenshots/live_play/DQN_Breakout_1_montage.png differ diff --git a/screenshots/live_play/IQN_Pong_2_montage.png b/screenshots/live_play/IQN_Pong_2_montage.png new file mode 100644 index 0000000..e371131 Binary files /dev/null and b/screenshots/live_play/IQN_Pong_2_montage.png differ diff --git a/screenshots/live_play/PER-DQN_Pong_4.gif b/screenshots/live_play/PER-DQN_Pong_4.gif new file mode 100644 index 0000000..711a73d Binary files /dev/null and b/screenshots/live_play/PER-DQN_Pong_4.gif differ diff --git a/screenshots/live_play/PER-DQN_Pong_4_montage.png b/screenshots/live_play/PER-DQN_Pong_4_montage.png new file mode 100644 index 0000000..c0acf95 Binary files /dev/null and b/screenshots/live_play/PER-DQN_Pong_4_montage.png differ diff --git a/screenshots/live_play/PPO-RND_MontezumaRevenge_2.gif b/screenshots/live_play/PPO-RND_MontezumaRevenge_2.gif new file mode 100644 index 0000000..94a96cc Binary files /dev/null and b/screenshots/live_play/PPO-RND_MontezumaRevenge_2.gif differ diff --git a/screenshots/live_play/PPO-RND_MontezumaRevenge_2_montage.png b/screenshots/live_play/PPO-RND_MontezumaRevenge_2_montage.png new file mode 100644 index 0000000..0f976b2 Binary files /dev/null and b/screenshots/live_play/PPO-RND_MontezumaRevenge_2_montage.png differ diff --git a/screenshots/live_play/Rainbow_Pong_2.gif b/screenshots/live_play/Rainbow_Pong_2.gif new file mode 100644 index 0000000..aaf738c Binary files /dev/null and b/screenshots/live_play/Rainbow_Pong_2.gif differ diff --git a/screenshots/live_play/Rainbow_Pong_2_montage.png b/screenshots/live_play/Rainbow_Pong_2_montage.png new file mode 100644 index 0000000..79f4394 Binary files /dev/null and b/screenshots/live_play/Rainbow_Pong_2_montage.png differ diff --git a/screenshots/live_play/gallery.html b/screenshots/live_play/gallery.html new file mode 100644 index 0000000..9527efe --- /dev/null +++ b/screenshots/live_play/gallery.html @@ -0,0 +1,206 @@ + + + + +Atari57 Live Play — Gallery + + + + +

ATARI57 · LIVE PLAY GALLERY

+
+ Real ALE frames captured from the live FastAPI WebSocket and from + direct deep_rl_zoo env factories. Each card pairs a 5-frame montage + (static summary) with an animated 100-120 frame GIF (motion). +
+ +
+ Trained agents + · 5 checkpoints across 3 games · streamed via /api/eval/stream +
+ +
+

Rainbow / Pong bundled · iter 2

+
end-of-capture running ep_return = +1.0 (close game)
+
+
montage
Rainbow Pong montage
+
animated
Rainbow Pong gif
+
+
+ +
+

PER-DQN / Pong bundled · iter 4 · strongest

+
end-of-capture running ep_return = +8.0 (dominating)
+
+
montage
PER-DQN Pong montage
+
animated
PER-DQN Pong gif
+
+
+ +
+

IQN / Pong bundled · iter 2 · undertrained

+
end-of-capture running ep_return = −4.0 (losing; partial-training snapshot)
+
+
montage
IQN Pong montage
+
animated
(GIF not captured this round — montage only)
+
+
+ +
+

PPO-RND / MontezumaRevenge bundled · iter 2 · sparse reward

+
avatar wanders the platforms, doesn't reach the key (expected — Montezuma needs millions of steps)
+
+
montage
PPO-RND Montezuma montage
+
animated
PPO-RND Montezuma gif
+
+
+ +
+

DQN / Breakout freshly trained · 5000 steps · iter 1

+
spawned via POST /api/training/start, completed in ~5s, wrote checkpoints/DQN_Breakout_1.ckpt — then replayed live
+
+
montage
DQN Breakout montage
+
animated
DQN Breakout gif
+
+
+ +
+ Random policy + · 6 games · direct ALE renders via deep_rl_zoo.gym_env +
+ +
+

SpaceInvaders random · action_dim 6

+
1 episode finished · score 240 (accidentally shot a few)
+
+
montage
SpaceInvaders montage
+
animated
SpaceInvaders gif
+
+
+ +
+

MsPacman random · action_dim 9

+
1 episode finished · score 250 (ate pellets while wandering)
+
+
montage
MsPacman montage
+
animated
MsPacman gif
+
+
+ +
+

Asteroids random · action_dim 14

+
no episode finished in 700 steps · running score +610
+
+
montage
Asteroids montage
+
animated
Asteroids gif
+
+
+ +
+

Q*bert random · action_dim 6

+
2 episodes finished · scores 50, 75
+
+
montage
Qbert montage
+
animated
Qbert gif
+
+
+ +
+

Seaquest random · action_dim 18

+
1 episode finished · score 80 (oxygen ran out)
+
+
montage
Seaquest montage
+
animated
Seaquest gif
+
+
+ +
+

Boxing random · action_dim 18

+
no episode finished in 700 steps · running score −3 (the CPU wins the round)
+
+
montage
Boxing montage
+
animated
Boxing gif
+
+
+ + + diff --git a/screenshots/live_play/random_Asteroids.gif b/screenshots/live_play/random_Asteroids.gif new file mode 100644 index 0000000..c4f9970 Binary files /dev/null and b/screenshots/live_play/random_Asteroids.gif differ diff --git a/screenshots/live_play/random_Asteroids_montage.png b/screenshots/live_play/random_Asteroids_montage.png new file mode 100644 index 0000000..34d14e6 Binary files /dev/null and b/screenshots/live_play/random_Asteroids_montage.png differ diff --git a/screenshots/live_play/random_Boxing.gif b/screenshots/live_play/random_Boxing.gif new file mode 100644 index 0000000..70af18e Binary files /dev/null and b/screenshots/live_play/random_Boxing.gif differ diff --git a/screenshots/live_play/random_Boxing_montage.png b/screenshots/live_play/random_Boxing_montage.png new file mode 100644 index 0000000..13f35c2 Binary files /dev/null and b/screenshots/live_play/random_Boxing_montage.png differ diff --git a/screenshots/live_play/random_MsPacman.gif b/screenshots/live_play/random_MsPacman.gif new file mode 100644 index 0000000..7f0034f Binary files /dev/null and b/screenshots/live_play/random_MsPacman.gif differ diff --git a/screenshots/live_play/random_MsPacman_montage.png b/screenshots/live_play/random_MsPacman_montage.png new file mode 100644 index 0000000..2ac75ad Binary files /dev/null and b/screenshots/live_play/random_MsPacman_montage.png differ diff --git a/screenshots/live_play/random_Qbert.gif b/screenshots/live_play/random_Qbert.gif new file mode 100644 index 0000000..2c11061 Binary files /dev/null and b/screenshots/live_play/random_Qbert.gif differ diff --git a/screenshots/live_play/random_Qbert_montage.png b/screenshots/live_play/random_Qbert_montage.png new file mode 100644 index 0000000..7f97ebc Binary files /dev/null and b/screenshots/live_play/random_Qbert_montage.png differ diff --git a/screenshots/live_play/random_Seaquest.gif b/screenshots/live_play/random_Seaquest.gif new file mode 100644 index 0000000..8984234 Binary files /dev/null and b/screenshots/live_play/random_Seaquest.gif differ diff --git a/screenshots/live_play/random_Seaquest_montage.png b/screenshots/live_play/random_Seaquest_montage.png new file mode 100644 index 0000000..5bc9131 Binary files /dev/null and b/screenshots/live_play/random_Seaquest_montage.png differ diff --git a/screenshots/live_play/random_SpaceInvaders.gif b/screenshots/live_play/random_SpaceInvaders.gif new file mode 100644 index 0000000..2d919f0 Binary files /dev/null and b/screenshots/live_play/random_SpaceInvaders.gif differ diff --git a/screenshots/live_play/random_SpaceInvaders_montage.png b/screenshots/live_play/random_SpaceInvaders_montage.png new file mode 100644 index 0000000..a25fd55 Binary files /dev/null and b/screenshots/live_play/random_SpaceInvaders_montage.png differ diff --git a/screenshots/live_play/summary.txt b/screenshots/live_play/summary.txt new file mode 100644 index 0000000..47997e5 --- /dev/null +++ b/screenshots/live_play/summary.txt @@ -0,0 +1,39 @@ +Atari57 live-play capture +============================================================ + +Rainbow / Pong + checkpoint: Rainbow_Pong_2.ckpt + frames captured: 600 + episodes done: 0 + final ep_return: None + last ep_return: 1.0 + capture elapsed: 2.3s + montage: screenshots/live_play/Rainbow_Pong_2_montage.png + +PER-DQN / Pong + checkpoint: PER-DQN_Pong_4.ckpt + frames captured: 600 + episodes done: 0 + final ep_return: None + last ep_return: 8.0 + capture elapsed: 1.7s + montage: screenshots/live_play/PER-DQN_Pong_4_montage.png + +IQN / Pong + checkpoint: IQN_Pong_2.ckpt + frames captured: 600 + episodes done: 0 + final ep_return: None + last ep_return: -4.0 + capture elapsed: 2.4s + montage: screenshots/live_play/IQN_Pong_2_montage.png + +PPO-RND / MontezumaRevenge + checkpoint: PPO-RND_MontezumaRevenge_2.ckpt + frames captured: 601 + episodes done: 1 + final ep_return: 0.0 + last ep_return: 0.0 + capture elapsed: 2.0s + montage: screenshots/live_play/PPO-RND_MontezumaRevenge_2_montage.png +