From b2053d850e73b40ff5f3eecfc3ff14cf5d92b024 Mon Sep 17 00:00:00 2001
From: Igor Kuznetsov <igorkuznetsov14@gmail.com>
Date: Tue, 24 Jun 2025 20:24:42 +0400
Subject: [PATCH 01/11] Upgrade requirements

---
 requirements.txt                  | 5 +++--
 src/oprl/configs/ddpg.py          | 4 ++--
 src/oprl/trainers/base_trainer.py | 2 +-
 src/oprl/utils/logger.py          | 2 +-
 4 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index e099f43..c7708c3 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,6 @@
 torch==2.2.2
 tensorboard==2.15.1
 packaging==23.2
-dm-control==1.0.16
-mujoco==3.1.3
+dm-control==1.0.31
+mujoco==3.3.3
+numpy==1.26.4
diff --git a/src/oprl/configs/ddpg.py b/src/oprl/configs/ddpg.py
index fa24a3f..4c35946 100644
--- a/src/oprl/configs/ddpg.py
+++ b/src/oprl/configs/ddpg.py
@@ -26,11 +26,11 @@ def make_env(seed: int):
 config = {
     "state_dim": STATE_DIM,
     "action_dim": ACTION_DIM,
-    "num_steps": int(1_000_000),
+    "num_steps": int(100_000),
     "eval_every": 2500,
     "device": args.device,
     "save_buffer": False,
-    "visualise_every": 0,
+    "visualise_every": 50000,
     "estimate_q_every": 5000,
     "log_every": 2500,
 }
diff --git a/src/oprl/trainers/base_trainer.py b/src/oprl/trainers/base_trainer.py
index 160649b..94779a8 100644
--- a/src/oprl/trainers/base_trainer.py
+++ b/src/oprl/trainers/base_trainer.py
@@ -187,7 +187,7 @@ def visualise_policy(self):
                 action = self._algo.exploit(state)
                 state, _, terminated, truncated, _ = env.step(action)
                 done = terminated or truncated
-            return np.concatenate(imgs)
+            return np.concatenate(imgs, dtype="uint8")
         except Exception as e:
             print(f"Failed to visualise a policy: {e}")
             return None
diff --git a/src/oprl/utils/logger.py b/src/oprl/utils/logger.py
index 563c0c1..f9a394a 100644
--- a/src/oprl/utils/logger.py
+++ b/src/oprl/utils/logger.py
@@ -66,7 +66,7 @@ def log_scalar(self, tag: str, value: float, step: int) -> None:
         self._log_scalar_to_file(tag, value, step)
 
     def log_video(self, tag: str, imgs, step: int) -> None:
-        os.makedirs(os.path.join(self._log_dir, "images"))
+        os.makedirs(os.path.join(self._log_dir, "images"), exist_ok=True)
         fn = os.path.join(self._log_dir, "images", f"{tag}_step_{step}.npz")
         with open(fn, "wb") as f:
             np.save(f, imgs)

From 14ee49d3475e0bcc925baf686eb11d4adf712775 Mon Sep 17 00:00:00 2001
From: Igor Kuznetsov <igorkuznetsov14@gmail.com>
Date: Mon, 30 Jun 2025 21:52:27 +0400
Subject: [PATCH 02/11] fixes for running both dm_control and safety gymnasium

---
 requirements.txt                  |  4 ++--
 src/oprl/configs/utils.py         |  2 +-
 src/oprl/env.py                   |  6 +++---
 src/oprl/trainers/base_trainer.py |  9 +++++++++
 src/oprl/trainers/safe_trainer.py |  5 ++++-
 src/oprl/utils/logger.py          | 11 +++++++++++
 6 files changed, 30 insertions(+), 7 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index c7708c3..2759849 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,6 +1,6 @@
 torch==2.2.2
 tensorboard==2.15.1
 packaging==23.2
-dm-control==1.0.31
-mujoco==3.3.3
+dm-control==1.0.11
+mujoco==2.3.3
 numpy==1.26.4
diff --git a/src/oprl/configs/utils.py b/src/oprl/configs/utils.py
index 0e216ab..50c16e7 100644
--- a/src/oprl/configs/utils.py
+++ b/src/oprl/configs/utils.py
@@ -29,7 +29,7 @@ def parse_args() -> argparse.Namespace:
 
 
 def create_logdir(logdir: str, algo: str, env: str, seed: int) -> str:
-    dt = datetime.now().strftime("%Y_%m_%d_%Hh%Mm")
+    dt = datetime.now().strftime("%Y_%m_%d_%Hh%Mm%Ss")
     log_dir = os.path.join(logdir, algo, f"{algo}-env_{env}-seed_{seed}-{dt}")
     logging.info(f"LOGDIR: {log_dir}")
     return log_dir
diff --git a/src/oprl/env.py b/src/oprl/env.py
index f4d8fe5..6017ab2 100644
--- a/src/oprl/env.py
+++ b/src/oprl/env.py
@@ -56,12 +56,12 @@ def step(
     ) -> tuple[npt.ArrayLike, npt.ArrayLike, bool, bool, dict[str, Any]]:
         obs, reward, cost, terminated, truncated, info = self._env.step(action)
         info["cost"] = cost
-        return obs, reward, terminated, truncated, info
+        return obs.astype("float32"), reward, terminated, truncated, info
 
     def reset(self) -> tuple[npt.ArrayLike, dict[str, Any]]:
         obs, info = self._env.reset(seed=self._seed)
         self._env.step(self._env.action_space.sample())
-        return obs, info
+        return obs.astype("float32"), info
 
     def sample_action(self):
         return self._env.action_space.sample()
@@ -129,7 +129,7 @@ def render(self) -> npt.ArrayLike:
             width=self._render_width,
         )
         img = img.astype(np.uint8)
-        return np.expand_dims(img, 0)
+        return img
 
     def _flat_obs(self, obs: OrderedDict) -> npt.ArrayLike:
         obs_flatten = []
diff --git a/src/oprl/trainers/base_trainer.py b/src/oprl/trainers/base_trainer.py
index 94779a8..c9fccb2 100644
--- a/src/oprl/trainers/base_trainer.py
+++ b/src/oprl/trainers/base_trainer.py
@@ -1,3 +1,4 @@
+import os
 from typing import Any, Callable
 
 import numpy as np
@@ -24,6 +25,7 @@ def __init__(
         eval_interval: int = int(2e3),
         num_eval_episodes: int = 10,
         save_buffer_every: int = 0,
+        save_policy_every: int = int(50_000),
         visualise_every: int = 0,
         estimate_q_every: int = 0,
         stdout_log_every: int = int(1e5),
@@ -60,6 +62,7 @@ def __init__(
         self._visualize_every = visualise_every
         self._estimate_q_every = estimate_q_every
         self._stdout_log_every = stdout_log_every
+        self._save_policy_every=  save_policy_every
         self._logger = logger
         self.seed = seed
 
@@ -106,6 +109,7 @@ def train(self):
             self._eval_routine(env_step, batch)
             self._visualize(env_step)
             self._save_buffer(env_step)
+            self._save_policy(env_step)
             self._log_stdout(env_step, batch)
 
     def _eval_routine(self, env_step: int, batch):
@@ -151,9 +155,14 @@ def _visualize(self, env_step: int):
                 self._logger.log_video("eval_policy", imgs, env_step)
 
     def _save_buffer(self, env_step: int):
+        # TODO: doesn't work
         if self._save_buffer_every > 0 and env_step % self._save_buffer_every == 0:
             self.buffer.save(f"{self.log_dir}/buffers/buffer_step_{env_step}.pickle")
 
+    def _save_policy(self, env_step: int):
+        if self._save_policy_every > 0 and env_step % self._save_policy_every == 0:
+            self._logger.save_weights(self._algo.actor, env_step)
+
     def _estimate_q(self, env_step: int):
         if self._estimate_q_every > 0 and env_step % self._estimate_q_every == 0:
             q_true = self.estimate_true_q()
diff --git a/src/oprl/trainers/safe_trainer.py b/src/oprl/trainers/safe_trainer.py
index ae3a85e..a5edf9f 100644
--- a/src/oprl/trainers/safe_trainer.py
+++ b/src/oprl/trainers/safe_trainer.py
@@ -23,6 +23,7 @@ def __init__(
         eval_interval: int = int(2e3),
         num_eval_episodes: int = 10,
         save_buffer_every: int = 0,
+        save_policy_every: int = int(50_000),
         visualise_every: int = 0,
         estimate_q_every: int = 0,
         stdout_log_every: int = int(1e5),
@@ -65,6 +66,7 @@ def __init__(
             eval_interval=eval_interval,
             num_eval_episodes=num_eval_episodes,
             save_buffer_every=save_buffer_every,
+            save_policy_every=save_policy_every,
             visualise_every=visualise_every,
             estimate_q_every=estimate_q_every,
             stdout_log_every=stdout_log_every,
@@ -97,10 +99,11 @@ def train(self):
             if len(self.buffer) < self.batch_size:
                 continue
             batch = self.buffer.sample(self.batch_size)
-            self._algo.update(batch)
+            self._algo.update(*batch)
 
             self._eval_routine(env_step, batch)
             self._visualize(env_step)
+            self._save_policy(env_step)
             self._save_buffer(env_step)
             self._log_stdout(env_step, batch)
 
diff --git a/src/oprl/utils/logger.py b/src/oprl/utils/logger.py
index f9a394a..c78175a 100644
--- a/src/oprl/utils/logger.py
+++ b/src/oprl/utils/logger.py
@@ -3,9 +3,12 @@
 import os
 import shutil
 from abc import ABC, abstractmethod
+from sys import path
 from typing import Any
 
 import numpy as np
+import torch
+import torch.nn as nn
 from torch.utils.tensorboard.writer import SummaryWriter
 
 
@@ -71,6 +74,14 @@ def log_video(self, tag: str, imgs, step: int) -> None:
         with open(fn, "wb") as f:
             np.save(f, imgs)
 
+    def save_weights(self, weights: nn.Module, step: int) -> None:
+        os.makedirs(os.path.join(self._log_dir, "weights"), exist_ok=True)
+        fn = os.path.join(self._log_dir, "weights", f"step_{step}.w")
+        torch.save(
+            weights,
+            fn
+        )
+
     def _log_scalar_to_file(self, tag: str, val: float, step: int) -> None:
         fn = os.path.join(self._log_dir, f"{tag}.log")
         os.makedirs(os.path.dirname(fn), exist_ok=True)

From 3cd27bc50b8467cb6736f863b570e9b895ee1116 Mon Sep 17 00:00:00 2001
From: Igor Kuznetsov <igorkuznetsov14@gmail.com>
Date: Fri, 4 Jul 2025 20:15:44 +0400
Subject: [PATCH 03/11] Add basic ci file

---
 src/oprl/configs/ddpg.py          |  1 -
 src/oprl/configs/sac.py           |  1 -
 src/oprl/configs/td3.py           |  2 -
 src/oprl/configs/tqc.py           |  1 -
 tests/functional/requirements.txt |  6 ---
 tests/functional/test_env.py      | 64 +++++++++++++++++++++++++++++++
 tests/functional/test_rl_algos.py | 35 +++++++++++++++++
 7 files changed, 99 insertions(+), 11 deletions(-)
 delete mode 100644 tests/functional/requirements.txt
 create mode 100644 tests/functional/test_env.py
 create mode 100644 tests/functional/test_rl_algos.py

diff --git a/src/oprl/configs/ddpg.py b/src/oprl/configs/ddpg.py
index 4c35946..22437f0 100644
--- a/src/oprl/configs/ddpg.py
+++ b/src/oprl/configs/ddpg.py
@@ -48,7 +48,6 @@ def make_algo(logger):
 
 
 def make_logger(seed: int) -> Logger:
-    global config
     log_dir = create_logdir(logdir="logs", algo="DDPG", env=args.env, seed=seed)
     return FileLogger(log_dir, config)
 
diff --git a/src/oprl/configs/sac.py b/src/oprl/configs/sac.py
index 2981aae..475fd0e 100644
--- a/src/oprl/configs/sac.py
+++ b/src/oprl/configs/sac.py
@@ -50,7 +50,6 @@ def make_algo(logger):
 
 
 def make_logger(seed: int) -> Logger:
-    global config
     log_dir = create_logdir(logdir="logs", algo="SAC", env=args.env, seed=seed)
     return FileLogger(log_dir, config)
 
diff --git a/src/oprl/configs/td3.py b/src/oprl/configs/td3.py
index 9cb7174..c8dac65 100644
--- a/src/oprl/configs/td3.py
+++ b/src/oprl/configs/td3.py
@@ -48,8 +48,6 @@ def make_algo(logger):
 
 
 def make_logger(seed: int) -> Logger:
-    global config
-
     log_dir = create_logdir(logdir="logs", algo="TD3", env=args.env, seed=seed)
     return FileLogger(log_dir, config)
 
diff --git a/src/oprl/configs/tqc.py b/src/oprl/configs/tqc.py
index b2242f1..640071e 100644
--- a/src/oprl/configs/tqc.py
+++ b/src/oprl/configs/tqc.py
@@ -48,7 +48,6 @@ def make_algo(logger: Logger):
 
 
 def make_logger(seed: int) -> Logger:
-    global config
     log_dir = create_logdir(logdir="logs", algo="TQC", env=args.env, seed=seed)
     return FileLogger(log_dir, config)
 
diff --git a/tests/functional/requirements.txt b/tests/functional/requirements.txt
deleted file mode 100644
index 2e41aa3..0000000
--- a/tests/functional/requirements.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-pytest==8.0.1
-torch==2.2.0
-tensorboard==2.15.1
-packaging==23.2
-dm-control==1.0.16
-mujoco==3.1.3
diff --git a/tests/functional/test_env.py b/tests/functional/test_env.py
new file mode 100644
index 0000000..1facf87
--- /dev/null
+++ b/tests/functional/test_env.py
@@ -0,0 +1,64 @@
+import pytest
+
+from oprl.env import make_env
+
+
+dm_control_envs: list[str] = [
+    "acrobot-swingup",
+    "ball_in_cup-catch",
+    "cartpole-balance",
+    "cartpole-swingup",
+    "cheetah-run",
+    "finger-spin",
+    "finger-turn_easy",
+    "finger-turn_hard",
+    "fish-upright",
+    "fish-swim",
+    "hopper-stand",
+    "hopper-hop",
+    "humanoid-stand",
+    "humanoid-walk",
+    "humanoid-run",
+    "pendulum-swingup",
+    "point_mass-easy",
+    "reacher-easy",
+    "reacher-hard",
+    "swimmer-swimmer6",
+    "swimmer-swimmer15",
+    "walker-stand",
+    "walker-walk",
+    "walker-run",
+]
+
+
+safety_envs: list[str] = [
+    "SafetyPointGoal1-v0",
+    "SafetyPointButton1-v0",
+    "SafetyPointPush1-v0",
+    "SafetyPointCircle1-v0",
+]
+
+
+env_names: list[str] = dm_control_envs + safety_envs
+
+
+@pytest.mark.parametrize("env_name", env_names)
+def test_envs(env_name: str) -> None:
+    env = make_env(env_name, seed=0)
+    obs, info = env.reset()
+    assert obs.shape[0] == env.observation_space.shape[0]
+    assert isinstance(info, dict), "Info is expected to be a dict"
+
+    rand_action = env.sample_action()
+    assert rand_action.ndim == 1
+
+    next_obs, reward, terminated, truncated, info = env.step(rand_action)
+    assert next_obs.ndim == 1, "Expected 1-dimensional array as observation"
+    assert isinstance(reward, float), "Reward is epxected to be a single float value"
+    assert isinstance(
+        terminated, bool
+    ), "Terminated is expected to be a single bool value"
+    assert isinstance(
+        truncated, bool
+    ), "Truncated is expected to be a single bool value"
+    assert isinstance(info, dict), "Info is expected to be a dict"
diff --git a/tests/functional/test_rl_algos.py b/tests/functional/test_rl_algos.py
new file mode 100644
index 0000000..40fb296
--- /dev/null
+++ b/tests/functional/test_rl_algos.py
@@ -0,0 +1,35 @@
+import pytest
+import torch
+
+from oprl.algos.ddpg import DDPG
+from oprl.algos.sac import SAC
+from oprl.algos.td3 import TD3
+from oprl.algos.tqc import TQC
+from oprl.env import DMControlEnv
+
+rl_algo_classes = [DDPG, SAC, TD3, TQC]
+
+
+@pytest.mark.parametrize("algo_class", rl_algo_classes)
+def test_rl_algo_run(algo_class):
+    env = DMControlEnv("walker-walk", seed=0)
+    obs, _ = env.reset(env.sample_action())
+
+    algo = algo_class(
+        state_dim=env.observation_space.shape[0],
+        action_dim=env.action_space.shape[0],
+    )
+    action = algo.exploit(obs)
+    assert action.ndim == 1
+
+    action = algo.explore(obs)
+    assert action.ndim == 1
+
+    _batch_size = 8
+    batch_obs = torch.randn(_batch_size, env.observation_space.shape[0])
+    batch_actions = torch.clamp(
+        torch.randn(_batch_size, env.action_space.shape[0]), -1, 1
+    )
+    batch_rewards = torch.randn(_batch_size, 1)
+    batch_dones = torch.randint(2, (_batch_size, 1))
+    algo.update(batch_obs, batch_actions, batch_rewards, batch_dones, batch_obs)

From 2d210cb50bf466b139718942744036ab3e906d1f Mon Sep 17 00:00:00 2001
From: Igor Kuznetsov <igorkuznetsov14@gmail.com>
Date: Fri, 4 Jul 2025 20:17:00 +0400
Subject: [PATCH 04/11] Add ci file

---
 .github/workflows/python-package.yml | 41 ++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)
 create mode 100644 .github/workflows/python-package.yml

diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
new file mode 100644
index 0000000..c352ffe
--- /dev/null
+++ b/.github/workflows/python-package.yml
@@ -0,0 +1,41 @@
+# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
+# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
+
+name: Python package
+
+on:
+  push:
+    branches: [ "master", "develop"]
+  pull_request:
+    branches: [ "master", "develop"]
+
+jobs:
+  build:
+
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.10.8"]
+
+    steps:
+    - uses: actions/checkout@v4
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v3
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        python -m pip install flake8 pytest
+        python -m pip install .
+    - name: Lint with flake8
+      run: |
+        # stop the build if there are Python syntax errors or undefined names
+        flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
+        # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
+        flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
+    - name: Test with pytest
+      run: |
+        pytest tests/functional
+

From fa7453ba04509c90f783cabdc6630e84be53b816 Mon Sep 17 00:00:00 2001
From: Igor Kuznetsov <igorkuznetsov14@gmail.com>
Date: Fri, 4 Jul 2025 21:33:31 +0400
Subject: [PATCH 05/11] Add dockerfile

---
 Dockerfile                            | 12 +++++++
 pyproject.toml                        | 44 +++++++++++++++++++++++
 src/setup.py                          | 20 -----------
 tests/functional/src/test_env.py      | 52 ---------------------------
 tests/functional/src/test_rl_algos.py | 35 ------------------
 tests/functional/test_env.py          |  2 +-
 6 files changed, 57 insertions(+), 108 deletions(-)
 create mode 100644 Dockerfile
 create mode 100644 pyproject.toml
 delete mode 100644 src/setup.py
 delete mode 100644 tests/functional/src/test_env.py
 delete mode 100644 tests/functional/src/test_rl_algos.py

diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..405f5c7
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,12 @@
+FROM python:3.10.8
+
+WORKDIR /app
+
+RUN pip install --no-cache-dir --upgrade pip
+
+COPY . .
+
+RUN pip install --no-cache-dir . && pip install pytest
+
+# Run tests by default
+CMD ["pytest", "tests/functional"]
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..f531d30
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,44 @@
+[build-system]
+requires = ["setuptools>=64"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "oprl"
+version = "0.1.0"
+description = "An RL Lib"
+readme = "README.md"
+requires-python = "==3.10.8"
+license = {text = "MIT"}
+authors = [
+    {name = "Igor Kuznetsov"},
+]
+classifiers = [
+    "Development Status :: 3 - Alpha",
+    "Intended Audience :: Developers",
+    "Programming Language :: Python :: 3.10",
+]
+dependencies = [
+    "torch==2.2.2",
+    "tensorboard==2.15.1",
+    "packaging==23.2",
+    "dm-control==1.0.11",
+    "mujoco==2.3.3",
+    "numpy==1.26.4",
+]
+
+[project.optional-dependencies]
+dev = [
+    "pytest>=6.0",
+    "black",
+    "flake8",
+]
+
+[project.urls]
+"Homepage" = "https://schatty.github.io/oprl"
+
+[tool.setuptools.packages.find]
+where = ["src"]
+include = ["oprl*"]
+
+[tool.setuptools.package-dir]
+"" = "src"
diff --git a/src/setup.py b/src/setup.py
deleted file mode 100644
index 06324f0..0000000
--- a/src/setup.py
+++ /dev/null
@@ -1,20 +0,0 @@
-from setuptools import setup
-
-setup(
-    name="oprl",
-    version="1.0",
-    author="Igor Kuznetsov",
-    description="Reinforcement Learning Off-policy Algorithms with PyTorch",
-    long_description="todo",
-    url="todo",
-    keywords="reinforcement, learning, off-policy",
-    python_requires=">=3.7",
-    # packages=find_packages(include=['exampleproject', 'exampleproject.*']),
-    # install_requires=[
-    #     'PyYAML',
-    #     'pandas==0.23.3',
-    #     'numpy>=1.14.5',
-    #     'matplotlib>=2.2.0,,
-    #     'jupyter'
-    # ],
-)
diff --git a/tests/functional/src/test_env.py b/tests/functional/src/test_env.py
deleted file mode 100644
index 3f056c4..0000000
--- a/tests/functional/src/test_env.py
+++ /dev/null
@@ -1,52 +0,0 @@
-import pytest
-
-from oprl.env import DMControlEnv
-
-dm_control_envs = [
-    "acrobot-swingup",
-    "ball_in_cup-catch",
-    "cartpole-balance",
-    "cartpole-swingup",
-    "cheetah-run",
-    "finger-spin",
-    "finger-turn_easy",
-    "finger-turn_hard",
-    "fish-upright",
-    "fish-swim",
-    "hopper-stand",
-    "hopper-hop",
-    "humanoid-stand",
-    "humanoid-walk",
-    "humanoid-run",
-    "pendulum-swingup",
-    "point_mass-easy",
-    "reacher-easy",
-    "reacher-hard",
-    "swimmer-swimmer6",
-    "swimmer-swimmer15",
-    "walker-stand",
-    "walker-walk",
-    "walker-run",
-]
-
-
-@pytest.mark.parametrize("env_name", dm_control_envs)
-def test_dm_control_envs(env_name: str):
-    env = DMControlEnv(env_name, seed=0)
-    obs, info = env.reset()
-    assert obs.shape[0] == env.observation_space.shape[0]
-    assert isinstance(info, dict), "Info is expected to be a dict"
-
-    rand_action = env.sample_action()
-    assert rand_action.ndim == 1
-
-    next_obs, reward, terminated, truncated, info = env.step(rand_action)
-    assert next_obs.ndim == 1, "Expected 1-dimensional array as observation"
-    assert isinstance(reward, float), "Reward is epxected to be a single float value"
-    assert isinstance(
-        terminated, bool
-    ), "Terminated is expected to be a single bool value"
-    assert isinstance(
-        truncated, bool
-    ), "Truncated is expected to be a single bool value"
-    assert isinstance(info, dict), "Info is expected to be a dict"
diff --git a/tests/functional/src/test_rl_algos.py b/tests/functional/src/test_rl_algos.py
deleted file mode 100644
index 40fb296..0000000
--- a/tests/functional/src/test_rl_algos.py
+++ /dev/null
@@ -1,35 +0,0 @@
-import pytest
-import torch
-
-from oprl.algos.ddpg import DDPG
-from oprl.algos.sac import SAC
-from oprl.algos.td3 import TD3
-from oprl.algos.tqc import TQC
-from oprl.env import DMControlEnv
-
-rl_algo_classes = [DDPG, SAC, TD3, TQC]
-
-
-@pytest.mark.parametrize("algo_class", rl_algo_classes)
-def test_rl_algo_run(algo_class):
-    env = DMControlEnv("walker-walk", seed=0)
-    obs, _ = env.reset(env.sample_action())
-
-    algo = algo_class(
-        state_dim=env.observation_space.shape[0],
-        action_dim=env.action_space.shape[0],
-    )
-    action = algo.exploit(obs)
-    assert action.ndim == 1
-
-    action = algo.explore(obs)
-    assert action.ndim == 1
-
-    _batch_size = 8
-    batch_obs = torch.randn(_batch_size, env.observation_space.shape[0])
-    batch_actions = torch.clamp(
-        torch.randn(_batch_size, env.action_space.shape[0]), -1, 1
-    )
-    batch_rewards = torch.randn(_batch_size, 1)
-    batch_dones = torch.randint(2, (_batch_size, 1))
-    algo.update(batch_obs, batch_actions, batch_rewards, batch_dones, batch_obs)
diff --git a/tests/functional/test_env.py b/tests/functional/test_env.py
index 1facf87..2cf2ec2 100644
--- a/tests/functional/test_env.py
+++ b/tests/functional/test_env.py
@@ -39,7 +39,7 @@
 ]
 
 
-env_names: list[str] = dm_control_envs + safety_envs
+env_names: list[str] = dm_control_envs # + safety_envs
 
 
 @pytest.mark.parametrize("env_name", env_names)

From 8cf9022bfec21481862918f9309d1bec6ea9c953 Mon Sep 17 00:00:00 2001
From: Igor Kuznetsov <igorkuznetsov14@gmail.com>
Date: Fri, 4 Jul 2025 21:44:23 +0400
Subject: [PATCH 06/11] Change ci file to use docker

---
 .github/workflows/{python-package.yml => unit-tests-docker.yml} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename .github/workflows/{python-package.yml => unit-tests-docker.yml} (100%)

diff --git a/.github/workflows/python-package.yml b/.github/workflows/unit-tests-docker.yml
similarity index 100%
rename from .github/workflows/python-package.yml
rename to .github/workflows/unit-tests-docker.yml

From 17cb0680de3f1c8aa1bdb930cb83e22934cc4475 Mon Sep 17 00:00:00 2001
From: Igor Kuznetsov <igorkuznetsov14@gmail.com>
Date: Fri, 4 Jul 2025 21:47:37 +0400
Subject: [PATCH 07/11] change ci

---
 .github/workflows/unit-tests-docker.yml | 43 +++++++------------------
 1 file changed, 11 insertions(+), 32 deletions(-)

diff --git a/.github/workflows/unit-tests-docker.yml b/.github/workflows/unit-tests-docker.yml
index c352ffe..1140e40 100644
--- a/.github/workflows/unit-tests-docker.yml
+++ b/.github/workflows/unit-tests-docker.yml
@@ -1,41 +1,20 @@
-# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
-# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
-
-name: Python package
-
-on:
+name: Unit Tests
+on: 
   push:
     branches: [ "master", "develop"]
   pull_request:
     branches: [ "master", "develop"]
 
 jobs:
-  build:
-
+  test:
     runs-on: ubuntu-latest
-    strategy:
-      fail-fast: false
-      matrix:
-        python-version: ["3.10.8"]
-
     steps:
-    - uses: actions/checkout@v4
-    - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v3
-      with:
-        python-version: ${{ matrix.python-version }}
-    - name: Install dependencies
-      run: |
-        python -m pip install --upgrade pip
-        python -m pip install flake8 pytest
-        python -m pip install .
-    - name: Lint with flake8
-      run: |
-        # stop the build if there are Python syntax errors or undefined names
-        flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
-        # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
-        flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
-    - name: Test with pytest
-      run: |
-        pytest tests/functional
+    - name: Checkout code
+      uses: actions/checkout@v3
+    
+    - name: Build Docker image
+      run: docker build -t oprl .
+    
+    - name: Unit Tests
+      run: docker run --rm oprl
 

From ad14d3e8e471bdf753fc1cec8b91da310a535b88 Mon Sep 17 00:00:00 2001
From: Igor Kuznetsov <igorkuznetsov14@gmail.com>
Date: Fri, 4 Jul 2025 21:57:33 +0400
Subject: [PATCH 08/11] rename jobs

---
 .github/workflows/unit-tests-docker.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/unit-tests-docker.yml b/.github/workflows/unit-tests-docker.yml
index 1140e40..1f37fa9 100644
--- a/.github/workflows/unit-tests-docker.yml
+++ b/.github/workflows/unit-tests-docker.yml
@@ -1,4 +1,4 @@
-name: Unit Tests
+name: Tests
 on: 
   push:
     branches: [ "master", "develop"]
@@ -6,7 +6,7 @@ on:
     branches: [ "master", "develop"]
 
 jobs:
-  test:
+  Unit Tests:
     runs-on: ubuntu-latest
     steps:
     - name: Checkout code

From eae6ac91e6f85a7a29bb6f5e827c5c9b6f000b56 Mon Sep 17 00:00:00 2001
From: Igor Kuznetsov <igorkuznetsov14@gmail.com>
Date: Fri, 4 Jul 2025 22:01:16 +0400
Subject: [PATCH 09/11] change jobs name

---
 .github/workflows/unit-tests-docker.yml | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/unit-tests-docker.yml b/.github/workflows/unit-tests-docker.yml
index 1f37fa9..ef99dd2 100644
--- a/.github/workflows/unit-tests-docker.yml
+++ b/.github/workflows/unit-tests-docker.yml
@@ -1,4 +1,4 @@
-name: Tests
+name: Unit Tests
 on: 
   push:
     branches: [ "master", "develop"]
@@ -6,7 +6,8 @@ on:
     branches: [ "master", "develop"]
 
 jobs:
-  Unit Tests:
+  unit_tests:
+    name: Unit Tests
     runs-on: ubuntu-latest
     steps:
     - name: Checkout code

From ab21366ed3121e85cdf1f5bedff8b906a101c038 Mon Sep 17 00:00:00 2001
From: Igor Kuznetsov <igorkuznetsov14@gmail.com>
Date: Fri, 4 Jul 2025 22:23:11 +0400
Subject: [PATCH 10/11] Add test coverage

---
 .github/workflows/unit-tests-docker.yml | 13 +++++++++++++
 Dockerfile                              |  2 +-
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/unit-tests-docker.yml b/.github/workflows/unit-tests-docker.yml
index ef99dd2..3158532 100644
--- a/.github/workflows/unit-tests-docker.yml
+++ b/.github/workflows/unit-tests-docker.yml
@@ -19,3 +19,16 @@ jobs:
     - name: Unit Tests
       run: docker run --rm oprl
 
+    - name: Extract coverage
+      run: |
+        docker run --rm -v $(pwd):/host oprl sh -c "
+          pytest --cov=oprl --cov-report=xml &&
+          cp coverage.xml /host/
+        "
+    
+    - name: Upload coverage
+      uses: codecov/codecov-action@v5
+      with:
+        token: ${{ secrets.CODECOV_TOKEN }}
+        slug: schatty/oprl
+        file: ./coverage.xml
diff --git a/Dockerfile b/Dockerfile
index 405f5c7..029e0f1 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -6,7 +6,7 @@ RUN pip install --no-cache-dir --upgrade pip
 
 COPY . .
 
-RUN pip install --no-cache-dir . && pip install pytest
+RUN pip install --no-cache-dir . && pip install pytest pytest-cov
 
 # Run tests by default
 CMD ["pytest", "tests/functional"]

From cbe8336fa3a452fac1780a742ffd844da8e4b855 Mon Sep 17 00:00:00 2001
From: Igor Kuznetsov <igorkuznetsov14@gmail.com>
Date: Fri, 4 Jul 2025 22:35:03 +0400
Subject: [PATCH 11/11] feature: add codecov icon to readme

---
 README.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/README.md b/README.md
index c9f6eaf..4a64378 100644
--- a/README.md
+++ b/README.md
@@ -5,6 +5,8 @@
 A Modular Library for Off-Policy Reinforcement Learning with a focus on SafeRL and distributed computing. Benchmarking resutls are available at associated homepage: [Homepage](https://schatty.github.io/oprl/)
 
 [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
+[![codecov](https://codecov.io/gh/schatty/oprl/branch/master/graph/badge.svg)](https://codecov.io/gh/schatty/oprl)
+
 
 
 # Disclaimer