diff --git a/resources/dart_additional_files/python/dartpy/gui/osg/Viewer.cpp b/Dart_Additional_Files/python/dartpy/gui/osg/Viewer.cpp
similarity index 100%
rename from resources/dart_additional_files/python/dartpy/gui/osg/Viewer.cpp
rename to Dart_Additional_Files/python/dartpy/gui/osg/Viewer.cpp
diff --git a/resources/dart_additional_files/python/dartpy/math/Geometry.cpp b/Dart_Additional_Files/python/dartpy/math/Geometry.cpp
similarity index 100%
rename from resources/dart_additional_files/python/dartpy/math/Geometry.cpp
rename to Dart_Additional_Files/python/dartpy/math/Geometry.cpp
diff --git a/docker/Commands b/Docker/Commands
similarity index 63%
rename from docker/Commands
rename to Docker/Commands
index 9111cb6..20d3124 100644
--- a/docker/Commands
+++ b/Docker/Commands
@@ -1,29 +1,31 @@
-# Build a docker image with all dependencies for Linux running the command below from the repo root folder.
-# Dependency: Docker, Nvidia-Docker, Nvidia cuda-enabled GPU. If you do not have Nvidia GPU you might use also the Dockerfile_Windows for building image on Linux.
-docker build . -t vtprl_image -f docker/Dockerfile
+# Build a docker image with all dependencies for Linux running the command below from the repo root folder
+# Dependency: Docker, Nvidia-Docker, Nvidia cuda-enabled GPU. if you do not have Nvidia GPU you might use also the Dockerfile_Windows for building image on Linux)
+docker build . -t cmlr_image -f Docker/Dockerfile
 
 # Build a docker image with all dependencies for Windows running the command below from the repo root folder on Windows. 
-# Dependencies: Docker for Windows with Linux containers enabled. Only difference from Dockerfile is the root image it starts from as Docker Windows does not support GPU acceleration from inside Docker.
-docker build . -t vtprl_image -f docker/Dockerfile_Windows
+# Dependencies: Docker for Windows with Linux containers enabled. Only difference from Dockerfile is the root image it starts from as Docker Windows does not support GPU acceleration from inside Docker
+docker build . -t cmlr_image -f Docker/Dockerfile_Windows
 
-# These commands are useful to run before starting the container in case you want to start the simulator from a shell inside the container (not possible for Windows, works on linux).
+# Starting the simulator on a fake display of X server on the AI4DI server from the shell:
+DISPLAY=:3 ./ManipulatorEnvironment_Linux.x86_64
 
-export DISPLAY=":0"
+# These commands are useful to run before starting the container in case you want to start the simulator from a shell inside the container (not possible for Windows, works on AI4DI servers, you can set it on your local machines or on VM from LRZ CC after installing X server) - it allows local connections to your X server
+export DISPLAY=":3"
 xhost + local:
 
-# After building create a container with the built image running the command below from the repo root folder. Same for Windows, but remove the --gpus parameter as it is not supported on Windows.
+# After building create a container with the built image running the command below from the repo root folder. Same for Windows, but remove the --gpus parameter as it is not supported on Windows
 docker run  --rm -it \
-            --name vtprl_container \
+            --name cmlr_container \
             --gpus all \
             -e DISPLAY \
             -v $(pwd):/home/[repo_name]:rw \
             -v $(pwd)/external/stable-baselines3:/home/repos/stable-baselines3:ro \
             --privileged \
             --net="host" \
-            vtprl_image:latest \
+            cmlr_image:latest \
             bash
 
 # On local machines you can start the simulator outside of docker as well, cd to the repository root folder inside the docker container and run python3 agant/main.py to connect to the simulator and start training. 
 
-# If there are errors see "Troubleshooting" section.
+# If there are errors see "Troubleshooting" section
 
diff --git a/docker/Dockerfile b/Docker/Dockerfile
similarity index 71%
rename from docker/Dockerfile
rename to Docker/Dockerfile
index 810ab90..fed8607 100644
--- a/docker/Dockerfile
+++ b/Docker/Dockerfile
@@ -1,5 +1,5 @@
 # syntax=docker/dockerfile:1
-FROM nvidia/cudagl:10.0-devel-ubuntu18.04
+FROM nvidia/cudagl:11.0-devel-ubuntu18.04 
 
 ENV DEBIAN_FRONTEND=noninteractive
 
@@ -9,7 +9,7 @@ ADD   https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64
 RUN   dpkg -i cuda-keyring_1.0-1_all.deb
 RUN rm /etc/apt/sources.list.d/cuda.list
 RUN rm /etc/apt/sources.list.d/nvidia-ml.list
-RUN apt-get update && apt-get  install -y ffmpeg libsm6 libxext6 python3.7  python3.7-dev python3-pip && \
+RUN apt-get update && apt-get  install -y htop nano graphviz ffmpeg libsm6 libxext6 python3.7  python3.7-dev python3-pip && \
     rm /usr/bin/python3 && ln -s /usr/bin/python3.7 /usr/bin/python3 && \
     python3 -m pip install --upgrade pip
 
@@ -23,20 +23,29 @@ RUN apt-get install -y build-essential cmake pkg-config git && \
     cd pybind11 && mkdir build && cd build && cmake .. && make -j4 && make install && \
     cd /home/repos && git clone https://github.com/dartsim/dart.git && cd dart && git checkout tags/v6.11.1
 
+# Install viewer dependencies
+RUN apt-get install -y libxcb-icccm4 && apt-get install -y libxcb-image0 && \ 
+    apt-get install -y libxcb-keysyms1 && apt-get install -y libxcb-render-util0 && \
+    apt-get install -y libxcb-xkb-dev && apt-get install -y libxkbcommon-x11-0 && \
+    apt-get install -y libxcb-xinerama0
+
+RUN python3 -m pip install --upgrade pip
+
 # Install DartPy from source with new bindings
-COPY resources/dart_additional_files /home/repos/dart
+COPY Dart_Additional_Files /home/repos/dart
 RUN cd /home/repos/dart && mkdir build && cd build && \
     cmake .. -DCMAKE_INSTALL_PREFIX=/usr/ -DCMAKE_BUILD_TYPE=Release -DDART_BUILD_DARTPY=ON && \
     make -j4 dartpy && make install-dartpy
 
 # Install Requirements
-ADD docker/requirements.txt /home/requirements.txt
-RUN python3 -m pip install -r /home/requirements.txt
+ADD Docker/requirements.txt /home/requirements.txt 
+RUN python3 -m pip install -r /home/requirements.txt --log /home/requirements_logs.txt
 
 # Install Stable-Baselines-3
 COPY external/stable-baselines3 /home/repos/stable-baselines3
 #RUN cd /home/repos/stable-baselines3 && python3 -m pip install -e .
-RUN cd /usr/local/lib/python3.7/dist-packages/ && python3 /home/repos/stable-baselines3/setup.py develop
+
+RUN cd /usr/local/lib/python3.7/dist-packages/ && python3 /home/repos/stable-baselines3/setup.py develop 
 ENV PYTHONPATH='/home/repos/stable-baselines3'
 ENV WANDB_API_KEY=INSERT_KEY_HERE
 
diff --git a/docker/Dockerfile_Windows b/Docker/Dockerfile_Windows
similarity index 76%
rename from docker/Dockerfile_Windows
rename to Docker/Dockerfile_Windows
index e0ad8e8..b046f0e 100644
--- a/docker/Dockerfile_Windows
+++ b/Docker/Dockerfile_Windows
@@ -16,14 +16,20 @@ RUN apt-get install -y build-essential cmake pkg-config git && \
     cd /home/repos && git clone https://github.com/dartsim/dart.git && cd dart && git checkout tags/v6.11.1
 
 # Install DartPy from source with new bindings
-COPY resources/dart_additional_files /home/repos/dart
+COPY Dart_Additional_Files /home/repos/dart
 RUN cd /home/repos/dart && mkdir build && cd build && \
     cmake .. -DCMAKE_INSTALL_PREFIX=/usr/ -DCMAKE_BUILD_TYPE=Release -DDART_BUILD_DARTPY=ON && \
     make -j4 dartpy && make install-dartpy
 
+# Install viewer dependencies
+RUN apt-get install -y libxcb-icccm4 && apt-get install -y libxcb-image0 && \ 
+    apt-get install -y libxcb-keysyms1 && apt-get install -y libxcb-render-util0 && \
+    apt-get install -y libxcb-xkb-dev && apt-get install -y libxkbcommon-x11-0 && \
+    apt-get install -y libxcb-xinerama0
+
 # Install Requirements
-ADD docker/requirements.txt /home/requirements.txt
-RUN python3 -m pip install -r /home/requirements.txt
+ADD Docker/requirements.txt /home/requirements.txt
+RUN python3 -m pip install -r /home/requirements.txt --log /home/requirements_logs.txt
 
 # Install Stable-Baselines-3
 COPY external/stable-baselines3 /home/repos/stable-baselines3
@@ -33,3 +39,4 @@ ENV PYTHONPATH='/home/repos/stable-baselines3'
 ENV WANDB_API_KEY=INSERT_KEY_HERE
 
 WORKDIR /home
+
diff --git a/docker/ROS-Dockerfile b/Docker/ROS-Dockerfile
similarity index 100%
rename from docker/ROS-Dockerfile
rename to Docker/ROS-Dockerfile
diff --git a/Docker/requirements.txt b/Docker/requirements.txt
new file mode 100644
index 0000000..bccf604
--- /dev/null
+++ b/Docker/requirements.txt
@@ -0,0 +1,31 @@
+--extra-index-url https://download.pytorch.org/whl/cu116
+tqdm==4.65.0
+zmq==0.0.0
+protobuf==3.20.1
+roslibpy==1.5.0
+
+Pillow==8.0.1
+numpy==1.21.6
+matplotlib==3.5.3
+opencv-python-headless==4.7.0.72
+scipy==1.7.3
+seaborn==0.12.2
+pandas==1.1.5
+
+tensorboard==1.15.0
+tensorflow-estimator==1.15.1
+tensorflow-gpu==1.15.0
+
+torch==1.12.0+cu116
+torchvision==0.13.0+cu116
+pytorch-lightning==0.8.1
+
+optuna==3.1.1
+wandb==0.12.11
+
+Box2D==2.3.10
+gym==0.17.3
+
+PySide2==5.15.2.1
+
+ruckig==0.9.2
diff --git a/agent/config.py b/agent/config.py
index 360498a..1cf246a 100644
--- a/agent/config.py
+++ b/agent/config.py
@@ -1,16 +1,38 @@
-class Config:
+"""
+Configuration dict for dart and gym agents
+
+Run this script to update the UNITY configuration.xml (or update the fields manually). See the main method below for instructions
 
+Symbol (*): If you have changed an option in the Config class where there is a "(*)" symbol in the comments, please run this script or
+            update the corresponding UNITY field in the .xml file to apply the changes - you may need to restart the simulator
+"""
+import os
+import numpy as np
+from utils.simulator_configuration import update_simulator_configuration
+
+class Config:
     def __init__(self):
-        pass
+        self.dart_dict           = self.get_dart_env_dict()      # Dart-related
+        self.config_dict         = self.get_config_dict()        # General and environment settings (e.g. number of envs, velocity limits, joints)
+        self.reward_dict         = self.get_reward_dict()        # Reward dict values
+        self.goal_dict           = self.get_goal_dict()          # UNITY options: Box dimensions
+        self.randomization_dict  = self.get_randomization_dict() # UNITY options: latency, appearance, shadows, etc
 
     @staticmethod
     def get_dart_env_dict():
         dart_env_dict = {
             # episode length in no. of simulation steps
-            'max_time_step': 200,
+            'max_time_step': 400,
+
+            # to train RL-based models: PPO
+            'total_timesteps': 10000,
+
+            # agents and UNITY control cycles
+            'control_cycle': 0.02,                             # (*) agents-related
+            'unity_cycle':   0.02,                             # (*) UNITY-related
 
             # should control end-effector orientation or not
-            'orientation_control': False,
+            'orientation_control': True,
 
             # when True: actions in task space, when False: actions in joint space
             'use_inverse_kinematics': True,
@@ -19,9 +41,26 @@ def get_dart_env_dict():
             # note: might conflict with training agents in task space velocities, in such cases set it to False
             'linear_motion_conservation': False,
 
-            # when True: the reaching task can be also rendered in the DART viewer
-            # note: only for visualization, not appropriate for training a model on the pixels
+            # when True: the task can be also rendered in the DART viewer
+            # Important: Use it when evaluating an agent (e.g. checkpoint) . Only for debugging when training an RL agent ('simulation_mode': 'train') - set to False in this case
+            # Advanced:  with 'weights & biases', you can log videos during training
             'enable_dart_viewer': False,
+
+            # enable task monitor to visualize states, velocities, agent actions, reward of the robot.
+            # 'enable_dart_viewer' should be set to True
+            'task_monitor': False,
+
+            # whether to load additional objects in the DART simulation and viewer - ground, background, etc.
+            'with_objects': False,
+
+            # how to spawn the red targets in the dart simulation
+            # Options: 'random', 'random_joint_level', 'import', 'fixed', 'None'
+            # import   -> import targets from a .csv file (see 'target_path' below)
+            # None     -> default behaviour. Can be adapted: See iiwa_sample_dart_unity_env, create_target() method
+            'target_mode': "random_joint_level",
+
+            # when target_mode is 'import': load targets from a .csv file
+            'target_path': "/misc/generated_random_targets/cart_pose_7dof.csv"
         }
 
         return dart_env_dict
@@ -29,45 +68,142 @@ def get_dart_env_dict():
     @staticmethod
     def get_config_dict():
         config_dict = {
-
-            ###################################### GENERAL PARAMETERS ##################################################
-
             # whether the RL algorithm uses customized or stable-baseline specific parameters
             'custom_hps': True,
 
-            # use 'iiwa_joint_vel' for iiwa_environment without dart (only joint velocity control), or
-            # 'iiwa_sample_dart_unity_env' for control in task space with dart
-            'env_key': 'iiwa_sample_dart_unity_env',
+            # agent model
+            'model': 'PPO',
 
-            # environment-specific parameter, e.g, number of links, only relevant for iiwa_joint_vel env
+            # options: 'train', 'evaluate', 'evaluate_model_based" -> only for 'iiwa_sample_dart_unity_env' environment (see below). Refer to the main.py for more details
+            # 'evaluate':             load an RL saved checkpoint
+            # 'evaluate_model_based': use a p-controller to solve the task for debugging
+            'simulation_mode': 'evaluate_model_based',
+
+            # available environments
+            'env_key': 'iiwa_sample_dart_unity_env',   # for control in task space with dart
+            #'env_key': 'iiwa_joint_vel',              # without dart (only joint velocity control) -> enable gripper (see below). Sample env for testing images state representation
+
+            # environment-specific parameter, e.g, number of links, only relevant for 'iiwa_joint_vel' env
             # currently, num_joints should be always 7 when the DART based environment is used
             'num_joints': 7,
 
-            # whether to use images as state representation, an example code is in iiwa_joint_vel env
-            'use_images': False,
+            # camera and images settings: gym and UNITY #
+            'use_images':      False,                   # (*) Use images as state representation. Example code at 'iiwa_joint_vel', _retrieve_image() and 'iiwa_sample_dart_unity_env', update() functions
+            'image_size':      128,                     # (*) Gym observation space dimension. See 'iiwa_joint_vel', __init__()
+            'camera_position': [0.0, 3.0, 0.0],         # (*) UNITY
+            'camera_rotation': [90.0, 0.0, -90.0],      # (*)
+
+            # end effector
+            'enable_end_effector': True,                # (*) Set to False if no tool is attached. Important: in that case, set 'robotic_tool' to None
+            'robotic_tool':        "3_gripper",         # (*) Options: '3_gripper', '2_gripper', 'calibration_pin', 'None' (string). Also, 'end_effector' should be set to True.
+                                                        #               -> For 'iiwa_sample_joint_vel_env' select a gripper
 
             # GRPC, ROS or ZMQ
-            'communication_type': 'GRPC',
+            'communication_type': 'GRPC',               # (*)
 
             # the ip address of the server, for windows and docker use 'host.docker.internal',
             # for linux use 'localhost' for connections on local machine
             'ip_address': 'localhost',  # 'host.docker.internal',  # 'localhost'
 
             # port number for communication with the simulator
-            'port_number': '9092',
+            'port_number': '9092', # (*)
 
             # the seed used for generating pseudo-random sequences
-            'seed': None,
+            'seed': 1235,
 
             # the state of the RL agent in case of numeric values for manipulators 'a' for angles only
             # or 'av' for angles and velocities
             'state': 'a',
 
-            # whether to enable gripper for manipulation tasks such as grasping or not
-            'gripper': False,
-
-            # number of environments to run in parallel
-            'num_envs': 16,
+            # number of environments to run in parallel, 8, 16, ...
+            # you may need to restart unity
+            'num_envs': 1,
+
+            #########################################################################
+            # Logging settings during training                                      #
+            #########################################################################
+            'log_dir': "./agent/logs/",                                             # Folder to save the model checkpoints and tensorboard logs
+            'tb_log_name': "ppo_log_tb",                                            # Name of the tb folder
+            ########################################################################
+
+            ##########################################################################################################
+            # Manipulator related                                                                                    #
+            # Important: these functionallities are not supported for the standalone env 'iiwa_sample_joint_vel_env' #
+            #            - see reset() and __init__ to adapt if needed                                               #
+            ##########################################################################################################
+
+            ##################################################################################################################
+            # Important: When the manipulator is spawned to a different position than the vertical position,                 #
+            #            the parallel envs should terminate at the same timestep due to Unity synchronization behaviour      #
+            # Imporant:  when the observation space is an image, can not sample random_initial_joint_positions, set to False #
+            ##################################################################################################################
+            'random_initial_joint_positions': False,                        # If set to True, it overrides the values set in 'initial_positions'.
+            'initial_positions': None,                                      # Example options: [0, 0, 0, 0, 0, 0, 0] same as None, [0, 0, 0, -np.pi/2, 0, np.pi/2, 0]
+
+            ##################################################################################################################################
+            # Default velocity and joint limits for 'iiwa_sample_dart_unity_env'                                                             #
+            # Note: 'joints_safety_limit' -> set to higher value depending on your task and velocity ranges                                  #
+            #                                the UNITY behaviour may be unstable when having a 0.0 safety limit with high velocities         #
+            #                                e.g. nan tensor joint error -> the robot is in an invalid configuration - reset the manipulator #                
+            ##################################################################################################################################
+            'joints_safety_limit': 0.0,  # [deg]
+            'max_joint_vel':       20.0, # [deg/s] - Joint space
+            'max_ee_cart_vel':     10.0, # [m/s]   - Task space  -- Not optimized values for sim2real transfer 
+            'max_ee_cart_acc':     3.0,  # [m/s^2]
+            'max_ee_rot_vel':      4.0,  # [rad/s]
+            'max_ee_rot_acc':      1.2   # [rad/s^2]
         }
 
         return config_dict
+
+    @staticmethod
+    def get_reward_dict():
+        reward_dict = {
+            'reward_terminal': 0.0 # Given at the end of the episode if it was successful - see simulator_vec_env.py, step() method during reset
+        }
+
+        return reward_dict
+
+    def get_goal_dict(self):
+        """
+            (*) Box dimensions - UNITY
+        """
+        goal_dict = {
+            'box_dim': [0.05, 0.1, 0.05]    # (*) x, y, z
+        }
+
+        return goal_dict
+
+    def get_randomization_dict(self):
+        """
+            (*) Randomization settings - UNITY
+        """
+
+        randomization_dict = {
+            'randomize_latency':    False,  # (*) 
+            'randomize_torque':     False,  # (*)
+            'randomize_appearance': False,  # (*)
+            'enable_shadows':       True,   # (*) Note: For sim2real transfer (images) try also False
+            'shadow_type':          "Soft", # (*) Options: Soft/Hard
+        }
+
+        return randomization_dict
+
+if __name__ == "__main__":
+    """
+        If you have updated the Config class, you can run this script to update the configuration.xml of the
+        UNITY simulator instead of updating the fields manually
+
+        Important: In this case the simulator folder should be located inside the vtprl/ folder
+                   with name: simulator/Linux/ManipulatorEnvironment/ManipulatorEnvironment.x86_64, or update the path
+                   of the 'xml_file' variable below
+
+        Note:      You many need to restart the UNITY simulator after updating the .xml file
+    """
+    config = Config()
+
+    #os.path.dirname(os.path.realpath(__file__)) +
+    xml_file = "./simulator/Linux/ManipulatorEnvironment/configuration.xml" # Change the path if needed
+
+    # Update the .xml based on the new changes in the Config class #
+    update_simulator_configuration(config, xml_file)
diff --git a/agent/config_advanced.py b/agent/config_advanced.py
new file mode 100644
index 0000000..02d828c
--- /dev/null
+++ b/agent/config_advanced.py
@@ -0,0 +1,504 @@
+"""
+Advanced Configuration dict for dart and gym agents
+
+Run this script to update the UNITY configuration.xml (or update the fields manually). See the main method below for instructions
+
+Symbol (*): If you have changed an option in the Config class where there is a "(*)" symbol in the comments, please run this script or
+            update the corresponding UNITY field in the .xml file to apply the changes - you may need to restart the simulator
+
+Important:  First fully understand the logic behind config.py and 'iiwa_sample_dart_unity_env', and 'iiwa_joint_vel' envs
+"""
+import os
+import numpy as np
+from utils.simulator_configuration import update_simulator_configuration
+
+class ConfigAdvanced:
+
+    def __init__(self):
+        self.dart_dict = self.get_dart_env_dict()                 # Dart-related
+        self.config_dict = self.get_config_dict()                 # General settings (e.g. number of envs, velocity limits, joints)
+        self.env_dict = self.get_env_dict()                       # Environment specific settings
+        self.reward_dict = self.get_reward_dict()                 # Reward dict values
+        self.hyper_dict = self.get_hyper_dict()                   # Model settings (e.g. PPO, RUCKIG Model-based)
+        self.goal_dict = self.get_goal_dict()                     # Green box and red rectangle target settings
+        self.manual_actions_dict = self.get_manual_actions_dict() # Hard-coded actions settings (applied at the end of the agent episode)
+        self.randomization_dict = self.get_randomization_dict()   # UNITY and agent randomization settings
+
+    @staticmethod
+    def get_dart_env_dict():
+        dart_env_dict = {
+            # episode length in no. of simulation steps
+            'max_time_step': 400,
+
+            # to train RL-based models: PPO
+            'total_timesteps': 1500000,
+
+            # agents and UNITY control cycles
+            'control_cycle': 0.05,                                # (*) agents-related
+            'unity_cycle':   0.025,                               # (*) UNITY-related 
+
+            # should control end-effector orientation or not
+            'orientation_control': True,
+
+            # when True: actions in task space, when False: actions in joint space
+            'use_inverse_kinematics': True,
+
+            # when True: SNS algorithm is used for inverse kinematics to conserve optimal linear motion in task space
+            # note: might conflict with training agents in task space velocities, in such cases set it to False
+            'linear_motion_conservation': False,
+
+            # when True: the task can be also rendered in the DART viewer
+            # Important: Use it when evaluating an agent (e.g. checkpoint). Only for debugging when training an RL agent ('simulation_mode': 'train') - set to False in this case
+            # Advanced:  with 'weights & biases', you can log videos during training
+            'enable_dart_viewer': False,
+
+            # enable task monitor to visualize states, velocities, agent actions, reward of the robot. 
+            # 'enable_dart_viewer' should be set to True
+            'task_monitor': True,
+
+            # whether to load additional objects in the DART simulation and viewer - ground, background, etc.
+            'with_objects': False,
+
+            # how to spawn the red targets in the dart simulation
+            # Options: 'random', 'random_joint_level', 'import', 'fixed', 'None'
+            # import   -> import targets from a .csv file (see 'target_path' below)
+            # None     -> default behaviour. Can be adapted: See iiwa_sample_dart_unity_env.py, create_target() method
+            'target_mode': "None",
+
+            # when target_mode is 'import': load targets from a .csv file
+            'target_path': "/misc/generated_random_targets/cart_pose_7dof.csv"
+        }
+
+        return dart_env_dict
+
+    @staticmethod
+    def get_config_dict():
+        config_dict = {
+            # whether the RL algorithm uses customized or stable-baseline specific parameters
+            'custom_hps': True,
+
+            # agent model
+            # options: 'PPO', 'RUCKIG'
+            # RUCKIG is a model-based trajectory-generator. Set env_key to 'iiwa_ruckig_planar_grasping_dart_unity_env'
+            'model': 'PPO', 
+
+            # options: 'train', 'evaluate', 'evaluate_model_based" -> only for 'iiwa_sample_dart_unity_env' environment (see below). Refer to the main.py and evaluate.py for more details
+            # 'evaluate', for grasping: if the number of test boxes can not be divided by the 'num_envs' (see below), some boxes will not be evaluated 
+            'simulation_mode': 'train',
+
+            # available environments
+            #'env_key': 'iiwa_sample_dart_unity_env',                     # for control in task space with dart
+            #'env_key': 'iiwa_joint_vel',                                 # without dart (only joint velocity control) -> enable gripper (see below). Sample env for testing images state representation
+            'env_key': 'iiwa_numerical_planar_grasping_dart_unity_env',  # RL-based planar grasping. Enable manual actions (see below)
+            #'env_key': 'iiwa_end_to_end_planar_grasping_dart_unity_env', # image-based planar grasping. Enable manual actions, enable images in the Unity simulator, and set 'use_images' to True
+            #'env_key': 'iiwa_ruckig_planar_grasping_dart_unity_env',      # time-optimal planar grasping. Enable manual actions, set 'model' to RUCKIG, and 'simulation_mode' to evaluate
+
+            # environment-specific parameter, e.g, number of links, only relevant for iiwa_joint_vel env
+            # currently, num_joints should be always 7 when the DART based environment is used
+            'num_joints': 7,
+
+            # camera and images settings: gym and UNITY #
+            'use_images':      False,                                     # (*) Use images as state representation. Example code at 'iiwa_joint_vel', _retrieve_image() and 'iiwa_sample_dart_unity_env', update() function
+            'image_size':      128,                                       # (*) Gym observation space dimension. See 'iiwa_joint_vel', __init__()
+            'camera_position': [0.0, 1.7, 1.6],                           # (*) UNITY
+            'camera_rotation': [125.0, 0.0, 180.0],                       # (*) 
+
+            # end effector
+            'enable_end_effector': True,                # (*) Set to False if no tool is attached. Important: in that case, set 'robotic_tool' to None
+            'robotic_tool':        "3_gripper",         # (*) Options: '3_gripper', '2_gripper', 'calibration_pin', 'None' (string). Also, 'end_effector' should be set to True.
+                                                        #               -> For 'iiwa_sample_joint_vel_env' select a gripper
+
+            # GRPC, ROS or ZMQ
+            'communication_type': 'GRPC',               # (*)
+
+            # the ip address of the server, for windows and docker use 'host.docker.internal',
+            # for linux use 'localhost' for connections on local machine
+            'ip_address': 'localhost',  # 'host.docker.internal',  # 'localhost'
+
+            # port number for communication with the simulator
+            'port_number': '9092', # (*)
+
+            # the seed used for generating pseudo-random sequences
+            'seed': 1235,
+
+            # the state of the RL agent in case of numeric values for manipulators 'a' for angles only
+            # or 'av' for angles and velocities
+            # Usage in: 'iiwa_sample_env', and 'iiwa_sample_joint_vel_env'
+            'state': 'a',
+
+            # number of environments to run in parallel, 8, 16, ...
+            # you may need to restart unity
+            # Note: for RUCKIG model set to 1 - no Neural Network - for parallel GPU support 
+            'num_envs': 4,
+
+            #########################################################################
+            # Logging settings during training: see monitoring_agent.py             #
+            #########################################################################
+            'log_dir':          "./agent/logs/",                                    # Folder to save the model checkpoints and tensorboard logs
+            'save_model_freq':  3125,                                               # Save model checkpoints at this frequency
+            'check_freq':       3125,                                               # Frequency to update the 'best_mean_reward' and 'best_model.zip'
+            'tb_log_name':      "ppo_log_tb",                                       # Name of the tb folder to save custom metrics/stats. e.g. success rate
+            'best_mean_reward': "inf",                                              # Continue training from a checkpoint -> set 'best_mean_reward' to the 'best_mean_reward' value of the loaded model
+            #########################################################################
+
+            ###################################################################################################################################
+            # Evaluation                                                                                                                      #
+            # Important: For base envs              -> support to evaluate only one model per time - do not use 'model_evaluation_type': all' #
+            #            For planar grasping envs   -> use 8 envs for 80 boxes, 10 envs for 50 boxes, etc.                                    #
+            ###################################################################################################################################
+
+            # Options for 'model_evaluation_type': 'all', or './agent/logs/run_0/best_model', or './agent/logs/run_1/model_25000_0', etc
+            #         - 'all'                           -> Scan the agent/logs/ folder and evaulate all the checkpoints that exists in each agen/logs/run_i/ folder
+            #         - './agent/logs/run_0/best_model' -> Single model evalaluation. Do not append .zip
+            #         - Note:                           -> Use 'all' for model-based evaluation - RUCKIG
+            'model_evaluation_type': 'all',
+            'evaluation_name':       'planar_grasping_eval',                                                 # Pandas column to name your evaluation experiment
+            'reward_baseline':       -95,                                                                    # (Adapt if needed). Reward at time step 0. "untrained" agent. For ploting.
+            ##################################################################################################
+
+            ##########################################################################################################
+            # Manipulator related                                                                                    #
+            # Important: these functionallities are not supported for the standalone env 'iiwa_sample_joint_vel_env' #
+            #            - see reset() and __init__ to adapt if needed                                               #
+            ##########################################################################################################
+
+            ############################################################################################################################
+            # Important: When the manipulator is spawned to a different position than the vertical position,                           #
+            #            the parallel envs should terminate at the same timestep due to Unity synchronization behaviour                #
+            # Imporant:  when the observation space is an image, can not sample random_initial_joint_positions, set to False           #
+            #            if you change the initial position - grasping envs should be adapted - e.g. height goal during manual actions #
+            ############################################################################################################################
+            'random_initial_joint_positions': False,                         # If set to True, it overrides the values set in 'initial_positions'
+            'initial_positions': [0, 0, 0, -np.pi/2, 0, np.pi/2, 0],         # Other options: [0, 0, 0, 0, 0, 0, 0] or None (same as the zero list)
+
+            # Planar Limits - optimized values for sim2real transfer #
+            # Note: these values can be increased further            #
+            'joints_safety_limit': 10,
+            'max_joint_vel':       20,
+            'max_ee_cart_vel':     0.035,
+            'max_ee_cart_acc':     0.35,
+            'max_ee_rot_vel':      0.15,
+            'max_ee_rot_acc':      15.0                              # Remove limits to match ruckig
+
+            ##################################################################################################################################
+            # Default velocity and joint limits for 'iiwa_sample_dart_unity_env'                                                             #
+            # Note: 'joints_safety_limit' -> set to higher value depending on your task and velocity ranges                                  #
+            #                                the UNITY behaviour may be unstable when having a 0.0 safety limit with high velocities         #
+            #                                e.g. nan tensor joint error -> the robot is in an invalid configuration - reset the manipulator #                
+            ##################################################################################################################################
+            #'joints_safety_limit': 0.0,
+            #'max_joint_vel':       20,   # Joint space
+            #'max_ee_cart_vel':     10.0, # Task space -- Not optimized values for sim2real transfer
+            #'max_ee_cart_acc':     3.0,
+            #'max_ee_rot_vel':      4.0,
+            #'max_ee_rot_acc':      1.2
+        }
+
+        return config_dict
+
+    def get_env_dict(self):
+        """
+            Environment specific settings
+        """
+
+        env_dict = None
+
+        if(self.config_dict["env_key"].find("planar") != -1): # Planar environment
+            env_dict = {
+                ########################################################################
+                # Agents p-controller settings                                         #
+                # Planar RL-agents are using a p-controller to keep fixed some DoF     #
+                # so that the robot moves in a planar manner during the episode        # 
+                ########################################################################
+                'agent_kp':                0.5,                                        # P-controller gain position
+                'agent_kpr':               1.5,                                        # P-controller gain rotation
+                'threshold_p_model_based': 0.01                                        # Threshold for model-based agents - RUCKIG
+                ########################################################################
+            }
+
+        return env_dict
+
+    def get_goal_dict(self):
+        """
+            Goal Settings: used mainly by the "advanced" envs (planar grasping) #
+            (*)
+        """
+        goal_dict = {
+            'goal_type': 'box',                                               # 'target' -> red rectangle or 'box' -> green box
+            'box_dim':   [0.05, 0.1, 0.05]                                    # (*) UNITY option x, y, z - if you change the height of the box, then adapt in boxes_generator.py the _get_samples() method
+        }
+
+        if(goal_dict["goal_type"] == "box"):
+            goal_dict_box = {
+                'box_type':              'small',                             # 'big' (10x10x10cm), or 'small' (5x10x5). Set 'box_dim' to the correct values
+                'box_mode':              "train",                             # 'train', 'val', 'debug'
+                'box_max_distance_base': 0.67,                                # Max distance of the box to the base of the robot 
+                'box_min_distance_base': 0.475,                               # Min distance of the box to the base of the robot
+                'box_folder':            "./agent/logs/dataset/",             # Folder to save the generated dataset
+                'box_samples':           200000,                              # Train and val
+                'box_split':             0.00025,                             # Val split ratio
+                'box_save_val':          True,                                # Save the dataset
+                'box_load_val':          False,                               # Load the validation boxes from a saved dataset. Train boxes are always random (seed)
+                'box_radius_val':        0.0,                                 # Exclude spawned train boxes that are close to the val boxes within this radius (threshold) in meters. e.g. 0.01
+                'box_x_active':          True,                                # If set to False:  x coord of the box will be always 0.0 meters
+                'box_x_min':             -0.67,                               # Minimum range for x box coordinate
+                'box_x_max':             0.67,                                # Maximum range for x box coordinate
+                'box_z_active':          True,
+                'box_z_min':             0.42,
+                'box_z_max':             0.67,
+                'box_ry_active':         True,                                # If set to False -> Boxes will have a fixed rotation (0.0 deg)
+                'box_ry_min':            -90,
+                'box_ry_max':            0,
+                'box_debug':             [-0.1, 0.05, 0.55, 0.0, -90.0, 0.0]  # 'box_mode': 'debug' -> spawn a fixed box for all envs
+            }
+
+            if(goal_dict_box['box_type'] == 'small'):
+                goal_dict['box_dim'] = [0.05, 0.1, 0.05]
+            elif(goal_dict_box['box_type'] == 'big'):
+                goal_dict['box_dim'] = [0.1, 0.1, 0.1]
+
+            goal_dict.update(goal_dict_box)
+
+        return goal_dict
+
+    def get_hyper_dict(self):
+        """
+            Model settings: PPO or RUCKIG
+        """
+
+        if(self.config_dict['model'] == 'PPO'):
+            hyper_dict = {
+
+                #################################### MOST COMMON #################################################################################################
+                'learning_rate':  0.0005,      # Default 0.0003
+                'clip_range':     0.25,        # Default 0.2
+                'policy':         "MlpPolicy", # Try 'CnnPolicy' with 'iiwa_end_to_end_planar_grasping_dart_unity_env'
+                'policy_network': None,        # None (means default), or 'PolicyNetworkVanillaReLU' -> default SB3 network but with ReLU act. func. instead of Tanh
+                #################################################################################################################################################
+
+                'ent_coef':       0.02,        # Default 0.0
+                'gamma':          0.96,        # Default 0.99
+                'gae_lambda':     0.94,        # Default 0.95
+                'n_steps':        512,         # Default 2048
+                'n_epochs':       10,          # Default 10
+                'vf_coef':        0.5,         # Default 0.5
+                'max_grad_norm':  0.5,         # Default 0.5
+                'batch_size':     32           # Default 64
+            }
+
+        # For 'iiwa_ruckig_planar_grasping_dart_unity_env'. Set 'simulation_mode': 'evaluate', and 'model': 'RUCKIG'
+        # Set 'num_envs': 1 -> the code can not be parallelized in the GPU (no neural network)
+        elif(self.config_dict['model'] == 'RUCKIG'): 
+            hyper_dict = {
+                'dof':            [1, 1, 1],              # Active dof. [1,1,1] or [0,1,1]. Important: for 011: set 'reward_pose_weight': 0.0  (see below). Activate only 2DoF control
+                'target_vel':     [0, 0, 0],              # Velocity of the ee in the target pose (rz, x, y - dart coords)
+                'target_acc':     [0, 0, 0],              # Acceleration of the ee in the target pose (rz, x, y - dart coords)
+                'max_vel':        [0.15, 0.035, 0.035],   # Limits task-space
+                'max_acc':        [1000, 1000, 1000],     # Disabled -> can also be set to some smaller values
+                'max_jerk':       [1000, 1000, 1000],
+            }
+
+        else:
+            return None
+
+        return hyper_dict
+
+    @staticmethod
+    def get_reward_dict():
+        """
+            Novel reward that uses the displacements of the ee to the box (previous distance of the ee to the box minus currrent distance of ee to the box)
+
+            see 'iiwa_numerical_planar_grasping_dart_unity_env'
+
+            Note: adapt if needed to your task
+
+        """
+
+        reward_dict = {
+            "reward_collision":       -100,        # One time penalty. When: collision with the box, floor, itself or joints position limits have been violated
+            "reward_terminal":        0.0,         # If the box is in the air after the episode -> give this reward. Also, used in the default envs for a successful episode
+            "reward_height_goal":     0.25,        # Goal height in meters                      -> then give 'reward_terminal' for planar envs only
+            'reward_pose_weight':     1.0,         # Important: Set 'reward_pose_weight': 0.0  to activate 2DoF control
+            'reward_pose_norm_const': 0.04,        # Normalization constant
+            'reward_x_weight':        1.0,         # Importance of this term
+            'reward_x_norm_const':    0.01,
+            'reward_z_weight':        1.0,
+            'reward_z_norm_const':    0.01
+        }
+
+        return reward_dict
+
+    def get_manual_actions_dict(self):
+        """
+            Manual actions settings.
+            Whether to perform manual actions at the end of the RL-episode
+            (*)
+        """
+
+        manual_actions_dict = {
+            'manual':           True,                # Activate manual actions
+            'manual_rewards':   True,                # Give collision penalties during the manual actions
+            'manual_behaviour': "planar_grasping",   # Option 1: "planar_grasping" -> only for planar envs. Go 'down', 'close', and then 'up' at the end of the episode
+                                                     # Option 2: "close_gripper"   -> close the gripper at the end of the episode. Works for non-planar agents too
+                                                     # The envs should terminate at the same time step always, see simulator_vec_env.py for explanations - self.step() method
+        }
+
+        # Extend the 'manual_actions_dict' dict depending on the user-defined 'manual_behaviour' option #
+        behaviour_dict = None
+        if(manual_actions_dict["manual_behaviour"] == "planar_grasping"):
+            behaviour_dict = self.get_manual_actions_planar_dict()
+
+        elif(manual_actions_dict["manual_behaviour"] == "close_gripper"):
+            behaviour_dict = self.get_manual_actions_close_gripper_dict()
+
+        # Extend manual_actions_dict #
+        if(behaviour_dict is not None):
+            manual_actions_dict.update(behaviour_dict)
+
+        return manual_actions_dict
+
+    def get_manual_actions_planar_dict(self):
+        """
+            Manual actions dictionary settings for planar grasping
+
+            Actions:
+                    1. down
+                    2. close
+                    3. up
+            (*)
+        """
+        manual_actions_planar_dict = None
+
+        ######################################################################
+        # Box 10x10x10 -> (0.055, 4, 15) / (down, close, close_vel)          #
+        # Box 5x10x5   -> (0.055, 6, 15) / (down, close, close_vel)          #
+        # (*) Change the configuration.xml of the Unity simulator:           #
+        # (gripper type <EndEffectorModel> and box dimensions <ItemSize>)    #
+        ######################################################################
+        manual_actions_planar_dict = {
+            'manual_kp':               1.0,                                  # P-controller gain when going down/up for position error
+            'manual_kpr':              3.0,                                  # P-controller gain when going down/up for orientation error
+            'manual_tol':              0.015,                                # Theshold of the P-controller
+            'manual_up_height':        0.35,                                 # Target height
+            'manual_steps_same_state': 30,                                   # How many steps the ee is allowed to not moved during the manual actions down/up
+            'manual_tol_same_state':   0.01,                                 # If the gripper has not moved more than this distance threshold value within 'steps_same_state' -> it means collision
+        }
+
+        if(self.config_dict["robotic_tool"] == "None"):
+            manual_actions_planar_dict = None
+
+        # 3-finger gripper #
+        elif(self.config_dict["robotic_tool"] == "3_gripper"):
+            manual_actions_planar_dict["manual_down_height"] = 0.055         # How much down to move to reach the box
+            manual_actions_planar_dict["manual_close_vel"] = 15              # Velocity value to close the gripper per each step
+
+            if(self.goal_dict["box_type"] == "big"):                         # Big box
+                manual_actions_planar_dict["manual_close_steps"] = 4         # How many steps to close the gripper (with 'manual_close_vel' per each step)
+
+            elif(self.goal_dict["box_type"] == "small"):                     # Small box
+                manual_actions_planar_dict["manual_close_steps"] = 6
+
+            else:
+                manual_actions_planar_dict = None
+
+        # 2-finger gripper #
+        elif(self.config_dict['robotic_tool'] == "2_gripper"):
+            if(self.goal_dict["box_type"] == "small"):                       # Small box
+                manual_actions_planar_dict["manual_down_height"] = 0.055
+                manual_actions_planar_dict["manual_close_steps"] = 10
+                manual_actions_planar_dict["manual_close_vel"] = 15 
+
+            else:                                                            # Important: Gripper 2 can not manipulate the big box
+                manual_actions_planar_dict = None
+
+        else:
+            manual_actions_planar_dict = None
+
+        return manual_actions_planar_dict
+
+    def get_manual_actions_close_gripper_dict(self):
+        """
+            Manual actions dictionary settings for closing the gripper at the end of the episode
+            (*)
+        """
+        manual_actions_close_gripper_dict = None
+
+        ######################################################################
+        # Box 10x10x10 -> (4, 15) / (close, close_vel)                       #
+        # Box 5x10x5   -> (6, 15) / (close, close_vel)                       #
+        # (*) Change the configuration.xml of the Unity simulator:           #
+        # (gripper type <EndEffectorModel> and box dimensions <ItemSize>)    #
+        ######################################################################
+        manual_actions_close_gripper_dict = {
+        }
+
+        if(self.config_dict["robotic_tool"] is "None"):
+            manual_actions_close_gripper_dict = None
+
+        # 3-finger gripper #
+        elif(self.config_dict["robotic_tool"] == "3_gripper"):
+            manual_actions_close_gripper_dict["manual_close_vel"] = 15       # Velocity value to close the gripper per each step
+
+            if(self.goal_dict["box_type"] == "big"):                         # Big box
+                manual_actions_close_gripper_dict["manual_close_steps"] = 4  # How many steps to close the gripper (with 'manual_close_vel' per each step)
+
+            elif(self.goal_dict["box_type"] == "small"):
+                manual_actions_close_gripper_dict["manual_close_steps"] = 6
+
+            else:
+                manual_actions_close_gripper_dict = None
+
+        # 2-finger gripper #
+        elif(self.config_dict['robotic_tool'] == "2_gripper"):
+            if(self.goal_dict["box_type"] == "small"):                       # Small box
+                manual_actions_close_gripper_dict["manual_close_steps"] = 10
+                manual_actions_close_gripper_dict["manual_close_vel"] = 15
+
+            else:                                                            # Important: Gripper 2 can not manipulate the big box
+                manual_actions_close_gripper_dict = None
+
+        else:
+            manual_actions_close_gripper_dict = None
+
+        return manual_actions_close_gripper_dict
+
+    def get_randomization_dict(self):
+        """
+            1. Apply noise to the agent state observation see 'iiwa_numerical_planar_grasping_dart_unity_env' - self.get_state() method
+
+            2. (*) UNITY Randomization settings
+        """
+
+        randomization_dict = {
+            'noise_enable_rl_obs':  False,  # Whether to apply noise in the agent state
+            'noise_rl_obs_ratio':   0.05,   # How much noise to apply in % percentage
+
+            'randomize_latency':    False,  # (*)
+            'randomize_torque':     False,  # (*)
+            'randomize_appearance': False,  # (*)
+            'enable_shadows':       True,   # (*) Note: For sim2real transfer (images) try also False
+            'shadow_type':          "Soft", # (*) Options: Soft/Hard
+        }
+
+        return randomization_dict
+
+if __name__ == "__main__":
+    """
+        If you have updated the Config class, you can run this script to update the configuration.xml of the
+        UNITY simulator instead of updating the fields manually
+
+        Important: In this case the simulator folder should be located inside the vtprl/ folder
+                   with name: simulator/Linux/ManipulatorEnvironment/ManipulatorEnvironment.x86_64, or update the path
+                   of the 'xml_file' variable below
+
+        Note:      You many need to restart the UNITY simulator after updating the .xml file
+    """
+
+    config = ConfigAdvanced()
+
+    #os.path.dirname(os.path.realpath(__file__)) +
+    xml_file = "./simulator/Linux/ManipulatorEnvironment/configuration.xml" # Change the path if needed
+
+    # Update the .xml based on the new changes in the Config class #
+    update_simulator_configuration(config, xml_file)
+
diff --git a/agent/dart_envs/iiwa_dart.cpython-37m-x86_64-linux-gnu.so b/agent/dart_envs/iiwa_dart.cpython-37m-x86_64-linux-gnu.so
deleted file mode 100755
index d68213b..0000000
Binary files a/agent/dart_envs/iiwa_dart.cpython-37m-x86_64-linux-gnu.so and /dev/null differ
diff --git a/agent/dart_envs/iiwa_dart_unity.cpython-37m-x86_64-linux-gnu.so b/agent/dart_envs/iiwa_dart_unity.cpython-37m-x86_64-linux-gnu.so
deleted file mode 100755
index 12a76cd..0000000
Binary files a/agent/dart_envs/iiwa_dart_unity.cpython-37m-x86_64-linux-gnu.so and /dev/null differ
diff --git a/agent/env_gym_run.py b/agent/env_gym_run.py
new file mode 100755
index 0000000..fc9bbcb
--- /dev/null
+++ b/agent/env_gym_run.py
@@ -0,0 +1,59 @@
+import numpy as np
+
+from envs_other.cartpole import CartPoleEnv
+from envs_other.nlinks_box2d import NLinksBox2DEnv
+from envs_other.pendulum import PendulumEnv
+
+from stable_baselines3.common.env_checker import check_env
+
+if __name__ == '__main__':
+    """
+        Example method to run gym base environments
+    """
+
+    env_mode = "cartpole" # "nlinks_box2d", pendulum
+
+    if(env_mode == "cartpole"):
+        env = CartPoleEnv(200)
+        check_env(env)
+
+        while True:
+            env.render()
+
+            rnd_act = [np.random.uniform(-1, 1)]
+
+            s, _, d, _ = env.step(action=rnd_act)
+            if d:
+                env.reset()
+
+    elif(env_mode == "nlinks_box2d"):
+        num_links = 10
+        nlinks = NLinksBox2DEnv(max_ts=200, n_links=num_links)
+
+        check_env(nlinks)
+
+        # some silent initialization steps
+        # for _ in range(100):
+        #     nlinks.render()
+        #     zero_act = np.zeros(num_links)
+        #     nlinks.step(action=zero_act)
+
+        while True:
+            nlinks.render()
+            # print("anchor", nlinks.anchor.position)
+            # print("end_eff", nlinks.end_effector.position)
+            # rnd_act = np.array([0.3, -0.3])
+            # rnd_act = np.array([np.random.rand() * 2 - 1, np.random.rand() * 2 - 1])
+            rnd_act = []
+            for i in range(num_links):
+                rnd_act.append(np.random.rand() * 2 - 1)
+
+            s, _, d, _ = nlinks.step(action=rnd_act)
+            # print("tx: {0:2.1f}; ty: {1:2.1f}".format(s[0], s[1]))
+            # print("eex: {0:2.1f}; eey: {1:2.1f}".format(s[2], s[3]))
+            if d:
+                nlinks.reset()
+
+    elif(env_mode == "pendulum"):
+        env = PendulumEnv(200)
+        check_env(env)
diff --git a/agent/env_sim_run.py b/agent/env_sim_run.py
old mode 100644
new mode 100755
index 2bf2254..daf7273
--- a/agent/env_sim_run.py
+++ b/agent/env_sim_run.py
@@ -1,30 +1,31 @@
-from config import Config
-from dart_envs.iiwa_dart import IiwaDartEnv
-
 import numpy as np
+import time
+
+from config import Config
 
+from envs_dart.iiwa_dart import IiwaDartEnv
 
 if __name__ == '__main__':
     """
-    Example method to run the base standalone DART environment.
-    This is useful to quickly develop and check different model-based control policies.
+        Example method to run the base standalone DART environment.
+        This is useful to quickly develop and check different model-based control policies.
     """
-
     env_dict = Config.get_dart_env_dict()
 
     iiwa = IiwaDartEnv(max_ts=env_dict['max_time_step'], orientation_control=env_dict['orientation_control'],
                        use_ik=env_dict['use_inverse_kinematics'], ik_by_sns=env_dict['linear_motion_conservation'],
-                       enable_render=env_dict['enable_dart_viewer'])
+                       enable_render=env_dict['enable_dart_viewer'], task_monitor=env_dict['task_monitor'], with_objects=env_dict['with_objects'])
 
     # iiwa.set_target(iiwa.create_target())
 
-    pd_control = True and iiwa.USE_IK
+    pd_control = True and iiwa.dart_sim.use_ik
     control_kp = 1.0 / iiwa.observation_space.high[0]
 
-    import time
     last_episode_time = time.time()
-    iiwa.render()
+
     iiwa.reset()
+    if iiwa.dart_sim.enable_viewer:
+        iiwa.render()
     time.sleep(1)
 
     while True:
@@ -37,7 +38,8 @@
 
         state, reward, done, info = iiwa.step(action=action)
 
-        iiwa.render()
+        if iiwa.dart_sim.enable_viewer:
+            iiwa.render()
 
         if done:
             iiwa.reset()
diff --git a/agent/envs/iiwa_sample_env.py b/agent/envs/iiwa_sample_env.py
new file mode 100644
index 0000000..9cd7643
--- /dev/null
+++ b/agent/envs/iiwa_sample_env.py
@@ -0,0 +1,482 @@
+"""
+A sample Env class inheriting from the DART-Unity Env for the Kuka LBR iiwa manipulator with 7 links and a Gripper
+The parent class takes care of integrating DART Engine with Unity simulation environment
+Unity is used as the main simulator for physics/rendering computations.
+The Unity interface receives joint velocities as commands and returns joint positions and velocities
+
+DART is used to calculate inverse kinematics of the iiwa chain.
+DART changes the agent action space from the joint space to the cartesian space
+(position-only or pose/SE(3)) of the end-effector.
+
+action_by_pd_control method can be called to implement a Proportional-Derivative control law instead of an RL policy.
+
+Note: Coordinates in the Unity simulator are different from the ones in DART which used here:
+      The mapping is [X, Y, Z] of Unity is [-y, z, x] of DART
+"""
+
+import math
+import numpy as np
+from scipy.spatial.transform import Rotation as R
+
+from gym import spaces
+from envs_dart.iiwa_dart_unity import IiwaDartUnityEnv
+
+# Import when images are used as state representation
+# import cv2
+# import base64
+
+class IiwaSampleEnv(IiwaDartUnityEnv):
+    def __init__(self, max_ts, orientation_control, use_ik, ik_by_sns,
+                 state_type, enable_render=False, task_monitor=False, 
+                 with_objects=False, target_mode="random", target_path="/misc/generated_random_targets/cart_pose_7dof.csv", goal_type="target",
+                 joints_safety_limit=0, max_joint_vel=20.0, max_ee_cart_vel=10.0, max_ee_cart_acc =3.0, max_ee_rot_vel=4.0, max_ee_rot_acc=1.2,
+                 random_initial_joint_positions=False, initial_positions=[0, 0, 0, 0, 0, 0, 0],
+                 robotic_tool="3_gripper", env_id=0):
+
+        # range of vertical, horizontal pixels for the DART viewer
+        viewport = (0, 0, 500, 500)
+
+        self.state_type = state_type
+        self.reset_flag = False
+        self.goal_type = goal_type # Target or box 
+
+        # Collision happened for this env when set to 1. In case the manipulator is spanwed in a different position than the #
+        # default vertical, for the remaining of the episode zero velocities are sent to UNITY                               #
+        self.collided_env = 0
+
+        ################################
+        # Set initial joints positions #
+        ################################
+        self.random_initial_joint_positions = random_initial_joint_positions                                                 # True, False
+        self.initial_positions = np.asarray(initial_positions, dtype=float) if initial_positions is not None else None
+
+        # Initial positions flag for the manipulator after reseting. 1 means different than the default vertical position #
+        # In that case, the environments should terminate at the same time step due to UNITY synchronization              #
+        if((self.initial_positions is None or np.count_nonzero(initial_positions) == 0) and self.random_initial_joint_positions == False):
+            self.flag_zero_initial_positions = 0
+        else:
+            self.flag_zero_initial_positions = 1
+
+        ##############################################################################
+        # Set Limits -> Important: Must be set before calling the super().__init__() #
+        ##############################################################################
+
+        # Variables below exist in the parent class, hence the names should not be changed                            #
+        # Min distance to declare that the target is reached by the end-effector, adapt the values based on your task #
+        self.MIN_POS_DISTANCE = 0.05  # [m]
+        self.MIN_ROT_DISTANCE = 0.1   # [rad]
+
+        self.JOINT_POS_SAFE_LIMIT = np.deg2rad(joints_safety_limit) # Manipulator joints safety limit
+
+        # admissible range for joint positions, velocities, accelerations, # 
+        # and torques of the iiwa kinematic chain                          #
+        self.MAX_JOINT_POS = np.deg2rad([170, 120, 170, 120, 170, 120, 175]) - self.JOINT_POS_SAFE_LIMIT  # [rad]: based on the specs
+        self.MIN_JOINT_POS = -self.MAX_JOINT_POS
+
+        # Joint space #
+        self.MAX_JOINT_VEL = np.deg2rad(np.full(7, max_joint_vel))                                        # np.deg2rad([85, 85, 100, 75, 130, 135, 135])  # [rad/s]: based on the specs
+        self.MAX_JOINT_ACC = 3.0 * self.MAX_JOINT_VEL                                                     # [rad/s^2]: just approximation due to no existing data
+        self.MAX_JOINT_TORQUE = np.array([320, 320, 176, 176, 110, 40, 40])                               # [Nm]: based on the specs
+
+        # admissible range for Cartesian pose translational and rotational velocities, #
+        # and accelerations of the end-effector                                        #
+        self.MAX_EE_CART_VEL = np.full(3, max_ee_cart_vel)                                                # np.full(3, 10.0) # [m/s] --- not optimized values for sim2real transfer
+        self.MAX_EE_CART_ACC = np.full(3, max_ee_cart_acc)                                                # np.full(3, 3.0) # [m/s^2] --- not optimized values
+        self.MAX_EE_ROT_VEL = np.full(3, max_ee_rot_vel)                                                  # np.full(3, 4.0) # [rad/s] --- not optimized values
+        self.MAX_EE_ROT_ACC = np.full(3, max_ee_rot_acc)                                                  # np.full(3, 1.2) # [rad/s^2] --- not optimized values
+
+        ##################################################################################
+        # End set limits -> Important: Must be set before calling the super().__init__() #
+        ##################################################################################
+
+        super().__init__(max_ts=max_ts, orientation_control=orientation_control, use_ik=use_ik, ik_by_sns=ik_by_sns,
+                         robotic_tool=robotic_tool, enable_render=enable_render, task_monitor=task_monitor,
+                         with_objects=with_objects, target_mode=target_mode, target_path=target_path, viewport=viewport,
+                         random_initial_joint_positions=random_initial_joint_positions, initial_positions=initial_positions, env_id=env_id)
+
+        # Clip gripper action to this limit #
+        if(self.robotic_tool.find("3_gripper") != -1):
+            self.gripper_clip = 90
+        elif(self.robotic_tool.find("2_gripper") != -1):
+            self.gripper_clip = 250
+        else:
+            self.gripper_clip = 90
+
+        # Some attributes that are initialized in the parent class:
+        # self.reset_counter --> keeps track of the number of resets performed
+        # self.reset_state   --> reset vector for initializing the Unity simulator at each episode
+        # self.tool_target   --> initial position for the gripper state
+
+        # the lines below should stay as it is
+        self.MAX_EE_VEL = self.MAX_EE_CART_VEL
+        self.MAX_EE_ACC = self.MAX_EE_CART_ACC
+        if orientation_control:
+            self.MAX_EE_VEL = np.concatenate((self.MAX_EE_ROT_VEL, self.MAX_EE_VEL))
+            self.MAX_EE_ACC = np.concatenate((self.MAX_EE_ROT_ACC, self.MAX_EE_ACC))
+
+        # the lines below wrt action_space_dimension should stay as it is
+        self.action_space_dimension = self.n_links  # there would be 7 actions in case of joint-level control
+        if use_ik:
+            # There are three cartesian coordinates x,y,z for inverse kinematic control
+            self.action_space_dimension = 3
+            if orientation_control:
+                # and the three rotations around each of the axis
+                self.action_space_dimension += 3
+
+        if self.robotic_tool.find("gripper") != -1:
+            self.action_space_dimension += 1  # gripper velocity
+
+        # Variables below exist in the parent class, hence the names should not be changed
+        tool_length = 0.2  # [m] allows for some tolerances in maximum observation
+
+        # x,y,z of TCP: maximum reach of arm plus tool length in meters
+        ee_pos_high = np.array([0.95 + tool_length, 0.95 + tool_length, 1.31 + tool_length])
+        ee_pos_low = -np.array([0.95 + tool_length, 0.95 + tool_length, 0.39 + tool_length])
+
+        high = np.empty(0)
+        low = np.empty(0)
+        if orientation_control:
+            # rx,ry,rz of TCP: maximum orientation in radians without considering dexterous workspace
+            ee_rot_high = np.full(3, np.pi)
+            # observation space is distance to target orientation (rx,ry,rz), [rad]
+            high = np.append(high, ee_rot_high)
+            low = np.append(low, -ee_rot_high)
+
+        # and distance to target position (dx,dy,dz), [m]
+        high = np.append(high, ee_pos_high - ee_pos_low)
+        low = np.append(low, -(ee_pos_high - ee_pos_low))
+
+        # and joint positions [rad] and possibly velocities [rad/s]
+        if 'a' in self.state_type:
+            high = np.append(high, self.MAX_JOINT_POS)
+            low = np.append(low, self.MIN_JOINT_POS)
+        if 'v' in self.state_type:
+            high = np.append(high, self.MAX_JOINT_VEL)
+            low = np.append(low, -self.MAX_JOINT_VEL)
+
+        # the lines below should stay as it is.                                                                        #
+        # Important:        Adapt only if you use images as state representation, or your task is more complicated     #
+        # Good practice:    If you need to adapt several methods, inherit from IiwaSampleEnv and define your own class # 
+        self.action_space = spaces.Box(-np.ones(self.action_space_dimension), np.ones(self.action_space_dimension),
+                                       dtype=np.float32)
+
+        self.observation_space = spaces.Box(low, high, dtype=np.float32)
+
+    def create_target(self):
+        """
+            defines the target to reach per episode, this should be adapted by the task
+
+            should always return rx,ry,rz,x,y,z in order, -> dart coordinates system
+            i.e., first target orientation rx,ry,rz in radians, and then target position x,y,z in meters
+                in case of orientation_control=False --> rx,ry,rz are irrelevant and can be set to zero
+
+            _random_target_gen_joint_level(): generate a random sample target in the reachable workspace of the iiwa
+
+            :return: Cartesian pose of the target to reach in the task space (dart coordinates)
+        """
+
+        # Default behaviour # 
+        if(self.target_mode == "None"): 
+            rx, ry, rz = 0.0, np.pi, 0.0
+            target = None
+            while True:
+                x, y, z = np.random.uniform(-1.0, 1.0), np.random.uniform(-1.0, 1.0), 0.2
+
+                if 0.4 < math.sqrt(math.pow(x, 2) + math.pow(y, 2)) < 0.8:
+                    target = rx, ry, rz, x, y, z
+                    break
+
+        elif self.target_mode == "import":
+            target = self._recorded_next_target()
+
+        elif self.target_mode == "random":
+            target = self._random_target()
+
+        elif self.target_mode == "random_joint_level":
+            target = self._random_target_gen_joint_level() # Sample always a rechable target
+
+        elif self.target_mode == "fixed":
+            target = self._fixed_target()
+
+        else:
+            target = [0, 0, 0, 0, -5, -200] # Dummy will be defined from the user later - advise define it in the reset() function 
+
+        return target
+
+    def get_state(self):
+        """
+           defines the environment state, this should be adapted by the task
+
+           get_pos_error(): returns Euclidean error from the end-effector to the target position
+           get_rot_error(): returns Quaternion error from the end-effector to the target orientation
+
+           :return: state for the policy training
+        """
+        state = np.empty(0)
+        if self.dart_sim.orientation_control:
+            state = np.append(state, self.dart_sim.get_rot_error())
+
+        state = np.append(state, self.dart_sim.get_pos_error())
+
+        if 'a' in self.state_type: # Append the joints position of the manipulator
+            state = np.append(state, self.dart_sim.chain.getPositions())
+        if 'v' in self.state_type:
+            state = np.append(state, self.dart_sim.chain.getVelocities())
+
+        # the lines below should stay as it is
+        self.observation_state = np.array(state)
+
+        return self.observation_state
+
+    def get_reward(self, action):
+        """
+           defines the environment reward, this should be adapted by the task
+
+           :param action: is the current action decided by the RL agent
+
+           :return: reward for the policy training
+        """
+        # stands for reducing position error
+        reward = -self.dart_sim.get_pos_distance()
+
+        # stands for reducing orientation error
+        if self.dart_sim.orientation_control:
+            reward -= 0.5 * self.dart_sim.get_rot_distance()
+
+        # stands for avoiding abrupt changes in actions
+        reward -= 0.1 * np.linalg.norm(action - self.prev_action)
+
+        # stands for shaping the reward to increase when target is reached to balance at the target
+        if self.get_terminal_reward():
+            reward += 1.0 * (np.linalg.norm(np.ones(self.action_space_dimension)) - np.linalg.norm(action))
+
+        # the lines below should stay as it is
+        self.reward_state = reward
+
+        return self.reward_state
+
+    def get_terminal_reward(self):
+        """
+           checks if the target is reached
+
+           get_pos_distance(): returns norm of the Euclidean error from the end-effector to the target position
+           get_rot_distance(): returns norm of the Quaternion error from the end-effector to the target orientation
+
+           Important: by default a 0.0 value of a terminal reward will be given to the agent. To adapt it please refer to the config.py,
+                      in the reward_dict. This terminal reward is given to the agent during reset in the step() function in the simulator_vec_env.py
+
+           :return: a boolean value representing if the target is reached within the defined threshold
+        """
+        if self.dart_sim.get_pos_distance() < self.MIN_POS_DISTANCE:
+            if not self.dart_sim.orientation_control:
+                return True
+            if self.dart_sim.get_rot_distance() < self.MIN_ROT_DISTANCE:
+                return True
+
+        return False
+
+    def get_terminal(self):
+        """
+           checks the terminal conditions for the episode - for reset
+
+           :return: a boolean value indicating if the episode should be terminated
+        """
+
+        if self.time_step > self.max_ts:
+            self.reset_flag = True
+
+        return self.reset_flag
+
+    def update_action(self, action):
+        """
+           converts env action to the required unity action by possibly using inverse kinematics from DART
+
+           self.dart_sim.command_from_action() --> changes the action from velocities in the task space (position-only 'x,y,z' or complete pose 'rx,ry,rz,x,y,z')
+                                                   to velocities in the joint space of the kinematic chain (j1,...,j7)
+
+           :param action: The action vector decided by the RL agent, acceptable range: [-1,+1]
+
+                          It should be a numpy array with the following shape: [arm_action] or [arm_action, tool_action]
+                          in case 'robotic_tool' is a gripper, tool_action is always a dim-1 scalar value representative of the normalized gripper velocity
+
+                          arm_action has different dim based on each case of control level:
+                              in case of use_ik=False    -> is dim-7 representative of normalized joint velocities
+                              in case of use_ik=True     -> there would be two cases:
+                                  orientation_control=False  -> of dim-3: Normalized EE Cartesian velocity in x,y,z DART coord
+                                  orientation_control=True   -> of dim-6: Normalized EE Rotational velocity in x,y,z DART coord followed by Normalized EE Cartesian velocity in x,y,z DART coord
+
+           :return: the command to send to the Unity simulator including joint velocities and possibly the gripper position
+        """
+
+        # the lines below should stay as it is
+        self.action_state = action
+        action = np.clip(action, -1., 1.)
+
+        # This updates the gripper target by accumulating the tool velocity from the action vector   #
+        # Note: adapt if needed: e.g. accumulating the tool velocity may not work well for your task #
+        if self.robotic_tool.find("gripper") != -1:
+            tool_action = action[-1]
+            self.tool_target = np.clip((self.tool_target + tool_action), 0.0, self.gripper_clip)
+
+            # This removes the gripper velocity from the action vector for inverse kinematics calculation
+            action = action[:-1]
+
+        # use INVERSE KINEMATICS #
+        if self.dart_sim.use_ik:
+            task_vel = self.MAX_EE_VEL * action
+            joint_vel = self.dart_sim.command_from_action(task_vel, normalize_action=False)
+        else:
+            joint_vel = self.MAX_JOINT_VEL * action
+
+        # append tool action #
+        unity_action = joint_vel
+        if self.robotic_tool.find("gripper") != -1:
+            unity_action = np.append(unity_action, [float(self.tool_target)])
+
+        return unity_action
+
+    def update(self, observation, time_step_update=True):
+        """
+            converts the unity observation to the required env state defined in get_state()
+            with also computing the reward value from the get_reward(...) and done flag,
+            it increments the time_step, and outputs done=True when the environment should be reset
+
+            important: it also updates the dart kinematic chain of the robot using the new unity simulator observation.
+                       always call this function, once you have send a new command to unity to synchronize the agent environment
+
+            :param observation: is the observation received from the Unity simulator within its [X,Y,Z] coordinate system
+                                'joint_values':       indices [0:7],
+                                'joint_velocities':   indices [7:14],
+                                'ee_position':        indices [14:17],
+                                'ee_orientation':     indices [17:20],
+                                'target_position':    indices [20:23],
+                                'target_orientation': indices [23:26],
+                                'object_position':    indices [26:29],
+                                'object_orientation': indices [29:32],
+                                'gripper_position':   indices [32:33], ---(it is optional, in case a gripper is enabled)
+                                'collision_flag':     indices [33:34], ---([32:33] in case of without gripper)
+
+            :param time_step_update: whether to increase the time_step of the agent
+
+            :return: The state, reward, episode termination flag (done), and an info dictionary
+        """
+        # use commented lines below in case you'd like to save the image observations received from the Unity
+        # base64_bytes = observation['ImageData'][0].encode('ascii')
+        # image_bytes = base64.b64decode(base64_bytes)
+        # image = np.frombuffer(image_bytes, np.uint8)
+        # image = cv2.imdecode(image, cv2.IMREAD_GRAYSCALE)
+        # print(image.shape)
+        # cv2.imwrite("test.jpg", image)
+
+        self.current_obs = observation['Observation']
+
+        ###########################################################################################
+        # collision happened or joints limits overpassed                                          #
+        # Important: in case the manipulator is spanwed in a different position than the default  #
+        #            vertical, for the remaining of the episode zero velocities are sent to UNITY #
+        #            see _send_actions_and_update() method in simulator_vec_env.py                #
+        ###########################################################################################
+        if(observation['Observation'][-1] == 1.0 or self.joints_limits_violation()):
+            self.collided_env = 1
+            # Reset when we have a collision only when we spawn the robot to the default #
+            # vertical position, else wait the episode to finish                         #
+            # Important: you may want to reset anyway depending on your task - adapt     #
+            if(self.flag_zero_initial_positions == 0):
+                self.reset_flag = True
+
+        # the method below handles synchronizing states of the DART kinematic chain with the #
+        # observation from Unity hence it should be always called                            #
+        self._unity_retrieve_joint_values(observation['Observation'])
+
+        # Class attributes below exist in the parent class, hence the names should not be changed
+        if(time_step_update == True):
+            self.time_step += 1
+
+        state = self.get_state()
+        reward = self.get_reward(self.action_state)
+        done = bool(self.get_terminal())
+        info = {"success": False}                   # Episode was successful. For now it is set at simulator_vec_env.py before reseting, step() method. Adapt if needed
+
+        self.prev_action = self.action_state
+
+        return state, reward, done, info
+
+    def reset(self, temp=False):
+        """
+            resets the DART-gym environment and creates the reset_state vector to be sent to Unity
+
+            :param temp: not relevant
+
+            :return: The initialized state
+        """
+        # takes care of resetting the DART chain and should stay as it is
+        state = super().reset(random_initial_joint_positions=self.random_initial_joint_positions, initial_positions=self.initial_positions)
+
+        self.collided_env = 0
+        random_target = self.create_target() # draw a red target
+
+        # sets the initial reaching target for the current episode,
+        # should be always called in the beginning of each episode,
+        # you might need to call it even during the episode run to change the reaching target for the IK-P controller
+        self.set_target(random_target)
+
+        # initial position for the gripper state, accumulates the tool_action velocity received in update_action
+        self.tool_target = 0.0  # should be in range [0.0,90.0]
+
+        # movement control of each joint can be disabled by setting zero for that joint index
+        active_joints = [1] * 7
+
+        # the lines below should stay as it is, Unity simulator expects these joint values in radians
+        joint_positions = self.dart_sim.chain.getPositions().tolist()
+        joint_velocities = self.dart_sim.chain.getVelocities().tolist()
+
+        # the mapping below for the target should stay as it is, unity expects orientations in degrees
+        target_positions = self.dart_sim.target.getPositions().tolist()
+
+        target_X, target_Y, target_Z = -target_positions[4], target_positions[5], target_positions[3]
+        target_RX, target_RY, target_RZ = np.rad2deg([-target_positions[1], target_positions[2], target_positions[0]])
+        target_positions_mapped = [target_X, target_Y, target_Z, target_RX, target_RY, target_RZ]
+
+        # spawn the object in UNITY: by default a green box is spawned     
+        # depending on your task, positioning the object might be necessary, start from the following sample code
+        # important: unity expects orientations in degrees
+        object_X, object_Y, object_Z = np.random.uniform(-1.0, 1.0), 0.05, np.random.uniform(-1.0, 1.0)
+        object_RX, object_RY, object_RZ = 0.0, 0.0, 0.0
+        object_positions_mapped = [object_X, object_Y, object_Z, object_RX, object_RY, object_RZ]
+
+        self.reset_state = active_joints + joint_positions + joint_velocities\
+                           + target_positions_mapped + object_positions_mapped
+ 
+        if self.robotic_tool.find("gripper") != -1:
+            self.reset_state = self.reset_state + [self.tool_target]
+
+        self.reset_counter += 1
+        self.reset_flag = False
+
+        return state
+
+    ####################
+    # Commands related #
+    ####################
+    def action_by_p_control(self, coeff_kp_lin, coeff_kp_rot):
+        """
+            computes the task-space velocity commands proportional to the reaching target error
+
+            :param coeff_kp_lin: proportional coefficient for the translational error
+            :param coeff_kp_rot: proportional coefficient for the rotational error
+
+            :return: The action in task space
+        """
+
+        action_lin = coeff_kp_lin * self.dart_sim.get_pos_error()
+        action = action_lin
+
+        if self.dart_sim.orientation_control:
+            action_rot = coeff_kp_rot * self.dart_sim.get_rot_error()
+            action = np.concatenate(([action_rot, action]))
+
+        if self.robotic_tool.find("gripper") != -1:
+            tool_vel = 0.0                         # zero velocity means no gripper movement - should be adapted for the task
+            action = np.append(action, [tool_vel])
+
+        return action
diff --git a/agent/iiwa_sample_joint_vel_env.py b/agent/envs/iiwa_sample_joint_vel_env.py
similarity index 53%
rename from agent/iiwa_sample_joint_vel_env.py
rename to agent/envs/iiwa_sample_joint_vel_env.py
index 3853b33..62ed000 100644
--- a/agent/iiwa_sample_joint_vel_env.py
+++ b/agent/envs/iiwa_sample_joint_vel_env.py
@@ -1,23 +1,25 @@
 """
-A sample Env class inheriting from basic gym.Env  for the Kuka LBR iiwa manipulator with 7 links and a Gripper.
-There is no inverse kinematics calculation supported here (see iiwa_sample_env for IK), only joint velocity control
-is supported with it. Unity is used as the main simulator for physics/rendering computations. The Unity interface
-receives joint velocities as commands and returns joint positions and velocities. The class presents a way to alternate
-between numeric observations and image observations, how to parse images returned from the Unity simulator and how to
-reset the environments in the simulator
+A sample Env class inheriting from basic gym.Env for the Kuka LBR iiwa manipulator with 7 links and a Gripper.
+
+Important: There is no inverse kinematics calculation supported here (see iiwa_sample_env for IK), only joint velocity control is supported with it.
+
+Unity is used as the main simulator for physics/rendering computations. The Unity interface receives joint velocities as commands
+and returns joint positions and velocities. The class presents a way to alternate between numeric observations and image observations,
+how to parse images returned from the Unity simulator and how to reset the environments in the simulator
 """
 
 import math
 import time
 
-import numpy as np
 import cv2
 import base64
+
+import numpy as np
 from numpy import newaxis
+
 from gym import spaces, core
 from gym.utils import seeding
 
-
 class IiwaJointVelEnv(core.Env):
     # the max velocity allowed for a joint in rad, used for normalization of the state values related to angle velocity
     MAX_VEL = 1.1  # value used to normalize the speed returned from the
@@ -27,17 +29,24 @@ class IiwaJointVelEnv(core.Env):
     # assumed max distance between target and end-effector, used for normalizing the agent state in range (-1,1)
     MAX_DISTANCE = 2
 
+    # min distance to declare that the target is reached by the end-effector, adapt the values based on your task
+    MIN_POS_DISTANCE = 0.05  # [m]
+
     def __init__(self, id, max_ts, config):
         self.max_ts = max_ts
         self.ts = 0
+
         self.use_images = config["use_images"]
+        self.image_size = config["image_size"]
+
         # relevant for numeric observations: whether only angles (a) or angles and velocities (av) of the joints
         self.state_type = config["state"]
         self.simulator_observation = None
         self.id = id
-        self.reset_counter = 0;
+        self.reset_counter = 0
         self.reset_state = None
-        self.collided = 0
+
+        self.collided_env = 0
         self.seed()
 
         # number of joints to be controlled from the 7 available
@@ -54,12 +63,12 @@ def __init__(self, id, max_ts, config):
         high = np.array(state_array_limits)
         low = -high
 
+        # Define a gym observation space suitable for images #
         if self.use_images:
-            # if using images make 100x100 pixels grayscale observation space
-            self.observation_space = spaces.Box(
-                low=0, high=255,
-                shape=(100, 100, 1),
-                dtype=np.uint8)
+            # if using images make self.image_sizexself.image_size pixels grayscale observation space
+            self.observation_space = spaces.Box(low=0, high=255,
+                                                shape=(self.image_size, self.image_size, 1),
+                                                dtype=np.uint8)
         else:
             self.observation_space = spaces.Box(low=low, high=high, dtype=np.float32)
 
@@ -71,55 +80,81 @@ def __init__(self, id, max_ts, config):
         low = -high
         self.action_space = spaces.Box(low=low, high=high, dtype=np.float32)
 
-    def update(self, observation):
+    def update(self, observation, time_step_update=True):
         """
-        a replacement of the standart step() method used in OpenAI gym. Unlike typical gym environment where the step()
-        function is called at each timestep, in this case the simulator_vec_env step() function is called during
-        training, which in turn calls the update() function here for a single env to pass its simulator observation.
-        Reasons are related to the logic of how to communicate with the simulator and how to reset individual
-        environments running in the simulator independently.
+            a replacement of the standart step() method used in OpenAI gym. Unlike typical gym environment where the step()
+            function is called at each timestep, in this case the simulator_vec_env step() function is called during
+            training, which in turn calls the update() function here for a single env to pass its simulator observation.
+            Reasons are related to the logic of how to communicate with the simulator and how to reset individual
+            environments running in the simulator independently.
         """
-        self.ts += 1
+        if(time_step_update == True):
+            self.ts += 1
+
         # create agent state for the specific task from the generic simulator observation that was passed
         self._convert_observation(observation['Observation'])
 
-        info = {}
+        info = {"success": False} # Episode was successful. For now it is set at simulator_vec_env.py before the reset. Adapt if needed
         terminal = self._get_terminal()
         reward = self._get_reward()
+
         if self.use_images:
             return self._retrieve_image(observation), reward, terminal, info
         else:
             return self.state, reward, terminal, info
 
     def _get_reward(self):
-        """ example definition of a reward function for a specific task"""
+        """
+            example definition of a reward function for a specific task
+        """
         absolute_distance = self._get_distance()
+
         # in this case the reward is the negative distance between the target and the end-effector and a punishment
         # of 1 for collision. This reward is suitable for training reaching tasks
-        return - 1 * (absolute_distance + self.collided)
+        return - 1 * (absolute_distance + self.collided_env)
+
+    def get_terminal_reward(self):
+        """
+           checks if the target is reached
+
+           _get_distance(): returns norm of the Euclidean error from the end-effector to the target position
+
+           Important: by default a 0.0 value of a terminal reward will be given to the agent. To adapt it please refer to the config.py,
+                      in the reward_dict. This terminal reward is given to the agent during the step() function in the simulator_vec_env.py
+
+           :return: a boolean value representing if the target is reached within the defined threshold
+        """
+        if self._get_distance() < self.MIN_POS_DISTANCE:
+            return True
+
+        return False
 
     def _convert_observation(self, new_observation):
         """
-        method used for creating task-specific agent state from the generic simulator observation returned.
-        The simulator observation has the following array of 34 values:
-        [a1, a2, a3, a4, a5, a6, a7,
-        v1, v2, v3, v4, v5, v6, v7,
-        ee_x, ee_y, ee_z ee_rx, ee_ry, ee_rz,
-        t_x, t_y, t_z, t_rx, t_ry, t_rz,
-        o_x, o_y, o_z, o_rx, o_ry, o_rz,
-        g_p,
-        c]
-        where a1..a7 are the angles of each joint of the robot in radians, v1..v7 are the velocities of each joint
-        in rad/sec, x, y, and z for ee, t and o are the coordinates and and rx, ry, and rz for ee, t and o are the
-        quaternion x, y and z components of the rotation for the end-effector, the target and the object(box)
-        respectively. g_p is the position (opening) of the gripper and c is a collision flag (0 if no collision and 1
-        if a collision of any part of the robot with the floor happened)
+            method used for creating task-specific agent state from the generic simulator observation returned.
+
+            The simulator observation has the following array of 34 values:
+                [a1, a2, a3, a4, a5, a6, a7,
+                 v1, v2, v3, v4, v5, v6, v7,
+                 ee_x, ee_y, ee_z ee_rx, ee_ry, ee_rz,
+                 t_x, t_y, t_z, t_rx, t_ry, t_rz,
+                 o_x, o_y, o_z, o_rx, o_ry, o_rz,
+                 g_p, c
+                ]
+
+            where
+                - a1..a7 are the angles of each joint of the robot in radians,
+                - v1..v7 are the velocities of each joint in rad/sec,
+                - x, y, and z for ee, t and o are the coordinates and and rx, ry, and rz for ee, t and o are the
+                  quaternion x, y and z components of the rotation for the end-effector, the target and the object(box) respectively
+                - g_p is the position (opening) of the gripper and,
+                - c is a collision flag (0 if no collision and 1 if a collision of any part of the robot with the floor happened)
         """
 
         # below is an example agent state suitable for solving the position reaching task
         self.simulator_observation = new_observation
 
-        self.collided = self.simulator_observation[-1]
+        self.collided_env = self.simulator_observation[-1]
         self.gripper_position = self.simulator_observation[-2]
 
         self.target_x, self.target_y, self.target_z, _, _, _ = self._get_target_pose()
@@ -167,35 +202,39 @@ def _get_distance(self):
 
     def step(self):
         """
-        not used directly on level on single environment, see update() method instead
+            not used directly on level on single environment,
+            see update() method instead and step() of simulator_vec_env.py
         """
         pass
 
     def reset(self):
         """
-        reset method that can set the environment in the simulator in a specific initial state and enable/disable joints
-        The method defines the self.reset_state that is used from simulator_vec_env to send the reset values to the
-        Unity simulator. The simulator reset state has the following array of 32 values:
-        [j1, j2, 23, j4, j5, j6, j7,
-        a1, a2, a3, a4, a5, a6, a7,
-        v1, v2, v3, v4, v5, v6, v7,
-        t_x, t_y, t_z, t_rx, t_ry, t_rz,
-        o_x, o_y, o_z, o_rx, o_ry, o_rz,
-        g_p]
-        where j1..j7 are flags indicating whether a joint should be enabled (1) or disabled (0),
-        a1..a7 are the initial angles of each joint of the robot in radians (currently only 0 initial values supported
-        due to unity limitations)
-        v1..v7 are the initial velocities of each joint in rad/sec (currently only 0 initial values supported due to
-        unity limitations)
-        x, y, and z for t and o are the initial coordinates of the target and the object in meters (note that y is
-        vertical axis in unity)
-        rx, ry, and rz for t and o are the euler angles in degrees for the rotation of the object and the target
-        g_p is the position (opening) of the gripper (0 is open, value up to 90 supported)
+            reset method that can set the environment in the simulator in a specific initial state and enable/disable joints
+            The method defines the self.reset_state that is used from simulator_vec_env to send the reset values to the Unity simulator.
+
+            The simulator reset state has the following array of 32 values:
+                [j1, j2, 23, j4, j5, j6, j7,
+                 a1, a2, a3, a4, a5, a6, a7,
+                 v1, v2, v3, v4, v5, v6, v7,
+                 t_x, t_y, t_z, t_rx, t_ry, t_rz,
+                 o_x, o_y, o_z, o_rx, o_ry, o_rz,
+                 g_p]
+
+            where
+                - j1..j7 are flags indicating whether a joint should be enabled (1) or disabled (0),
+                - a1..a7 are the initial angles of each joint of the robot in radians (currently only 0 initial values supported
+                  due to unity limitations)
+                - v1..v7 are the initial velocities of each joint in rad/sec (currently only 0 initial values supported due to
+                  unity limitations)
+                - x, y, and z for t and o are the initial coordinates of the target and the object in meters (note that y is
+                  vertical axis in unity)
+                - rx, ry, and rz for t and o are the euler angles in degrees for the rotation of the object and the target
+                - g_p is the position (opening) of the gripper (0 is open, value up to 90 supported)
         """
         self.reset_counter += 1
 
         self.ts = 0
-        self.collided = 0;
+        self.collided_env = 0
 
         self.target_x = 0
         self.target_y = 0.38  # np.random.uniform(0, 1.15)
@@ -207,8 +246,11 @@ def reset(self):
 
         # the joints to be enabled have value 1 and the others have value 0
         self.reset_state = [1] * self.num_joints + [0] * (7 - self.num_joints)
-        # zeros for initial joint angles and velocities
+
+        # zeros for initial joint angles and velocities 
+        # Note: adapt this to reset to different initial positions if needed 
         self.reset_state.extend([0] * 14)
+
         # initial target and object position and orientation and gripper opening
         self.reset_state.extend([
             self.target_x, self.target_y, self.target_z, 90, 0, 0,
@@ -217,21 +259,17 @@ def reset(self):
 
     def _get_terminal(self):
         """
-        a function to define terminal condition, it can be customized to define terminal conditions based on episode
-        duration, whether the task was solved, whether collision or some illegal state happened
+            a function to define terminal condition, it can be customized to define terminal conditions based on
+            episode duration, whether the task was solved, whether collision or some illegal state happened
         """
-        dist = np.sqrt(np.power(self.target_x - self.ee_x, 2) + np.power(self.target_y - self.ee_y, 2)
-                       + np.power(self.target_z - self.ee_z, 2))
-        # if dist < 0.3:
-        #    return True
         if self.ts > self.max_ts:
             return True
         return False
 
     def _retrieve_image(self, observation):
         """
-        if the image observations are enabled at the simulator, it passes image data in the observation in addition
-        to the numeric data. This method shows how the image data can be accessed, saved locally, processed, etc.
+            if the image observations are enabled at the simulator, it passes image data in the observation in addition
+            to the numeric data. This method shows how the image data can be accessed, saved locally, processed, etc.
         """
         # how to access the image data when it is provided by the simulator
         base64_bytes = observation['ImageData'][0].encode('ascii')
@@ -246,10 +284,10 @@ def _retrieve_image(self, observation):
         # cv2.imwrite('images/received-image-bw-{}.{}'.format('cv', 'jpg'), image_bw)
         image = cv2.GaussianBlur(image, (3, 3), cv2.BORDER_DEFAULT)
         # cv2.imwrite('images/received-image-{}.{}'.format('blurred', 'jpg'), dst)
-        #dim = (100, 100)
+        #dim = (self.image_size, self.image_size)
         # resize image
         #image = cv2.resize(image, dim, interpolation=cv2.INTER_AREA)
         #return image
         return image[:, :, newaxis]
-        # return np.zeros(shape=(100, 100, 1),
+        # return np.zeros(shape=(self.image_size, self.image_size, 1),
         #        dtype=np.uint8)
diff --git a/agent/envs_advanced/iiwa_end_to_end_planar_grasping_env.py b/agent/envs_advanced/iiwa_end_to_end_planar_grasping_env.py
new file mode 100755
index 0000000..6ab2dce
--- /dev/null
+++ b/agent/envs_advanced/iiwa_end_to_end_planar_grasping_env.py
@@ -0,0 +1,291 @@
+"""
+A vision-based end-to-end RL planar grasping Env class inheriting from the IiwaNumericalPlanarGraspingEnv for the Kuka LBR iiwa manipulator with 7 links and a Gripper
+The parent class takes care of integrating DART Engine with Unity simulation environment
+
+Unity is used as the main simulator for physics/rendering computations.
+The Unity interface receives joint velocities as commands and returns joint positions and velocities
+
+DART is used to calculate inverse kinematics of the iiwa chain.
+DART changes the agent action space from the joint space to the cartesian space (position-only or pose/SE(3)) of the end-effector.
+
+action_by_pd_control method can be called to implement a Proportional-Derivative control law instead of an RL policy.
+
+Note: Coordinates in the Unity simulator are different from the ones in DART which used here:
+The mapping is [X, Y, Z] of Unity is [-y, z, x] of DART
+"""
+
+import numpy as np
+import math
+from gym import spaces
+from envs_advanced.iiwa_numerical_planar_grasping_env import IiwaNumericalPlanarGraspingEnv
+import cv2
+import base64
+
+class IiwaEndToEndPlanarGraspingEnv(IiwaNumericalPlanarGraspingEnv):
+    def __init__(self, max_ts, orientation_control, use_ik, ik_by_sns, state_type, use_images=True, enable_render=False, task_monitor=False, target_mode="None", goal_type="box",
+                 randomBoxesGenerator=None, joints_safety_limit=10, max_joint_vel=20, max_ee_cart_vel=0.035, max_ee_cart_acc =10, max_ee_rot_vel=0.15, max_ee_rot_acc=10,
+                 random_initial_joint_positions=False, initial_positions=[0, 0, 0, -np.pi/2, 0, np.pi/2, np.pi/2],
+                 noise_enable_rl_obs=False, noise_rl_obs_ratio=0.05, reward_dict=None, agent_kp=0.5, agent_kpr=1.5,
+                 robotic_tool="3_gripper", image_size=128, env_id=0):
+
+        if(use_images != True):
+            raise Exception("End-to-end vision-based env requires use_images to be set to True - abort")
+
+        # the init of the parent class should be always called, this will in the end call reset() once
+        super().__init__(max_ts=max_ts, orientation_control=orientation_control, use_ik=use_ik, ik_by_sns=ik_by_sns, state_type=state_type, enable_render=enable_render, task_monitor=task_monitor, target_mode=target_mode, goal_type=goal_type, 
+                         randomBoxesGenerator=randomBoxesGenerator, joints_safety_limit=joints_safety_limit, max_joint_vel=max_joint_vel, max_ee_cart_vel=max_ee_cart_vel,
+                         max_ee_cart_acc=max_ee_cart_acc, max_ee_rot_vel=max_ee_rot_vel, max_ee_rot_acc=max_ee_rot_acc,
+                         random_initial_joint_positions=random_initial_joint_positions, initial_positions=initial_positions,noise_enable_rl_obs=False,noise_rl_obs_ratio=0.05,
+                         reward_dict=reward_dict, agent_kp=agent_kp, agent_kpr=agent_kpr, robotic_tool=robotic_tool, env_id=env_id)
+
+        self.use_images = use_images
+        self.image_size = image_size
+        self.current_obs_img = None
+
+        ###################################
+        # RGB image                       #
+        # Unormalized observation for now #
+        # Adapt to your project           #
+        ###################################
+        self.observation_space = spaces.Box(low=0, high=255, shape=(self.image_size, self.image_size, 3), dtype=np.uint8)
+
+        #######################################################
+        # Set-up the task_monitor                             #
+        #######################################################
+        if self.task_monitor:
+            self._create_task_monitor()
+
+    def get_state(self):
+        """
+            End-to-end planar grasping agent receives a whole image as state input
+
+           :return: state for the policy training
+        """
+        return self.current_obs_img
+
+    def update(self, observation, time_step_update=True):
+        """
+            converts the unity observation to the required env state defined in get_state()
+            with also computing the reward value from the get_reward(...) and done flag,
+            it increments the time_step, and outputs done=True when the environment should be reset
+
+            important: it also updates the dart kinematic chain of the robot using the new unity simulator observation.
+                       always call this function, once you have send a new command to unity to synchronize the agent environment
+
+            :param observation: is the observation received from the Unity simulator within its [X,Y,Z] coordinate system
+                                'joint_values': indices [0:7],
+                                'joint_velocities': indices [7:14],
+                                'ee_position': indices [14:17],
+                                'ee_orientation': indices [17:20],
+                                'target_position': indices [20:23],
+                                'target_orientation': indices [23:26],
+                                'object_position': indices [26:29],
+                                'object_orientation': indices [29:32],
+                                'gripper_position': indices [32:33],
+                                'collision_flag': indices [33:34],
+
+            :param time_step_update: whether to increase the time_step of the agent - during manual actions call with False
+
+            :return: The state, reward, episode termination flag (done), and an empty info dictionary
+        """
+
+        self.current_obs = observation['Observation']
+
+        ###############################################################################################
+        # Parse the image observation from Unity                                                      #
+        # This is a sample code - to make it faster process images in batches in simulator_vec_env.py #
+        # Decoding is done faster with pytorch -> then detach().numpy() and pass to the update()      #
+        # the decoded image instead of decoding the images inside each env individually               #
+        ###############################################################################################
+        self.current_obs_img = self.parse_image_observation(observation)
+
+        # Collision or joints limits overpassed                                      #
+        # env will not be reseted - wait until the end of the episoe for planar envs #
+        if observation['Observation'][-1] == 1.0 or self.joints_limits_violation():
+            self.collided_env = 1
+            self.reset_flag = True
+
+        # the method below handles synchronizing states of the DART kinematic chain with the #
+        # observation from Unity hence it should be always called                            #
+        self._unity_retrieve_joint_values(observation['Observation'])
+
+        # Class attributes below exist in the parent class, hence the names should not be changed
+        if(time_step_update == True):
+            self.time_step += 1
+
+        state = self.get_state()
+        reward = self.get_reward(self.action_state)
+        done = bool(self.get_terminal())
+        info = {"success": False}                    # Episode was successful. It is set at simulator_vec_env.py before reseting
+
+        # Keep track the previous distance of the ee to the box - used in the reward function #
+        self.prev_dist_ee_box_z = self.get_relative_distance_ee_box_z_unity()
+        self.prev_dist_ee_box_x = self.get_relative_distance_ee_box_x_unity()
+        self.prev_dist_ee_box_ry = self.get_relative_distance_ee_box_ry_unity() 
+
+        self.prev_action = self.action_state
+
+        return state, reward, done, info
+
+    def parse_image_observation(self, observation):
+        """
+           Read the unity observation and decode the RGB image
+
+           :param observation: is the observation received from the Unity simulator
+
+           :return: decoded RGB image
+        """
+
+        base64_bytes = observation['ImageData'][0].encode('ascii')
+        image_bytes = base64.b64decode(base64_bytes)
+        image = np.frombuffer(image_bytes, np.uint8)
+        image = cv2.imdecode(image, cv2.IMREAD_COLOR) 
+        # cv2.IMREAD_GRAYSCALE - for faster processing but with lower performance (depending on the task)
+
+        #print(image.shape)
+        #cv2.imwrite("test.jpg", image)
+
+        return image
+
+    """
+    -> Advanced code:
+           - Sample code to parallelize the image-proccessing logic. Should be adapted, can not be run
+           - Add this code/logic in the simulator_vec_env.py
+           - Ideas of Ludwig Graef, TUM
+
+       Advise:
+           - Parse the images from the response of the json message, and then call the env.update() with the decode images
+           - More advanced idea: try in addition to use nvidia dali. jpeg decoding is faster
+
+        # Sample code starts #
+
+        # Put this in the simulator_vec_env.py #
+        observations_preprocessed = preprocessImage(observations_as_literal)
+        for env, observation, observation_img in zip(self.envs, observations_as_literal, observations_preprocessed):
+            obs, rew, done, info = env.update(observation, observation_img)
+            observations_converted.append(obs)
+
+        def preprocessImage(self, observation):
+            mean = torch.tensor([0.8, 0.9. 0.99])
+            std = torch.tensor([0.8, 0.9. 0.99])
+            norm_transform = T.Normalize(mean, std)
+            device = "cuda"
+            imgs = torch.stack([self.observation_to_image_tensor(obs) for obs in observation])
+            imgs = norm_transform(imgs/255).to(device)
+            return imgs.numpy()
+
+        def observation_to_image_tensor(self, observation) -> torch.tensor:
+            base64_bytes = observation['ImageData'][0].encode('ascii')
+            image_bytes = base64.b64decode(base64_bytes)
+            buffer2 = bytearray(len(image_bytes))
+            buffer2[:] = image_bytes
+            image = torch.frombuffer(buffer2, dtype=torch.uint8)
+            img = torchvision.io.decode_image(image, torchvision.io.ImageReadMode.RGB)
+            return img.float()
+        # Sample code ends #
+    """
+
+    ##############
+    # Monitoring #
+    ##############
+    def _create_task_monitor(self, plot_joint_position=True, plot_joint_velocity=True, plot_joint_acceleration=False, plot_joint_torque=True, plot_agent_state=False, plot_agent_action=True, plot_agent_reward=True):
+        """
+            Override the task monitor definition since we have changed the get_state() function and the actions dimensions
+            refer to iiwa_dart.py and task_monitor.py for more
+        """
+        from PySide2.QtWidgets import QApplication
+        from utils.task_monitor import TaskMonitor
+
+        if not QApplication.instance():
+            self.monitor_app = QApplication([])
+        else:
+            self.monitor_app = QApplication.instance()
+
+        plot_agent_state = False
+        # Unused -> state observation is an image #
+        # Do not visualize the agent state        #
+        self.monitor_n_states = 2
+        state_chart_categories = ['X', 'Z']
+        if self.action_space_dimension == 3:
+            self.monitor_n_states += 1
+            state_chart_categories = ['X', 'Z', 'RY']
+        # Unused ###################################
+
+        self.monitor_window = \
+            TaskMonitor(plot_joint_position=plot_joint_position,
+                        param_joint_position={'dim': self.n_links,
+                                              'min': np.rad2deg(self.MIN_JOINT_POS),
+                                              'max': np.rad2deg(self.MAX_JOINT_POS),
+                                              'cat': [str(1 + value) for value in list(range(self.n_links))],
+                                              'zone': 10.0,
+                                              'title': "Joint Position [" + u"\N{DEGREE SIGN}" + "]"},
+                        plot_joint_velocity=plot_joint_velocity,
+                        param_joint_velocity={'dim': self.n_links,
+                                              'min': np.rad2deg(-self.MAX_JOINT_VEL),
+                                              'max': np.rad2deg(+self.MAX_JOINT_VEL),
+                                              'cat': [str(1 + value) for value in list(range(self.n_links))],
+                                              'zone': 5.0,
+                                              'title': "Joint Velocity [" + u"\N{DEGREE SIGN}" + "/s]"},
+                        plot_joint_acceleration=plot_joint_acceleration,
+                        param_joint_acceleration={'dim': self.n_links,
+                                                  'min': np.rad2deg(-self.MAX_JOINT_ACC),
+                                                  'max': np.rad2deg(+self.MAX_JOINT_ACC),
+                                                  'cat': [str(1 + value) for value in list(range(self.n_links))],
+                                                  'title': "Joint Acceleration [" + u"\N{DEGREE SIGN}" + "/s^2]"},
+                        plot_joint_torque=plot_joint_torque,
+                        param_joint_torque={'dim': self.n_links,
+                                            'min': -self.MAX_JOINT_TORQUE,
+                                            'max': +self.MAX_JOINT_TORQUE,
+                                            'cat': [str(1 + value) for value in list(range(self.n_links))],
+                                            'zone': 5.0,
+                                            'title': "Joint Torque [Nm]"},
+                        plot_agent_state=plot_agent_state,
+                        param_agent_state={'dim': self.monitor_n_states,
+                                           'min': self.observation_space.low[0:self.monitor_n_states],
+                                           'max': self.observation_space.high[0:self.monitor_n_states],
+                                           'cat': state_chart_categories,
+                                           'title': "Reaching Target Error"},
+                        plot_agent_action=plot_agent_action,
+                        param_agent_action={'dim': self.action_space_dimension,
+                                            'min': self.action_space.low,
+                                            'max': self.action_space.high,
+                                            'cat': [str(1 + value) for value in
+                                                    list(range(self.action_space_dimension))],
+                                            'title': "Normalized Command"},
+                        plot_agent_reward=plot_agent_reward,
+                        param_agent_reward={'dim': 1,
+                                            'min': -3.0,
+                                            'max': +3.0,
+                                            'cat': ['r'],
+                                            'title': "Step Reward"},
+                        )
+
+        self.monitor_window.show()
+        self.monitor_window.correct_size()
+
+    def render(self, mode='human', monitor_real_values=False, joint_torques=None):
+        """
+            Override the render definition since we have changed the get_state() function and the actions dimensions
+                Note: we do not keep the joints positions in the state of the agent
+            refer to iiwa_dart.py and task_monitor.py for more
+        """
+        if not self.dart_sim.enable_viewer:
+            return False
+
+        if self.task_monitor:
+            if self.monitor_window.plot_joint_torque:
+                if monitor_real_values and joint_torques is not None:
+                    joint_torques = joint_torques
+                else:
+                    self.dart_sim.chain.computeInverseDynamics()
+                    joint_torques = self.dart_sim.chain.getForces()
+
+            self.monitor_window.update_values(values_joint_position=np.rad2deg(self.dart_sim.chain.getPositions()),
+                                              values_joint_velocity=np.rad2deg(self.dart_sim.chain.getVelocities()),
+                                              values_joint_acceleration=np.rad2deg(self.dart_sim.chain.getAccelerations()),
+                                              values_joint_torque=joint_torques,
+                                              values_agent_action=self.action_state,
+                                              values_agent_reward=[self.reward_state])
+            self.monitor_app.processEvents()
+
+        return self.dart_sim.render(mode=mode)
diff --git a/agent/envs_advanced/iiwa_numerical_planar_grasping_env.py b/agent/envs_advanced/iiwa_numerical_planar_grasping_env.py
new file mode 100644
index 0000000..44f0a17
--- /dev/null
+++ b/agent/envs_advanced/iiwa_numerical_planar_grasping_env.py
@@ -0,0 +1,940 @@
+"""
+A numerical planar grasping Env class inheriting from the IiwaSampleEnv for the Kuka LBR iiwa manipulator with 7 links and a Gripper
+The parent class takes care of integrating DART Engine with Unity simulation environment
+
+Unity is used as the main simulator for physics/rendering computations.
+The Unity interface receives joint velocities as commands and returns joint positions and velocities
+
+DART is used to calculate inverse kinematics of the iiwa chain.
+DART changes the agent action space from the joint space to the cartesian space (position-only or pose/SE(3)) of the end-effector.
+
+Notes: - Coordinates in the Unity simulator are different from the ones in DART which used here:
+         The mapping is [X, Y, Z] of Unity is [-y, z, x] of DART
+       - In numerical planar grasping envs, a P-controller (agent controller) keeps fixed the uncontrolled DoF during the episode. E.g. height of the ee
+       - At the end of the episode, manual actions help the agent to grasp the box
+         In that case, the P-controller (manual_actions controller) does not correct the controlled DoF by the RL agent. It keeps them fixed. E.g. rotation of the ee
+         see simulator_vec_env.py for more details
+       - gym functions use UNITY coordinates - e.g. get_state(), get_reward()
+"""
+
+import numpy as np
+from scipy.spatial.transform import Rotation as R
+
+import math
+import dartpy as dart
+from gym import spaces
+
+from envs.iiwa_sample_env import IiwaSampleEnv
+
+class IiwaNumericalPlanarGraspingEnv(IiwaSampleEnv):
+    def __init__(self, max_ts, orientation_control, use_ik, ik_by_sns, state_type, enable_render=False, task_monitor=False,
+                 with_objects=False, target_mode="None", goal_type="box", randomBoxesGenerator=None,
+                 joints_safety_limit=10, max_joint_vel=20, max_ee_cart_vel=0.035, max_ee_cart_acc =10, max_ee_rot_vel=0.15, max_ee_rot_acc=10,
+                 random_initial_joint_positions=False, initial_positions=[0, 0, 0, -np.pi/2, 0, np.pi/2, np.pi/2], noise_enable_rl_obs=False, noise_rl_obs_ratio=0.05,
+                 reward_dict=None, agent_kp=0.5, agent_kpr=1.5,
+                 robotic_tool="3_gripper", env_id=0):
+
+        # Some checks #
+        if(use_ik == False or orientation_control == False or 'v' in state_type):
+            raise Exception("Enable orientation control and inverse kinematics for planar grasping. Use also only joints for the state - abort")
+
+        if(ik_by_sns == True):
+            raise Exception("RL agent with sns is not advised - abort")
+
+        if(goal_type != "box"):
+            raise Exception("Grasping accepts a box target for now - abort")
+
+        if(robotic_tool.find("gripper") == -1):
+            raise Exception("Enable gripper for grasping - abort")
+
+        if(not np.isclose(initial_positions[3], -np.pi/2) or not np.isclose(initial_positions[5], np.pi/2) or (not np.isclose(initial_positions[6], np.pi/2) and not np.isclose(initial_positions[6], 0))):
+            print("Warning: initial_positions are different - make sure you have adapted the planar envs and manual actions correctly")
+
+        # the init of the parent class should be always called, this will in the end call reset() once
+        super().__init__(max_ts=max_ts, orientation_control=orientation_control, use_ik=use_ik, ik_by_sns=ik_by_sns, state_type=state_type, enable_render=enable_render, task_monitor=task_monitor, 
+                         with_objects=with_objects, target_mode=target_mode,  goal_type=goal_type, joints_safety_limit=joints_safety_limit, max_joint_vel=max_joint_vel, max_ee_cart_vel=max_ee_cart_vel, 
+                         max_ee_cart_acc=max_ee_cart_acc, max_ee_rot_vel=max_ee_rot_vel, max_ee_rot_acc=max_ee_rot_acc, random_initial_joint_positions=random_initial_joint_positions, initial_positions=initial_positions,
+                         robotic_tool=robotic_tool, env_id=env_id)
+
+        # Box dataset to spawn boxes #
+        self.randomBoxesGenerator = randomBoxesGenerator
+
+        # Keep object initial pose. Easier calculations for rotations - box does not moved during the planar episode #
+        self.init_object_pose_unity = None
+
+        ###################################################################
+        # Reward related                                                  #
+        ###################################################################
+        self.reward_collision = reward_dict["reward_collision"]              # Value
+        self.reward_height_goal = reward_dict["reward_height_goal"]          # Height in meters
+
+        self.reward_pose_weight = reward_dict["reward_pose_weight"]          # Weight for the pose error displacement
+        self.reward_pose_norm_const = reward_dict["reward_pose_norm_const"]  # Normalization constant for the pose error displacement
+
+        self.reward_x_weight = reward_dict["reward_x_weight"]
+        self.reward_x_norm_const = reward_dict["reward_x_norm_const"]
+
+        self.reward_z_weight = reward_dict["reward_z_weight"]
+        self.reward_z_norm_const = reward_dict["reward_z_norm_const"]
+
+        self.collision_flag = False                                          # Give collision reward one once per episode
+
+        # Save previous distance ee/box for calculting the displacements - see reward definition #
+        self.prev_dist_ee_box_z = np.inf
+        self.prev_dist_ee_box_x = np.inf
+        self.prev_dist_ee_boxry = np.inf
+        ###################################################################
+
+        ###################################################################
+        # P-controller related                                            #
+        ###################################################################
+        self.agent_kp = agent_kp                                          # P-controller gain for positional errors during the episode
+        self.agent_kpr = agent_kpr
+        self.target_rot_quat_dart = None                                  # Save the target orientation in quaternions - keep moving planar during the episode
+        self.target_z_dart = None                                         # Target height - keep moving planar during the episode
+        ###################################################################
+
+        ###################################################################
+        # Noise related                                                   #
+        ###################################################################
+        self.noise_enable_rl_obs = noise_enable_rl_obs                    # True/False
+        self.noise_rl_obs_ratio = noise_rl_obs_ratio                      # e.g. 0.05
+        ###################################################################
+
+        #############################################################################################
+        # Joints are normalized in [-1, 1] at the get_state() function - for planar envs            #
+        # The below settings are necessary to be set so that in iiwa_dart.py when                   #
+        # calling the self.observation_space.sample() function, we denormalize the joints positions #
+        #############################################################################################
+        self.normalized_rl_obs = True
+        self.observation_indices = {'obs_len': 0}
+
+        #########################################
+        # Action and observation spaces related #
+        #########################################
+        if np.isclose(np.asarray([self.reward_pose_weight]), np.asarray([0])): # No rotation control - 2DoF
+            ##################
+            # Viewer-related #
+            ##################
+
+            # Exist in base-class - overide #
+            self.observation_indices['joint_pos'] = 2 # In which position of the agent state the joint positions start
+            self.observation_indices['obs_len'] = 9
+
+            ###############
+            # Gym-related #
+            ###############
+
+            # X, and Z in unity - no rotation #
+            self.action_space_dimension = 2
+            self.observation_space_dimension = 9
+
+            # 4 DoF that are being controlled by the P-controller - keep moving planar during the episode #
+            self.config_p_controller = {"kpr_x": 1,
+                                        "kpr_y": 1,
+                                        "kpr_z": 1,
+                                        "kp_x": 0,  # Controlled by the RL-agent 
+                                        "kp_y": 0,
+                                        "kp_z": 1
+                                       }
+        else: # 3DoF control, rotation is active
+            ##################
+            # Viewer-related #
+            ##################
+            self.observation_indices['joint_pos'] = 3
+            self.observation_indices['obs_len'] = 10
+
+            ###############
+            # Gym-related #
+            ###############
+
+            # X, Y, and RY in unity #
+            self.action_space_dimension = 3
+            self.observation_space_dimension = 10
+
+            # 3 DoF that are being controlled by the P-controller - keep moving planar during the episode #
+            self.config_p_controller = {"kpr_x": 1,
+                                        "kpr_y": 1,
+                                        "kpr_z": 0,
+                                        "kp_x": 0,
+                                        "kp_y": 0,
+                                        "kp_z": 1
+                                       }
+
+        #####################
+        # Define gym spaces #
+        #####################
+        self.action_space = spaces.Box(-np.ones(self.action_space_dimension), np.ones(self.action_space_dimension), dtype=np.float32)
+
+        self.observation_space = spaces.Box(-np.ones(self.observation_space_dimension), np.ones(self.observation_space_dimension), dtype=np.float32)
+
+        ########################################################################
+        # Set-up the task_monitor. The re-implementation resides in this class #
+        # since the dims of action, and obs spaces have been overridden above  #
+        ########################################################################
+        if self.task_monitor:
+            self._create_task_monitor()
+
+    def get_state(self):
+        """
+            defines the environment state:
+                Format: [x_error, y_error, ry_error, j1, .., j7] normalized in [-1, 1]
+                        - error from the target pose: ee to the target box (x, y, ry axis)
+                        - joints positions
+                        - unity coords system
+
+                Note: if ry rotation is not controlled by the RL agent, then the state will not include the ry_error part
+
+           :return: observation state for the policy training.
+        """
+        state = np.empty(0)
+
+        # Relative position normalized errors of ee to the box #
+        dx_ee_b, dz_ee_b = self.get_error_ee_box_x_z_normalized_unity()
+
+        # Add to state #
+        state = np.append(state, np.array([dx_ee_b, dz_ee_b]))
+
+        # Rotation control is active #
+        if(self.action_space_dimension == 3):
+            # Normalized rotation error - y axis #
+            dry_ee_b = self.get_error_ee_box_ry_normalized_unity()
+
+            # Add to state #
+            state = np.append(state, np.array([dry_ee_b]))
+
+        # Get joints positions #
+        joint_positions = self.dart_sim.chain.getPositions()
+        joint_positions = self.normalize_joints(joint_positions)
+
+        # Add to state #
+        state = np.append(state, joint_positions)
+
+        #####################################################
+        # Apply random noise to the RL-observation          #
+        # Needed for deploying the model to the real system #
+        #####################################################
+        if (self.noise_enable_rl_obs == True):
+            state = (state + self.np_random.uniform(-self.noise_rl_obs_ratio, self.noise_rl_obs_ratio,
+                                                     (1, len(state)))).clip(-1, 1).tolist()[0]
+
+        # the lines below should stay as it is - exist in parent class
+        self.observation_state = np.array(state)
+
+        return self.observation_state
+
+    def get_reward(self, action):
+        """
+           defines the environment reward
+           novel reward that uses displacements -- see self.get_reward_term_displacement()
+
+           :param action: is the current action decided by the RL agent - not used
+
+           :return: reward for the policy training
+        """
+
+        reward = 0.0
+        if(self.collided_env != 1): # Valid env
+            reward += self.get_reward_term_displacement()
+
+        # Collision penalty is given only once #
+        elif(self.collided_env == 1 and self.collision_flag == False):
+            reward += self.reward_collision
+            self.collision_flag = True
+
+        # the lines below should stay as it is
+        self.reward_state = reward
+
+        return self.reward_state
+
+    def get_terminal_reward(self):
+        """
+           checks if the box is in the air after the end of the episode and manual actions (down/close/up) - see also simulator_vec_env.py
+
+           :return: a boolean value representing if the box is in the air (True means success)
+        """
+        if(self.get_object_height_unity() >= self.reward_height_goal and self.collided_env != 1):
+            return True
+
+        return False
+
+    def get_terminal(self):
+        """
+           checks the terminal conditions for the episode
+                Important: planar envs should terminate at the same time step - when 'num_envs' != 1
+
+           :return: a boolean value indicating if the episode should be terminated - maximum timesteps reached
+        """
+        self.reset_flag = False
+        if self.time_step > self.max_ts:
+            self.reset_flag = True
+
+        return self.reset_flag
+
+    def update_action(self, action):
+        """
+           converts env action to the required unity action by using inverse kinematics from DART
+               - action dim: 3, or 2 (in case of no rotation control)
+               - the remaining DoF are controlled by the P-controller.
+               - the Gripper is not controlled by the RL agent only during the manual and at the end of the episode - see simulator_vec_env.py
+
+           :param action: The action vector decided by the RL agent. Acceptable range: [-1, +1]
+
+           :return: the command to send to the Unity simulator including joint velocities and gripper position
+        """
+        #################
+        # Unity in DART #
+        # Yu = Zd       #
+        # Xu = -Yd      #
+        # Zu = Xd       #
+        #################
+
+        ##############################################################################
+        # Reset the P-controller - save the initial pose of the manipulator          #
+        # for moving in a planar manner during the episode e.g. keep the same height #
+        ##############################################################################
+        if (self.time_step == 1):
+            self.reset_agent_p_controller()
+
+        # the lines below should stay as it is
+        self.action_state = action
+        env_action = np.clip(action, -1., 1.)
+
+        # Rotation is controlled by the RL-agent
+        if(self.action_space_dimension == 3):
+            task_vel = np.zeros(3)
+        else:
+            task_vel = np.zeros(2)
+
+        ###################################################################
+        # The RL agent controls the x, y and rotational z-axis (optional) #
+        ###################################################################
+
+        ##################################################################################
+        # Calculate the errors for the P-controller. Axis not controlled by the RL-agent #
+        # Important: P-controller expects dart coordinates                               #
+        ##################################################################################
+        _, ee_y, _ = self.get_ee_pos_unity()
+        z_diff = self.target_z_dart - ee_y                                                                    # Height 
+        curr_quat = self.get_rot_ee_quat()                                                                    # Current orientation of the ee in quaternions
+        rx_diff, ry_diff, rz_diff = self.get_rot_error_from_quaternions(self.target_rot_quat_dart, curr_quat) # Orientation error from the target pose
+        ################################################################################################
+
+        if(self.action_space_dimension == 3): # Rotation is active - 3DoF control by the RL agent
+            task_vel[0] = self.MAX_EE_VEL[2] * env_action[0]
+            task_vel[1] = self.MAX_EE_VEL[3] * env_action[1]
+            task_vel[2] = self.MAX_EE_VEL[4] * env_action[2]
+
+            ######################################################################################
+            # P-controller + inverse kinematics                                                  #
+            #   - The DoF that are controlled by the RL-agent are unaffected by the P-controller #
+            #   - see config_p_controller dictionary                                             #                        
+            ######################################################################################
+            joint_vel = self.action_by_p_controller_custom(rx_diff, ry_diff, task_vel[0],
+                                                           task_vel[1], task_vel[2], z_diff,
+                                                           self.agent_kpr, self.agent_kp,
+                                                           self.config_p_controller)
+        else: # Rotation is not active - 2DoF control by the RL agent
+            task_vel[0] = self.MAX_EE_VEL[3] * env_action[0]
+            task_vel[1] = self.MAX_EE_VEL[4] * env_action[1]
+
+            joint_vel = self.action_by_p_controller_custom(rx_diff, ry_diff, rz_diff,
+                                                           task_vel[0], task_vel[1], z_diff,
+                                                           self.agent_kpr, self.agent_kp,
+                                                           self.config_p_controller)
+
+        ##########################################################################################
+        # Gripper is not controlled via the RL-agent - manual actions - see simulator_vec_env.py # 
+        ##########################################################################################
+        unity_action = np.append(joint_vel, [float(0.0)])
+
+        return unity_action
+
+    def update(self, observation, time_step_update=True):
+        """
+            converts the unity observation to the required env state defined in get_state()
+            with also computing the reward value from the get_reward(...) and done flag,
+            it increments the time_step, and outputs done=True when the environment should be reset
+
+            important: it also updates the dart kinematic chain of the robot using the new unity simulator observation.
+                       always call this function, once you have send a new command to unity to synchronize the agent environment
+
+            :param observation: is the observation received from the Unity simulator within its [X,Y,Z] coordinate system
+                                'joint_values': indices [0:7],
+                                'joint_velocities': indices [7:14],
+                                'ee_position': indices [14:17],
+                                'ee_orientation': indices [17:20],
+                                'target_position': indices [20:23],
+                                'target_orientation': indices [23:26],
+                                'object_position': indices [26:29],
+                                'object_orientation': indices [29:32],
+                                'gripper_position': indices [32:33],
+                                'collision_flag': indices [33:34],
+
+            :param time_step_update: whether to increase the time_step of the agent - during manual actions call with False
+
+            :return: The state, reward, episode termination flag (done), and an empty info dictionary
+        """
+
+        self.current_obs = observation['Observation']
+
+        # Collision or joints limits overpassed                                      #
+        # env will not be reseted - wait until the end of the episoe for planar envs #
+        if observation['Observation'][-1] == 1.0 or self.joints_limits_violation():
+            self.collided_env = 1
+
+        # the method below handles synchronizing states of the DART kinematic chain with the #
+        # observation from Unity hence it should be always called                            #
+        self._unity_retrieve_joint_values(observation['Observation'])
+
+        # Class attributes below exist in the parent class, hence the names should not be changed
+        if(time_step_update == True):
+            self.time_step += 1
+
+        state = self.get_state()
+        reward = self.get_reward(self.action_state)
+        done = bool(self.get_terminal())
+        info = {"success": False}                   # Episode was successful. It is set at simulator_vec_env.py before reseting
+
+
+        # Keep track the previous distance of the ee to the box - used in the reward function #
+        self.prev_dist_ee_box_z = self.get_relative_distance_ee_box_z_unity()
+        self.prev_dist_ee_box_x = self.get_relative_distance_ee_box_x_unity()
+        self.prev_dist_ee_box_ry = self.get_relative_distance_ee_box_ry_unity() 
+
+        self.prev_action = self.action_state
+
+        return state, reward, done, info
+
+    ###########
+    # Monitor #
+    ###########
+    def _create_task_monitor(self, plot_joint_position=True, plot_joint_velocity=True, plot_joint_acceleration=False, plot_joint_torque=True, 
+                                   plot_agent_state=True, plot_agent_action=True, plot_agent_reward=True):
+        """
+            Override the task monitor definition since we have changed the get_state() function and the actions dimensions
+            refer to iiwa_dart.py and task_monitor.py for more
+        """
+
+        from PySide2.QtWidgets import QApplication
+        from utils.task_monitor import TaskMonitor
+
+        if not QApplication.instance():
+            self.monitor_app = QApplication([])
+        else:
+            self.monitor_app = QApplication.instance()
+
+        self.monitor_n_states = 2
+        state_chart_categories = ['X', 'Z']
+        if self.action_space_dimension == 3:
+            self.monitor_n_states += 1
+            state_chart_categories = ['X', 'Z', 'RY']
+
+        self.monitor_window = \
+            TaskMonitor(plot_joint_position=plot_joint_position,
+                        param_joint_position={'dim': self.n_links,
+                                              'min': np.rad2deg(self.MIN_JOINT_POS),
+                                              'max': np.rad2deg(self.MAX_JOINT_POS),
+                                              'cat': [str(1 + value) for value in list(range(self.n_links))],
+                                              'zone': 10.0,
+                                              'title': "Joint Position [" + u"\N{DEGREE SIGN}" + "]"},
+                        plot_joint_velocity=plot_joint_velocity,
+                        param_joint_velocity={'dim': self.n_links,
+                                              'min': np.rad2deg(-self.MAX_JOINT_VEL),
+                                              'max': np.rad2deg(+self.MAX_JOINT_VEL),
+                                              'cat': [str(1 + value) for value in list(range(self.n_links))],
+                                              'zone': 5.0,
+                                              'title': "Joint Velocity [" + u"\N{DEGREE SIGN}" + "/s]"},
+                        plot_joint_acceleration=plot_joint_acceleration,
+                        param_joint_acceleration={'dim': self.n_links,
+                                                  'min': np.rad2deg(-self.MAX_JOINT_ACC),
+                                                  'max': np.rad2deg(+self.MAX_JOINT_ACC),
+                                                  'cat': [str(1 + value) for value in list(range(self.n_links))],
+                                                  'title': "Joint Acceleration [" + u"\N{DEGREE SIGN}" + "/s^2]"},
+                        plot_joint_torque=plot_joint_torque,
+                        param_joint_torque={'dim': self.n_links,
+                                            'min': -self.MAX_JOINT_TORQUE,
+                                            'max': +self.MAX_JOINT_TORQUE,
+                                            'cat': [str(1 + value) for value in list(range(self.n_links))],
+                                            'zone': 5.0,
+                                            'title': "Joint Torque [Nm]"},
+                        plot_agent_state=plot_agent_state,
+                        param_agent_state={'dim': self.monitor_n_states,
+                                           'min': self.observation_space.low[0:self.monitor_n_states],
+                                           'max': self.observation_space.high[0:self.monitor_n_states],
+                                           'cat': state_chart_categories,
+                                           'title': "Reaching Target Error"},
+                        plot_agent_action=plot_agent_action,
+                        param_agent_action={'dim': self.action_space_dimension,
+                                            'min': self.action_space.low,
+                                            'max': self.action_space.high,
+                                            'cat': [str(1 + value) for value in
+                                                    list(range(self.action_space_dimension))],
+                                            'title': "Normalized Command"},
+                        plot_agent_reward=plot_agent_reward,
+                        param_agent_reward={'dim': 1,
+                                            'min': -3.0,
+                                            'max': +3.0,
+                                            'cat': ['r'],
+                                            'title': "Step Reward"},
+                        )
+
+        self.monitor_window.show()
+        self.monitor_window.correct_size()
+
+    def reset(self, temp=False):
+        """
+            resets the DART-gym environment and creates the reset_state vector to be sent to Unity
+
+            :param temp: not relevant
+
+            :return: The initialized state
+        """
+        self.current_obs = np.ones(34)
+
+        ############################################################
+        # Spawn the next box and fix the target (for task monitor) #
+        ############################################################
+        object_X, object_Y, object_Z, object_RX, object_RY, object_RZ = self.randomBoxesGenerator()
+        self.init_object_pose_unity = [object_X, object_Y, object_Z, object_RX, object_RY, object_RZ]
+
+        ################################################################################################
+        # Align the target with the box. This is done for visualization purposes for the dart viewer   #
+        # In the Unity simulator, however, we spawn the target far away                                #
+        # Vision-based models might get confused if there is in the image a red target - if the target #
+        # is used in vision-based envs -> adapt                                                        #
+        ################################################################################################
+
+        # Align with the box
+        target_object_X, target_object_Y, target_object_Z = self.init_object_pose_unity[0], self.init_object_pose_unity[1], self.init_object_pose_unity[2]
+
+        # Note in the dart viewer the ee at the goal is more up as we assume that we have a gripper (urdf) but the gripper is not #
+        # yet visualized in the viewer. The self.dart_sim.get_pos_distance() returns 0 correctly at the goal                      #
+        tool_length = 0.0 
+        target_object_RX, target_object_RY, target_object_RZ = self.get_box_rotation_in_target_dart_coords_angle_axis()
+        target = [target_object_RX, target_object_RY, target_object_RZ, target_object_Z, -target_object_X, target_object_Y + tool_length]
+
+        # takes care of resetting the DART chain and should stay as it is
+        state = super().reset()
+
+        # sets the initial reaching target for the current episode,
+        # should be always called in the beginning of each episode,
+        # you might need to call it even during the episode run to change the reaching target for the IK-P controller
+        self.set_target(target)
+
+        # initial position for the gripper state, accumulates the tool_action velocity received in update_action
+        self.tool_target = 0.0  # should be in range [0.0,90.0]
+
+        # movement control of each joint can be disabled by setting zero for that joint index
+        active_joints = [1] * 7
+
+        # the lines below should stay as it is, Unity simulator expects these joint values in radians
+        joint_positions = self.dart_sim.chain.getPositions().tolist()
+        joint_velocities = self.dart_sim.chain.getVelocities().tolist()
+
+        #######################################################################################
+        # Spawn the target rectangle outside of the Unity simulator -> since it is not used   #
+        # or can harm the vision-based methods                                                #
+        # in dart we map the target to be in the same pose as the pose of the box (see above) #
+        #######################################################################################
+        #target_positions = self.dart_sim.target.getPositions().tolist()
+        target_positions = [0, 0, 0, 0, -5, -200]
+
+        target_X, target_Y, target_Z = -target_positions[4], target_positions[5], target_positions[3]
+        target_RX, target_RY, target_RZ = np.rad2deg([-target_positions[1], target_positions[2], target_positions[0]])
+        target_positions_mapped = [target_X, target_Y, target_Z, target_RX, target_RY, target_RZ]
+
+        object_positions_mapped = [object_X, object_Y, object_Z, object_RX, object_RY, object_RZ]
+
+        self.reset_state = active_joints + joint_positions + joint_velocities\
+                           + target_positions_mapped + object_positions_mapped + [self.tool_target]
+
+        self.collision_flag = False
+
+        #######################################################################################
+        # Keep track the previous distance of the ee to the box - used in the reward function #
+        # Initialize -during the __init__() they are set to np.inf                            #
+        #######################################################################################
+        self.prev_dist_ee_box_z = self.get_relative_distance_ee_box_z_unity()
+        self.prev_dist_ee_box_x = self.get_relative_distance_ee_box_x_unity()
+        self.prev_dist_ee_box_ry = self.get_relative_distance_ee_box_ry_unity() 
+
+        return state
+
+    ################
+    # P-controller #
+    ################
+    def action_by_p_controller_custom(self, rx_diff, ry_diff, rz_diff, x_diff, y_diff, z_diff, kpr, kp, config_p_controller, normalize_action=False):
+        """
+            computes the joint-space velocity command given position and orientation errors and possibly task-space velocities generated from an RL-agent
+            it uses a P-controller for generating task-space velocities for the input errors and IK from dart for the joint-space transformation
+
+            Important: input params should be in DARTS cords
+
+            Note: some axis can be controller by an RL-agent while others by the p-controller
+                  hence, inputs should be errors or task-space velocities
+
+
+            :param rx_diff: rx axis error or task-space vel
+            :param ry_diff: ry axis error or task-space vel
+            :param rz_diff: rz axis error or task-space vel
+
+            :param x_diff: x axis error or task-space vel
+            :param y_diff: y axis error or task-space vel
+            :param z_diff: z axis error or task-space vel
+
+
+            :param kpr:    P-controller gain for orientation errors
+            :param kp:     P-controller gain for position errors
+
+            :param config_p_controller: dictionary to define which axis are controlled by the P-controller. It should be 6-dim
+                                        - 1 means apply the corresponding gain. Else, input is a task-space velocity. Do nothing.
+                                        - e.g. {"kpr_x": 1, "kpr_y": 1,
+                                                "kpr_z": 1, "kp_x": 0,
+                                                "kp_y": 1,   "kp_z": 0
+                                               }
+
+             :param normalize_action: whether IK should denormalize the input velocities
+
+            :return: Action in joint-space
+        """
+
+        # Wheter to multiply with the P-controller gains #
+        kpr_x = kpr if config_p_controller["kpr_x"] == 1 else 1
+        kpr_y = kpr if config_p_controller["kpr_y"] == 1 else 1
+        kpr_z = kpr if config_p_controller["kpr_z"] == 1 else 1
+        kp_x = kp if config_p_controller["kp_x"] == 1 else 1
+        kp_y = kp if config_p_controller["kp_y"] == 1 else 1
+        kp_z = kp if config_p_controller["kp_z"] == 1 else 1
+
+        task_space_vel = np.array([kpr_x * rx_diff, kpr_y * ry_diff, kpr_z * rz_diff, kp_x * x_diff, kp_y * y_diff, kp_z * z_diff])
+
+        # IK #
+        joint_space_vel = self.dart_sim.command_from_action(task_space_vel, normalize_action=normalize_action)
+
+        return joint_space_vel
+
+    def reset_agent_p_controller(self):
+        """
+            reset the P-controller. Save the initial pose of ee. e.g. for keeping the same height during the RL episode
+                Note: dart coords
+
+            affects: self.target_z_dart
+            affects: self.target_rot_quat_dart
+        """
+
+        _, ee_y, __ = self.get_ee_pos_unity()
+
+        self.target_z_dart = ee_y
+        self.target_rot_quat_dart = self.get_rot_ee_quat()
+
+    ################
+    # Reward terms #
+    ################
+    def get_reward_term_displacement(self):
+        """
+            returns the reward value for the current observation
+
+            uses a displacements logic:
+                - the current ee distance to the box in x, z, and ry axis minus the previous ee distance to box for the same axis (see implementation for more)
+
+            :return: reward displacement term (float)
+        """
+        reward = 0
+
+        # Z axis #
+        curr_dist_ee_box_z = self.get_relative_distance_ee_box_z_unity()
+        dz = (self.prev_dist_ee_box_z - curr_dist_ee_box_z)              # Displacement
+
+        dz /= self.reward_z_norm_const # Normalize
+        dz = np.clip(dz, -1, 1)        # Clip for safety
+        dz *= self.reward_z_weight     # Weight this term
+
+        # X axis #
+        curr_dist_ee_box_x = self.get_relative_distance_ee_box_x_unity()
+        dx = (self.prev_dist_ee_box_x - curr_dist_ee_box_x)
+
+        dx /= self.reward_x_norm_const
+        dx = np.clip(dx, -1, 1)
+        dx *= self.reward_x_weight
+
+        # RY axis #
+        curr_dist_ee_box_ry = self.get_relative_distance_ee_box_ry_unity()
+        dry = (self.prev_dist_ee_box_ry - curr_dist_ee_box_ry)
+
+        dry /= self.reward_pose_norm_const
+        dry = np.clip(dry, -1, 1)
+        dry *= self.reward_pose_weight
+
+        reward = dz + dx + dry
+
+        return reward
+
+
+    #############
+    # Accessors #
+    #############
+    def get_ee_orient_unity(self):
+        """
+            Override it for planar grasping envs - easier calculations
+
+            :return: x, y, z orientation of the ee in Euler
+        """
+        rot_mat = self.dart_sim.chain.getBodyNode('iiwa_link_ee').getTransform().rotation()
+        rx, ry, rz = dart.math.matrixToEulerXYZ(rot_mat)
+
+        rx_unity = -ry
+        ry_unity = rz
+        rz_unity = rx
+
+        return rx_unity, ry_unity, rz_unity
+
+    def get_box_rotation_in_target_dart_coords_angle_axis(self):
+        """
+            return in angle-axis dart coordinates the orientation of the box
+                  - read the unity coordinates of the box and then convert
+                  - unity uses degrees [-90, 0] for rotation in our case)
+                  - if the orientation of the box is in a different range adapt
+ 
+            :return: a, b, c (in rad) angle-axis dart coordinates the orientation of the box
+        """
+
+        object_RY = self.init_object_pose_unity[4]
+        object_RY = -object_RY if object_RY >= -45 else -object_RY - 90
+        r = R.from_euler('xyz', [-180, 0, -180 + object_RY], degrees=True)
+        r = r.as_matrix()
+        a, b, c = dart.math.logMap(r)
+
+        return a, b, c
+
+    def get_object_pos_unity(self):
+        """
+            get the position of the box in unity coords
+
+            :return: x, y, z coords of box in unity
+        """
+        return self.current_obs[26], self.current_obs[27], self.current_obs[28]
+
+    def get_object_orient_unity(self):
+        """
+            get the orientation of the box in unity coords
+                Important: works for planar grasping envs only - assume the box does not move during the RL episode
+
+            :return: rx, ry, rz coords of box in unity
+        """
+        return self.init_object_pose_unity[3], self.init_object_pose_unity[4], self.init_object_pose_unity[5]
+
+    def get_object_height_unity(self):
+        """
+            get the height of the box in unity coords
+
+            :return: y coords of box
+        """
+        return self.current_obs[27]
+
+    def get_object_pose_unity(self):
+        """
+            get the pose of the box in unity coords
+                Important: works for planar grasping envs only - assume the box does not move during the RL episode
+
+            :return: x, y, z, rx, ry, rz coords of box
+        """
+        return self.current_obs[26], self.current_obs[27], self.current_obs[28], \
+               self.init_object_pose_unity[3], self.init_object_pose_unity[4], self.init_object_pose_unity[5]
+
+    def get_collision_flag(self):
+        """
+            get the collision flag value
+
+            :return: 0 (no collision) or 1 (collision)
+        """
+        return self.current_obs[33]
+
+    def get_relative_distance_ee_box_x_unity(self):
+        """
+            get the relative distance from the box to the ee in x axis in unity coords
+
+            :return: dist (float) of the box to the ee in x axis
+        """
+        x_err, _, _ = self.get_error_ee_box_pos_unity()
+        dist = abs(x_err)
+
+        return dist
+
+    def get_relative_distance_ee_box_z_unity(self):
+        """
+            get the relative distance from the box to the ee in z axis in unity coords
+
+            :return: dist of the box to the ee in z axis
+        """
+        _, _, z_err = self.get_error_ee_box_pos_unity()
+        dist = abs(z_err)
+
+        return dist
+
+    def get_relative_distance_ee_box_ry_unity(self):
+        """
+            get the relative distance from the box to the ee in ry axis in unity coords
+
+            :return: dist of the box to the ee in ry axis
+        """
+        ry_err = self.get_error_ee_box_ry_unity()
+        dist = abs(ry_err)
+
+        return dist
+
+    def get_error_ee_box_pos_unity(self):
+        """
+            get the error from the box to the ee in unity coords
+
+            :return: x_err, y_err, z_err of the box to the ee
+        """
+        object_x, object_y, object_z = self.get_object_pos_unity()
+        ee_x, ee_y, ee_z = self.get_ee_pos_unity()
+
+        return object_x - ee_x, object_y - ee_y, object_z - ee_z
+
+    def get_error_ee_box_x_z_unity(self):
+        """
+            get the error from the box to the ee in x and z axis in unity coords
+
+            :return: x_err, z_err of the box to the ee
+        """
+        x_err, _, z_err = self.get_error_ee_box_pos_unity()
+
+        return x_err, z_err
+
+    def get_error_ee_box_x_z_normalized_unity(self):
+        """
+            get the normalized error from the box to the ee in x and z axis in unity coords
+                Important: hard-coded manner - adapt if the ee starts from different initial position, or
+                           the boxes are not spawned in front of the robot
+
+            :return: x_err, z_err normalized of the box to the ee
+        """
+        x_err, z_err = self.get_error_ee_box_x_z_unity()
+
+        return x_err / 1.4, z_err / 0.73
+
+    def get_error_ee_box_ry_unity(self):
+        """
+            get the relative error from the box to the ee in ry axis in unity coords
+
+            :return: ry_err of the box to the ee - in radians
+        """
+        _, ee_ry, _ = self.get_ee_orient_unity()
+
+        #####################################################
+        # Transorm ee and box ry rotation to our needs      #
+        # see the function implementation for more details  #
+        #####################################################
+        box_ry = np.deg2rad(self.init_object_pose_unity[4]) # Deg -> rad  
+        clipped_ee_ry, clipped_box_ry = self.clip_ee_ry_and_box_ry(ee_ry, box_ry)
+
+        ry_error = clipped_box_ry - clipped_ee_ry
+
+        return ry_error
+
+    def get_error_ee_box_ry_normalized_unity(self):
+        """
+            get the normalized relative error from the box to the ee in ry axis in unity coords
+                Important: hard-coded normalization - adapt if needed
+
+            :return: ry_err normalized of the box to the ee
+        """
+        error_ry = self.get_error_ee_box_ry_unity()
+
+        return error_ry / (2 * np.pi)
+
+    def clip_ee_ry_and_box_ry(self, ee_ry, box_ry):
+        """
+            Fix the rotation in y axis for both box and end-effector
+                - Input for the ee is the raw observation of rotation returned from the Unity simulator
+                - Input for the box is the rotation returned from the boxGenerator but transformed in radians
+                - The ee starts at +-np.pi rotation in y-axis.
+                - Important: some observations are returned with a change of sign and they should be corrected.
+
+            Important: In this clipping behaviour, we assume that when the ee is at +-np.pi and the box
+                       at 0 radians, then the error between them is zero. No rotation should be performed - 'highest reward'
+                           - Hence, in this case, the function returns for ee_ry -np.pi and for box_ry -np.pi so that their difference is 0.
+
+            Note:      We also define only one correct rotation for grasping the box.
+                           - The Box rotation ranges from [-90, 0]
+                           - The ee should (only) turn clock-wise when the box is between [-90, -45), and
+                             counter-clock-wise when the box is between [-45, 0].
+
+            Warning:   If the ee starts from different rotation than +-np.pi or the box rotation spawn range of [-90, 0] is different - adapt.
+
+            :param ee_ry:  ee rotation returned from the unity simulator in radians
+            :param boy_ry: box rotation returned from the boxGenerator, but transformed in radians - does not change during the RL episode
+
+            :return: clipped_ee_ry corrected ee ry rotation in radians
+            :return: clipped_box_ry: corrected box ry rotation in radians
+        """
+
+        if (self.init_object_pose_unity[4] >= -45): # Counter-clock-wise rotation should be performed
+            if (ee_ry > 0): # Change of sign
+                ee_ry *= -1
+            else:
+                # ee is turning in the wrong direction. ee rotation is #
+                # decreasing but the error to the box is increasing.   #
+                # add the difference to correct the error              #
+                ee_ry = -np.pi - (np.pi + ee_ry) 
+
+            # When box_ry is at 0 rad - no rotation of the ee is needed #
+            clipped_box_ry = -np.pi - box_ry
+
+        elif (self.init_object_pose_unity[4] < -45): # Clock-wise rotation should be performed
+            if (ee_ry < 0):
+                ee_ry *= -1
+            else:
+                ee_ry = np.pi + (np.pi - ee_ry)
+
+            clipped_box_ry = np.pi + (-np.pi / 2 - box_ry)
+
+        clipped_ee_ry = ee_ry
+
+        return clipped_ee_ry, clipped_box_ry
+
+    def clip_ry(self, ee_ry):
+        """
+            Individual cliping function. For more information refer to the self.clip_ee_ry_and_box_ry() definition
+                - This re-definition is for agents that need to clip ee_ry and box_ry seperately 
+
+            :param ee_ry:  ee rotation returned from the unity simulator in radians
+
+            :return: clipped_ee_ry corrected ee rotation (rad)
+        """
+
+        if (self.init_object_pose_unity[4] >= -45): 
+            if (ee_ry > 0): 
+                ee_ry *= -1
+            else:
+                ee_ry = -np.pi - (np.pi + ee_ry) 
+
+        elif (self.init_object_pose_unity[4] < -45): 
+            if (ee_ry < 0):
+                ee_ry *= -1
+            else:
+                ee_ry = np.pi + (np.pi - ee_ry)
+
+        clipped_ee_ry = ee_ry
+
+        return clipped_ee_ry 
+
+    def clip_box_ry(self, box_ry):
+        """
+            Individual cliping function. For more information refer to the self.clip_ee_ry_and_box_ry() definition
+                - This re-definition is for agents that need to clip ee_ry and box_ry seperately 
+
+            :param boy_ry: box rotation returned from the boxGenerator, but transformed in radians - does not change during the RL episode
+
+            :return: clipped_box_ry: corrected box ry rotation in radians
+        """
+
+        if (self.init_object_pose_unity[4] >= -45):
+            clipped_box_ry = -np.pi - box_ry 
+        elif (self.init_object_pose_unity[4] < -45):
+            clipped_box_ry = np.pi + (-np.pi / 2 - box_ry) 
+
+        return clipped_box_ry
diff --git a/agent/envs_advanced/iiwa_ruckig_planar_grasping_env.py b/agent/envs_advanced/iiwa_ruckig_planar_grasping_env.py
new file mode 100755
index 0000000..d92e1e5
--- /dev/null
+++ b/agent/envs_advanced/iiwa_ruckig_planar_grasping_env.py
@@ -0,0 +1,383 @@
+"""
+A model-based numerical planar grasping Env class inheriting from the IiwaNumericalPlanarGraspingEnv for the Kuka LBR iiwa manipulator with 7 links and a Gripper
+The parent class takes care of integrating DART Engine with Unity simulation environment
+
+Note: update_action() receives velocities genereted by ruckig. For more refer to ruckig_planar_model.py
+
+Unity is used as the main simulator for physics/rendering computations.
+The Unity interface receives joint velocities as commands and returns joint positions and velocities
+
+DART is used to calculate inverse kinematics of the iiwa chain.
+DART changes the agent action space from the joint space to the cartesian space (position-only or pose/SE(3)) of the end-effector.
+
+action_by_pd_control method can be called to implement a Proportional-Derivative control law instead of an RL policy.
+
+Note: Coordinates in the Unity simulator are different from the ones in DART which used here:
+The mapping is [X, Y, Z] of Unity is [-y, z, x] of DART
+"""
+
+import numpy as np
+import math
+
+from gym import spaces
+
+from envs_advanced.iiwa_numerical_planar_grasping_env import IiwaNumericalPlanarGraspingEnv
+
+class IiwaRuckigPlanarGraspingEnv(IiwaNumericalPlanarGraspingEnv):
+    def __init__(self, max_ts, orientation_control, use_ik, ik_by_sns, state_type, enable_render=False, task_monitor=False,
+                 with_objects=False, target_mode="None", goal_type="box", randomBoxesGenerator=None, 
+                 joints_safety_limit=10, max_joint_vel=20, max_ee_cart_vel=0.035, max_ee_cart_acc =10, max_ee_rot_vel=0.15, max_ee_rot_acc=10,
+                 random_initial_joint_positions=False, initial_positions=[0, 0, 0, -np.pi/2, 0, np.pi/2, np.pi/2], noise_enable_rl_obs=False, noise_rl_obs_ratio=0.05,
+                 reward_dict=None, agent_kp=0.5, agent_kpr=1.5, threshold_p_model_based=0.01,
+                 robotic_tool="3_gripper", env_id=0):
+
+        ################################################################################################
+        # the init of the parent class should be always called, this will in the end call reset() once #
+        ################################################################################################
+        super().__init__(max_ts=max_ts, orientation_control=orientation_control, use_ik=use_ik, ik_by_sns=ik_by_sns, state_type=state_type, enable_render=enable_render, task_monitor=task_monitor,
+                         with_objects=with_objects, target_mode=target_mode, goal_type=goal_type, randomBoxesGenerator=randomBoxesGenerator,
+                         joints_safety_limit=joints_safety_limit, max_joint_vel=max_joint_vel, max_ee_cart_vel=max_ee_cart_vel, max_ee_cart_acc=max_ee_cart_acc, max_ee_rot_vel=max_ee_rot_vel, max_ee_rot_acc=max_ee_rot_acc,
+                         random_initial_joint_positions=random_initial_joint_positions, initial_positions=initial_positions,noise_enable_rl_obs=False,noise_rl_obs_ratio=0.05,
+                         reward_dict=reward_dict,agent_kp=agent_kp, agent_kpr=agent_kpr, robotic_tool=robotic_tool, env_id=env_id)
+
+        #############################################################################################
+        # Joints are normalized in [-1, 1] at the get_state() function - for planar envs            #
+        # The below settings are necessary to be set so that in iiwa_dart.py when                   #
+        # calling the self.observation_space.sample() function, we denormalize the joints positions #
+        #############################################################################################
+        self.normalized_rl_obs = True
+        self.observation_indices = {'obs_len': 0}
+
+        # Whether to apply a threshold to the model-based controller - see update_action() #
+        self.threshold_p_model_based = threshold_p_model_based 
+
+        ########################################
+        # Action and observation space related #
+        ########################################
+        if np.isclose(np.asarray([self.reward_pose_weight]), np.asarray([0])): # No rotation control - 2DoF
+            ##################
+            # Viewer-related #
+            ##################
+            self.observation_indices['joint_pos'] = 0 # Unused -> no joints position information in the state
+            self.observation_indices['obs_len'] = 5
+
+            ###############
+            # Gym-related #
+            ###############
+            self.action_space_dimension = 2
+            self.observation_space_dimension = 5   # [reset, x_ee_d, y_ee_d, x_box_d, y_box_d] - see get_state() 
+
+            # 4 DoF that are being controlled by the P-controller - keep moving planar during the episode #
+            self.config_p_controller = {"kpr_x": 1,
+                                        "kpr_y": 1,
+                                        "kpr_z": 1,
+                                        "kp_x": 0,  # Controlled by the model-based agent
+                                        "kp_y": 0,
+                                        "kp_z": 1
+                                       }
+
+            tool_length = 0.2 # Tolerance
+
+            ##########################################################################
+            # Important: adapt if the ranges change in the randomBoxesGenerator, or  #
+            # e.g. the manipulator is in a different initial planar position,    or  #
+            #      the boxes are spawned behind/left/right and not in-front          #     
+            ##########################################################################
+            low = np.asarray([0, -(0.95 + tool_length), -(0.95 + tool_length), -(0.95 + tool_length), -(0.95 + tool_length)])
+
+            high = np.asarray([1, 0.95 + tool_length, 0.95 + tool_length, 0.95 + tool_length, 0.95 + tool_length])
+
+            self.observation_space = spaces.Box(low=low, high=high, dtype=np.float32)
+
+            self.action_space = spaces.Box(low=np.asarray([-self.MAX_EE_CART_VEL[1], -self.MAX_EE_CART_VEL[0]]), 
+                                           high=np.asarray([self.MAX_EE_CART_VEL[1], self.MAX_EE_CART_VEL[0]]), 
+                                           dtype=np.float32)
+
+        else: # 3DoF control, rotation is active
+            ##################
+            # Viewer-related #
+            ##################
+            self.observation_indices['joint_pos'] = 0 # Unused 
+            self.observation_indices['obs_len'] = 6
+
+            ###############
+            # Gym-related #
+            ###############
+
+            # X, Y, RY in unity #
+            self.action_space_dimension = 3
+            self.observation_space_dimension = 6      # [reset, rz_ee_d, x_ee_d, y_ee_d, rz_box_d, x_box_d, y_box_d] - see get_state()
+
+            # 3 DoF that are being controlled by the P-controller - keep moving planar during the episode #
+            self.config_p_controller = {"kpr_x": 1,
+                                        "kpr_y": 1,
+                                        "kpr_z": 0,
+                                        "kp_x": 0,
+                                        "kp_y": 0,
+                                        "kp_z": 1
+                                    }
+
+            tool_length = 0.2 # Tolerance
+            low = np.asarray([0, -2*np.pi, -(0.95 + tool_length), -(0.95 + tool_length), -2*np.pi, -(0.95 + tool_length), -(0.95 + tool_length)])
+
+            high = np.asarray([1, 2*np.pi, 0.95 + tool_length, 0.95 + tool_length, 2*np.pi, 0.95 + tool_length, 0.95 + tool_length])
+
+            self.observation_space = spaces.Box(low=low, high=high, dtype=np.float32)
+
+            self.action_space = spaces.Box(low=np.asarray([-self.MAX_EE_ROT_VEL[2], -self.MAX_EE_CART_VEL[1], -self.MAX_EE_CART_VEL[1]]), 
+                                           high=np.asarray([self.MAX_EE_ROT_VEL[2], self.MAX_EE_CART_VEL[1], self.MAX_EE_CART_VEL[0]]), 
+                                           dtype=np.float32)
+
+        ########################################################################
+        # Set-up the task_monitor. The re-implementation resides in this class #
+        # since the dims of action, and obs spaces have been overridden above  #
+        ########################################################################
+        if self.task_monitor:
+            self._create_task_monitor()
+
+    def get_state(self):
+        """
+            defines the environment state with shape(num_envs, 5 or 6) - depending on the active DoF (rotation control is active)
+                Format: [reset, rz_ee_d, x_ee_d, y_ee_d, rz_box_d, x_box_d, y_box_d]
+                        - Should reset, rotation of ee in dart, x position of the ee in dart, etc
+                        - box pose is used only once (at reset_ruckig_model()) as the box does not move during the planar episode
+                        - Not normalized in [-1, 1] as in most gym agents
+
+           :return: state for the model-based agent (no RL training)
+        """
+        state = np.empty(0)
+
+        # Reset ruckig model (flag). Set target pose of ee only once       #
+        # at the first step - see how it is used in ruckig_planar_model.py #
+        if(self.time_step == 1):
+            reset_ruckig_model = 1
+        else:
+            reset_ruckig_model = 0
+
+        state = np.append(state, np.asarray([reset_ruckig_model]))
+
+        state = np.append(state, self.get_ruckig_current_pose()) # Current pose of the ee
+        state = np.append(state, self.get_ruckig_target_pose())  # Pose of the box - changes only per episode. Assume the box is not moved during the planar motion
+
+        # the lines below should stay as it is
+        self.observation_state = np.array(state)
+
+        return self.observation_state
+
+    def update_action(self, action):
+        """
+           converts env action to the required unity action by using inverse kinematics from DART
+               - action dim: 3, or 2 (in case of no rotation control)
+               - the remaining DoF are controlled by the P-controller.
+               - the Gripper is not controlled by the RL agent only during the manual and at the end of the episode - see simulator_vec_env.py
+
+           :param action: The action vector decided by the model-based model
+                              - Important: the input action is not normalized in [-1, 1] in this case - m/sec
+
+           :return: the command to send to the Unity simulator including joint velocities and gripper position
+        """
+
+        #################
+        # Unity in DART #
+        # Yu = Zd       #
+        # Xu = -Yd      #
+        # Zu = Xd       #
+        #################
+
+        ##############################################################################
+        # Reset the P-controller - save the initial pose of the manipulator          #
+        # for moving in a planar manner during the episode e.g. keep the same height #
+        ##############################################################################
+        if (self.time_step == 1):
+            self.reset_agent_p_controller()
+
+        # the lines below should stay as it is
+        self.action_state = action
+        env_action = action
+
+        # Rotation is controlled by the model-based agent
+        if(self.action_space_dimension == 3):
+            task_vel = np.zeros(3)
+        else:
+            task_vel = np.zeros(2)
+
+        ############################################################################
+        # The model-based agent controls the x, y and rotational z-axis (optional) #
+        ############################################################################
+
+        ###########################################################################################
+        # Calculate the errors for the P-controller. Axis not controlled by the model-based agent #
+        # Important: P-controller expects dart coordinates                                        #
+        ###########################################################################################
+        _, ee_y, _ = self.get_ee_pos_unity()
+        z_diff = self.target_z_dart - ee_y                                                                    # Height 
+        curr_quat = self.get_rot_ee_quat()                                                                    # Current orientation of the ee in quaternions
+        rx_diff, ry_diff, rz_diff = self.get_rot_error_from_quaternions(self.target_rot_quat_dart, curr_quat) # Orientation error from the target pose
+        ################################################################################################
+
+        # Get poses #
+        curr_pose = self.get_ruckig_current_pose()  # Current ee pose
+        target_pose = self.get_ruckig_target_pose() # Current pose of the box - does not change during the episode
+
+        ##############
+        # Dart cords #
+        ##############
+        if(self.action_space_dimension == 3): # Rotation is active - 3DoF control by the model-based agent 
+            task_vel[0] = env_action[0]
+            task_vel[1] = env_action[1]
+            task_vel[2] = env_action[2]
+
+            # Distance to the goal #
+            dist = np.linalg.norm(np.array([rx_diff, ry_diff, z_diff, target_pose[0] - curr_pose[0], target_pose[1] - curr_pose[1], target_pose[2] - curr_pose[2]]))
+
+            if(dist < self.threshold_p_model_based): # Threshold is reached - stop, if active only
+                joint_vel = np.zeros(7)
+            else:
+                ###############################################################################################
+                # P-controller + inverse kinematics                                                           #
+                #   - The DoF that are controlled by the model-based agent are unaffected by the P-controller #
+                #   - see config_p_controller dictionary                                                      #                        
+                ###############################################################################################
+                joint_vel = self.action_by_p_controller_custom(rx_diff, ry_diff, task_vel[0],
+                                                               task_vel[1], task_vel[2], z_diff,
+                                                               self.agent_kpr, self.agent_kp,
+                                                               self.config_p_controller)
+        else: # Rotation is not active - 2DoF control by the model-based agent
+            task_vel[0] = env_action[0]
+            task_vel[1] = env_action[1]
+
+            dist = np.linalg.norm(np.array([rx_diff, ry_diff, z_diff, rz_diff, target_pose[0] - curr_pose[0], target_pose[1] - curr_pose[1]]))
+
+            if(dist < self.threshold_p_model_based): # Threshold is reached
+                joint_vel = np.zeros(7)
+            else:
+                joint_vel = self.action_by_p_controller_custom(rx_diff, ry_diff, rz_diff,
+                                                               task_vel[0], task_vel[1], z_diff,
+                                                               self.agent_kpr, self.agent_kp,
+                                                               self.config_p_controller)
+
+        ###################################################################################################
+        # Gripper is not controlled via the model-based agent - manual actions - see simulator_vec_env.py # 
+        ###################################################################################################
+        unity_action = np.append(joint_vel, [float(0.0)])
+
+        return unity_action
+
+    ###########
+    # Helpers #
+    ###########
+    def get_ruckig_current_pose(self):
+        """
+            get the current state pose of the ee in dart coordinates - needed in the get_state()
+                - refer also to the clip methods in iiwa_numerical_planar_grasping_env.py
+
+           :return: ee_rz_d, ee_x_d, ee_y_d - or the first value is skipped if only 2DoF are controlled by the model-based agent
+        """
+        state = np.empty(0)
+
+        # Rotation part of the ee - only if 3DoF are active #
+        if(self.action_space_dimension == 3):
+            _, ee_ry, _ = self.get_ee_orient_unity()
+            ee_ry = self.clip_ry(ee_ry)
+            state = np.append(state, np.asarray([ee_ry]))
+
+        # x and z position of the ee #
+        x, _, z = self.get_ee_pos_unity()
+        state = np.append(state, np.array([z, -x]))
+
+        return state
+
+    def get_ruckig_target_pose(self):
+        """
+            get the target state pose of the box - needed in the get_state()
+                - refer also to the clip methods in iiwa_numerical_planar_grasping_env.py
+
+           :return: rz_box_d, x_box_d, y_box_d - or first value is skipped if only 2DoF are controlled by the model-based agent
+
+        """
+        state = np.empty(0)
+
+        # Rotation part of the box - if 3DoF are active #
+        if(self.action_space_dimension == 3):
+            box_ry_rad = np.deg2rad(self.init_object_pose_unity[4])
+            box_ry_rad = self.clip_box_ry(box_ry_rad)
+            state = np.append(state, np.asarray([box_ry_rad]))
+
+        # x and z position of the box #
+        state = np.append(state, np.array([self.init_object_pose_unity[2], -self.init_object_pose_unity[0]]))
+
+        return state
+
+    ###########
+    # Monitor #
+    ###########
+    def _create_task_monitor(self, plot_joint_position=True, plot_joint_velocity=True, plot_joint_acceleration=False, plot_joint_torque=True, plot_agent_state=True, plot_agent_action=True, plot_agent_reward=True):
+        """
+            Override the task monitor definition since we have changed the get_state() function and the actions dimensions
+            refer to iiwa_dart.py and task_monitor.py for more
+        """
+        from PySide2.QtWidgets import QApplication
+        from utils.task_monitor import TaskMonitor
+
+        if not QApplication.instance():
+            self.monitor_app = QApplication([])
+        else:
+            self.monitor_app = QApplication.instance()
+
+        self.monitor_n_states = 5
+        state_chart_categories = ['RESET', 'X_EE', 'Y_EE', 'X_B', 'Y_B']
+        if self.action_space_dimension == 3:
+            self.monitor_n_states += 2
+            state_chart_categories = ['RESET', 'RZ_EE', 'X_EE', 'Y_EE', 'RZ_B', 'X_B', 'Y_B']
+
+        self.monitor_window = \
+            TaskMonitor(plot_joint_position=plot_joint_position,
+                        param_joint_position={'dim': self.n_links,
+                                              'min': np.rad2deg(self.MIN_JOINT_POS),
+                                              'max': np.rad2deg(self.MAX_JOINT_POS),
+                                              'cat': [str(1 + value) for value in list(range(self.n_links))],
+                                              'zone': 10.0,
+                                              'title': "Joint Position [" + u"\N{DEGREE SIGN}" + "]"},
+                        plot_joint_velocity=plot_joint_velocity,
+                        param_joint_velocity={'dim': self.n_links,
+                                              'min': np.rad2deg(-self.MAX_JOINT_VEL),
+                                              'max': np.rad2deg(+self.MAX_JOINT_VEL),
+                                              'cat': [str(1 + value) for value in list(range(self.n_links))],
+                                              'zone': 5.0,
+                                              'title': "Joint Velocity [" + u"\N{DEGREE SIGN}" + "/s]"},
+                        plot_joint_acceleration=plot_joint_acceleration,
+                        param_joint_acceleration={'dim': self.n_links,
+                                                  'min': np.rad2deg(-self.MAX_JOINT_ACC),
+                                                  'max': np.rad2deg(+self.MAX_JOINT_ACC),
+                                                  'cat': [str(1 + value) for value in list(range(self.n_links))],
+                                                  'title': "Joint Acceleration [" + u"\N{DEGREE SIGN}" + "/s^2]"},
+                        plot_joint_torque=plot_joint_torque,
+                        param_joint_torque={'dim': self.n_links,
+                                            'min': -self.MAX_JOINT_TORQUE,
+                                            'max': +self.MAX_JOINT_TORQUE,
+                                            'cat': [str(1 + value) for value in list(range(self.n_links))],
+                                            'zone': 5.0,
+                                            'title': "Joint Torque [Nm]"},
+                        plot_agent_state=plot_agent_state,
+                        param_agent_state={'dim': self.monitor_n_states,
+                                           'min': self.observation_space.low[0:self.monitor_n_states],
+                                           'max': self.observation_space.high[0:self.monitor_n_states],
+                                           'cat': state_chart_categories,
+                                           'title': "Reaching Target Error"},
+                        plot_agent_action=plot_agent_action,
+                        param_agent_action={'dim': self.action_space_dimension,
+                                            'min': self.action_space.low,
+                                            'max': self.action_space.high,
+                                            'cat': [str(1 + value) for value in
+                                                    list(range(self.action_space_dimension))],
+                                            'title': "Normalized Command"},
+                        plot_agent_reward=plot_agent_reward,
+                        param_agent_reward={'dim': 1,
+                                            'min': -3.0,
+                                            'max': +3.0,
+                                            'cat': ['r'],
+                                            'title': "Step Reward"},
+                        )
+
+        self.monitor_window.show()
+        self.monitor_window.correct_size()
diff --git a/agent/dart_envs/LICENSE b/agent/envs_dart/LICENSE
similarity index 100%
rename from agent/dart_envs/LICENSE
rename to agent/envs_dart/LICENSE
diff --git a/agent/dart_envs/README.md b/agent/envs_dart/README.md
similarity index 100%
rename from agent/dart_envs/README.md
rename to agent/envs_dart/README.md
diff --git a/agent/envs_dart/base_dart_simulation.cpython-37m-x86_64-linux-gnu.so b/agent/envs_dart/base_dart_simulation.cpython-37m-x86_64-linux-gnu.so
new file mode 100755
index 0000000..32b640c
Binary files /dev/null and b/agent/envs_dart/base_dart_simulation.cpython-37m-x86_64-linux-gnu.so differ
diff --git a/agent/envs_dart/iiwa_dart.cpython-37m-x86_64-linux-gnu.so b/agent/envs_dart/iiwa_dart.cpython-37m-x86_64-linux-gnu.so
new file mode 100755
index 0000000..74d0b29
Binary files /dev/null and b/agent/envs_dart/iiwa_dart.cpython-37m-x86_64-linux-gnu.so differ
diff --git a/agent/envs_dart/iiwa_dart_unity.cpython-37m-x86_64-linux-gnu.so b/agent/envs_dart/iiwa_dart_unity.cpython-37m-x86_64-linux-gnu.so
new file mode 100755
index 0000000..5f0d8da
Binary files /dev/null and b/agent/envs_dart/iiwa_dart_unity.cpython-37m-x86_64-linux-gnu.so differ
diff --git a/agent/envs_dart/misc/generated_random_targets/CSV_contents_guide.txt b/agent/envs_dart/misc/generated_random_targets/CSV_contents_guide.txt
new file mode 100644
index 0000000..bdabfe2
--- /dev/null
+++ b/agent/envs_dart/misc/generated_random_targets/CSV_contents_guide.txt
@@ -0,0 +1,3 @@
+Each row of generated targets CSV files correspond to a random target.
+Columns of generated targets correspond to the following order:
+cart_pose_y, cart_pose_z, cart_pose_x, quaternion_w, quaternion_x, quaternion_y, quaternion_z, joint_pos_1, joint_pos_2, joint_pos_3, joint_pos_4, joint_pos_5, joint_pos_6, joint_pos_7
diff --git a/agent/envs_dart/misc/generated_random_targets/cart_pose_7dof.csv b/agent/envs_dart/misc/generated_random_targets/cart_pose_7dof.csv
new file mode 100644
index 0000000..9e50b5e
--- /dev/null
+++ b/agent/envs_dart/misc/generated_random_targets/cart_pose_7dof.csv
@@ -0,0 +1,195 @@
+0.636911404421983,0.913358883325954,0.223688801631281,0.228858550238294,0.121947178591538,0.375864763638961,0.889650678115303,1.88646566867828,0.702981173992157,1.76862525939941,0.97777533531189,0.479131609201431,0.449924528598785,-1.17468595504761
+-0.172564472486644,1.19472477694813,0.238597657983051,0.36784919690639,0.243972351162842,-0.4211664541066,0.792327759288207,-1.23522877693176,0.073914274573326,1.27880215644836,-0.548812210559845,-1.90543782711029,1.03065967559814,-1.85495471954346
+-0.348973447115343,1.17363446329339,-0.044651328986998,0.632612410525263,0.270949759408944,0.55169256588449,-0.471193249817353,-2.21242594718933,0.196580037474632,0.445660382509232,-0.238308236002922,0.79594898223877,1.06861257553101,-0.549297690391541
+0.338599205678434,0.828664415672876,-0.651609554108356,0.747044425735506,0.332513709271991,-0.550756575196238,-0.167411033094596,-0.941726624965668,-1.15098190307617,2.26723909378052,0.646672129631043,2.16433262825012,0.766096532344818,1.7537122964859
+0.381673212808921,0.885215277119418,-0.381767228759397,0.68277541460987,-0.574022852046658,-0.022524688722399,0.451451145674148,-1.18400228023529,-0.008043975569308,0.460767924785614,1.42930769920349,0.768580615520477,0.303574293851852,1.28979384899139
+-0.774020843018197,0.679233019295516,-0.482397925503371,-0.415182368829033,-0.508780062611764,-0.283532239869952,0.69883897820357,-2.24807977676392,1.19180631637573,-1.91322731971741,0.234353572130203,-0.034982848912478,0.083682261407375,2.02784562110901
+-0.761811644305853,0.294981455992918,0.202087722082298,0.452952283287461,0.26207208096634,0.84901830846331,0.072940834494463,1.26870024204254,-1.53389990329742,-1.57877540588379,-0.83793181180954,-0.713739812374115,1.04835093021393,-0.661163628101349
+-0.700435806150988,0.819221846839289,0.157110238745731,0.533723066771078,0.044651432163337,-0.120884210288562,-0.835782833818181,1.66434979438782,-1.15863466262817,1.49038469791412,0.304716646671295,-1.24069654941559,1.43124032020569,2.25601577758789
+0.131465782796256,0.906574207805222,-0.519395480573886,0.631770894583743,0.452111905760019,-0.215588570711769,-0.591592705841914,-1.3143744468689,-0.891731977462769,-1.04966413974762,-1.34605967998505,1.26334130764008,-0.039275709539652,-0.92792671918869
+-0.398273439416948,0.413367136168851,-0.654544853110886,0.667584834532649,-0.14246060475558,-0.715438654539584,-0.14893957293238,1.27727603912354,-1.48156023025513,-1.59957242012024,1.12508797645569,0.149799346923828,-0.578854084014893,1.32979655265808
+-0.675036005622146,0.73119076584406,0.25103923850997,-0.247086916420851,-0.10601439739991,0.855696030491492,-0.442146250327505,-1.15182828903198,0.618721127510071,0.197253867983818,-0.698967337608337,2.16231989860535,-1.18650114536285,1.93397986888886
+-0.642721104741678,0.56326003360993,0.075407815637437,0.268546785791751,0.751635439329062,-0.061140812349429,0.599323444602979,0.870910108089447,-0.674615621566772,0.955447793006897,1.44490587711334,-2.05302143096924,0.766474306583405,0.99465537071228
+-0.870504543529283,0.53811219515483,-0.252312602646826,0.896130688155642,0.179337384648051,-0.326177745945576,0.241652581743204,-1.8067409992218,1.48299026489258,-1.81458365917206,0.037965621799231,2.34465003013611,-0.898180961608887,2.09032988548279
+-0.745646334812978,0.597419933264637,-0.267647142123039,0.777709075579262,-0.287999879264276,-0.46792537794371,0.305402200354955,-1.52358639240265,1.35906076431274,-1.64770567417145,-0.430638462305069,-0.384777337312698,1.28676605224609,-0.771496176719666
+0.161736788605422,1.06222787871894,-0.368662414616576,-0.370123277188546,-0.021208823769587,0.764041443972289,0.528014788969267,2.21840667724609,0.105984322726727,-2.12894463539124,0.622080087661743,-1.0746750831604,-1.31553387641907,-0.447015821933746
+0.240890679767527,1.03831435913241,-0.343171040867735,-0.490794890325449,0.742511305887802,0.350902437787198,-0.290972190104524,-1.55874609947205,-0.148293256759644,-1.80815839767456,-0.611432790756226,-0.410930782556534,1.27658247947693,-1.36323225498199
+-0.201532582176514,1.02175520100892,-0.122207016160399,0.891596105410748,-0.119893700674116,0.091946680312633,0.42688135744553,0.01774044521153,-0.892684996128082,-0.655002295970917,-1.54584896564484,2.21748828887939,1.09420657157898,-1.2143120765686
+0.112024603053261,1.26280525293063,-0.21534478063865,0.934950092410331,-0.121298674349814,-0.193848617696462,0.271252040950962,-1.86652612686157,-0.232403695583343,-0.912892878055573,-0.570689380168915,-2.12120079994202,0.120552010834217,-0.842764675617218
+0.22381281909273,1.10938426943059,0.120586640405228,-0.472141588132912,-0.225410404265326,0.023695562395159,0.851886724117151,2.33498620986938,0.424914240837097,-2.15903854370117,-1.15687024593353,0.436919122934341,-1.41438591480255,-2.52330446243286
+-0.05471988267764,0.876820695598893,0.471997776369938,-0.42965223077325,0.546748673974259,-0.007745093078829,0.718613151585858,-1.52714216709137,0.732043385505676,2.09672737121582,-1.5400025844574,-1.72158360481262,-0.098712041974068,-1.49899458885193
+-0.146593424051392,0.612697682651349,0.649490444129963,0.471708797902704,-0.150891397898446,0.854362232935551,-0.157441325435282,-1.06678926944733,0.8619504570961,-1.53483581542969,1.39567053318024,1.63468456268311,0.84794944524765,0.250422865152359
+0.493372040464022,0.799955416616922,-0.268524717672336,-0.226457851172098,0.134822602745586,0.383833634413641,0.884992343765886,1.68102943897247,0.223840564489365,0.62731409072876,-1.56917488574982,0.503437042236328,-1.19090461730957,1.25378572940826
+0.497450462391834,0.541396213711154,0.796895134646632,0.810838564831695,-0.02128455670615,0.576334480659517,0.099631098711318,0.647509157657623,1.40354382991791,-1.22563219070435,-0.04446292668581,2.32823014259338,-0.471204668283463,-1.13512992858887
+0.539947549572346,0.701082655049513,0.075503890246938,-0.492702296847456,0.833639804136288,-0.229289791878748,0.098566297389387,-2.33286571502686,-0.072443321347237,0.775075495243073,1.52542626857758,2.40691232681274,0.684577345848084,-0.685503482818604
+-0.396061854013517,0.96893495104751,0.384636413803617,0.417459786659642,-0.173461209342896,-0.036827863257251,0.891225136462887,-1.20013034343719,1.01531338691711,-1.16109001636505,0.783139884471893,-1.39830911159515,1.26807808876038,-0.63716059923172
+-0.614581880552827,0.90599410376589,0.499056127565472,-0.370897389466195,0.040653012517565,-0.319425030769302,0.871062631949299,-0.908458471298218,1.04080283641815,-1.12210929393768,0.077483110129833,-1.62269484996796,0.386096507310867,1.34500873088837
+0.04607403299296,0.947153814874523,-0.551287812625047,0.105773462428278,0.004409985574704,-0.258059509599587,0.960311312116225,0.350414633750916,-1.03172135353088,-2.03262209892273,0.998356401920319,0.913001537322998,1.32961320877075,-2.32348132133484
+0.549991212243178,0.870614833020334,-0.535112360835291,0.635701306216602,-0.445952334005778,-0.508833127312725,0.371617025469039,-0.900344610214233,-0.98240339756012,0.372431188821793,-0.145762056112289,0.63100653886795,-0.943620383739471,0.539177656173706
+-0.048283939831369,1.2362449424691,0.29787725684504,0.846125673932814,0.311159118864901,0.237247882180404,-0.361890576086023,0.131306916475296,0.307573229074478,0.629711925983429,-0.092944040894508,-2.43593955039978,0.883309900760651,1.02441227436066
+0.419023455393329,0.859348436777655,-0.341275036383636,0.162391009781394,-0.540186061176027,0.172631835129085,0.807481534620621,-0.337848335504532,-0.039250899106264,-0.705408692359924,1.47724485397339,-0.97467577457428,0.599819898605347,-1.9131795167923
+0.283469627014647,1.10094351821382,-0.035115090306096,0.590192848843529,-0.746083691285846,0.286033108986598,0.114963417375735,0.292415797710419,-0.148666888475418,-2.14911818504333,0.467957317829132,0.147470816969872,-1.45261335372925,2.19984698295593
+0.036814053504143,0.880353828397754,0.439921063054493,-0.298185579652034,0.655613005366004,-0.001079241369185,0.693726014014614,-1.36029016971588,0.743379473686218,-0.811925530433655,1.55870616436005,1.64342546463013,0.522528290748596,-1.95877933502197
+0.725281592689829,0.80578169721448,0.052137521702016,0.346443211480051,-0.545977117302355,0.177753416658574,0.741815213828756,-1.46110332012177,-1.23600196838379,0.914525210857391,-0.786412835121155,0.47497296333313,-1.31889641284943,2.1042594909668
+-0.442862964189149,1.07725757273753,-0.433045477191641,0.8515821062149,0.33786172126744,-0.008754644064904,-0.40072525486185,0.680771827697754,-0.937028050422668,-0.183417826890945,-0.450688093900681,1.07007968425751,-0.237898334860802,-2.5278480052948
+-0.757227783445803,0.96450445980435,-0.043142701858597,0.724800017466872,0.345456180287254,-0.260619233688185,0.536099409823537,-1.72294104099274,0.889577984809876,-1.67553949356079,0.139355719089508,-0.369058519601822,0.030817808583379,-1.29576599597931
+0.701219651074966,0.583341970690438,0.387291900618304,0.662049977641164,-0.020772824057373,0.207443032787867,0.719878951653532,-1.82827854156494,-1.10021686553955,-0.602835297584534,0.864984333515167,0.519585192203522,1.41983067989349,-2.33843278884888
+-0.73265291185144,0.909476270669214,-0.184163265048354,0.701544912715108,0.029874945350108,-0.35606908850093,0.616568753098953,-1.96214723587036,1.10566163063049,2.15609192848206,-0.454432129859924,2.26549911499023,0.637875318527222,-0.987691938877106
+0.187933577830459,0.506086100167199,-0.843502572758487,-0.055981031491685,-0.526293014380211,0.653859660888984,0.540693379828032,0.17501300573349,-1.4101699590683,1.1767909526825,-0.631067752838135,1.37765073776245,0.850337982177734,1.42891871929169
+-0.650711947417774,0.694664950992731,0.4745821452281,0.594819223154125,0.629506978887004,0.490566335635647,0.09620668188798,1.77752816677094,-0.8520268201828,1.11220037937164,0.757686734199524,-0.911316990852356,-0.534466207027435,-1.93114912509918
+-0.192068746264104,0.872975685921411,-0.431830607765067,0.840814792204557,-0.148206604604031,-0.36999878863564,-0.366287024026119,1.40057301521301,-1.19151973724365,-2.27863430976868,1.48207640647888,2.38569593429565,0.318593591451645,-1.54156732559204
+0.832294648381876,0.831714884245273,0.170540904312942,-0.230881609492375,0.20543530054791,0.463286621727337,0.830575418499411,-1.77722430229187,-1.07024419307709,0.722939312458038,-0.016314847394824,-2.35883736610413,0.063507936894894,0.783059656620026
+-0.51222496323253,0.857679063504942,0.205074942615334,0.651175640324347,0.573140946943665,0.030758010177279,0.496521585829489,-1.19358098506928,0.047104109078646,-0.110984414815903,-1.45819914340973,2.19533777236938,0.512102663516998,0.070922926068306
+-0.533592700938702,1.03805569028235,0.215209605167806,-0.474686519347246,-0.277324954690901,-0.09080039189394,0.830372727567322,-0.646244049072266,0.909859716892242,1.02605783939362,0.667910575866699,-0.690209686756134,-0.487149804830551,-1.37605381011963
+-0.244207952301942,0.830921774010588,0.476447053393594,0.626324101854176,0.120427763109515,0.71478178360511,-0.286883731039764,-2.21100473403931,0.093533247709274,1.89500069618225,-1.44827473163605,2.11830258369446,-0.335476040840149,-2.48427224159241
+0.195623878095059,1.19276072003075,0.081137724098354,0.648979327215869,0.034513898580478,0.626798020997072,0.429835857654256,-1.88847732543945,-0.227765366435051,2.36348223686218,0.297593683004379,0.006863658782095,1.48738944530487,0.803833544254303
+0.19399917071796,0.964531336831757,0.289465615431401,-0.087088373411935,-0.18834333380389,0.685710131580867,0.697670423104957,-0.862358689308167,0.604137241840363,2.14134573936462,-1.09194445610046,0.544340431690216,1.21212339401245,0.631623029708862
+0.723788575487757,0.338714710085193,0.366271843021665,-0.175483827830372,-0.186470159922931,0.925397969346943,0.279415289411427,0.802120745182037,1.25753223896027,0.787411034107208,-0.538206875324249,-0.098978132009506,1.25263249874115,0.644879877567291
+0.386969290019709,1.06212124473252,0.341342928236557,0.734998365014975,0.111259215602606,0.658415506841479,0.117846555823599,0.897514402866364,0.459190756082535,-2.24435639381409,0.303252726793289,-1.99260652065277,-1.3047833442688,-2.13302636146545
+0.341327290047859,1.03682207047903,-0.343559120901693,0.085886298645526,-0.185460705806964,0.110732275311721,0.972607954682072,-1.5161806344986,-0.936827659606934,2.2996654510498,1.16891312599182,-0.43856680393219,0.515383660793304,2.33067297935486
+-0.218002091930658,0.915521533648101,0.194271024432429,-0.40149322841597,-0.251432577103765,0.353311087740603,0.806694565487231,2.11276030540466,-1.38333332538605,-0.293501138687134,-1.45050740242004,-2.36680746078491,-1.0226321220398,-2.05796504020691
+-0.684700222045974,0.834870556622838,-0.308597390327423,-0.409248865285432,0.013652520870523,-0.087536367388445,0.908111424507423,-2.01192402839661,0.971780359745026,-0.065050348639488,-0.376711964607239,-0.105748988687992,-1.26167547702789,-0.215934306383133
+0.289704750476023,1.09091516143354,-0.289396937891571,0.628670608557409,0.048710566786553,0.019177025221453,-0.775907718947972,0.095926031470299,-0.248476654291153,1.86832165718079,-1.03642082214355,0.166418701410294,-1.17997479438782,2.45396709442139
+-0.011135227010258,1.24659651681178,0.006005885637619,0.902571677165782,0.133859032776,0.408021732969878,-0.031054667067586,-2.00510501861572,0.203737244009972,0.643991649150848,0.46343344449997,0.927428305149078,1.15708589553833,0.492472231388092
+-0.264153312969764,0.907470239842529,0.584918465791843,0.227709322236425,0.439372850124007,0.262980743660596,0.828215606955251,0.065881595015526,0.67616868019104,-1.44337332248688,-0.979189395904541,-0.444043755531311,-1.15996432304382,-2.15457630157471
+-0.600168721083803,0.716981898235472,0.234166601743867,0.424446562765201,0.732661887061321,-0.169900888598559,0.504167990513053,-0.708645582199097,1.3731644153595,0.75609278678894,1.40222728252411,-0.385849386453629,1.49883663654327,1.95318472385407
+0.36388612516406,0.946990070116241,0.262468205760144,-0.017948783407879,0.657212863167675,0.347140067897349,0.668762190110467,-1.23726654052734,-0.846203744411469,0.63184928894043,-1.17945241928101,1.45900893211365,1.14597284793854,2.34366154670715
+-0.642025802590455,0.658869291905894,-0.510328200517284,0.406873466657085,0.493474311262002,0.552412212836622,-0.534581923903049,0.860302746295929,-0.803196609020233,-0.218701630830765,0.723901569843292,1.53480517864227,-0.79029643535614,1.55526554584503
+-0.476222728141901,0.660543263041618,0.615106967208472,-0.04322478891931,0.754500923636895,0.130060198018312,0.641828885876439,1.97466599941254,-1.28361022472382,2.0766761302948,0.720331728458405,2.21355772018433,0.959938406944275,2.3403799533844
+0.42948271142836,0.234393194428786,-0.682061422270137,0.642759229389321,0.15048841488784,-0.747201566065165,-0.076835081109839,-0.908741652965546,-1.22482562065125,0.430074125528336,1.10046803951263,-1.38909411430359,0.667581021785736,0.345533102750778
+0.035958811982747,0.970222854250159,-0.162392229657976,0.663630299213701,-0.138116969572001,-0.611087859630394,-0.408766628400759,-2.42525148391724,0.903214931488037,-0.035555075854063,1.55613350868225,-1.37560665607452,1.48206162452698,2.16241264343262
+0.13966574528122,0.49743104396609,0.788707148924698,-0.020664820163767,0.704443122757874,0.194689272192212,0.682223525906283,-0.47078675031662,1.23214721679688,1.4730931520462,-1.24052083492279,0.546141028404236,-0.46508976817131,1.1477313041687
+0.620467749577061,0.835660768010111,-0.338626321028026,-0.207228774747179,-0.193220407516312,0.775310453462218,0.564460636172464,-0.643993556499481,-0.613588333129883,2.00864291191101,-0.744429051876068,-1.96885454654694,-0.837775766849518,-1.97907400131226
+0.182636353228026,0.712888357898798,-0.71581830790195,0.244074535562574,0.872035760088231,-0.289743947042581,-0.309886591141729,-0.629477560520172,-1.00143384933472,-1.309565782547,-0.49194923043251,1.92519807815552,-1.41939949989319,-2.46686673164368
+-0.148216279051337,1.18644620529776,0.28912175736045,-0.377560350598175,0.441928734882788,-0.270633381731087,0.767401295043133,1.20376873016357,-0.246277362108231,2.10855078697205,0.597861289978027,-0.256641715764999,-0.582962989807129,0.904814541339874
+-0.591914859436688,1.03679948769014,0.01996030535417,0.160918271379811,-0.092404506913417,-0.667214546309369,0.721381637021212,1.51444220542908,-0.445340931415558,-2.44122791290283,-0.390372753143311,1.84915995597839,-0.899656116962433,2.07026243209839
+-0.179507235267371,0.910505846048781,-0.680798968688491,0.847372696583647,0.216914467471526,0.135601685751838,-0.465316891706461,0.002595300553367,-0.931283533573151,0.950899302959442,0.320824295282364,1.76274061203003,-1.08374714851379,2.10952281951904
+-0.099581682239317,1.25671637890683,0.004343522053436,-0.306317903624794,0.273712289094264,-0.342186187745497,0.845079604317779,1.45656180381775,0.042004987597466,-0.84948855638504,0.314662575721741,2.13893485069275,-1.07146692276001,0.958908021450043
+0.288789453562025,1.1500885929307,-0.420045528193466,-0.27712866517016,-0.448746497089631,0.280634094178459,0.801916946744129,2.39289474487305,0.471732079982758,0.389285802841187,-0.041007269173861,0.364815235137939,0.691260278224945,0.522503137588501
+-0.111291498283606,1.20513156315291,-0.156493040059587,0.77990639018144,-0.067312437668952,0.477101382936538,-0.399486331041828,0.395847111940384,-0.441004276275635,0.552285730838776,-0.20714958012104,-1.21672105789185,1.27649974822998,-0.710156500339508
+0.748896516178909,0.487473392973478,-0.084063722748344,0.844241298817629,-0.496116898993354,0.164382131257394,-0.118756754869856,-0.698665976524353,-1.42892980575562,-1.5327752828598,1.32558858394623,-1.69981026649475,-0.473166793584824,-1.41057848930359
+0.361292774981517,1.0839765222981,0.267824736530015,0.898120607138184,-0.059154121766314,0.159026091918045,-0.405697999749899,0.681072056293488,0.155821174383163,0.450344324111939,-1.00181376934052,0.291104048490524,-1.0343359708786,-2.11548852920532
+0.334347254405228,1.01318429350774,-0.047997643881906,0.389619695084182,-0.104702961846437,-0.306144242828816,0.862269960955305,-0.805354475975037,-0.897411227226257,0.874174118041992,-1.23818969726563,1.08773195743561,-1.474280834198,1.63746774196625
+0.641002131925426,0.358845231215015,0.59618516381757,0.094208467541888,0.133442659617282,0.774485120282756,0.611138789227358,0.345772355794907,1.36220765113831,1.20781576633453,-0.867246150970459,-0.441142141819,0.14402163028717,0.836861610412598
+-0.409087386360666,0.936882707823581,0.378851171758633,0.4339812463198,-0.030336054103767,0.07254277778541,0.89748400935911,-0.221609935164452,0.880178689956665,1.47170460224152,1.02065896987915,0.954361140727997,1.3151490688324,0.512748003005981
+-0.320004769238541,0.975840441156021,-0.14539082584455,0.580644238157311,-0.108162404776262,-0.612663628801537,0.525163251600361,1.07162272930145,0.295391082763672,0.251605480909348,1.39015352725983,-1.33656907081604,-0.774456262588501,2.04469299316406
+0.604387491805053,0.99642292925467,-0.02027035442361,0.773464643872538,-0.484574850089228,-0.258725641363043,0.316228875723599,1.34539759159088,1.04740989208221,-0.126202672719955,0.643973469734192,-1.74820530414581,-0.941993534564972,1.06344962120056
+0.753197635865217,0.649333852311558,0.294092297139401,0.337372513443376,-0.345396117147017,0.423343063749094,0.766591129487026,-1.87265729904175,-0.849061012268066,-0.434947669506073,0.901000499725342,-0.670849204063416,1.03916776180267,-1.52743232250214
+0.46425114407617,0.7496384283268,-0.413607279014546,-0.276122286941419,0.797312413718034,0.136920593498341,-0.518943300044836,-1.25144112110138,-0.259436726570129,0.351937562227249,1.24962031841278,0.879777908325195,-0.707617163658142,1.43954288959503
+-0.405221030006359,1.01252663388676,0.517578126173335,0.624289242001611,0.061888506218627,-0.116484959752057,0.769976629041845,-0.698528289794922,0.833487272262573,-2.32901501655579,0.198639735579491,-0.387742578983307,0.914884865283966,-1.02422988414764
+0.064890012174057,0.971389192232388,0.370336169456273,0.56655890906982,0.462699195904591,0.410131263805732,0.544713505534767,-1.29073870182037,-0.605242908000946,0.638175666332245,-1.41972804069519,-1.83921492099762,-0.524857938289642,-2.2324116230011
+-0.666462901799575,0.96387155233006,0.089844650948206,0.810718421978087,0.503297958527946,0.067800951918514,0.291255619904414,1.15522050857544,-0.902801215648651,-1.14371263980865,-0.747685790061951,-1.68646144866943,0.249685302376747,2.0882420539856
+0.150364949342243,0.594499650224636,0.625920213512265,0.713111277872918,0.275824196593662,0.026932773539285,0.643947159053751,-2.29116225242615,-0.843475461006165,-0.818664193153381,1.45974588394165,-2.16186237335205,-1.55656599998474,1.62833106517792
+0.350610519865953,1.19871171448214,0.325881066975633,0.920109703468102,-0.004800719611637,0.271966454226276,0.281796618948202,-2.26239800453186,-0.503514111042023,-2.42722034454346,-0.057914540171623,0.652493298053741,-0.328758627176285,-1.57261848449707
+-0.393833661119184,0.79715645470876,-0.692072717848862,0.90686943844939,-0.093374635761429,-0.367602185440689,-0.183678066905491,-2.34343361854553,1.15574133396149,1.51945960521698,0.303888410329819,2.34424734115601,0.827578485012054,-1.36024475097656
+-0.536163836798907,0.95082975770887,-0.498691626058434,0.84370770391306,0.311804835279106,-0.220153959000251,-0.377448393016632,-2.41668009757996,0.693041265010834,0.48855996131897,-0.505365908145905,0.318310588598251,-0.518589437007904,0.857737183570862
+-0.075070285166067,0.881015245203664,0.527023356719585,0.654717786031845,0.127653266408096,0.656279372833639,0.352628202249889,-2.36193251609802,-0.055402912199497,1.99379503726959,-1.33222210407257,1.51664006710053,0.658399403095245,-0.669269323348999
+0.148391695834763,1.2732560226575,0.108166407160642,0.577388046981363,-0.111906532252114,0.090489609025018,0.803686258375029,0.002778482856229,0.411468416452408,-0.494688808917999,0.594145476818085,0.631799578666687,0.046489782631397,1.70506203174591
+-0.752287020133316,0.800195446038831,0.332522791160324,-0.136871103623447,-0.250934862741574,-0.468995899842689,0.835667901492282,-1.0856260061264,1.06295418739319,-2.12158751487732,0.126125141978264,0.3807512819767,0.836376190185547,0.451923340559006
+0.848139546001332,0.197491989193884,-0.015310398252788,-0.193726888162622,-0.554086164793174,0.623408938267002,0.5165459422708,1.49908792972565,1.47856783866882,-0.288708865642548,-0.474588662385941,-1.56651222705841,-1.19009387493134,2.52681946754456
+0.503226625797267,0.841931252106391,-0.054799784626148,-0.225264563748471,0.029735261192193,0.68326102555032,0.693920789085446,-0.115268036723137,-0.521479070186615,1.08305966854095,-1.52854955196381,1.8706179857254,0.810707211494446,0.674304127693176
+-0.431703061311451,0.701639891689123,-0.483473971598871,0.55042811094886,0.214960839099995,-0.667499446726759,0.45306204977924,-0.111347265541554,-0.417775243520737,1.05572152137756,1.47535693645477,-0.753979027271271,0.138935908675194,0.765320062637329
+0.718214830828173,0.402007000682715,0.060018150526458,0.587866963880869,-0.511597676041811,-0.609930024586329,-0.143755402521882,0.8628950715065,0.895659923553467,-2.35839295387268,1.41510498523712,-1.88455617427826,0.45818954706192,1.51454544067383
+-0.676170778734924,0.37538445045393,-0.652076206819349,-0.025341811897448,0.600325401276232,0.643156772650608,-0.474675226808752,-2.2694091796875,1.44283628463745,-1.46202456951141,-0.128668129444122,-1.66491031646729,-0.693396031856537,2.2326078414917
+0.461615380719027,0.982459354810667,-0.41776797769925,0.938676932334039,0.229683264582428,-0.085598253116928,-0.242495677771889,2.02555632591248,1.01858770847321,2.22515869140625,-0.351834177970886,0.54879754781723,0.994260191917419,0.604972064495087
+-0.627837015531445,1.02264113291143,-0.068486314938888,0.076095611483002,0.00034356069174,-0.341496012044326,0.936797637506041,-1.19204568862915,0.573889791965485,-1.19834077358246,-0.744904935359955,0.09378944337368,-0.452984541654587,-0.905039072036743
+-0.906560656345078,0.450451003812456,0.11262002380878,0.10832586573097,0.241986443831847,0.771223744498933,-0.578724462707683,2.00777459144592,-1.28900837898254,1.92464125156403,-0.578844010829926,-2.06056761741638,-0.362781286239624,2.10588097572327
+0.383038238638205,0.957235860879417,-0.130601065644323,-0.156579177251606,0.199491701534845,0.819413807837509,0.514049641372481,0.488070398569107,-0.299338608980179,-2.20404648780823,1.01788091659546,0.331022024154663,-1.14423930644989,-1.04700672626495
+0.59182375203404,0.611567905339327,-0.149481093052288,0.165924786780849,-0.496049284666164,0.720145578206375,0.455844730696454,0.839916229248047,0.614544808864594,1.49868381023407,-1.55400085449219,1.83896172046661,-0.719889938831329,-1.6755096912384
+-0.657646105293129,0.374274560260741,0.572999661522511,0.010051357351637,0.091277807352834,-0.304351175406072,0.948123248385562,-0.770921051502228,1.47840929031372,-0.149398699402809,-0.470839709043503,0.429544895887375,-1.3538635969162,-2.29718852043152
+-0.748511667298653,0.880030247916194,-0.027300932517323,-0.121964652695929,0.12136604307742,-0.492745760301271,0.852992686243917,1.09181833267212,-1.09600150585175,-1.04664921760559,-0.687567114830017,1.82958054542542,-0.191698864102364,1.18249273300171
+0.687747679867611,0.282815111480477,0.010609617664817,0.521141692716388,-0.688266498064893,-0.504310420938747,-0.019275971746461,2.09928154945374,1.21911549568176,1.99226307868958,1.56296825408936,-0.799715280532837,1.31677293777466,2.15742754936218
+0.686816636638714,0.840939721961332,0.187739358180226,0.576876071617844,-0.031394340861979,0.681978199304276,0.448479797795066,1.92726469039917,0.952082872390747,-1.84983670711517,-0.790094316005707,0.805806457996368,0.580945193767548,0.982917129993439
+-0.569476043921849,0.367643315814899,0.663326046958155,0.259592952243274,0.557283103049882,0.782093323955749,0.101867928341376,-0.643958508968353,1.35042548179626,2.00872015953064,0.247655957937241,-1.70695173740387,1.22527956962585,0.327810764312744
+0.213923476525526,0.551563383316013,0.729889813852367,-0.430310203011133,0.866271033627604,-0.081305817640648,0.240410044507432,0.641947031021118,0.918042898178101,-1.13383269309998,-1.01264464855194,-1.34713327884674,-1.27726447582245,-0.188324749469757
+-0.776428573775432,0.539980723608266,-0.178622984073823,0.073926877446345,-0.15759890992335,0.818261944749677,-0.547854716285697,0.741220414638519,-1.06590104103088,1.32689905166626,0.987933933734894,-0.642112553119659,-0.528240203857422,1.10301661491394
+0.318007375783224,1.0596931167993,-0.504521186874696,0.144030907121101,-0.660768588296194,0.102381822376082,0.729491557841406,-0.716541767120361,-0.53775280714035,-2.32374858856201,-0.127791970968246,-0.406732499599457,0.850658118724823,-0.205385774374008
+-0.666035523732795,0.918171311526587,-0.205844894692418,0.34957608084149,-0.226532234091884,-0.623483720539397,0.661625090850698,-1.99308490753174,0.59045273065567,0.740805208683014,-0.570022821426392,-1.7342973947525,0.876800000667572,-0.690746486186981
+-0.668505759340301,0.815237766513034,-0.377650746379333,0.711051232104791,0.490491592541418,-0.384029629896919,-0.326106403386499,1.29375803470612,-0.676185190677643,-0.293555468320847,0.675547897815704,-1.44143772125244,-0.612814903259277,0.143297776579857
+-0.264977965969188,0.952359545468656,0.176864508766961,0.759662609384277,-0.18721067799516,-0.259744114200181,0.566036992683718,-0.709902465343475,1.39356100559235,0.061480898410082,1.48805022239685,0.744490921497345,-0.540858685970306,1.23264384269714
+-0.586531229111167,0.91733236177502,-0.524142305198111,0.346001860434152,-0.085956682916821,-0.511788582569864,0.781643529998919,-2.26591181755066,0.835746467113495,-0.504677653312683,-0.218594551086426,-1.36645209789276,-0.21320316195488,0.091533102095127
+0.32907315418907,0.783669668905489,0.490316921188424,0.556166042879733,0.32246748016536,0.725455594467371,0.245780872810102,-0.16031776368618,0.565495848655701,-1.55429565906525,1.25373363494873,1.3286988735199,1.34862816333771,2.04824376106262
+0.740476739546802,0.508906020458506,0.603523951722767,-0.069374722062251,0.434892222708766,0.544194743491542,0.714078415667628,-2.19421792030334,-1.37954449653625,1.7333300113678,-0.164611726999283,-0.743789792060852,-0.248036950826645,-1.79799818992615
+-0.426931917258644,1.10738479015684,0.230589866919196,-0.001332982558704,0.143400541238524,-0.429299634128785,0.891704172955856,-0.088951952755451,0.324605315923691,-1.58029294013977,-0.8197340965271,-1.72347366809845,-0.127518728375435,0.346147239208221
+-0.498904878818019,0.857038021196022,-0.631700433986429,0.729950718441936,0.16171190095633,-0.65583481429468,0.104412193225955,-2.353670835495,0.948889374732971,1.33693337440491,0.093638755381107,1.21934521198273,-0.61371111869812,0.310350298881531
+-0.796092851320248,0.883723674324673,0.035391902294859,0.513025867118131,0.28341652721721,-0.37716799647462,0.717094020476497,-1.28012347221375,0.93154788017273,1.78906118869782,0.424548387527466,0.074686452746391,0.201474115252495,1.43182384967804
+-0.193070468710785,0.974077319785481,-0.485406647489432,0.883682069865396,0.425165159078826,-0.188977566405332,0.051264669114358,0.018785402178764,-0.155118405818939,0.187805131077766,1.13202512264252,-1.05889332294464,0.955407440662384,0.409219563007355
+-0.600299330106969,0.996042577258841,-0.127989534859512,0.474411185584567,-0.036959054392156,0.13949017258485,0.868395386355162,-1.77483308315277,0.924811065196991,-0.920451819896698,-0.01666047796607,-2.23721265792847,1.22453057765961,0.794268071651459
+0.739500631189621,0.779818126306504,-0.075003872010091,0.556413188438782,-0.011437897438489,-0.100333540806674,0.824746457296284,1.63159453868866,1.37227201461792,-0.075814314186573,0.291924685239792,-0.068284846842289,-1.2279999256134,0.360205084085464
+0.372532017517189,1.13879928495473,0.006715317162449,0.679546247344315,-0.62199422963931,-0.359705788505673,-0.148161471817651,1.56638741493225,0.254667222499847,-1.15005266666412,-0.142917513847351,-1.63915526866913,-1.34926390647888,0.60636693239212
+0.626081759733775,0.96401080090732,-0.176462130745053,0.796989484850512,-0.065738772363574,0.293224542059905,0.523932765514436,2.30857682228088,1.05379199981689,0.995922386646271,0.651656329631805,-0.577432215213776,-0.520536839962006,-1.10011792182922
+-0.385436802315983,1.21277123344765,0.110988039053359,-0.199959255671856,0.141912304440522,-0.352900763432689,0.902960821458032,-1.62989711761475,0.198353096842766,0.44916182756424,-0.40257066488266,0.835498154163361,0.290389686822891,-2.45420527458191
+-0.217223039096263,1.23721617757484,0.257666933824183,0.741760216709742,0.042804065194866,-0.095753348385191,0.662412929510388,-0.871574580669403,0.601327836513519,-0.521132051944733,0.446233302354813,-1.37333929538727,0.421416312456131,-2.09539723396301
+-0.655925190053986,0.859946939184442,0.425207580490031,-0.187895232634169,0.159355997067299,-0.11533485782234,0.962288375864085,-1.06878113746643,1.20323061943054,-0.298799067735672,0.190806835889816,0.002903472865,-0.709571361541748,-1.53831994533539
+0.006092298477722,1.14256393910604,0.075266013745795,0.905638943696485,-0.091837835392896,0.349866474763222,-0.221308304153773,-2.37750768661499,-0.569985628128052,-0.058434393256903,-1.2255380153656,-0.492108941078186,-1.30936467647553,2.24883222579956
+0.202197059056828,0.227751531650246,-0.713764065051687,0.212434585516412,0.810862875710789,-0.159192128705576,-0.521565729154712,2.38333082199097,1.11489868164063,0.617834210395813,-1.41361808776855,2.12524199485779,0.347592860460281,-2.35176730155945
+0.683866137935586,0.819658905992823,0.200336436569389,-0.473985421019589,0.585991132425147,0.04075872159205,0.655965654584262,1.63988626003265,1.39168226718903,0.406433701515198,0.964644134044647,1.90109443664551,-0.723686873912811,0.792403399944305
+0.718408265882564,0.612640985928754,-0.456863919548639,0.701385075469172,-0.144684576440578,-0.694522281660211,-0.069022818884844,1.69799482822418,1.20437502861023,1.49709165096283,-0.595990419387817,-0.161659434437752,0.581349670886993,2.17840170860291
+0.381099885611891,1.18663598744888,-0.086335837283864,0.722671467991921,-0.259233474941003,-0.139636988965194,-0.625336282437817,-0.274553000926971,-0.198571667075157,-1.41759288311005,0.711088836193085,-2.102370262146,-0.254179239273071,2.50888609886169
+0.343582010959522,0.981486872632579,0.38308606213035,0.006708926446723,0.080912315163729,0.102712373449892,0.991392130239628,0.789387047290802,1.37655222415924,0.091171883046627,1.22058844566345,0.797704637050629,0.153486207127571,1.5029639005661
+-0.091867647190339,0.159565159978342,-0.613826472653765,-0.029951715707651,0.40991897681676,0.880780182554135,0.235150158816817,-0.114079274237156,-1.15668618679047,0.643234074115753,1.27980351448059,2.13264012336731,1.35938155651093,-1.48569226264954
+-0.070291483715151,1.25863320300463,-0.094402068330469,0.822586018224927,0.163540765864257,-0.462142400316521,0.288151110272158,-0.860713601112366,-0.020580988377333,-2.37619471549988,0.044807214289904,-2.37511444091797,-1.05614018440247,0.00911473389715
+0.469445475590626,0.967530439693512,0.133578299227314,0.646395368321752,-0.379733450538871,0.462758501145971,0.473106863162186,2.33162903785706,-0.126651719212532,-0.889535427093506,-1.27033340930939,-0.858386218547821,0.123358078300953,0.668511152267456
+0.220182215893538,1.03960741414431,-0.367461603028727,0.663514220733396,-0.398201627408038,0.308586632644125,0.553135275468402,-1.41233479976654,-0.179858759045601,-1.66807293891907,-1.04493188858032,-2.20793724060059,1.27347612380981,0.940442025661469
+-0.799535165688586,0.81975657333375,-0.071287503459986,0.572154189412222,0.603106372372623,-0.367419232503882,0.417019657483716,-1.44114947319031,1.01459586620331,-1.89757978916168,-0.456281125545502,2.32944345474243,0.784060895442963,2.17194318771362
+-0.697848542782954,0.397897717290489,0.454206033407438,0.690618802266347,0.548622433167677,-0.446475293151702,0.150727928355948,-1.07027721405029,1.2751384973526,0.808172047138214,-0.711658596992493,0.764662802219391,-1.47497451305389,1.11145913600922
+0.023527560179199,1.23745879414635,-0.148400865898892,0.855720511958687,-0.333106068596611,-0.344754084964009,0.194749514448437,-0.572307586669922,0.206033632159233,0.319890916347504,0.470549613237381,-0.403964549303055,-0.762171566486359,1.19003689289093
+0.192124252801009,1.25233196208838,0.08358372109579,-0.467016543575534,0.342186378924006,0.359412887498785,0.731865019252474,0.624193727970123,0.11352726072073,2.0148458480835,-0.048224214464426,1.89949476718903,-0.941429316997528,-0.28108823299408
+-0.403387845272815,1.12814304088634,-0.183090762974073,0.836914434003485,-0.284622712780322,-0.381694173421965,0.269951290980278,1.44289124011993,-0.627323091030121,0.138860031962395,-0.097943857312203,-2.18525624275208,-1.16134119033813,1.5269935131073
+0.11911474121473,1.23796973439978,-0.061556752821084,0.669142470440705,-0.207882968799013,0.144132340660021,0.698755246070956,-0.262430757284164,0.331818133592606,-0.109087407588959,0.842773735523224,-2.06526517868042,-0.448707222938538,-2.14086031913757
+-0.053939737324878,0.850778397615735,-0.604482524852875,0.744445238146539,0.110235119166531,-0.653652373503802,-0.079925468507413,-2.30587720870972,0.211778312921524,-0.925400793552399,-1.28495442867279,-1.68455147743225,-0.116159215569496,-1.5447895526886
+-0.273548598374909,1.18872343537976,-0.054548494172537,-0.334253187160937,-0.399616330242983,-0.515236365616198,0.680524123762339,-1.94101166725159,0.065824709832668,0.305742770433426,-0.193280965089798,-0.145369157195091,1.16428112983704,-0.437267243862152
+0.461324745911674,0.809296465789907,-0.707002795814508,0.352207386404887,-0.561785770604271,-0.080475201588148,0.744224728719592,-0.709498882293701,-0.936362028121948,0.492805868387222,0.289299845695496,1.154456615448,-0.246571019291878,1.07685887813568
+-0.321941512166996,1.19391186037425,-0.097274052961326,-0.34550705626918,-0.348022258206239,0.152747153729227,0.858005646187049,0.984721541404724,-0.328294575214386,-1.66347658634186,-0.491117686033249,0.195964187383652,-1.13326239585876,-1.72793412208557
+-0.527352112430607,1.05666950494206,-0.002039115192397,0.730525863257351,0.605101835506915,-0.0800967560201,0.306216004567562,2.16129970550537,-0.27230441570282,1.98679971694946,-0.746099352836609,-1.75452637672424,-0.681978821754456,-1.81680619716644
+0.628053257884524,0.995956600787964,-0.117129117213013,0.624450525403548,-0.657848392763224,0.293551276633517,0.301868649330978,-1.06492877006531,-0.689320921897888,-1.89582216739655,0.356725573539734,1.56281304359436,-0.959017932415009,2.3663318157196
+0.857540857823805,0.473360858893732,0.411945514860971,0.649313984710093,-0.452514521515561,0.588199085223802,0.166264227118541,1.27484011650085,1.43356049060822,-1.77172899246216,-0.230303525924683,1.27137053012848,0.313244342803955,0.053516868501902
+-0.379478787617769,1.10629264030008,-0.01469814840505,-0.12357617729377,0.26081414485868,-0.667230225051596,0.686664937960786,-1.17543721199036,0.285774886608124,-1.70625436306,-0.480580538511276,1.94856560230255,1.47674024105072,-2.27068758010864
+0.964610580999139,0.381875717204817,0.096499355790074,0.676442646038083,-0.698588645784445,-0.141899020602452,-0.185105155395834,1.3994003534317,1.51703858375549,1.2214081287384,-0.142985045909882,0.965710639953613,-0.036180999130011,2.06186008453369
+-0.125927882515087,0.953182522538202,-0.249272289735829,0.934571799921988,-0.353418767239977,-0.039870487593845,0.009003886504144,1.92499876022339,-0.824121713638306,0.850838899612427,-1.56785261631012,-1.60747575759888,1.0047550201416,-0.255334943532944
+0.228670148656106,0.405294540026063,0.78566292381853,0.093770257845315,-0.512313263803796,0.853644076864461,-0.00583510998589,0.171032756567001,1.26858866214752,1.04518735408783,-0.242275148630142,-0.976981401443481,1.55835485458374,-0.696242332458496
+-0.864170416915797,0.572477771505419,0.308833952909195,0.799547716800972,0.51661452208157,0.172703042513711,-0.252995144695092,2.16114449501038,-1.29079627990723,-1.13794934749603,0.422983109951019,-1.58790934085846,-0.379500567913055,0.406428605318069
+-0.086833531101529,1.26142004232052,-0.157127987435393,0.856520570456875,0.154499976151293,0.186937733066628,-0.455583750488743,-1.48611009120941,-0.117594733834267,-1.09735810756683,-0.548085987567902,2.0996253490448,0.695748031139374,-0.65185809135437
+-0.224571768776128,0.965543710437858,-0.454342831791305,0.832578518046094,0.528573665838438,0.056647208639745,-0.155608434294582,-0.246502116322517,-0.198775932192802,0.5822873711586,1.07710993289948,-1.33364486694336,1.10879325866699,-0.135208860039711
+-0.063281385891176,0.918850765575839,0.546927242595696,-0.061514080586567,0.176839105289233,-0.194568675996848,0.962853560543308,0.619490683078766,1.19062793254852,0.863060355186462,1.30468881130219,1.01496946811676,0.308291167020798,1.30862879753113
+0.203240878063783,1.01955086362581,0.295364055102529,0.777472161600101,-0.176335884011322,-0.181418945838496,-0.575786297195114,-1.10046923160553,-0.370542675256729,1.00188100337982,-1.41339313983917,-0.185629144310951,-1.35502791404724,-1.11336266994476
+-0.048208774052019,1.12561379046006,-0.389976122502398,0.499681727723347,-0.412518392846953,-0.50339371420367,0.571613081588072,2.20388555526733,0.031262047588825,-2.02111482620239,0.618494987487793,-0.017173374071717,-0.784897983074188,1.52073585987091
+-0.347864080265155,1.25208413074213,-0.136952451124811,0.983451200718103,0.148200433221111,-0.103766156200301,-0.009641173507943,1.4613424539566,-0.432672441005707,-1.96504497528076,0.195305839180946,1.83104836940765,0.035984437912703,-1.30533921718597
+-0.660730726766445,0.359901997440489,0.201063453924481,0.259760923149527,-0.484420066739764,0.737789262411178,-0.391827086882872,-2.12178111076355,1.49276566505432,1.69086110591888,-1.41702246665955,-1.16274857521057,0.773430526256561,-1.986976146698
+0.697988459405288,0.553914765036702,-0.157679257951369,0.6013443076691,-0.338612372997193,-0.575101856995332,-0.439300055278508,-0.642451941967011,-1.36695218086243,-1.79795575141907,1.51899361610413,-0.207648172974586,1.03474676609039,1.67819547653198
+0.061062905915831,1.22832782927929,0.42997277686964,0.951130785972439,-0.053390665176946,0.253128043172948,-0.168599699308295,0.186012372374535,0.418193429708481,-0.268376588821411,-0.074450545012951,-0.842691719532013,0.063423246145249,0.592402994632721
+0.199547234362016,0.627424125323426,-0.661959353702476,0.653356265253686,-0.347009265705389,0.495733837600203,0.454926502224851,0.25161224603653,-0.996740877628326,-0.933246552944183,0.831357479095459,-1.10136520862579,-1.54066407680511,-1.56143522262573
+0.616056533225128,1.02746937245706,0.330934345184125,0.123313443495479,0.208660515420551,0.263116612500772,0.933822377213052,-2.01276779174805,-0.832539916038513,-0.559663772583008,0.008370717056096,1.43823289871216,0.287754058837891,-2.17213129997253
+0.121659843335466,1.23209574134165,-0.050425111213988,0.501000295666875,-0.160007630259349,-0.437135767329695,0.729594807356502,-1.43290305137634,-0.337000340223312,-2.40010118484497,0.292319387197495,1.34939694404602,1.19666361808777,-1.83561825752258
+0.708255364800549,0.336366314342272,0.238781164794421,0.310977292577168,-0.312124342354714,0.630157539142739,0.639353575318524,-1.54258680343628,-1.02765846252441,-0.696378946304321,1.47776365280151,-0.360515087842941,1.06904888153076,-1.68400931358337
+-0.244860966808423,1.24587651456609,-0.253632049463707,-0.229477203316089,-0.188580431698158,0.025075484593168,0.954541174602322,-2.00725269317627,0.529901027679443,0.583177804946899,0.360274881124496,-2.2754762172699,0.132288172841072,1.09904360771179
+-0.762841927976915,0.497662030768475,-0.570849126142578,0.531144323244537,0.728062377035855,-0.030677513330595,-0.432284366136291,1.01456761360168,-1.30739498138428,-0.706560015678406,0.219337075948715,0.512083053588867,-0.158802464604378,-2.04325819015503
+0.0301702688036,1.19949008539334,-0.233851675928276,0.593193673572835,0.384446482659661,-0.425618668576406,-0.564952136523995,-2.41525387763977,0.293216437101364,0.420879125595093,0.513094186782837,-1.04464721679688,1.22705805301666,1.35793542861938
+0.014951676186971,1.29019906579111,0.141398850380279,0.80855058442395,0.097291696252603,0.347941313965433,0.464453571742094,-2.43373394012451,-0.010759285651147,2.01016044616699,-0.097374081611633,-2.39843463897705,-0.655430376529694,-2.43921828269958
+0.617718792361255,0.983315459224614,0.176832258666868,-0.19079672181237,-0.233412163165239,0.087497971488244,0.949452198911361,1.10525691509247,0.942209243774414,-1.3262711763382,0.127341210842133,-2.29564762115479,1.07525682449341,-0.545569539070129
+-0.645530841211927,0.177988792066627,0.313251363674584,-0.009436365913785,-0.265571559978458,0.963493104156757,-0.032615023705735,-0.663061022758484,1.36240708827972,-1.08700120449066,-1.05327117443085,-2.00707769393921,-1.20538365840912,1.00517332553864
+0.073167493675162,0.821764571657435,0.575812370209355,0.624044652248538,-0.364346761983641,0.629175865144625,0.286282098203066,0.415200173854828,0.279424786567688,2.42608666419983,1.18701887130737,1.63146638870239,-1.22081935405731,1.67620182037354
+-0.069675406449229,1.12176372759619,0.281820595989504,0.859842478732164,-0.211565504865627,0.339476771095458,-0.317279798924047,1.81507956981659,0.327413529157639,0.407986223697662,1.15595865249634,2.14545631408691,-0.753958225250244,0.976218998432159
+0.245750401391424,0.976518234534024,0.652926975831676,0.170059163201151,0.340779615432439,0.402509193707542,0.832427464526458,0.012699390761554,0.612366020679474,0.861287355422974,-0.534394204616547,0.632673799991608,0.182209685444832,0.995842814445496
+0.24382135303838,1.15689840628626,0.477119318242821,0.921313442553855,-0.12801379923266,0.267797908093546,0.251153913360523,0.160798966884613,0.617120265960693,-1.30352318286896,0.313988566398621,1.9622415304184,-0.001645527197979,-0.383113831281662
+-0.605478366455286,0.97425608478441,0.417559417099507,-0.301178361488927,0.137174910831896,-0.460315497452486,0.823762272269237,-1.19809448719025,0.858566701412201,-1.45394229888916,0.345582723617554,0.840273201465607,0.200753435492516,-0.70950984954834
+-0.573440758228347,1.08538952912975,-0.118516014866951,0.695405919134404,0.380764405632268,0.232213513925808,-0.563476671199943,1.14628458023071,-0.36615988612175,0.440569192171097,0.579094886779785,-1.27761077880859,-0.005499926395714,-1.71829497814178
+-0.577713342085434,0.820563123650167,-0.362037928037098,0.206392590597009,-0.441428628629169,-0.60577404803728,0.628952038789727,1.75864589214325,-0.80280613899231,1.44370007514954,-0.96798712015152,1.02111029624939,0.613745331764221,-1.32988131046295
+0.138302157587216,1.14199587472173,-0.19560397688806,0.645650829786833,-0.690460812966654,0.056196463325714,0.32131110977164,-0.544483006000519,0.10510428249836,0.358821421861649,0.558321118354797,2.08619236946106,1.32144129276276,-0.611496329307556
+-0.616904280376582,0.497226062300036,0.579628377379033,0.255260676395013,-0.452340998505152,0.832004200302661,-0.194932344254239,-0.820925891399384,1.20717215538025,2.13999199867249,0.159369692206383,1.42129504680634,-1.37950241565704,1.20559918880463
+-0.126611897428104,0.698363533339438,0.487604805523578,0.627270599841923,0.626584080300186,0.376274579922072,-0.268963613510809,0.579923391342163,0.208352103829384,-0.631639540195465,-1.5590124130249,-1.40874481201172,1.3664128780365,2.34109997749329
+0.256703642883507,0.985965662035167,-0.474644026468992,0.637881939940872,-0.571195986555754,-0.388846923709799,-0.340058591365216,0.101648449897766,-0.871674954891205,0.680574357509613,-0.646894037723541,-2.15447974205017,-1.13724732398987,0.828339815139771
+0.368480444857791,1.11973695342029,0.174180847519709,-0.456280572585493,-0.316526780628434,0.416153709542883,0.72002425393854,-2.27150201797485,-0.596828043460846,0.755089938640595,-0.084245294332504,-1.80126428604126,1.38243043422699,0.998225450515747
+-0.248037236632446,0.997984891041308,-0.338429418942203,0.809442660865486,0.465511732311919,-0.351657009965641,-0.066623968607282,0.638268530368805,0.188850238919258,-0.081894092261791,1.30080103874207,1.15366160869598,-0.300139874219894,-2.06458592414856
+-0.360457941235354,0.876111408216616,0.723413358747915,-0.071537326445128,0.464148801574552,-0.233049084203132,0.851549426207353,-0.670872271060944,0.933017671108246,1.3028017282486,-0.340695202350616,0.859216272830963,-0.085435435175896,1.69431090354919
+0.426987957060252,0.232254104108508,0.548588847813482,0.114083523189833,0.077970052204838,0.952686770747072,0.27072779232891,-1.93354439735413,-1.20678150653839,2.0512170791626,-1.43280041217804,1.75474166870117,1.00753927230835,-0.18412572145462
+-0.825283278566211,0.304408949990684,0.135939314126004,0.079447955004877,0.983327290567561,0.121384741120633,-0.109641263645029,1.75989902019501,-1.43303632736206,0.533579647541046,0.06353610008955,2.37738108634949,1.515833735466,1.54336178302765
+-0.216718175034519,1.09628521960642,0.222565328436649,0.158886579601356,0.076825051487916,0.234842847686814,0.955877504273852,-2.15745115280151,-0.089647330343723,1.04148995876312,-1.13509917259216,-0.326217204332352,-1.40369355678558,-2.2589328289032
+-0.697348508320503,0.81881624163625,0.023284395418522,0.028334239153153,-0.068682604231789,0.918521011223108,-0.388328498451028,-1.68516874313355,0.796210706233978,-1.64344143867493,0.187828674912453,-1.3171671628952,-1.51519656181335,1.5646630525589
+0.230287711575945,0.979194624080139,0.412807766274565,-0.311958240046239,0.46471273374212,0.240264326954164,0.793093427509295,2.1987407207489,-0.499979764223099,2.07976698875427,1.30679500102997,-2.06130075454712,-0.093529343605042,1.42197704315186
+0.413382233385853,1.03630987298274,0.227824664385203,-0.30893312206583,0.28084114394025,0.232783308306051,0.87835101715378,0.267852604389191,-0.01381757017225,-2.34710884094238,1.21004617214203,0.065994210541248,0.45452943444252,-0.429983049631119
+-0.552584435420759,0.682645409436014,-0.388815136172824,0.768673657369574,0.313442182830123,-0.400813015248212,-0.387612865235929,-1.31363391876221,0.906925737857819,-1.37290585041046,-1.36175060272217,-1.72353148460388,0.622546792030335,-1.75211668014526
+0.55831021738535,0.464102144707506,-0.134563215093199,-0.014504264003811,-0.426082390392616,0.904567112615893,-0.001327289255715,-0.73624861240387,-0.657847821712494,2.15796136856079,-1.38983809947968,0.568065047264099,1.39728653430939,0.534001350402832
+-0.497771465678185,0.904132684121846,-0.259153822615261,0.752999240927675,0.611744511721642,-0.187158445599859,-0.154053600352082,1.85346102714539,-0.080631390213966,-0.832447528839111,1.26871919631958,0.543653190135956,-0.072993040084839,-1.9505363702774
+-0.482067062616781,0.804541366592778,-0.641976682438821,0.089403105871745,-0.076299239405212,-0.38065794806714,0.91721591640136,0.765132486820221,-0.961885392665863,2.26129508018494,-0.508382320404053,-0.07028578221798,-0.929624795913696,-0.204895809292793
+0.44920693385886,1.0422272929826,0.003768906409485,-0.35811228110198,0.247625883454974,0.422751082788055,0.794807233213568,1.24168336391449,-0.087278947234154,0.31125670671463,-1.17347431182861,0.931364059448242,-0.110606156289577,1.57281744480133
diff --git a/agent/dart_envs/misc/kuka_iiwa_urdf/LICENSE b/agent/envs_dart/misc/kuka_iiwa_urdf/LICENSE
similarity index 100%
rename from agent/dart_envs/misc/kuka_iiwa_urdf/LICENSE
rename to agent/envs_dart/misc/kuka_iiwa_urdf/LICENSE
diff --git a/agent/dart_envs/misc/kuka_iiwa_urdf/README.md b/agent/envs_dart/misc/kuka_iiwa_urdf/README.md
similarity index 100%
rename from agent/dart_envs/misc/kuka_iiwa_urdf/README.md
rename to agent/envs_dart/misc/kuka_iiwa_urdf/README.md
diff --git a/agent/envs_dart/misc/kuka_iiwa_urdf/background.urdf b/agent/envs_dart/misc/kuka_iiwa_urdf/background.urdf
new file mode 100644
index 0000000..4dd3e81
--- /dev/null
+++ b/agent/envs_dart/misc/kuka_iiwa_urdf/background.urdf
@@ -0,0 +1,156 @@
+<?xml version="1.0"?>
+<robot name="background_skeleton">
+    <link name="world" />
+    <link name="background_link1">
+        <inertial>
+            <mass value="1.0" />
+            <origin xyz="0 0 0" rpy="0 0 0" />
+            <inertia ixx="1.0" ixy="0" ixz="0" iyy="1.0" iyz="0" izz="1.0" />
+        </inertial>
+        <visual>
+            <origin xyz="0 0 0" rpy="0 0 0" />
+            <geometry>
+                <box size="100.0 100.0 0.05" />
+            </geometry>
+            <material name="FloorGray">
+              <color rgba="0.68 0.67 0.6 1.0"/>
+            </material>
+        </visual>
+        <collision group="default">
+            <origin xyz="0 0 0" rpy="0 0 0" />
+            <geometry>
+                <box size="100.0 100.0 0.05" />
+            </geometry>
+        </collision>
+    </link>
+    <link name="background_link2">
+        <inertial>
+            <mass value="1.0" />
+            <origin xyz="0 0 0" rpy="0 0 0" />
+            <inertia ixx="1.0" ixy="0" ixz="0" iyy="1.0" iyz="0" izz="1.0" />
+        </inertial>
+        <visual>
+            <origin xyz="0 0 0" rpy="0 0 0" />
+            <geometry>
+                <box size="100.0 100.0 0.05" />
+            </geometry>
+            <material name="FloorGray" />
+        </visual>
+        <collision group="default">
+            <origin xyz="0 0 0" rpy="0 0 0" />
+            <geometry>
+                <box size="100.0 100.0 0.05" />
+            </geometry>
+        </collision>
+    </link>
+    <link name="background_link3">
+        <inertial>
+            <mass value="1.0" />
+            <origin xyz="0 0 0" rpy="0 0 0" />
+            <inertia ixx="1.0" ixy="0" ixz="0" iyy="1.0" iyz="0" izz="1.0" />
+        </inertial>
+        <visual>
+            <origin xyz="0 0 0" rpy="0 0 0" />
+            <geometry>
+                <box size="100.0 100.0 0.05" />
+            </geometry>
+            <material name="FloorGray" />
+        </visual>
+        <collision group="default">
+            <origin xyz="0 0 0" rpy="0 0 0" />
+            <geometry>
+                <box size="100.0 100.0 0.05" />
+            </geometry>
+        </collision>
+    </link>
+    <link name="background_link4">
+        <inertial>
+            <mass value="1.0" />
+            <origin xyz="0 0 0" rpy="0 0 0" />
+            <inertia ixx="1.0" ixy="0" ixz="0" iyy="1.0" iyz="0" izz="1.0" />
+        </inertial>
+        <visual>
+            <origin xyz="0 0 0" rpy="0 0 0" />
+            <geometry>
+                <box size="100.0 100.0 0.05" />
+            </geometry>
+            <material name="FloorGray" />
+        </visual>
+        <collision group="default">
+            <origin xyz="0 0 0" rpy="0 0 0" />
+            <geometry>
+                <box size="100.0 100.0 0.05" />
+            </geometry>
+        </collision>
+    </link>
+    <link name="background_link5">
+        <inertial>
+            <mass value="1.0" />
+            <origin xyz="0 0 0" rpy="0 0 0" />
+            <inertia ixx="1.0" ixy="0" ixz="0" iyy="1.0" iyz="0" izz="1.0" />
+        </inertial>
+        <visual>
+            <origin xyz="0 0 0" rpy="0 0 0" />
+            <geometry>
+                <box size="100.0 100.0 0.05" />
+            </geometry>
+            <material name="FloorGray" />
+        </visual>
+        <collision group="default">
+            <origin xyz="0 0 0" rpy="0 0 0" />
+            <geometry>
+                <box size="100.0 100.0 0.05" />
+            </geometry>
+        </collision>
+    </link>
+    <link name="background_link6">
+        <inertial>
+            <mass value="1.0" />
+            <origin xyz="0 0 0" rpy="0 0 0" />
+            <inertia ixx="1.0" ixy="0" ixz="0" iyy="1.0" iyz="0" izz="1.0" />
+        </inertial>
+        <visual>
+            <origin xyz="0 0 0" rpy="0 0 0" />
+            <geometry>
+                <box size="100.0 100.0 0.05" />
+            </geometry>
+            <material name="FloorGray" />
+        </visual>
+        <collision group="default">
+            <origin xyz="0 0 0" rpy="0 0 0" />
+            <geometry>
+                <box size="100.0 100.0 0.05" />
+            </geometry>
+        </collision>
+    </link>
+    <joint name="background_joint1" type="fixed">
+        <origin xyz="-50 0 0" rpy="0 1.57079632679 0" />
+        <parent link="world" />
+        <child link="background_link1" />
+    </joint>
+    <joint name="background_joint2" type="fixed">
+        <origin xyz="0 -50 0" rpy="1.57079632679 0 0" />
+        <parent link="world" />
+        <child link="background_link2" />
+    </joint>
+    <joint name="background_joint3" type="fixed">
+        <origin xyz="+50 0 0" rpy="0 1.57079632679 0" />
+        <parent link="world" />
+        <child link="background_link3" />
+    </joint>
+    <joint name="background_joint4" type="fixed">
+        <origin xyz="0 +50 0" rpy="1.57079632679 0 0" />
+        <parent link="world" />
+        <child link="background_link4" />
+    </joint>
+    <joint name="background_joint5" type="fixed">
+        <origin xyz="0 0 -50" rpy="0 0 1.57079632679" />
+        <parent link="world" />
+        <child link="background_link5" />
+    </joint>
+    <joint name="background_joint6" type="fixed">
+        <origin xyz="0 0 +50" rpy="0 0 1.57079632679" />
+        <parent link="world" />
+        <child link="background_link6" />
+    </joint>
+</robot>
diff --git a/agent/envs_dart/misc/kuka_iiwa_urdf/ground.urdf b/agent/envs_dart/misc/kuka_iiwa_urdf/ground.urdf
new file mode 100644
index 0000000..e66e1c6
--- /dev/null
+++ b/agent/envs_dart/misc/kuka_iiwa_urdf/ground.urdf
@@ -0,0 +1,31 @@
+<?xml version="1.0"?>
+<robot name="ground_skeleton">
+    <link name="world" />
+    <link name="ground_link">
+        <inertial>
+            <mass value="1.0" />
+            <origin xyz="0 0 0" rpy="0 0 0" />
+            <inertia ixx="1.0" ixy="0" ixz="0" iyy="1.0" iyz="0" izz="1.0" />
+        </inertial>
+        <visual>
+            <origin xyz="0 0 0" rpy="0 0 0" />
+            <geometry>
+                <box size="2.4 2.4 0.05" />
+            </geometry>
+            <material name="NavyBlue">
+              <color rgba="0.3 0.43 0.66 1.0"/>
+            </material>
+        </visual>
+        <collision group="default">
+            <origin xyz="0 0 0" rpy="0 0 0" />
+            <geometry>
+                <box size="2.4 2.4 0.05" />
+            </geometry>
+        </collision>
+    </link>
+    <joint name="ground_joint" type="fixed">
+        <origin xyz="0 0 -0.027" rpy="0 0 0" />
+        <parent link="world" />
+        <child link="ground_link" />
+    </joint>
+</robot>
diff --git a/agent/envs_dart/misc/kuka_iiwa_urdf/iiwa14_2gripper.urdf b/agent/envs_dart/misc/kuka_iiwa_urdf/iiwa14_2gripper.urdf
new file mode 100755
index 0000000..61ea381
--- /dev/null
+++ b/agent/envs_dart/misc/kuka_iiwa_urdf/iiwa14_2gripper.urdf
@@ -0,0 +1,252 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!-- =================================================================================== -->
+<!-- |    This document was autogenerated by xacro from iiwa14.urdf.xacro              | -->
+<!-- |    EDITING THIS FILE BY HAND IS NOT RECOMMENDED                                 | -->
+<!-- =================================================================================== -->
+<robot name="iiwa14">
+  <!-- Fix to world just for testing -->
+  <link name="world"/>
+  <!--joint between {parent} and link_0-->
+  <joint name="world_iiwa_joint" type="fixed">
+    <origin rpy="0 0 0" xyz="0 0 0"/>
+    <parent link="world"/>
+    <child link="iiwa_link_0"/>
+  </joint>
+  <link name="iiwa_link_0">
+    <inertial>
+      <origin rpy="0 0 0" xyz="-0.1 0 0.07"/>
+      <mass value="5"/>
+      <inertia ixx="0.05" ixy="0" ixz="0" iyy="0.06" iyz="0" izz="0.03"/>
+    </inertial>
+    <visual>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+        <mesh filename="package://iiwa_description/meshes/iiwa14/visual/white/link_0.dae"/>
+      </geometry>
+    </visual>
+    <collision>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+        <mesh filename="package://iiwa_description/meshes/iiwa14/collision/link_0.stl"/>
+      </geometry>
+    </collision>
+    <self_collision_checking>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+        <capsule length="0.25" radius="0.15"/>
+      </geometry>
+    </self_collision_checking>
+  </link>
+  <!-- joint between link_0 and link_1 -->
+  <joint name="iiwa_joint_1" type="revolute">
+    <parent link="iiwa_link_0"/>
+    <child link="iiwa_link_1"/>
+    <origin rpy="0 0 0" xyz="0 0 0.1575"/>
+    <axis xyz="0 0 1"/>
+    <limit effort="300" lower="-2.92342649709" upper="2.92342649709" velocity="10"/>
+    <safety_controller k_position="100" k_velocity="2" soft_lower_limit="-2.93215314335" soft_upper_limit="2.93215314335"/>
+    <dynamics damping="0.5"/>
+  </joint>
+  <link name="iiwa_link_1">
+    <inertial>
+      <origin rpy="0 0 0" xyz="0 -0.03 0.12"/>
+      <mass value="4"/>
+      <inertia ixx="0.1" ixy="0" ixz="0" iyy="0.09" iyz="0" izz="0.02"/>
+    </inertial>
+    <visual>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+        <mesh filename="package://iiwa_description/meshes/iiwa14/visual/white/link_1.dae"/>
+      </geometry>
+    </visual>
+    <collision>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+        <mesh filename="package://iiwa_description/meshes/iiwa14/collision/link_1.stl"/>
+      </geometry>
+    </collision>
+  </link>
+  <!-- joint between link_1 and link_2 -->
+  <joint name="iiwa_joint_2" type="revolute">
+    <parent link="iiwa_link_1"/>
+    <child link="iiwa_link_2"/>
+    <origin rpy="1.57079632679 0 3.14159265359" xyz="0 0 0.2025"/>
+    <axis xyz="0 0 1"/>
+    <limit effort="300" lower="-2.05076187109" upper="2.05076187109" velocity="10"/>
+    <safety_controller k_position="100" k_velocity="2" soft_lower_limit="-2.05948851735" soft_upper_limit="2.05948851735"/>
+    <dynamics damping="0.5"/>
+  </joint>
+  <link name="iiwa_link_2">
+    <inertial>
+      <origin rpy="0 0 0" xyz="0.0003 0.059 0.042"/>
+      <mass value="4"/>
+      <inertia ixx="0.05" ixy="0" ixz="0" iyy="0.018" iyz="0" izz="0.044"/>
+    </inertial>
+    <visual>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+        <mesh filename="package://iiwa_description/meshes/iiwa14/visual/white/link_2.dae"/>
+      </geometry>
+    </visual>
+    <collision>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+        <mesh filename="package://iiwa_description/meshes/iiwa14/collision/link_2.stl"/>
+      </geometry>
+    </collision>
+  </link>
+  <!-- joint between link_2 and link_3 -->
+  <joint name="iiwa_joint_3" type="revolute">
+    <parent link="iiwa_link_2"/>
+    <child link="iiwa_link_3"/>
+    <origin rpy="1.57079632679 0 3.14159265359" xyz="0 0 0"/>
+    <axis xyz="0 0 1"/>
+    <limit effort="300" lower="-2.92342649709" upper="2.92342649709" velocity="10"/>
+    <safety_controller k_position="100" k_velocity="2" soft_lower_limit="-2.93215314335" soft_upper_limit="2.93215314335"/>
+    <dynamics damping="0.5"/>
+  </joint>
+  <link name="iiwa_link_3">
+    <inertial>
+      <origin rpy="0 0 0" xyz="0 0.03 0.13"/>
+      <mass value="3"/>
+      <inertia ixx="0.08" ixy="0" ixz="0" iyy="0.075" iyz="0" izz="0.01"/>
+    </inertial>
+    <visual>
+      <origin rpy="0 0 0" xyz="0 0 0.2045"/>
+      <geometry>
+        <mesh filename="package://iiwa_description/meshes/iiwa14/visual/white/link_3.dae"/>
+      </geometry>
+    </visual>
+    <collision>
+      <origin rpy="0 0 0" xyz="0 0 0.2045"/>
+      <geometry>
+        <mesh filename="package://iiwa_description/meshes/iiwa14/collision/link_3.stl"/>
+      </geometry>
+    </collision>
+  </link>
+  <!-- joint between link_3 and link_4 -->
+  <joint name="iiwa_joint_4" type="revolute">
+    <parent link="iiwa_link_3"/>
+    <child link="iiwa_link_4"/>
+    <origin rpy="1.57079632679 0 0" xyz="0 0 0.42"/>
+    <axis xyz="0 0 1"/>
+    <limit effort="300" lower="-2.05076187109" upper="2.05076187109" velocity="10"/>
+    <safety_controller k_position="100" k_velocity="2" soft_lower_limit="-2.05948851735" soft_upper_limit="2.05948851735"/>
+    <dynamics damping="0.5"/>
+  </joint>
+  <link name="iiwa_link_4">
+    <inertial>
+      <origin rpy="0 0 0" xyz="0 0.067 0.034"/>
+      <mass value="2.7"/>
+      <inertia ixx="0.03" ixy="0" ixz="0" iyy="0.01" iyz="0" izz="0.029"/>
+    </inertial>
+    <visual>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+        <mesh filename="package://iiwa_description/meshes/iiwa14/visual/white/link_4.dae"/>
+      </geometry>
+    </visual>
+    <collision>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+        <mesh filename="package://iiwa_description/meshes/iiwa14/collision/link_4.stl"/>
+      </geometry>
+    </collision>
+  </link>
+  <!-- joint between link_4 and link_5 -->
+  <joint name="iiwa_joint_5" type="revolute">
+    <parent link="iiwa_link_4"/>
+    <child link="iiwa_link_5"/>
+    <origin rpy="1.57079632679 0 3.14159265359" xyz="0 0 0"/>
+    <axis xyz="0 0 1"/>
+    <limit effort="300" lower="-2.92342649709" upper="2.92342649709" velocity="10"/>
+    <safety_controller k_position="100" k_velocity="2" soft_lower_limit="-2.93215314335" soft_upper_limit="2.93215314335"/>
+    <dynamics damping="0.5"/>
+  </joint>
+  <link name="iiwa_link_5">
+    <inertial>
+      <origin rpy="0 0 0" xyz="0.0001 0.021 0.076"/>
+      <mass value="1.7"/>
+      <inertia ixx="0.02" ixy="0" ixz="0" iyy="0.018" iyz="0" izz="0.005"/>
+    </inertial>
+    <visual>
+      <origin rpy="0 0 0" xyz="0 0 0.1845"/>
+      <geometry>
+        <mesh filename="package://iiwa_description/meshes/iiwa14/visual/white/link_5.dae"/>
+      </geometry>
+    </visual>
+    <collision>
+      <origin rpy="0 0 0" xyz="0 0 0.1845"/>
+      <geometry>
+        <mesh filename="package://iiwa_description/meshes/iiwa14/collision/link_5.stl"/>
+      </geometry>
+    </collision>
+  </link>
+  <!-- joint between link_5 and link_6 -->
+  <joint name="iiwa_joint_6" type="revolute">
+    <parent link="iiwa_link_5"/>
+    <child link="iiwa_link_6"/>
+    <origin rpy="1.57079632679 0 0" xyz="0 0 0.4"/>
+    <axis xyz="0 0 1"/>
+    <limit effort="300" lower="-2.05076187109" upper="2.05076187109" velocity="10"/>
+    <safety_controller k_position="100" k_velocity="2" soft_lower_limit="-2.05948851735" soft_upper_limit="2.05948851735"/>
+    <dynamics damping="0.5"/>
+  </joint>
+  <link name="iiwa_link_6">
+    <inertial>
+      <origin rpy="0 0 0" xyz="0 0.0006 0.0004"/>
+      <mass value="1.8"/>
+      <inertia ixx="0.005" ixy="0" ixz="0" iyy="0.0036" iyz="0" izz="0.0047"/>
+    </inertial>
+    <visual>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+        <mesh filename="package://iiwa_description/meshes/iiwa14/visual/white/link_6.dae"/>
+      </geometry>
+    </visual>
+    <collision>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+        <mesh filename="package://iiwa_description/meshes/iiwa14/collision/link_6.stl"/>
+      </geometry>
+    </collision>
+  </link>
+  <!-- joint between link_6 and link_7 -->
+  <joint name="iiwa_joint_7" type="revolute">
+    <parent link="iiwa_link_6"/>
+    <child link="iiwa_link_7"/>
+    <origin rpy="1.57079632679 0 -3.14159265359" xyz="0 0 0"/>
+    <axis xyz="0 0 1"/>
+    <limit effort="300" lower="-3.01069295969" upper="3.01069295969" velocity="10"/>
+    <safety_controller k_position="100" k_velocity="2" soft_lower_limit="-3.01941960595" soft_upper_limit="3.01941960595"/>
+    <dynamics damping="0.5"/>
+  </joint>
+  <link name="iiwa_link_7">
+    <inertial>
+      <origin rpy="0 0 0" xyz="0 0 0.02"/>
+      <mass value="0.3"/>
+      <inertia ixx="0.001" ixy="0" ixz="0" iyy="0.001" iyz="0" izz="0.001"/>
+    </inertial>
+    <visual>
+      <origin rpy="0 0 0" xyz="0 0 0.081"/>
+      <geometry>
+        <mesh filename="package://iiwa_description/meshes/iiwa14/visual/white/link_7_electric_touch.dae"/>
+      </geometry>
+    </visual>
+    <collision>
+      <origin rpy="0 0 0" xyz="0 0 0.081"/>
+      <geometry>
+        <mesh filename="package://iiwa_description/meshes/iiwa14/collision/link_7_electric_touch.stl"/>
+      </geometry>
+    </collision>
+  </link>
+  <joint name="iiwa_joint_ee" type="fixed">
+    <parent link="iiwa_link_7"/>
+    <child link="iiwa_link_ee"/>
+    <origin rpy="0 0 0" xyz="0 0 0.326808609"/>
+  </joint>
+  <link name="iiwa_link_ee">
+		</link>
+</robot>
+
+
diff --git a/agent/envs_dart/misc/kuka_iiwa_urdf/iiwa14_3gripper.urdf b/agent/envs_dart/misc/kuka_iiwa_urdf/iiwa14_3gripper.urdf
new file mode 100755
index 0000000..d785ab0
--- /dev/null
+++ b/agent/envs_dart/misc/kuka_iiwa_urdf/iiwa14_3gripper.urdf
@@ -0,0 +1,252 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!-- =================================================================================== -->
+<!-- |    This document was autogenerated by xacro from iiwa14.urdf.xacro              | -->
+<!-- |    EDITING THIS FILE BY HAND IS NOT RECOMMENDED                                 | -->
+<!-- =================================================================================== -->
+<robot name="iiwa14">
+  <!-- Fix to world just for testing -->
+  <link name="world"/>
+  <!--joint between {parent} and link_0-->
+  <joint name="world_iiwa_joint" type="fixed">
+    <origin rpy="0 0 0" xyz="0 0 0"/>
+    <parent link="world"/>
+    <child link="iiwa_link_0"/>
+  </joint>
+  <link name="iiwa_link_0">
+    <inertial>
+      <origin rpy="0 0 0" xyz="-0.1 0 0.07"/>
+      <mass value="5"/>
+      <inertia ixx="0.05" ixy="0" ixz="0" iyy="0.06" iyz="0" izz="0.03"/>
+    </inertial>
+    <visual>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+        <mesh filename="package://iiwa_description/meshes/iiwa14/visual/white/link_0.dae"/>
+      </geometry>
+    </visual>
+    <collision>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+        <mesh filename="package://iiwa_description/meshes/iiwa14/collision/link_0.stl"/>
+      </geometry>
+    </collision>
+    <self_collision_checking>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+        <capsule length="0.25" radius="0.15"/>
+      </geometry>
+    </self_collision_checking>
+  </link>
+  <!-- joint between link_0 and link_1 -->
+  <joint name="iiwa_joint_1" type="revolute">
+    <parent link="iiwa_link_0"/>
+    <child link="iiwa_link_1"/>
+    <origin rpy="0 0 0" xyz="0 0 0.1575"/>
+    <axis xyz="0 0 1"/>
+    <limit effort="300" lower="-2.92342649709" upper="2.92342649709" velocity="10"/>
+    <safety_controller k_position="100" k_velocity="2" soft_lower_limit="-2.93215314335" soft_upper_limit="2.93215314335"/>
+    <dynamics damping="0.5"/>
+  </joint>
+  <link name="iiwa_link_1">
+    <inertial>
+      <origin rpy="0 0 0" xyz="0 -0.03 0.12"/>
+      <mass value="4"/>
+      <inertia ixx="0.1" ixy="0" ixz="0" iyy="0.09" iyz="0" izz="0.02"/>
+    </inertial>
+    <visual>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+        <mesh filename="package://iiwa_description/meshes/iiwa14/visual/white/link_1.dae"/>
+      </geometry>
+    </visual>
+    <collision>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+        <mesh filename="package://iiwa_description/meshes/iiwa14/collision/link_1.stl"/>
+      </geometry>
+    </collision>
+  </link>
+  <!-- joint between link_1 and link_2 -->
+  <joint name="iiwa_joint_2" type="revolute">
+    <parent link="iiwa_link_1"/>
+    <child link="iiwa_link_2"/>
+    <origin rpy="1.57079632679 0 3.14159265359" xyz="0 0 0.2025"/>
+    <axis xyz="0 0 1"/>
+    <limit effort="300" lower="-2.05076187109" upper="2.05076187109" velocity="10"/>
+    <safety_controller k_position="100" k_velocity="2" soft_lower_limit="-2.05948851735" soft_upper_limit="2.05948851735"/>
+    <dynamics damping="0.5"/>
+  </joint>
+  <link name="iiwa_link_2">
+    <inertial>
+      <origin rpy="0 0 0" xyz="0.0003 0.059 0.042"/>
+      <mass value="4"/>
+      <inertia ixx="0.05" ixy="0" ixz="0" iyy="0.018" iyz="0" izz="0.044"/>
+    </inertial>
+    <visual>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+        <mesh filename="package://iiwa_description/meshes/iiwa14/visual/white/link_2.dae"/>
+      </geometry>
+    </visual>
+    <collision>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+        <mesh filename="package://iiwa_description/meshes/iiwa14/collision/link_2.stl"/>
+      </geometry>
+    </collision>
+  </link>
+  <!-- joint between link_2 and link_3 -->
+  <joint name="iiwa_joint_3" type="revolute">
+    <parent link="iiwa_link_2"/>
+    <child link="iiwa_link_3"/>
+    <origin rpy="1.57079632679 0 3.14159265359" xyz="0 0 0"/>
+    <axis xyz="0 0 1"/>
+    <limit effort="300" lower="-2.92342649709" upper="2.92342649709" velocity="10"/>
+    <safety_controller k_position="100" k_velocity="2" soft_lower_limit="-2.93215314335" soft_upper_limit="2.93215314335"/>
+    <dynamics damping="0.5"/>
+  </joint>
+  <link name="iiwa_link_3">
+    <inertial>
+      <origin rpy="0 0 0" xyz="0 0.03 0.13"/>
+      <mass value="3"/>
+      <inertia ixx="0.08" ixy="0" ixz="0" iyy="0.075" iyz="0" izz="0.01"/>
+    </inertial>
+    <visual>
+      <origin rpy="0 0 0" xyz="0 0 0.2045"/>
+      <geometry>
+        <mesh filename="package://iiwa_description/meshes/iiwa14/visual/white/link_3.dae"/>
+      </geometry>
+    </visual>
+    <collision>
+      <origin rpy="0 0 0" xyz="0 0 0.2045"/>
+      <geometry>
+        <mesh filename="package://iiwa_description/meshes/iiwa14/collision/link_3.stl"/>
+      </geometry>
+    </collision>
+  </link>
+  <!-- joint between link_3 and link_4 -->
+  <joint name="iiwa_joint_4" type="revolute">
+    <parent link="iiwa_link_3"/>
+    <child link="iiwa_link_4"/>
+    <origin rpy="1.57079632679 0 0" xyz="0 0 0.42"/>
+    <axis xyz="0 0 1"/>
+    <limit effort="300" lower="-2.05076187109" upper="2.05076187109" velocity="10"/>
+    <safety_controller k_position="100" k_velocity="2" soft_lower_limit="-2.05948851735" soft_upper_limit="2.05948851735"/>
+    <dynamics damping="0.5"/>
+  </joint>
+  <link name="iiwa_link_4">
+    <inertial>
+      <origin rpy="0 0 0" xyz="0 0.067 0.034"/>
+      <mass value="2.7"/>
+      <inertia ixx="0.03" ixy="0" ixz="0" iyy="0.01" iyz="0" izz="0.029"/>
+    </inertial>
+    <visual>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+        <mesh filename="package://iiwa_description/meshes/iiwa14/visual/white/link_4.dae"/>
+      </geometry>
+    </visual>
+    <collision>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+        <mesh filename="package://iiwa_description/meshes/iiwa14/collision/link_4.stl"/>
+      </geometry>
+    </collision>
+  </link>
+  <!-- joint between link_4 and link_5 -->
+  <joint name="iiwa_joint_5" type="revolute">
+    <parent link="iiwa_link_4"/>
+    <child link="iiwa_link_5"/>
+    <origin rpy="1.57079632679 0 3.14159265359" xyz="0 0 0"/>
+    <axis xyz="0 0 1"/>
+    <limit effort="300" lower="-2.92342649709" upper="2.92342649709" velocity="10"/>
+    <safety_controller k_position="100" k_velocity="2" soft_lower_limit="-2.93215314335" soft_upper_limit="2.93215314335"/>
+    <dynamics damping="0.5"/>
+  </joint>
+  <link name="iiwa_link_5">
+    <inertial>
+      <origin rpy="0 0 0" xyz="0.0001 0.021 0.076"/>
+      <mass value="1.7"/>
+      <inertia ixx="0.02" ixy="0" ixz="0" iyy="0.018" iyz="0" izz="0.005"/>
+    </inertial>
+    <visual>
+      <origin rpy="0 0 0" xyz="0 0 0.1845"/>
+      <geometry>
+        <mesh filename="package://iiwa_description/meshes/iiwa14/visual/white/link_5.dae"/>
+      </geometry>
+    </visual>
+    <collision>
+      <origin rpy="0 0 0" xyz="0 0 0.1845"/>
+      <geometry>
+        <mesh filename="package://iiwa_description/meshes/iiwa14/collision/link_5.stl"/>
+      </geometry>
+    </collision>
+  </link>
+  <!-- joint between link_5 and link_6 -->
+  <joint name="iiwa_joint_6" type="revolute">
+    <parent link="iiwa_link_5"/>
+    <child link="iiwa_link_6"/>
+    <origin rpy="1.57079632679 0 0" xyz="0 0 0.4"/>
+    <axis xyz="0 0 1"/>
+    <limit effort="300" lower="-2.05076187109" upper="2.05076187109" velocity="10"/>
+    <safety_controller k_position="100" k_velocity="2" soft_lower_limit="-2.05948851735" soft_upper_limit="2.05948851735"/>
+    <dynamics damping="0.5"/>
+  </joint>
+  <link name="iiwa_link_6">
+    <inertial>
+      <origin rpy="0 0 0" xyz="0 0.0006 0.0004"/>
+      <mass value="1.8"/>
+      <inertia ixx="0.005" ixy="0" ixz="0" iyy="0.0036" iyz="0" izz="0.0047"/>
+    </inertial>
+    <visual>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+        <mesh filename="package://iiwa_description/meshes/iiwa14/visual/white/link_6.dae"/>
+      </geometry>
+    </visual>
+    <collision>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+        <mesh filename="package://iiwa_description/meshes/iiwa14/collision/link_6.stl"/>
+      </geometry>
+    </collision>
+  </link>
+  <!-- joint between link_6 and link_7 -->
+  <joint name="iiwa_joint_7" type="revolute">
+    <parent link="iiwa_link_6"/>
+    <child link="iiwa_link_7"/>
+    <origin rpy="1.57079632679 0 -3.14159265359" xyz="0 0 0"/>
+    <axis xyz="0 0 1"/>
+    <limit effort="300" lower="-3.01069295969" upper="3.01069295969" velocity="10"/>
+    <safety_controller k_position="100" k_velocity="2" soft_lower_limit="-3.01941960595" soft_upper_limit="3.01941960595"/>
+    <dynamics damping="0.5"/>
+  </joint>
+  <link name="iiwa_link_7">
+    <inertial>
+      <origin rpy="0 0 0" xyz="0 0 0.02"/>
+      <mass value="0.3"/>
+      <inertia ixx="0.001" ixy="0" ixz="0" iyy="0.001" iyz="0" izz="0.001"/>
+    </inertial>
+    <visual>
+      <origin rpy="0 0 0" xyz="0 0 0.081"/>
+      <geometry>
+        <mesh filename="package://iiwa_description/meshes/iiwa14/visual/white/link_7_electric_touch.dae"/>
+      </geometry>
+    </visual>
+    <collision>
+      <origin rpy="0 0 0" xyz="0 0 0.081"/>
+      <geometry>
+        <mesh filename="package://iiwa_description/meshes/iiwa14/collision/link_7_electric_touch.stl"/>
+      </geometry>
+    </collision>
+  </link>
+  <joint name="iiwa_joint_ee" type="fixed">
+    <parent link="iiwa_link_7"/>
+    <child link="iiwa_link_ee"/>
+    <origin rpy="0 0 0" xyz="0 0 0.34720"/>
+  </joint>
+  <link name="iiwa_link_ee">
+		</link>
+</robot>
+
+
diff --git a/agent/dart_envs/misc/kuka_iiwa_urdf/iiwa14.urdf b/agent/envs_dart/misc/kuka_iiwa_urdf/iiwa14_base.urdf
old mode 100644
new mode 100755
similarity index 99%
rename from agent/dart_envs/misc/kuka_iiwa_urdf/iiwa14.urdf
rename to agent/envs_dart/misc/kuka_iiwa_urdf/iiwa14_base.urdf
index 37755db..8a40e62
--- a/agent/dart_envs/misc/kuka_iiwa_urdf/iiwa14.urdf
+++ b/agent/envs_dart/misc/kuka_iiwa_urdf/iiwa14_base.urdf
@@ -249,3 +249,4 @@
 		</link>
 </robot>
 
+
diff --git a/agent/envs_dart/misc/kuka_iiwa_urdf/iiwa14_calibration_pin.urdf b/agent/envs_dart/misc/kuka_iiwa_urdf/iiwa14_calibration_pin.urdf
new file mode 100755
index 0000000..8a40e62
--- /dev/null
+++ b/agent/envs_dart/misc/kuka_iiwa_urdf/iiwa14_calibration_pin.urdf
@@ -0,0 +1,252 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!-- =================================================================================== -->
+<!-- |    This document was autogenerated by xacro from iiwa14.urdf.xacro              | -->
+<!-- |    EDITING THIS FILE BY HAND IS NOT RECOMMENDED                                 | -->
+<!-- =================================================================================== -->
+<robot name="iiwa14">
+  <!-- Fix to world just for testing -->
+  <link name="world"/>
+  <!--joint between {parent} and link_0-->
+  <joint name="world_iiwa_joint" type="fixed">
+    <origin rpy="0 0 0" xyz="0 0 0"/>
+    <parent link="world"/>
+    <child link="iiwa_link_0"/>
+  </joint>
+  <link name="iiwa_link_0">
+    <inertial>
+      <origin rpy="0 0 0" xyz="-0.1 0 0.07"/>
+      <mass value="5"/>
+      <inertia ixx="0.05" ixy="0" ixz="0" iyy="0.06" iyz="0" izz="0.03"/>
+    </inertial>
+    <visual>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+        <mesh filename="package://iiwa_description/meshes/iiwa14/visual/white/link_0.dae"/>
+      </geometry>
+    </visual>
+    <collision>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+        <mesh filename="package://iiwa_description/meshes/iiwa14/collision/link_0.stl"/>
+      </geometry>
+    </collision>
+    <self_collision_checking>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+        <capsule length="0.25" radius="0.15"/>
+      </geometry>
+    </self_collision_checking>
+  </link>
+  <!-- joint between link_0 and link_1 -->
+  <joint name="iiwa_joint_1" type="revolute">
+    <parent link="iiwa_link_0"/>
+    <child link="iiwa_link_1"/>
+    <origin rpy="0 0 0" xyz="0 0 0.1575"/>
+    <axis xyz="0 0 1"/>
+    <limit effort="300" lower="-2.92342649709" upper="2.92342649709" velocity="10"/>
+    <safety_controller k_position="100" k_velocity="2" soft_lower_limit="-2.93215314335" soft_upper_limit="2.93215314335"/>
+    <dynamics damping="0.5"/>
+  </joint>
+  <link name="iiwa_link_1">
+    <inertial>
+      <origin rpy="0 0 0" xyz="0 -0.03 0.12"/>
+      <mass value="4"/>
+      <inertia ixx="0.1" ixy="0" ixz="0" iyy="0.09" iyz="0" izz="0.02"/>
+    </inertial>
+    <visual>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+        <mesh filename="package://iiwa_description/meshes/iiwa14/visual/white/link_1.dae"/>
+      </geometry>
+    </visual>
+    <collision>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+        <mesh filename="package://iiwa_description/meshes/iiwa14/collision/link_1.stl"/>
+      </geometry>
+    </collision>
+  </link>
+  <!-- joint between link_1 and link_2 -->
+  <joint name="iiwa_joint_2" type="revolute">
+    <parent link="iiwa_link_1"/>
+    <child link="iiwa_link_2"/>
+    <origin rpy="1.57079632679 0 3.14159265359" xyz="0 0 0.2025"/>
+    <axis xyz="0 0 1"/>
+    <limit effort="300" lower="-2.05076187109" upper="2.05076187109" velocity="10"/>
+    <safety_controller k_position="100" k_velocity="2" soft_lower_limit="-2.05948851735" soft_upper_limit="2.05948851735"/>
+    <dynamics damping="0.5"/>
+  </joint>
+  <link name="iiwa_link_2">
+    <inertial>
+      <origin rpy="0 0 0" xyz="0.0003 0.059 0.042"/>
+      <mass value="4"/>
+      <inertia ixx="0.05" ixy="0" ixz="0" iyy="0.018" iyz="0" izz="0.044"/>
+    </inertial>
+    <visual>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+        <mesh filename="package://iiwa_description/meshes/iiwa14/visual/white/link_2.dae"/>
+      </geometry>
+    </visual>
+    <collision>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+        <mesh filename="package://iiwa_description/meshes/iiwa14/collision/link_2.stl"/>
+      </geometry>
+    </collision>
+  </link>
+  <!-- joint between link_2 and link_3 -->
+  <joint name="iiwa_joint_3" type="revolute">
+    <parent link="iiwa_link_2"/>
+    <child link="iiwa_link_3"/>
+    <origin rpy="1.57079632679 0 3.14159265359" xyz="0 0 0"/>
+    <axis xyz="0 0 1"/>
+    <limit effort="300" lower="-2.92342649709" upper="2.92342649709" velocity="10"/>
+    <safety_controller k_position="100" k_velocity="2" soft_lower_limit="-2.93215314335" soft_upper_limit="2.93215314335"/>
+    <dynamics damping="0.5"/>
+  </joint>
+  <link name="iiwa_link_3">
+    <inertial>
+      <origin rpy="0 0 0" xyz="0 0.03 0.13"/>
+      <mass value="3"/>
+      <inertia ixx="0.08" ixy="0" ixz="0" iyy="0.075" iyz="0" izz="0.01"/>
+    </inertial>
+    <visual>
+      <origin rpy="0 0 0" xyz="0 0 0.2045"/>
+      <geometry>
+        <mesh filename="package://iiwa_description/meshes/iiwa14/visual/white/link_3.dae"/>
+      </geometry>
+    </visual>
+    <collision>
+      <origin rpy="0 0 0" xyz="0 0 0.2045"/>
+      <geometry>
+        <mesh filename="package://iiwa_description/meshes/iiwa14/collision/link_3.stl"/>
+      </geometry>
+    </collision>
+  </link>
+  <!-- joint between link_3 and link_4 -->
+  <joint name="iiwa_joint_4" type="revolute">
+    <parent link="iiwa_link_3"/>
+    <child link="iiwa_link_4"/>
+    <origin rpy="1.57079632679 0 0" xyz="0 0 0.42"/>
+    <axis xyz="0 0 1"/>
+    <limit effort="300" lower="-2.05076187109" upper="2.05076187109" velocity="10"/>
+    <safety_controller k_position="100" k_velocity="2" soft_lower_limit="-2.05948851735" soft_upper_limit="2.05948851735"/>
+    <dynamics damping="0.5"/>
+  </joint>
+  <link name="iiwa_link_4">
+    <inertial>
+      <origin rpy="0 0 0" xyz="0 0.067 0.034"/>
+      <mass value="2.7"/>
+      <inertia ixx="0.03" ixy="0" ixz="0" iyy="0.01" iyz="0" izz="0.029"/>
+    </inertial>
+    <visual>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+        <mesh filename="package://iiwa_description/meshes/iiwa14/visual/white/link_4.dae"/>
+      </geometry>
+    </visual>
+    <collision>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+        <mesh filename="package://iiwa_description/meshes/iiwa14/collision/link_4.stl"/>
+      </geometry>
+    </collision>
+  </link>
+  <!-- joint between link_4 and link_5 -->
+  <joint name="iiwa_joint_5" type="revolute">
+    <parent link="iiwa_link_4"/>
+    <child link="iiwa_link_5"/>
+    <origin rpy="1.57079632679 0 3.14159265359" xyz="0 0 0"/>
+    <axis xyz="0 0 1"/>
+    <limit effort="300" lower="-2.92342649709" upper="2.92342649709" velocity="10"/>
+    <safety_controller k_position="100" k_velocity="2" soft_lower_limit="-2.93215314335" soft_upper_limit="2.93215314335"/>
+    <dynamics damping="0.5"/>
+  </joint>
+  <link name="iiwa_link_5">
+    <inertial>
+      <origin rpy="0 0 0" xyz="0.0001 0.021 0.076"/>
+      <mass value="1.7"/>
+      <inertia ixx="0.02" ixy="0" ixz="0" iyy="0.018" iyz="0" izz="0.005"/>
+    </inertial>
+    <visual>
+      <origin rpy="0 0 0" xyz="0 0 0.1845"/>
+      <geometry>
+        <mesh filename="package://iiwa_description/meshes/iiwa14/visual/white/link_5.dae"/>
+      </geometry>
+    </visual>
+    <collision>
+      <origin rpy="0 0 0" xyz="0 0 0.1845"/>
+      <geometry>
+        <mesh filename="package://iiwa_description/meshes/iiwa14/collision/link_5.stl"/>
+      </geometry>
+    </collision>
+  </link>
+  <!-- joint between link_5 and link_6 -->
+  <joint name="iiwa_joint_6" type="revolute">
+    <parent link="iiwa_link_5"/>
+    <child link="iiwa_link_6"/>
+    <origin rpy="1.57079632679 0 0" xyz="0 0 0.4"/>
+    <axis xyz="0 0 1"/>
+    <limit effort="300" lower="-2.05076187109" upper="2.05076187109" velocity="10"/>
+    <safety_controller k_position="100" k_velocity="2" soft_lower_limit="-2.05948851735" soft_upper_limit="2.05948851735"/>
+    <dynamics damping="0.5"/>
+  </joint>
+  <link name="iiwa_link_6">
+    <inertial>
+      <origin rpy="0 0 0" xyz="0 0.0006 0.0004"/>
+      <mass value="1.8"/>
+      <inertia ixx="0.005" ixy="0" ixz="0" iyy="0.0036" iyz="0" izz="0.0047"/>
+    </inertial>
+    <visual>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+        <mesh filename="package://iiwa_description/meshes/iiwa14/visual/white/link_6.dae"/>
+      </geometry>
+    </visual>
+    <collision>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+        <mesh filename="package://iiwa_description/meshes/iiwa14/collision/link_6.stl"/>
+      </geometry>
+    </collision>
+  </link>
+  <!-- joint between link_6 and link_7 -->
+  <joint name="iiwa_joint_7" type="revolute">
+    <parent link="iiwa_link_6"/>
+    <child link="iiwa_link_7"/>
+    <origin rpy="1.57079632679 0 -3.14159265359" xyz="0 0 0"/>
+    <axis xyz="0 0 1"/>
+    <limit effort="300" lower="-3.01069295969" upper="3.01069295969" velocity="10"/>
+    <safety_controller k_position="100" k_velocity="2" soft_lower_limit="-3.01941960595" soft_upper_limit="3.01941960595"/>
+    <dynamics damping="0.5"/>
+  </joint>
+  <link name="iiwa_link_7">
+    <inertial>
+      <origin rpy="0 0 0" xyz="0 0 0.02"/>
+      <mass value="0.3"/>
+      <inertia ixx="0.001" ixy="0" ixz="0" iyy="0.001" iyz="0" izz="0.001"/>
+    </inertial>
+    <visual>
+      <origin rpy="0 0 0" xyz="0 0 0.081"/>
+      <geometry>
+        <mesh filename="package://iiwa_description/meshes/iiwa14/visual/white/link_7_electric_touch.dae"/>
+      </geometry>
+    </visual>
+    <collision>
+      <origin rpy="0 0 0" xyz="0 0 0.081"/>
+      <geometry>
+        <mesh filename="package://iiwa_description/meshes/iiwa14/collision/link_7_electric_touch.stl"/>
+      </geometry>
+    </collision>
+  </link>
+  <joint name="iiwa_joint_ee" type="fixed">
+    <parent link="iiwa_link_7"/>
+    <child link="iiwa_link_ee"/>
+    <origin rpy="0 0 0" xyz="0 0 0.152"/>
+  </joint>
+  <link name="iiwa_link_ee">
+		</link>
+</robot>
+
+
diff --git a/agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/collision/link_0.stl b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/collision/link_0.stl
similarity index 100%
rename from agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/collision/link_0.stl
rename to agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/collision/link_0.stl
diff --git a/agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/collision/link_1.stl b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/collision/link_1.stl
similarity index 100%
rename from agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/collision/link_1.stl
rename to agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/collision/link_1.stl
diff --git a/agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/collision/link_2.stl b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/collision/link_2.stl
similarity index 100%
rename from agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/collision/link_2.stl
rename to agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/collision/link_2.stl
diff --git a/agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/collision/link_3.stl b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/collision/link_3.stl
similarity index 100%
rename from agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/collision/link_3.stl
rename to agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/collision/link_3.stl
diff --git a/agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/collision/link_4.stl b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/collision/link_4.stl
similarity index 100%
rename from agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/collision/link_4.stl
rename to agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/collision/link_4.stl
diff --git a/agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/collision/link_5.stl b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/collision/link_5.stl
similarity index 100%
rename from agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/collision/link_5.stl
rename to agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/collision/link_5.stl
diff --git a/agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/collision/link_6.stl b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/collision/link_6.stl
similarity index 100%
rename from agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/collision/link_6.stl
rename to agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/collision/link_6.stl
diff --git a/agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/collision/link_7_electric.stl b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/collision/link_7.stl
similarity index 100%
rename from agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/collision/link_7_electric.stl
rename to agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/collision/link_7.stl
diff --git a/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/collision/link_7_electric.stl b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/collision/link_7_electric.stl
new file mode 100644
index 0000000..ea2d9d2
Binary files /dev/null and b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/collision/link_7_electric.stl differ
diff --git a/agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/collision/link_7_electric_touch.stl b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/collision/link_7_electric_touch.stl
similarity index 100%
rename from agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/collision/link_7_electric_touch.stl
rename to agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/collision/link_7_electric_touch.stl
diff --git a/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/link_0.stl b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/link_0.stl
new file mode 100644
index 0000000..e057ea1
Binary files /dev/null and b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/link_0.stl differ
diff --git a/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/link_1.stl b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/link_1.stl
new file mode 100644
index 0000000..4eaa051
Binary files /dev/null and b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/link_1.stl differ
diff --git a/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/link_2.stl b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/link_2.stl
new file mode 100644
index 0000000..2eb3709
Binary files /dev/null and b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/link_2.stl differ
diff --git a/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/link_3.stl b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/link_3.stl
new file mode 100644
index 0000000..2892530
Binary files /dev/null and b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/link_3.stl differ
diff --git a/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/link_4.stl b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/link_4.stl
new file mode 100644
index 0000000..dda08fe
Binary files /dev/null and b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/link_4.stl differ
diff --git a/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/link_5.stl b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/link_5.stl
new file mode 100644
index 0000000..6095451
Binary files /dev/null and b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/link_5.stl differ
diff --git a/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/link_6.stl b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/link_6.stl
new file mode 100644
index 0000000..6224ddb
Binary files /dev/null and b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/link_6.stl differ
diff --git a/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/link_7.stl b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/link_7.stl
new file mode 100644
index 0000000..3d1760b
Binary files /dev/null and b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/link_7.stl differ
diff --git a/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/link_7_electric.stl b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/link_7_electric.stl
new file mode 100644
index 0000000..ea2d9d2
Binary files /dev/null and b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/link_7_electric.stl differ
diff --git a/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/link_7_electric_touch.stl b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/link_7_electric_touch.stl
new file mode 100644
index 0000000..f916477
Binary files /dev/null and b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/link_7_electric_touch.stl differ
diff --git a/agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/silver/link_0.dae b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/silver/link_0.dae
similarity index 100%
rename from agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/silver/link_0.dae
rename to agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/silver/link_0.dae
diff --git a/agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/silver/link_1.dae b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/silver/link_1.dae
similarity index 100%
rename from agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/silver/link_1.dae
rename to agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/silver/link_1.dae
diff --git a/agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/silver/link_2.dae b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/silver/link_2.dae
similarity index 100%
rename from agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/silver/link_2.dae
rename to agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/silver/link_2.dae
diff --git a/agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/silver/link_3.dae b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/silver/link_3.dae
similarity index 100%
rename from agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/silver/link_3.dae
rename to agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/silver/link_3.dae
diff --git a/agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/silver/link_4.dae b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/silver/link_4.dae
similarity index 100%
rename from agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/silver/link_4.dae
rename to agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/silver/link_4.dae
diff --git a/agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/silver/link_5.dae b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/silver/link_5.dae
similarity index 100%
rename from agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/silver/link_5.dae
rename to agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/silver/link_5.dae
diff --git a/agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/silver/link_6.dae b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/silver/link_6.dae
similarity index 100%
rename from agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/silver/link_6.dae
rename to agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/silver/link_6.dae
diff --git a/agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/silver/link_7_electric.dae b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/silver/link_7_electric.dae
similarity index 100%
rename from agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/silver/link_7_electric.dae
rename to agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/silver/link_7_electric.dae
diff --git a/agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/silver/link_7_electric_touch.dae b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/silver/link_7_electric_touch.dae
similarity index 100%
rename from agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/silver/link_7_electric_touch.dae
rename to agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/silver/link_7_electric_touch.dae
diff --git a/agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/white/link_0.dae b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/white/link_0.dae
similarity index 100%
rename from agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/white/link_0.dae
rename to agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/white/link_0.dae
diff --git a/agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/white/link_1.dae b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/white/link_1.dae
similarity index 100%
rename from agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/white/link_1.dae
rename to agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/white/link_1.dae
diff --git a/agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/white/link_2.dae b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/white/link_2.dae
similarity index 100%
rename from agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/white/link_2.dae
rename to agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/white/link_2.dae
diff --git a/agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/white/link_3.dae b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/white/link_3.dae
similarity index 100%
rename from agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/white/link_3.dae
rename to agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/white/link_3.dae
diff --git a/agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/white/link_4.dae b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/white/link_4.dae
similarity index 100%
rename from agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/white/link_4.dae
rename to agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/white/link_4.dae
diff --git a/agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/white/link_5.dae b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/white/link_5.dae
similarity index 100%
rename from agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/white/link_5.dae
rename to agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/white/link_5.dae
diff --git a/agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/white/link_6.dae b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/white/link_6.dae
similarity index 100%
rename from agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/white/link_6.dae
rename to agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/white/link_6.dae
diff --git a/agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/white/link_7_electric.dae b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/white/link_7_electric.dae
similarity index 100%
rename from agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/white/link_7_electric.dae
rename to agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/white/link_7_electric.dae
diff --git a/agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/white/link_7_electric_touch.dae b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/white/link_7_electric_touch.dae
similarity index 100%
rename from agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/white/link_7_electric_touch.dae
rename to agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/white/link_7_electric_touch.dae
diff --git a/agent/envs_dart/misc/real_two_joints_targets/target_points_sphere1.csv b/agent/envs_dart/misc/real_two_joints_targets/target_points_sphere1.csv
new file mode 100644
index 0000000..a6d539b
--- /dev/null
+++ b/agent/envs_dart/misc/real_two_joints_targets/target_points_sphere1.csv
@@ -0,0 +1,50 @@
+0.387046,0.588643,-0.861801,0.768436,-0.12956,-0.604665,-0.164622,-0.422118,-1.33335,2.56E-05,-3.98E-05,-7.93E-05,-7.1E-05,9.08E-05
+-0.00123494,1.33089,0.0463198,0.999628,0.0003179,0.023818,-0.0133045,-0.026655,0.0476791,2.63E-05,-3.87E-05,-7.92E-05,-7.22E-05,9.05E-05
+0.049673,1.3269,0.0861862,0.964751,-0.013236,0.049476,0.258133,0.522844,0.102511,2.64E-05,-3.8E-05,-7.93E-05,-7.24E-05,9.05E-05
+0.654445,0.803076,0.565835,0.775868,-0.216946,0.474385,0.354854,-2.28371,-1.09747,2.56E-05,-5.39E-07,-7.93E-05,-7.11E-05,9.04E-05
+-0.721295,0.945411,0.286027,0.74044,0.250556,0.368878,-0.502892,-1.19327,0.924425,2.74E-05,-6.17E-05,-7.87E-05,-7.26E-05,9.07E-05
+-0.353213,1.26478,0.0374496,0.730499,0.124349,0.138225,-0.657117,-1.46517,0.374098,2.7E-05,-3.54E-06,-7.87E-05,-7.19E-05,9.05E-05
+-0.508844,1.06333,-0.437258,0.84364,0.155117,-0.337833,0.387401,-2.28067,0.761797,2.76E-05,-4.55E-05,-7.83E-05,-7.26E-05,9.07E-05
+0.0494147,1.33067,-0.0121016,0.786467,-0.0161455,-0.0205759,-0.617078,1.81097,0.0523618,2.69E-05,-2.42E-05,-7.85E-05,-7.14E-05,9.05E-05
+-0.00502128,1.07908,0.653971,0.932684,0.00139173,0.360674,-0.00356224,-0.00768045,0.738008,2.73E-05,-5.2E-05,-7.87E-05,-7.26E-05,9.07E-05
+0.085346,0.990145,-0.735131,0.9063,-0.0242297,-0.418671,-0.0524157,-0.115579,-0.865447,2.61E-05,-1.24E-05,-7.87E-05,-7.11E-05,9.05E-05
+-0.501228,1.18547,0.110228,0.749413,0.172044,0.213982,-0.602489,1.78727,-0.556235,2.61E-05,-2.67E-05,-7.88E-05,-7.15E-05,9.05E-05
+-0.198982,1.31129,-0.0154172,0.729976,0.0701228,-0.075769,0.675631,1.49347,-0.206806,2.66E-05,-3.28E-05,-7.87E-05,-7.18E-05,9.05E-05
+0.747739,0.874838,0.347293,0.73721,-0.260873,0.408818,0.470462,-2.00561,-1.01259,2.54E-05,-2.44E-05,-7.94E-05,-7.09E-05,9.05E-05
+-0.302458,1.24707,0.257717,0.887859,0.0876099,0.189741,-0.409914,-0.865099,0.42117,2.67E-05,1.23E-05,-7.94E-05,-7.16E-05,9.05E-05
+0.914476,0.689419,-0.000467361,0.578722,-0.406432,-0.406624,-0.578404,1.57131,1.22502,2.71E-05,-6.56E-05,-7.94E-05,-7.22E-05,9.05E-05
+-0.0715548,1.32929,0.0122452,0.763898,0.024105,0.0285794,-0.644253,1.74029,-0.0747768,2.62E-05,-5.53E-05,-7.93E-05,-7.21E-05,9.05E-05
+0.679878,0.977421,-0.318341,0.763009,-0.229179,-0.360351,-0.485223,2.00871,0.882523,2.7E-05,-1.51E-05,-7.92E-05,-7.29E-05,9.07E-05
+0.235172,0.515882,-0.93015,0.755844,-0.080038,-0.642999,-0.0940569,-0.247643,-1.40969,2.46E-05,4.14E-05,-7.94E-05,-7.38E-05,9.07E-05
+-0.916423,0.499866,-0.292216,0.610624,0.385988,-0.528236,0.446227,-1.87947,1.42642,2.74E-05,8.51E-06,-7.88E-05,-7.45E-05,9.05E-05
+-0.0431887,0.636267,0.930911,0.801125,0.0138772,0.598049,-0.018558,-0.0463608,1.28261,2.73E-05,9.59E-06,-7.88E-05,-7.47E-05,9.05E-05
+0.540553,0.872777,-0.624216,0.818908,-0.169779,-0.45538,-0.305277,2.4279,1.01511,2.73E-05,1.71E-05,-7.87E-05,-7.44E-05,9.06E-05
+0.0398599,1.31074,-0.198219,0.989624,-0.0103638,-0.104088,-0.0984974,-0.198444,-0.209547,2.59E-05,-3.04E-05,-7.93E-05,-7.25E-05,9.05E-05
+-0.196303,0.718404,0.881928,0.822323,0.061408,0.558429,-0.0903956,-0.219013,1.19315,2.68E-05,-3.27E-05,-7.93E-05,-7.31E-05,9.05E-05
+0.075669,0.378383,-0.968876,0.713195,-0.0273016,-0.699882,-0.0277948,-0.0779416,-1.55185,2.54E-05,2.96E-05,-7.93E-05,-7.28E-05,9.06E-05
+0.099022,1.32516,0.0587185,0.867381,-0.0293775,0.0515768,0.494092,-2.10604,-0.118702,2.6E-05,8.03E-06,-7.92E-05,-7.34E-05,9.06E-05
+-0.602865,1.08635,0.23185,0.770479,0.201249,0.293003,-0.52916,-1.20365,0.726825,2.68E-05,-6.15E-05,-7.93E-05,-7.38E-05,9.05E-05
+0.0358194,0.691392,-0.913061,0.818644,-0.0112645,-0.573967,-0.0160366,-0.0392093,-1.22286,2.54E-05,2.71E-05,-7.95E-05,-7.25E-05,9.07E-05
+-0.971686,0.372724,-0.0211701,0.508686,0.491286,-0.502124,0.497743,1.54901,-1.55768,2.54E-05,2.52E-05,-7.93E-05,-7.24E-05,9.06E-05
+-0.120265,1.32323,0.0501203,0.830198,0.0372841,0.0559282,-0.553401,1.96566,-0.134404,2.59E-05,5.51E-05,-7.94E-05,-7.33E-05,9.05E-05
+0.410801,0.764779,-0.78242,0.81707,-0.129329,-0.524492,-0.201441,-0.483477,-1.14123,2.51E-05,5.21E-05,-7.93E-05,-7.22E-05,9.06E-05
+-0.184575,1.30698,0.118094,0.871537,0.0544893,0.0995465,-0.477016,2.13998,-0.227366,2.57E-05,1.8E-05,-7.94E-05,-7.34E-05,9.06E-05
+0.19187,1.3071,-0.104756,0.854481,-0.0577735,-0.0973623,-0.506996,-1.07105,-0.226821,2.54E-05,1.8E-05,-7.93E-05,-7.35E-05,9.04E-05
+-0.0118187,1.3318,0.0155168,0.947461,0.00319775,0.00947526,-0.319715,-0.650927,0.0200873,2.56E-05,1.29E-05,-7.93E-05,-7.35E-05,9.05E-05
+0.140475,1.30749,-0.165288,0.932685,-0.0387288,-0.105367,-0.342778,2.43717,0.225082,2.64E-05,1.47E-05,-7.9E-05,-7.36E-05,9.06E-05
+0.102779,1.12746,-0.58756,0.942375,-0.02806,-0.323191,-0.0817839,-0.173172,-0.660661,2.5E-05,3.58E-05,-7.93E-05,-7.24E-05,9.05E-05
+-0.535461,1.17084,0.024655,0.692579,0.198843,0.208202,-0.661398,-1.52478,0.584131,2.61E-05,-9.83E-06,-7.89E-05,-7.44E-05,9.06E-05
+0.828553,0.631074,-0.429886,0.683338,-0.311878,-0.513152,-0.415279,-1.09219,-1.28814,2.49E-05,2.18E-05,-7.92E-05,-7.31E-05,9.06E-05
+-0.216545,1.28462,0.207293,0.908383,0.0613059,0.143547,-0.387912,-0.807224,0.313519,2.56E-05,-1.76E-05,-7.93E-05,-7.38E-05,9.04E-05
+0.707388,0.790014,-0.509386,0.755877,-0.240707,-0.469929,-0.387138,2.19488,1.11253,2.68E-05,-1.11E-05,-7.89E-05,-7.44E-05,9.06E-05
+-0.819711,0.681315,0.411842,0.694269,0.303681,0.492411,-0.428135,-1.10521,1.23389,2.68E-05,-2.87E-05,-7.87E-05,-7.34E-05,9.04E-05
+-0.179436,1.27513,0.274082,0.944182,0.0488758,0.163877,-0.28156,-0.579658,0.343762,2.61E-05,-2.06E-05,-7.87E-05,-7.29E-05,9.06E-05
+0.354092,1.25366,-0.14412,0.812879,-0.112028,-0.166542,-0.546756,1.95733,0.404231,2.62E-05,-1.85E-05,-7.88E-05,-7.31E-05,9.06E-05
+-0.529087,1.0994,-0.343717,0.824603,0.165009,-0.303983,0.447656,-2.14692,0.706409,2.67E-05,-2.03E-05,-7.84E-05,-7.35E-05,9.06E-05
+0.0369512,0.944619,0.775654,0.894593,-0.0106146,0.446247,0.0213141,0.0476028,0.925467,2.68E-05,-2.25E-05,-7.83E-05,-7.35E-05,9.04E-05
+0.170645,1.3131,-0.0852475,0.84642,-0.0518719,-0.0838939,-0.523301,-1.1075,-0.19752,2.57E-05,3.6E-07,-7.87E-05,-7.21E-05,9.06E-05
+-0.875839,0.637922,-0.316936,0.656386,0.343186,-0.489171,0.460535,1.22359,-1.28081,2.51E-05,2.34E-06,-7.85E-05,-7.18E-05,9.06E-05
+-0.769683,0.923478,0.186723,0.698596,0.283395,0.360353,-0.549361,1.8088,-0.952423,2.51E-05,-1.74E-05,-7.85E-05,-7.21E-05,9.05E-05
+-0.383404,0.451755,-0.888463,0.724416,0.136114,-0.659014,0.14965,0.407393,-1.47626,2.51E-05,-1.95E-05,-7.87E-05,-7.19E-05,9.07E-05
+-0.573824,1.13832,-0.0985943,0.725532,0.203425,-0.241367,0.611526,1.40064,-0.642273,2.53E-05,-4.61E-05,-7.87E-05,-7.24E-05,9.04E-05
+0.491804,1.01633,0.521674,0.850617,-0.148693,0.374509,0.337763,0.755934,0.829493,2.66E-05,-5.04E-05,-7.85E-05,-7.32E-05,9.06E-05
diff --git a/agent/envs_other/cartpole.py b/agent/envs_other/cartpole.py
new file mode 100755
index 0000000..6af1ea3
--- /dev/null
+++ b/agent/envs_other/cartpole.py
@@ -0,0 +1,327 @@
+# modified from https://github.com/openai/gym/blob/master/gym/envs/classic_control/cartpole.py
+# to include simulating disturbed models and an analytical disturbance observer
+# the environment is also changed to be episodic with maximum allowed time steps
+# the action space is also changed to be continuous Box space
+
+import math
+import gym
+from gym import spaces, logger
+from gym.utils import seeding
+import numpy as np
+
+
+class CartPoleEnv(gym.Env):
+    """
+    Description:
+        A pole is attached by an un-actuated joint to a cart, which moves along
+        a frictionless track. The pendulum starts upright, and the goal is to
+        prevent it from falling over by increasing and reducing the cart's
+        velocity.
+    Source:
+        This environment corresponds to the version of the cart-pole problem
+        described by Barto, Sutton, and Anderson
+    Observation:
+        Type: Box(4)
+        Num     Observation               Min                     Max
+        0       Cart Position             -4.8                    4.8
+        1       Cart Velocity             -Inf                    Inf
+        2       Pole Angle                -0.418 rad (-24 deg)    0.418 rad (24 deg)
+        3       Pole Angular Velocity     -Inf                    Inf
+    Actions:
+        Type: Discrete(2)
+        Num   Action
+        0     Push cart to the left
+        1     Push cart to the right
+        Note: The amount the velocity that is reduced or increased is not
+        fixed; it depends on the angle the pole is pointing. This is because
+        the center of gravity of the pole increases the amount of energy needed
+        to move the cart underneath it
+    Reward:
+        Reward is 1 for every step taken, including the termination step
+    Starting State:
+        All observations are assigned a uniform random value in [-0.05..0.05]
+    Episode Termination:
+        Pole Angle is more than 12 degrees.
+        Cart Position is more than 2.4 (center of the cart reaches the edge of
+        the display).
+        Episode length is greater than max_ts.
+        Solved Requirements:
+        Considered solved when the average return is greater than or equal to
+        195.0 over 100 consecutive trials.
+    """
+
+    metadata = {
+        'render.modes': ['human', 'rgb_array'],
+        'video.frames_per_second': 50
+    }
+
+    def __init__(self, max_ts, disturbance=None):
+        self.gravity = 9.8
+        self.masscart = 1.0
+        self.masspole = 0.1
+        self.total_mass = (self.masspole + self.masscart)
+        self.length = 0.5  # actually half the pole's length
+        self.polemass_length = (self.masspole * self.length)
+        self.force_mag = 10.0
+        self.tau = 0.02  # seconds between state updates
+        self.kinematics_integrator = 'euler'
+
+        self.ts = 0
+        self.max_ts = max_ts
+        self.disturbance = disturbance
+
+        # Angle at which to fail the episode
+        self.theta_threshold_radians = 12 * 2 * math.pi / 360
+        self.x_threshold = 2.4
+
+        # Angle limit set to 2 * theta_threshold_radians so failing observation
+        # is still within bounds.
+        high = np.array([self.x_threshold * 2,
+                         np.finfo(np.float32).max,
+                         self.theta_threshold_radians * 2,
+                         np.finfo(np.float32).max],
+                        dtype=np.float32)
+
+        # self.action_space = spaces.Discrete(2)
+        self.action_space = spaces.Box(low=-1., high=1., shape=(1,), dtype=np.float32)
+        self.observation_space = spaces.Box(-high, high, dtype=np.float32)
+
+        self.gravity_n = self.gravity
+        self.masscart_n = self.masscart
+        self.masspole_n = self.masspole
+        self.total_mass_n = (self.masspole_n + self.masscart_n)
+        self.length_n = self.length
+        self.polemass_length_n = (self.masspole_n * self.length_n)
+        self.force_mag_n = self.force_mag
+
+        self.obs = None
+        self.obs_prev = None
+        self.act_prev = None
+
+        self.action_dist = 0.0
+        self.obs_noise = np.zeros(4)
+        if self.disturbance is not None:
+            self._apply_disturbance()
+
+        self.seed()
+        self.viewer = None
+        self.state = None
+
+        self.steps_beyond_done = None
+
+    def seed(self, seed=None):
+        self.np_random, seed = seeding.np_random(seed)
+        return [seed]
+
+    def set_disturbance(self, disturbance):
+        self.disturbance = disturbance
+        self._apply_disturbance()
+        self.reset()
+
+    def _apply_disturbance(self):
+        for dist_type, _dist in self.disturbance.items():
+            for dist_var, dist_val in _dist.items():
+                if dist_type == 'parametric_disturbance':
+                    if dist_var == 'cart_mass':
+                        self.masscart *= dist_val
+                        self.total_mass = (self.masspole + self.masscart)
+                    elif dist_var == 'pole_mass':
+                        self.masspole *= dist_val
+                        self.total_mass = (self.masspole + self.masscart)
+                        self.polemass_length = (self.masspole * self.length)
+                    elif dist_var == 'pole_length':
+                        self.length *= dist_val
+                        self.polemass_length = (self.masspole * self.length)
+                    elif dist_var == 'gravity':
+                        self.gravity *= dist_val
+                    else:
+                        raise Exception('* unknown parameter!')
+
+                elif dist_type == 'action_disturbance':
+                    if dist_var == '0':
+                        self.action_dist = (dist_val - 1.0) * self.action_space.high[0]
+                    else:
+                        raise Exception('* unknown action!')
+
+                elif dist_type == 'observation_noise':
+                    obs_ind = int(dist_var)
+                    if obs_ind in range(self.observation_space.shape[0]):
+                        self.obs_noise[obs_ind] =\
+                            (dist_val - 1.0) * self.observation_space.high[obs_ind - (obs_ind % 2)]  # should not be inf
+                    else:
+                        raise Exception('* unknown observation!')
+                else:
+                    raise Exception('* unknown disturbance type!')
+
+    def step(self, action):
+        self.ts += 1
+
+        # err_msg = "%r (%s) invalid" % (action, type(action))
+        # assert self.action_space.contains(action), err_msg
+
+        x, x_dot, theta, theta_dot = self.state
+        self.act_prev = action
+        action_dist = self.np_random.uniform(low=0., high=np.abs(self.action_dist)) * np.sign(self.action_dist)
+        action = np.clip(action[0] + action_dist, -1., 1.)
+        force = self.force_mag * action
+        # force = self.force_mag if action == 1 else -self.force_mag
+        costheta = math.cos(theta)
+        sintheta = math.sin(theta)
+
+        # For the interested reader:
+        # https://coneural.org/florian/papers/05_cart_pole.pdf
+        temp = (force + self.polemass_length * theta_dot ** 2 * sintheta) / self.total_mass
+        thetaacc = (self.gravity * sintheta - costheta * temp) /\
+                   (self.length * (4.0 / 3.0 - self.masspole * costheta ** 2 / self.total_mass))
+        xacc = temp - self.polemass_length * thetaacc * costheta / self.total_mass
+
+        if self.kinematics_integrator == 'euler':
+            x = x + self.tau * x_dot
+            x_dot = x_dot + self.tau * xacc
+            theta = theta + self.tau * theta_dot
+            theta_dot = theta_dot + self.tau * thetaacc
+        else:  # semi-implicit euler
+            x_dot = x_dot + self.tau * xacc
+            x = x + self.tau * x_dot
+            theta_dot = theta_dot + self.tau * thetaacc
+            theta = theta + self.tau * theta_dot
+
+        self.state = (x, x_dot, theta, theta_dot)
+
+        done = bool(
+            x < -self.x_threshold
+            or x > self.x_threshold
+            or theta < -self.theta_threshold_radians
+            or theta > self.theta_threshold_radians
+        )
+
+        if not done:
+            reward = 1.0
+        elif self.steps_beyond_done is None:
+            # Pole just fell!
+            self.steps_beyond_done = 0
+            reward = 1.0
+        else:
+            if self.steps_beyond_done == 0:
+                logger.warn(
+                    "You are calling 'step()' even though this "
+                    "environment has already returned done = True. You "
+                    "should always call 'reset()' once you receive 'done = "
+                    "True' -- any further steps are undefined behavior."
+                )
+            self.steps_beyond_done += 1
+            reward = 0.0
+
+        if self.ts > self.max_ts:
+            reward = 100.0
+            done = True
+
+        return self._get_obs(), reward, done, {}
+
+    def reset(self):
+        self.ts = 0
+        self.obs = None
+        self.act_prev = None
+        high = np.array([self.x_threshold / 2, self.x_threshold / 2,
+                         self.theta_threshold_radians / 2, self.theta_threshold_radians / 2])
+        self.state = self.np_random.uniform(low=-high, high=high, size=(4,))
+        self.steps_beyond_done = None
+        return self._get_obs()
+
+    def _get_obs(self):
+        self.obs_prev = self.obs
+        self.obs = np.array(self.state)
+        obs_noise = self.np_random.uniform(low=0., high=np.abs(self.obs_noise)) * np.sign(self.obs_noise)
+        self.obs += obs_noise
+        return self.obs
+
+    def render(self, mode='human'):
+        screen_width = 600
+        screen_height = 400
+
+        world_width = self.x_threshold * 2
+        scale = screen_width/world_width
+        carty = 100  # TOP OF CART
+        polewidth = 10.0
+        polelen = scale * (2 * self.length)
+        cartwidth = 50.0
+        cartheight = 30.0
+
+        if self.viewer is None:
+            from gym.envs.classic_control import rendering
+            self.viewer = rendering.Viewer(screen_width, screen_height)
+            l, r, t, b = -cartwidth / 2, cartwidth / 2, cartheight / 2, -cartheight / 2
+            axleoffset = cartheight / 4.0
+            cart = rendering.FilledPolygon([(l, b), (l, t), (r, t), (r, b)])
+            self.carttrans = rendering.Transform()
+            cart.add_attr(self.carttrans)
+            self.viewer.add_geom(cart)
+            l, r, t, b = -polewidth / 2, polewidth / 2, polelen - polewidth / 2, -polewidth / 2
+            pole = rendering.FilledPolygon([(l, b), (l, t), (r, t), (r, b)])
+            pole.set_color(.8, .6, .4)
+            self.poletrans = rendering.Transform(translation=(0, axleoffset))
+            pole.add_attr(self.poletrans)
+            pole.add_attr(self.carttrans)
+            self.viewer.add_geom(pole)
+            self.axle = rendering.make_circle(polewidth/2)
+            self.axle.add_attr(self.poletrans)
+            self.axle.add_attr(self.carttrans)
+            self.axle.set_color(.5, .5, .8)
+            self.viewer.add_geom(self.axle)
+            self.track = rendering.Line((0, carty), (screen_width, carty))
+            self.track.set_color(0, 0, 0)
+            self.viewer.add_geom(self.track)
+
+            self._pole_geom = pole
+
+        if self.state is None:
+            return None
+
+        # Edit the pole polygon vertex
+        pole = self._pole_geom
+        l, r, t, b = -polewidth / 2, polewidth / 2, polelen - polewidth / 2, -polewidth / 2
+        pole.v = [(l, b), (l, t), (r, t), (r, b)]
+
+        x = self.state
+        cartx = x[0] * scale + screen_width / 2.0  # MIDDLE OF CART
+        self.carttrans.set_translation(cartx, carty)
+        self.poletrans.set_rotation(-x[2])
+
+        return self.viewer.render(return_rgb_array=mode == 'rgb_array')
+
+    def close(self):
+        if self.viewer:
+            self.viewer.close()
+            self.viewer = None
+
+    def estimate_disturbance(self):
+        if self.act_prev is None:
+            return 0.
+
+        # gravity = self.gravity_n
+        # masscart = self.masscart_n
+        # masspole = self.masspole_n
+        total_mass = self.total_mass_n
+        # length = self.length_n
+        polemass_length = self.polemass_length_n
+        force_mag = self.force_mag_n
+        tau = self.tau
+
+        x, x_dot, theta, theta_dot = self.obs
+        x_prev, x_dot_prev, theta_prev, theta_dot_prev = self.obs_prev
+
+        temp_prev = (x_dot - x_dot_prev) / tau +\
+                    (polemass_length * math.cos(theta_prev) * ((theta_dot - theta_dot_prev) / tau)) / total_mass
+        force_prev = total_mass * temp_prev - polemass_length * theta_dot_prev ** 2 * math.sin(theta_prev)
+        act_prev_est = force_prev / force_mag
+
+        return act_prev_est - self.act_prev
+
+    def random_action(self):
+        return self.action_space.sample()
+
+
+if __name__ == "__main__":
+    from stable_baselines3.common.env_checker import check_env
+    env = CartPoleEnv(200)
+    check_env(env)
diff --git a/agent/envs_other/nlinks_box2d.py b/agent/envs_other/nlinks_box2d.py
new file mode 100755
index 0000000..cf56a48
--- /dev/null
+++ b/agent/envs_other/nlinks_box2d.py
@@ -0,0 +1,470 @@
+"""
+A Box2D environment of a planar manipulator with N links
+Box2D only accepts speed control of the joints not force/torque.
+The action space is the joint velocities.
+"""
+
+import time
+import Box2D
+import gym
+import numpy as np
+from Box2D.b2 import (circleShape, polygonShape, fixtureDef, revoluteJointDef, weldJointDef)
+from gym import spaces
+from gym.utils import seeding, EzPickle
+from gym.envs.classic_control import rendering
+
+# Slow rendering with output
+DEBUG = False
+FPS = 50  # Frames per Second
+
+
+class NLinksBox2DEnv(gym.Env, EzPickle):
+    metadata = {
+        'render.modes': ['human', 'rgb_array'],
+        'video.frames_per_second': FPS
+    }
+
+    SCALE = 70.0
+    VIEWPORT_W, VIEWPORT_H = 800, 800
+
+    # Punishing energy (integral of applied forces); huge impact on performance
+    POWER = False
+
+    LINK_MASS = 1.0  # kg
+    LINK_HEIGHT = 0.5  # m
+    LINK_WIDTH = 0.10  # m
+
+    # Min distance for end effector target
+    MIN_DISTANCE = 0.05  # m
+
+    MAX_JOINT_VEL = 3.0
+
+    GRAVITY = -9.81  # m/s^2
+
+    COLOR_ANCHOR = (0., 0., 0.)
+    COLOR_JOINTS = (0.6, 0.6, .8)
+    COLOR_BORDER = (0., 0., 0.)
+    COLOR_LINKS = (.6, .6, 1.)
+    COLOR_EE = (.6, 1., .6)
+    COLOR_TARGET = (1., 0.6, 0.6)
+    COLOR_BACKGROUND = (0.9, 0.9, 1.0)
+
+    MAX_JOINT_TORQUE = 10000  # 80
+
+    ANCHOR_X = 0.0
+    ANCHOR_Y = 0.0
+
+    FIX_CIRCLE = fixtureDef(
+        shape=circleShape(radius=LINK_WIDTH),
+        density=1e-3,
+        friction=0.0,
+        restitution=0.0,
+        categoryBits=0x0020,
+        maskBits=0x001)
+
+    FIX_POLY = fixtureDef(
+        shape=polygonShape(box=(LINK_WIDTH / 2, LINK_HEIGHT / 2)),
+        density=LINK_MASS / (LINK_WIDTH * LINK_HEIGHT),
+        friction=0.0,
+        restitution=0.0,
+        categoryBits=0x0020,
+        maskBits=0x001)
+
+    time_step = n_links = np_random = world = viewer = None
+    draw_list = []
+
+    # The Box2D objects below need to be destroyed when resetting world to release allocated memory
+    anchor = None  # static body to hold the manipulator base fixed
+    target = None  # static body to represent the target position
+    end_effector = None  # dynamic body of end effector
+    links = []  # dynamic body of links
+    joint_bodies = []  # dynamic body of joints
+    joint_fixes = []  # weld joints to connect previous link to the next joint_body
+    joints = []  # revolute joints to connect previous joint_body to the next link
+
+    def __init__(self, max_ts, n_links, disturbance=None):
+
+        self.max_ts = max_ts
+        self.n_links = n_links
+        self.disturbance = disturbance
+
+        EzPickle.__init__(self)
+        self.seed()
+
+        len_arms = self.n_links * self.LINK_HEIGHT
+        # x,y observation space is twice big to include anchor deviations from (0,0)
+        high = [len_arms * 2.0, len_arms * 2.0, len_arms * 2.0, len_arms * 2.0]
+        for _ in range(self.n_links):
+            high.append(np.pi)
+            high.append(self.MAX_JOINT_VEL)
+        high = np.array(high)
+        self.action_space = spaces.Box(-np.ones(self.n_links), np.ones(self.n_links), dtype=np.float32)
+        self.observation_space = spaces.Box(-high, high, dtype=np.float32)
+
+        self.action_dist = np.zeros(self.n_links)
+        self.obs_noise = np.zeros(self.observation_space.shape[0])
+        if self.disturbance is not None:
+            self._apply_disturbance()
+
+        self.world = Box2D.b2World(gravity=(0, self.GRAVITY))
+
+        self.reset()
+
+    def _destroy(self):
+
+        for idx in range(len(self.joints)):
+            self.world.DestroyJoint(self.joints[idx])
+        self.joints = []
+
+        for idx in range(len(self.joint_fixes)):
+            self.world.DestroyJoint(self.joint_fixes[idx])
+        self.joint_fixes = []
+
+        for idx in range(len(self.joint_bodies)):
+            self.world.DestroyBody(self.joint_bodies[idx])
+        self.joint_bodies = []
+
+        for idx in range(len(self.links)):
+            self.world.DestroyBody(self.links[idx])
+        self.links = []
+
+        if self.end_effector:
+            self.world.DestroyBody(self.end_effector)
+        self.end_effector = None
+
+        if self.target:
+            self.world.DestroyBody(self.target)
+        self.target = None
+
+        if self.anchor:
+            self.world.DestroyBody(self.anchor)
+        self.anchor = None
+
+    def _create_anchor(self):
+        self.anchor = self.world.CreateStaticBody(position=(self.ANCHOR_X, self.ANCHOR_Y), fixtures=self.FIX_CIRCLE)
+        self.anchor.color1 = self.COLOR_ANCHOR
+        self.anchor.color2 = self.COLOR_BORDER
+        # print(f"anchor mass:         {self.anchor.mass}")
+        # print(f"anchor inertia:      {self.anchor.inertia}")
+        # print(f"anchor local center: {self.anchor.localCenter}")
+        # print(f"anchor world center: {self.anchor.worldCenter}")
+
+    def _create_first_arm(self):
+        self.joint_bodies.append(
+            self.world.CreateDynamicBody(position=self.anchor.position, fixtures=self.FIX_CIRCLE))
+        self.joint_bodies[0].color1 = self.COLOR_JOINTS
+        self.joint_bodies[0].color2 = self.COLOR_BORDER
+
+        rjd = weldJointDef(bodyA=self.anchor, bodyB=self.joint_bodies[0],
+                           localAnchorA=(0.0, 0.0), localAnchorB=(0.0, 0.0))
+        self.joint_fixes.append(self.world.CreateJoint(rjd))
+
+        self.links.append(
+            self.world.CreateDynamicBody(position=self.joint_bodies[0].position - (0.0, self.LINK_HEIGHT / 2),
+                                         angle=0.0, fixtures=self.FIX_POLY))
+        self.links[0].color1 = self.COLOR_LINKS
+        self.links[0].color2 = self.COLOR_BORDER
+        # print(f"link1 mass:         {self.links[0].mass}")
+        # print(f"link1 inertia:      {self.links[0].inertia}")
+        # print(f"link1 local center: {self.links[0].localCenter}")
+        # print(f"link1 world center: {self.links[0].worldCenter}")
+
+        rjd = revoluteJointDef(bodyA=self.anchor, bodyB=self.links[0], localAnchorA=(0.0, 0.0),
+                               localAnchorB=(0.0, self.LINK_HEIGHT / 2),
+                               enableMotor=True, maxMotorTorque=self.MAX_JOINT_TORQUE, motorSpeed=0.0)
+        self.joints.append(self.world.CreateJoint(rjd))
+
+    def _create_next_arm(self):
+        self.joint_bodies.append(
+            self.world.CreateDynamicBody(
+                position=self.links[-1].position - (0.0, self.LINK_HEIGHT / 2), fixtures=self.FIX_CIRCLE))
+        self.joint_bodies[-1].color1 = self.COLOR_JOINTS
+        self.joint_bodies[-1].color2 = self.COLOR_BORDER
+
+        rjd = weldJointDef(bodyA=self.links[-1], bodyB=self.joint_bodies[-1],
+                           localAnchorA=(0.0, -self.LINK_HEIGHT / 2), localAnchorB=(0.0, 0.0))
+        self.joint_fixes.append(self.world.CreateJoint(rjd))
+
+        self.links.append(
+            self.world.CreateDynamicBody(position=self.joint_bodies[-1].position - (0.0, self.LINK_HEIGHT / 2),
+                                         angle=0.0, fixtures=self.FIX_POLY))
+        self.links[-1].color1 = self.COLOR_LINKS
+        self.links[-1].color2 = self.COLOR_BORDER
+        # print(f"link1 mass:         {self.links[-1].massData.mass}")
+        # print(f"link1 inertia:      {self.links[-1].massData.I}")
+        # print(f"link1 local center: {self.links[-1].localCenter}")
+        # print(f"link1 world center: {self.links[-1].worldCenter}")
+
+        rjd = revoluteJointDef(bodyA=self.links[-2], bodyB=self.links[-1], localAnchorA=(0.0, -self.LINK_HEIGHT / 2),
+                               localAnchorB=(0.0, self.LINK_HEIGHT / 2),
+                               enableMotor=True, maxMotorTorque=self.MAX_JOINT_TORQUE, motorSpeed=0.0)
+        self.joints.append(self.world.CreateJoint(rjd))
+
+    def _create_end_effector(self):
+        self.end_effector = self.world.CreateDynamicBody(position=self.links[-1].position - (0.0, self.LINK_HEIGHT / 2),
+                                                         fixtures=self.FIX_CIRCLE)
+        self.end_effector.color1 = self.COLOR_EE
+        self.end_effector.color2 = self.COLOR_BORDER
+
+        rjd = weldJointDef(bodyA=self.links[-1], bodyB=self.end_effector,
+                           localAnchorA=(0.0, -self.LINK_HEIGHT / 2), localAnchorB=(0.0, 0.0))
+        self.joint_fixes.append(self.world.CreateJoint(rjd))
+
+    def _create_target(self):
+        self.target = self.world.CreateStaticBody(position=(self._random_point()), fixtures=self.FIX_CIRCLE)
+        self.target.color1 = self.COLOR_TARGET
+        self.target.color2 = self.COLOR_BORDER
+
+    def _random_point(self):
+
+        len_arms = self.n_links * self.LINK_HEIGHT * 1.
+
+        while True:
+            x = 2 * len_arms * self.np_random.rand() - len_arms + self.anchor.position[0]
+            y = 2 * len_arms * self.np_random.rand() - len_arms + self.anchor.position[1]
+
+            pos_1 = np.array([x, y])
+            pos_2 = np.array([self.anchor.position[0], self.anchor.position[1]])
+
+            distance = np.linalg.norm(pos_1 - pos_2)
+
+            if self.n_links == 1:
+                if abs(distance - len_arms) < self.MIN_DISTANCE:
+                    return x, y
+            else:
+                if len_arms * 0.95 > distance:
+                    return x, y
+
+    def _calc_power(self, action_arr):
+        power = 0
+        for action in action_arr:
+            power += abs(action)
+
+        return power
+
+    def _get_distance(self):
+
+        pos1 = np.array([self.end_effector.position[0], self.end_effector.position[1]])
+        pos2 = np.array([self.target.position[0], self.target.position[1]])
+
+        distance = np.linalg.norm(pos1 - pos2)
+
+        return distance
+
+    def _get_terminal(self):
+
+        distance = self._get_distance()
+
+        if distance < self.MIN_DISTANCE:
+            return True
+        else:
+            return False
+
+    def reset(self):
+
+        self.time_step = 0
+
+        self._destroy()
+
+        self._create_anchor()
+        self._create_first_arm()
+        for _ in range(self.n_links - 1):
+            self._create_next_arm()
+        self._create_end_effector()
+
+        self._create_target()
+
+        self.links[self.n_links - 1].ground_contact = False
+        self.draw_list = [self.anchor] + self.links + self.joint_bodies + [self.end_effector] + [self.target]
+
+        # It is needed to perform one world step to correctly initialize joint angles/velocities
+        for idx in range(len(self.joints)):
+            self.joints[idx].motorSpeed = 0.0
+        self.world.Step(1.0 / FPS, 6 * 300, 2 * 300)
+        self.world.ClearForces()
+
+        return self._get_state()
+
+    def seed(self, seed=None):
+        self.np_random, seed = seeding.np_random(seed)
+        return [seed]
+
+    def _get_state(self):
+
+        tx = self.target.position[0]
+        ty = self.target.position[1]
+
+        eex = self.end_effector.position[0]
+        eey = self.end_effector.position[1]
+
+        state = [tx, ty, eex, eey]
+
+        for idx in range(self.n_links):
+            state.append(self.joints[idx].angle % (np.pi * 2) - np.pi)
+            state.append(self.joints[idx].speed)
+
+        obs_noise = self.np_random.uniform(low=0., high=np.abs(self.obs_noise)) * np.sign(self.obs_noise)
+
+        return np.array(state + obs_noise)
+
+    def step(self, action):
+
+        self.time_step += 1
+
+        action_dist = self.np_random.uniform(low=0., high=np.abs(self.action_dist)) * np.sign(self.action_dist)
+        action += action_dist
+
+        action = np.clip(action, -1., 1.)
+        power = self._calc_power(action)
+
+        speeds = self.MAX_JOINT_VEL * action
+
+        for idx in range(len(self.joints)):
+            self.joints[idx].motorSpeed = float(speeds[idx])
+
+        self.world.Step(1.0 / FPS, 6 * 300, 2 * 300)
+        self.world.ClearForces()
+
+        state = self._get_state()
+
+        done = self._get_terminal()
+
+        if done:
+            reward = 300
+        else:
+            reward = -self._get_distance()
+            if self.POWER:
+                reward -= power
+
+        if self.time_step > self.max_ts:
+            done = True
+
+        if DEBUG:
+            print("dist", self._get_distance())
+            print("power", power)
+            print("reward", reward)
+            print("state", state)
+            time.sleep(.1)
+
+        return state, reward, done, {}
+
+    def render(self, mode='human'):
+        if self.viewer is None:
+            self.viewer = rendering.Viewer(self.VIEWPORT_W, self.VIEWPORT_H)
+
+        dim = self.VIEWPORT_W / self.SCALE / 2
+        self.viewer.set_bounds(-dim, dim, -dim, dim)
+
+        self.viewer.draw_polygon([
+            (-self.VIEWPORT_H, -self.VIEWPORT_H),
+            (self.VIEWPORT_H, -self.VIEWPORT_H),
+            (self.VIEWPORT_H, self.VIEWPORT_H),
+            (-self.VIEWPORT_H, self.VIEWPORT_H),
+        ], color=self.COLOR_BACKGROUND)
+
+        for obj in self.draw_list:
+            for f in obj.fixtures:
+                trans = f.body.transform
+                if type(f.shape) is circleShape:
+                    t = rendering.Transform(translation=trans * f.shape.pos)
+                    self.viewer.draw_circle(f.shape.radius, 30, color=obj.color1).add_attr(t)
+                    self.viewer.draw_circle(f.shape.radius, 30, color=obj.color2, filled=False, linewidth=2).add_attr(t)
+                else:
+                    path = [trans * v for v in f.shape.vertices]
+                    self.viewer.draw_polygon(path, color=obj.color1)
+                    path.append(path[0])
+                    self.viewer.draw_polyline(path, color=obj.color2, linewidth=2)
+
+        return self.viewer.render(return_rgb_array=mode == 'rgb_array')
+
+    def close(self):
+        if self.viewer is not None:
+            self.viewer.close()
+            self.viewer = None
+
+    def random_action(self):
+        return self.action_space.sample()
+
+    def set_disturbance(self, disturbance):
+        self.disturbance = disturbance
+        self._apply_disturbance()
+        self.world.gravity = (0, self.GRAVITY)
+        self.reset()
+
+    def _apply_disturbance(self):
+        for dist_type, _dist in self.disturbance.items():
+            for dist_var, dist_val in _dist.items():
+                if dist_type == 'parametric_disturbance':
+                    if dist_var == 'link_mass':
+                        self.LINK_MASS *= dist_val
+                        self.FIX_POLY = fixtureDef(
+                            shape=polygonShape(box=(self.LINK_WIDTH / 2, self.LINK_HEIGHT / 2)),
+                            density=self.LINK_MASS / (self.LINK_WIDTH * self.LINK_HEIGHT),
+                            friction=0.0,
+                            restitution=0.0,
+                            categoryBits=0x0020,
+                            maskBits=0x001)
+                    elif dist_var == 'link_height':
+                        self.LINK_HEIGHT *= dist_val
+                        self.FIX_POLY = fixtureDef(
+                            shape=polygonShape(box=(self.LINK_WIDTH / 2, self.LINK_HEIGHT / 2)),
+                            density=self.LINK_MASS / (self.LINK_WIDTH * self.LINK_HEIGHT),
+                            friction=0.0,
+                            restitution=0.0,
+                            categoryBits=0x0020,
+                            maskBits=0x001)
+                    elif dist_var == 'gravity':
+                        self.GRAVITY *= dist_val
+                    elif dist_var == 'max_joint_torque':
+                        self.MAX_JOINT_TORQUE *= dist_val
+                    else:
+                        raise Exception('* unknown parameter!')
+
+                elif dist_type == 'action_disturbance':
+                    act_ind = int(dist_var)
+                    if act_ind in range(self.n_links):
+                        self.action_dist[act_ind] = (dist_val - 1.0) * self.action_space.high[act_ind]
+                    else:
+                        raise Exception('* unknown action!')
+
+                elif dist_type == 'observation_noise':
+                    obs_ind = int(dist_var)
+                    if obs_ind in range(self.observation_space.shape[0]):
+                        self.obs_noise[obs_ind] = (dist_val - 1.0) * self.observation_space.high[obs_ind]
+                    else:
+                        raise Exception('* unknown observation!')
+                else:
+                    raise Exception('* unknown disturbance type!')
+
+
+if __name__ == '__main__':
+    """
+    Test fct for debugging purposes
+    """
+    num_links = 10
+    nlinks = NLinksBox2DEnv(max_ts=200, n_links=num_links)
+
+    from stable_baselines3.common.env_checker import check_env
+    check_env(nlinks)
+
+    # some silent initialization steps
+    # for _ in range(100):
+    #     nlinks.render()
+    #     zero_act = np.zeros(num_links)
+    #     nlinks.step(action=zero_act)
+
+    while True:
+        nlinks.render()
+        # print("anchor", nlinks.anchor.position)
+        # print("end_eff", nlinks.end_effector.position)
+        # rnd_act = np.array([0.3, -0.3])
+        # rnd_act = np.array([np.random.rand() * 2 - 1, np.random.rand() * 2 - 1])
+        rnd_act = []
+        for i in range(num_links):
+            rnd_act.append(np.random.rand() * 2 - 1)
+
+        s, _, d, _ = nlinks.step(action=rnd_act)
+        # print("tx: {0:2.1f}; ty: {1:2.1f}".format(s[0], s[1]))
+        # print("eex: {0:2.1f}; eey: {1:2.1f}".format(s[2], s[3]))
+        if d:
+            nlinks.reset()
diff --git a/agent/envs_other/pendulum.py b/agent/envs_other/pendulum.py
new file mode 100755
index 0000000..144c190
--- /dev/null
+++ b/agent/envs_other/pendulum.py
@@ -0,0 +1,199 @@
+# modified from https://github.com/openai/gym/blob/master/gym/envs/classic_control/pendulum.py
+# to include simulating disturbed models and an analytical disturbance observer
+# the environment is also changed to be episodic with maximum allowed time steps
+
+import gym
+from gym import spaces
+from gym.utils import seeding
+import numpy as np
+from os import path
+
+
+class PendulumEnv(gym.Env):
+    metadata = {
+        'render.modes': ['human', 'rgb_array'],
+        'video.frames_per_second': 30
+    }
+
+    def __init__(self, max_ts, disturbance=None):
+
+        self.max_speed = 8.
+        self.max_torque = 20.
+        self.dt = 0.05
+        self.g = 9.8
+        self.m = 1.
+        self.l = 1.
+        self.ts = 0
+        self.max_ts = max_ts
+        self.disturbance = disturbance
+        self.viewer = None
+
+        high = np.array([1., 1., self.max_speed], dtype=np.float32)
+        self.action_space = spaces.Box(
+            low=-self.max_torque,
+            high=self.max_torque, shape=(1,),
+            dtype=np.float32
+        )
+        self.observation_space = spaces.Box(
+            low=-high,
+            high=high,
+            dtype=np.float32
+        )
+
+        self.dt_n = self.dt
+        self.g_n = self.g
+        self.m_n = self.m
+        self.l_n = self.l
+
+        self.obs = None
+        self.obs_prev = None
+        self.act_prev = None
+
+        self.action_dist = 0.0
+        self.obs_noise = np.zeros(3)  # TODO: finding robust model against obs noise
+        if self.disturbance is not None:
+            self._apply_disturbance()
+
+        self.seed()
+
+    def seed(self, seed=None):
+        self.np_random, seed = seeding.np_random(seed)
+        return [seed]
+
+    def set_disturbance(self, disturbance):
+        self.disturbance = disturbance
+        self._apply_disturbance()
+        self.reset()
+
+    def _apply_disturbance(self):
+        for dist_type, _dist in self.disturbance.items():
+            for dist_var, dist_val in _dist.items():
+                if dist_type == 'parametric_disturbance':
+                    if dist_var == 'pendulum_mass':
+                        self.m *= dist_val
+                    elif dist_var == 'pendulum_length':
+                        self.l *= dist_val
+                    elif dist_var == 'gravity':
+                        self.g *= dist_val
+                    elif dist_var == 'time_step':
+                        self.dt *= dist_val
+                    else:
+                        raise Exception('* unknown parameter!')
+
+                elif dist_type == 'action_disturbance':
+                    if dist_var == '0':
+                        self.action_dist = (dist_val - 1.0) * self.action_space.high[0]
+                    else:
+                        raise Exception('* unknown action!')
+
+                elif dist_type == 'observation_noise':
+                    obs_ind = int(dist_var)
+                    if obs_ind in range(self.observation_space.shape[0]):
+                        self.obs_noise[obs_ind] = (dist_val - 1.0) * self.observation_space.high[obs_ind]
+                    else:
+                        raise Exception('* unknown observation!')
+                else:
+                    raise Exception('* unknown disturbance type!')
+        # print('param dist: ', self.m, self.l, self.g, self.dt)
+        # print('action dist: ', self.action_dist)
+        # print('obs noise: ', self.obs_noise)
+
+    def step(self, u):
+        self.ts += 1
+        th, thdot = self.state  # th := theta
+
+        g = self.g
+        m = self.m
+        l = self.l
+        dt = self.dt
+
+        u_r = u[0]
+        action_dist = self.np_random.uniform(low=0., high=np.abs(self.action_dist)) * np.sign(self.action_dist)
+        u = u_r + action_dist
+        u = np.clip(u, -self.max_torque, self.max_torque)
+        costs = angle_normalize(th) ** 2 + .1 * thdot ** 2 + .01 * (u_r ** 2)
+
+        self.last_u = u_r  # for rendering
+
+        newthdot = thdot + (-3 * g / (2 * l) * np.sin(th + np.pi) + 3. / (m * l ** 2) * u) * dt
+        newth = th + newthdot * dt
+        newthdot = np.clip(newthdot, -self.max_speed, self.max_speed)
+
+        self.state = np.array([newth, newthdot])
+        self.act_prev = u_r
+
+        done = bool(self.ts > self.max_ts)
+
+        return self._get_obs(), -costs, done, {}
+
+    def reset(self):
+        self.ts = 0
+        self.obs = None
+        self.act_prev = None
+        high = np.array([np.pi, 1])
+        self.state = self.np_random.uniform(low=-high, high=high)
+        self.last_u = None
+        return self._get_obs()
+
+    def _get_obs(self):
+        theta, thetadot = self.state
+        self.obs_prev = self.obs
+        self.obs = np.array([np.cos(theta), np.sin(theta), thetadot])
+        obs_noise = self.np_random.uniform(low=0., high=np.abs(self.obs_noise)) * np.sign(self.obs_noise)
+        self.obs += obs_noise
+        return self.obs
+
+    def render(self, mode='human'):
+        if self.viewer is None:
+            from gym.envs.classic_control import rendering
+            self.viewer = rendering.Viewer(500, 500)
+            self.viewer.set_bounds(-2.2, 2.2, -2.2, 2.2)
+            rod = rendering.make_capsule(1, .2)
+            rod.set_color(.8, .3, .3)
+            self.pole_transform = rendering.Transform()
+            rod.add_attr(self.pole_transform)
+            self.viewer.add_geom(rod)
+            axle = rendering.make_circle(.05)
+            axle.set_color(0, 0, 0)
+            self.viewer.add_geom(axle)
+            fname = path.join(path.dirname(__file__), "misc/clockwise.png")
+            self.img = rendering.Image(fname, 1., 1.)
+            self.imgtrans = rendering.Transform()
+            self.img.add_attr(self.imgtrans)
+
+        self.viewer.add_onetime(self.img)
+        self.pole_transform.set_rotation(self.state[0] + np.pi / 2)
+        if self.last_u:
+            self.imgtrans.scale = (-self.last_u / 2, np.abs(self.last_u) / 2)
+
+        return self.viewer.render(return_rgb_array=mode == 'rgb_array')
+
+    def close(self):
+        if self.viewer:
+            self.viewer.close()
+            self.viewer = None
+
+    def estimate_disturbance(self):
+        if self.act_prev is None:
+            return 0.
+
+        g, m, l, dt = self.g_n, self.m_n, self.l_n, self.dt_n
+        costh, sinth, thdot = self.obs  # th := theta
+        costh_prev, sinth_prev, thdot_prev = self.obs_prev
+
+        act_prev_est = (m * l ** 2) / 3. * ((thdot - thdot_prev) / dt - 3 * g / (2 * l) * sinth_prev)
+
+        return act_prev_est - self.act_prev
+
+    def random_action(self):
+        return self.action_space.sample()
+
+
+def angle_normalize(x):
+    return (((x+np.pi) % (2*np.pi)) - np.pi)
+
+
+if __name__ == "__main__":
+    from stable_baselines3.common.env_checker import check_env
+    env = PendulumEnv(200)
+    check_env(env)
diff --git a/agent/iiwa_sample_env.py b/agent/iiwa_sample_env.py
deleted file mode 100644
index 4e1ea0a..0000000
--- a/agent/iiwa_sample_env.py
+++ /dev/null
@@ -1,420 +0,0 @@
-"""
-A sample Env class inheriting from the DART-Unity Env for the Kuka LBR iiwa manipulator with 7 links and a Gripper
-The parent class takes care of integrating DART Engine with Unity simulation environment
-Unity is used as the main simulator for physics/rendering computations.
-The Unity interface receives joint velocities as commands and returns joint positions and velocities
-DART is used to calculate inverse kinematics of the iiwa chain.
-DART changes the agent action space from the joint space to the cartesian space
-(position-only or pose/SE(3)) of the end-effector.
-action_by_pd_control method can be called to implement a Proportional-Derivative control law instead of an RL policy.
-Note: Coordinates in the Unity simulator are different from the ones in DART which used here:
-The mapping is [X, Y, Z] of Unity is [-y, z, x] of DART
-DART uses Eigen library - Geometry module to retrieve transformation matrices of chains of rigidbodies
-Documentation: https://eigen.tuxfamily.org/dox/group__Geometry__Module.html
-Background details: https://en.wikipedia.org/wiki/Axis%E2%80%93angle_representation
-Further code details: https://github.com/dartsim/dart/blob/main/dart/math/Geometry.cpp
-"""
-
-import numpy as np
-import dartpy as dart
-# import cv2
-# import base64
-from gym import spaces
-from dart_envs.iiwa_dart_unity import IiwaDartUnityEnv
-
-
-class IiwaSampleEnv(IiwaDartUnityEnv):
-
-    # Variables below exist in the parent class, hence the names should not be changed
-
-    # Min distance to declare that the target is reached by the end-effector, adapt the values based on your task
-    MIN_POS_DISTANCE = 0.05  # [m]
-    MIN_ROT_DISTANCE = 0.1  # [rad]
-
-    # admissible range for joint positions, velocities, accelerations and torques of the iiwa kinematic chain
-    MAX_JOINT_POS = np.deg2rad([170, 120, 170, 120, 170, 120, 175])  # [rad]: based on the specs
-    MIN_JOINT_POS = -MAX_JOINT_POS
-    MAX_JOINT_VEL = np.deg2rad([85, 85, 100, 75, 130, 135, 135])  # [rad/s]: based on the specs
-    MAX_JOINT_ACC = 3.0 * MAX_JOINT_VEL  # [rad/s^2]: just approximation due to no existing data
-    MAX_JOINT_TORQUE = np.array([320, 320, 176, 176, 110, 40, 40])  # [Nm]: based on the specs
-
-    # admissible range for Cartesian pose translational and rotational velocities and accelerations of the end-effector
-    MAX_EE_CART_VEL = np.full(3, 10.0)  # [m/s] --- not optimized values
-    MAX_EE_CART_ACC = np.full(3, 3.0)  # [m/s^2] --- not optimized values
-    MAX_EE_ROT_VEL = np.full(3, 4.0)  # [rad/s] --- not optimized values
-    MAX_EE_ROT_ACC = np.full(3, 1.2)  # [rad/s^2] --- not optimized values
-
-    def __init__(self, max_ts, orientation_control, use_ik, ik_by_sns,
-                 enable_render, state_type, with_gripper=True, env_id=0):
-
-        # range of vertical, horizontal pixels for the DART viewer
-        viewport = (0, 0, 500, 500)
-
-        self.state_type = state_type
-        self.reset_flag = False
-        self.with_gripper = with_gripper
-
-        # the init of the parent class should be always called, this will in the end call reset() once
-        super().__init__(max_ts=max_ts, orientation_control=orientation_control,
-                         use_ik=use_ik, ik_by_sns=ik_by_sns, enable_render=enable_render,
-                         viewport=viewport, env_id=env_id)
-
-        # Some attributes that are initialized in the parent class:
-        # self.reset_counter --> keeps track of the number of resets performed
-        # self.reset_state --> reset vector for initializing the Unity simulator at each episode
-        # self.tool_target --> initial position for the gripper state
-
-        # the lines below should stay as it is
-        self.MAX_EE_VEL = self.MAX_EE_CART_VEL
-        self.MAX_EE_ACC = self.MAX_EE_CART_ACC
-        if self.ORIENTATION_CONTROL:
-            self.MAX_EE_VEL = np.concatenate((self.MAX_EE_ROT_VEL, self.MAX_EE_VEL))
-            self.MAX_EE_ACC = np.concatenate((self.MAX_EE_ROT_ACC, self.MAX_EE_ACC))
-
-        # the lines below wrt action_space_dimension should stay as it is
-        self.action_space_dimension = self.n_links  # there would be 7 actions in case of joint-level control
-        if self.USE_IK:
-            # There are three cartesian coordinates x,y,z for inverse kinematic control
-            self.action_space_dimension = 3
-            if self.ORIENTATION_CONTROL:
-                # and the three rotations around each of the axis
-                self.action_space_dimension += 3
-        if self.with_gripper:
-            self.action_space_dimension += 1  # gripper velocity
-
-        # Variables below exist in the parent class, hence the names should not be changed
-        tool_length = 0.2  # [m] allows for some tolerances in maximum observation
-        # x,y,z of TCP: maximum reach of arm plus tool length in meters
-        ee_pos_high = np.array([0.95 + tool_length, 0.95 + tool_length, 1.31 + tool_length])
-        ee_pos_low = -np.array([0.95 + tool_length, 0.95 + tool_length, 0.39 + tool_length])
-
-        high = np.empty(0)
-        low = np.empty(0)
-        if self.ORIENTATION_CONTROL:
-            # rx,ry,rz of TCP: maximum orientation in radians without considering dexterous workspace
-            ee_rot_high = np.full(3, np.pi)
-            # observation space is distance to target orientation (rx,ry,rz), [rad]
-            high = np.append(high, ee_rot_high)
-            low = np.append(low, -ee_rot_high)
-
-        # and distance to target position (dx,dy,dz), [m]
-        high = np.append(high, ee_pos_high - ee_pos_low)
-        low = np.append(low, -(ee_pos_high - ee_pos_low))
-
-        # and joint positions [rad] and possibly velocities [rad/s]
-        if 'a' in self.state_type:
-            high = np.append(high, self.MAX_JOINT_POS)
-            low = np.append(low, self.MIN_JOINT_POS)
-        if 'v' in self.state_type:
-            high = np.append(high, self.MAX_JOINT_VEL)
-            low = np.append(low, -self.MAX_JOINT_VEL)
-
-        # the lines below should stay as it is
-        self.action_space = spaces.Box(-np.ones(self.action_space_dimension), np.ones(self.action_space_dimension),
-                                       dtype=np.float32)
-        self.observation_space = spaces.Box(low, high, dtype=np.float32)
-
-        self.reset()
-
-    def create_target(self):
-        """
-            defines the target to reach per episode, this should be adapted by the task
-            should always return rx,ry,rz,x,y,z in order,
-            i.e., first target orientation rx,ry,rz in radians, and then target position x,y,z in meters
-            in case of orientation_control=False --> rx,ry,rz are irrelevant and can be set to zero
-            _random_target_gen_joint_level(): generate a random sample target in the reachable workspace of the iiwa
-
-            :return: Cartesian pose of the target to reach in the task space
-        """
-        # a sample fixed target on the right side of iiwa with orientation pointing downwards
-        # rx, ry, rz = 0.0, np.pi, 0.0
-        # x, y, z = 0.0, 0.57, 0.655
-        # target = rx, ry, rz, x, y, z
-
-        # a sample reachable target generator
-        target = self._random_target_gen_joint_level()
-
-        return target
-
-    def get_pos_error(self):
-        """
-           calculates the Euclidean error from the end-effector to the target position
-           body_node.getTransform().translation() --> Cartesian position (x,y,z) of the center of that body_node
-
-           :return: Euclidean error from the end-effector to the target position
-        """
-        ee = self.dart_chain.getBodyNode('iiwa_link_ee')  # The end-effector rigid-body node
-        target = self.dart_target.getBodyNode(0)  # The target rigid-body node
-        position_error = target.getTransform().translation() - ee.getTransform().translation()
-        return position_error
-
-    def get_pos_distance(self):
-        """
-           :return: L2-norm of the Euclidean error from the end-effector to the target position
-        """
-        distance = np.linalg.norm(self.get_pos_error())
-        return distance
-
-    def get_rot_error(self):
-        """
-           calculates the Quaternion error from the end-effector to the target orientation
-           body_node.getTransform().rotation() --> Orientation of the body_node in a 3*3 rotation matrix
-           body_node.getTransform().quaternion() --> Orientation of the body_node in a unit quaternion form (w,x,y,z)
-           dart.math.logMap() --> calculates an angle-axis representation of a rotation matrix
-
-           :return: returns Quaternion error from the end-effector to the target orientation
-        """
-        ee = self.dart_chain.getBodyNode('iiwa_link_ee')  # The end-effector rigid-body node
-        target = self.dart_target.getBodyNode(0)  # The target rigid-body node
-        quaternion_error = target.getTransform().quaternion().multiply(ee.getTransform().quaternion().inverse())
-        orientation_error = dart.math.logMap(quaternion_error.rotation())
-        return orientation_error
-
-    def get_rot_distance(self):
-        """
-           :return: L2-norm of the Quaternion error from the end-effector to the target orientation
-        """
-        distance = np.linalg.norm(self.get_rot_error())
-        return distance
-
-    def get_state(self):
-        """
-           defines the environment state, this should be adapted by the task
-           get_pos_error(): returns Euclidean error from the end-effector to the target position
-           get_rot_error(): returns Quaternion error from the end-effector to the target orientation
-
-           :return: state for the policy training
-        """
-        state = np.empty(0)
-        if self.ORIENTATION_CONTROL:
-            state = np.append(state, self.get_rot_error())
-        state = np.append(state, self.get_pos_error())
-        if 'a' in self.state_type:
-            state = np.append(state, self.dart_chain.getPositions())
-        if 'v' in self.state_type:
-            state = np.append(state, self.dart_chain.getVelocities())
-
-        # the lines below should stay as it is
-        self.observation_state = np.array(state)
-        
-        return self.observation_state
-
-    def get_reward(self, action):
-        """
-           defines the environment reward, this should be adapted by the task
-
-           :param action: is the current action decided by the RL agent
-           :return: reward for the policy training
-        """
-        # stands for reducing position error
-        reward = -self.get_pos_distance()
-
-        # stands for reducing orientation error
-        if self.ORIENTATION_CONTROL:
-            reward -= 0.5 * self.get_rot_distance()
-
-        # stands for avoiding abrupt changes in actions
-        reward -= 0.1 * np.linalg.norm(action - self.prev_action)
-
-        # stands for shaping the reward to increase when target is reached to balance at the target
-        if self.get_terminal_reward():
-            reward += 1.0 * (np.linalg.norm(np.ones(self.action_space_dimension)) - np.linalg.norm(action))
-
-        # the lines below should stay as it is
-        self.reward_state = reward
-
-        return self.reward_state
-
-    def get_terminal_reward(self):
-        """
-           checks if the target is reached
-           get_pos_distance(): returns norm of the Euclidean error from the end-effector to the target position
-           get_rot_distance(): returns norm of the Quaternion error from the end-effector to the target orientation
-
-           :return: a boolean value representing if the target is reached within the defined threshold
-        """
-        if self.get_pos_distance() < self.MIN_POS_DISTANCE:
-            if not self.ORIENTATION_CONTROL:
-                return True
-            if self.get_rot_distance() < self.MIN_ROT_DISTANCE:
-                return True
-
-        return False
-
-    def get_terminal(self):
-        """
-           checks the terminal conditions for the episode
-
-           :return: a boolean value indicating if the episode should be terminated
-        """
-
-        if self.time_step > self.max_ts:
-            self.reset_flag = True
-
-        return self.reset_flag
-
-    def update_action(self, action):
-        """
-           converts env action to the required unity action by possibly using inverse kinematics from DART
-           _dart_calc_inv_kinematics() --> changes the action from velocities in the task space (position-only 'x,y,z'
-           or complete pose 'rx,ry,rz,x,y,z') to velocities in the joint space of the kinematic chain (j1,...,j7)
-
-           :param action: The action vector decided by the RL agent, acceptable range: [-1,+1]
-                          It should be a numpy array with the following shape: [arm_action] or [arm_action, tool_action]
-                          in case of with_gripper=True, tool_action is always a dim-1 scalar value representative of the normalized gripper velocity
-                          arm_action has different dim based on each case of control level:
-                          in case of use_ik=False -> is dim-7 representative of normalized joint velocities
-                          in case of use_ik=True -> there would be two cases:
-                          orientation_control=False -> of dim-3: Normalized EE Cartesian velocity in x,y,z DART coord
-                          orientation_control=True -> of dim-6: Normalized EE Rotational velocity in x,y,z DART coord
-                          followed by Normalized EE Cartesian velocity in x,y,z DART coord
-
-           :return: the command to send to the Unity simulator including joint velocities and possibly gripper position
-        """
-
-        # the lines below should stay as it is
-        self.action_state = action
-        action = np.clip(action, -1., 1.)
-
-        if self.with_gripper:
-            # This updates the gripper target by accumulating the tool velocity from the action vector
-            tool_action = action[-1]
-            self.tool_target = np.clip((self.tool_target + tool_action), 0.0, 90.0)
-            # This removes the gripper velocity from the action vector for inverse kinematics calculation
-            action = action[:-1]
-
-        if self.USE_IK:
-            task_vel = self.MAX_EE_VEL * action
-            joint_vel = self._dart_calc_inv_kinematics(task_vel)
-        else:
-            joint_vel = self.MAX_JOINT_VEL * action
-
-        unity_action = joint_vel
-        if self.with_gripper:
-            unity_action = np.append(unity_action, [float(self.tool_target)])
-
-        return unity_action
-
-    def update(self, observation):
-        """
-           converts the unity observation to the required env state defined in get_state()
-           with also computing the reward value from the get_reward(...) and done flag,
-           it increments the time_step, and outputs done=True when the environment should be reset
-
-           :param observation: is the observation received from the Unity simulator within its [X,Y,Z] coordinate system
-                'joint_values': indices [0:7],
-                'joint_velocities': indices [7:14],
-                'ee_position': indices [14:17],
-                'ee_orientation': indices [17:20],
-                'target_position': indices [20:23],
-                'target_orientation': indices [23:26],
-                'object_position': indices [26:29],
-                'object_orientation': indices [29:32],
-                'gripper_position': indices [32:33], ---(it is optional, in case a gripper is enabled)
-                'collision_flag': indices [33:34], ---([32:33] in case of without gripper)
-
-           :return: The state, reward, episode termination flag (done), and an empty info dictionary
-        """
-
-        # use commented lines below in case you'd like to save the image observations received from the Unity
-        # base64_bytes = observation['ImageData'][0].encode('ascii')
-        # image_bytes = base64.b64decode(base64_bytes)
-        # image = np.frombuffer(image_bytes, np.uint8)
-        # image = cv2.imdecode(image, cv2.IMREAD_GRAYSCALE)
-        # print(image.shape)
-        # cv2.imwrite("test.jpg", image)
-
-        if observation['Observation'][-1] == 1.0:
-            self.reset_flag = True
-
-        # the method below handles synchronizing states of the DART kinematic chain with the observation from Unity
-        # hence it should be always called
-        self._unity_retrieve_joint_values(observation['Observation'])
-
-        # Class attributes below exist in the parent class, hence the names should not be changed
-        self.time_step += 1
-
-        state = self.get_state()
-        reward = self.get_reward(self.action_state)
-        done = bool(self.get_terminal())
-        info = {}
-
-        self.prev_action = self.action_state
-
-        return state, reward, done, info
-
-    def reset(self, temp=False):
-        """
-            resets the DART-gym environment and creates the reset_state vector to be sent to Unity
-
-            :param temp: not relevant
-
-            :return: The initialized state
-        """
-
-        # takes care of resetting the DART chain and should stay as it is
-        state = super().reset()
-
-        # creates a random target for reaching task, should be adapted for your specific task
-        random_target = self.create_target()
-
-        # sets the initial reaching target for the current episode,
-        # should be always called in the beginning of each episode,
-        # you might need to call it even during the episode run to change the reaching target for the IK-P controller
-        self.set_target(random_target)
-
-        # initial position for the gripper state, accumulates the tool_action velocity received in update_action
-        self.tool_target = 0.0  # should be in range [0.0,90.0]
-
-        # movement control of each joint can be disabled by setting zero for that joint index
-        active_joints = [1] * 7
-
-        # the lines below should stay as it is, Unity simulator expects these joint values in radians
-        joint_positions = self.dart_chain.getPositions().tolist()
-        joint_velocities = self.dart_chain.getVelocities().tolist()
-
-        # the mapping below for the target should stay as it is, unity expects orientations in degrees
-        target_positions = self.dart_target.getPositions().tolist()
-        target_X, target_Y, target_Z = -target_positions[4], target_positions[5], target_positions[3]
-        target_RX, target_RY, target_RZ = np.rad2deg([-target_positions[1], target_positions[2], target_positions[0]])
-        target_positions_mapped = [target_X, target_Y, target_Z, target_RX, target_RY, target_RZ]
-
-        # depending on your task, positioning the object might be necessary,
-        # start from the following sample code, unity expects orientations in degrees
-        object_X, object_Y, object_Z = np.random.uniform(-1.0, 1.0), 0.05, np.random.uniform(-1.0, 1.0)
-        object_RX, object_RY, object_RZ = 0.0, 0.0, 0.0
-        object_positions_mapped = [object_X, object_Y, object_Z, object_RX, object_RY, object_RZ]
-
-        self.reset_state = active_joints + joint_positions + joint_velocities\
-                           + target_positions_mapped + object_positions_mapped
-        
-        if self.with_gripper:
-            self.reset_state = self.reset_state + [self.tool_target]
-
-        self.reset_counter += 1
-        self.reset_flag = False
-
-        return state
-
-    def action_by_p_control(self, coeff_kp_lin, coeff_kp_rot):
-        """
-            computes the task-space velocity commands proportional to the reaching target error
-
-            :param coeff_kp_lin: proportional coefficient for the translational error
-            :param coeff_kp_rot: proportional coefficient for the rotational error
-
-            :return: The action in task space
-        """
-
-        action_lin = coeff_kp_lin * self.get_pos_error()
-        action = action_lin
-
-        if self.ORIENTATION_CONTROL:
-            action_rot = coeff_kp_rot * self.get_rot_error()
-            action = np.concatenate(([action_rot, action]))
-
-        if self.with_gripper:
-            tool_vel = 0.0  # zero velocity means no gripper movement - should be adapted for the task
-            action = np.append(action, [tool_vel])
-
-        return action
diff --git a/agent/main.py b/agent/main.py
old mode 100644
new mode 100755
index 2ff37c0..315bda6
--- a/agent/main.py
+++ b/agent/main.py
@@ -1,84 +1,157 @@
+import os
+
 from config import Config
-from iiwa_sample_joint_vel_env import IiwaJointVelEnv
-from iiwa_sample_env import IiwaSampleEnv
+
+import numpy as np
+from pathlib import Path
+import pandas as pd
+
+# Import envs #
 from simulator_vec_env import SimulatorVecEnv
+from envs.iiwa_sample_joint_vel_env import IiwaJointVelEnv
+from envs.iiwa_sample_env import IiwaSampleEnv
+
+# Monitor envs 
+from stable_baselines3.common.vec_env import VecMonitor
+
+# Models #
 from stable_baselines3 import PPO
-import numpy as np
 
+from utils.helpers import set_seeds
 
-def get_env(config_dict, env_dict):
-    env_key = config_dict['env_key']
+def get_env(config_dict, dart_env_dict, reward_dict, log_dir):
+    """
+        Set-up the env according to the input dictionary settings
+    """
 
+    env_key = config_dict['env_key']
     def create_env(id=0):
-        # 'dart' should be always included in the env_key when there is a need to use a dart based environment
+
+        #################################################################################################################################
+        # Important: 'dart' substring should always be included in the 'env_key' for dart-based envs. E.g. 'iiwa_sample_dart_unity_env' #
+        # If 'dart' is not included, IK behaviour can not be used                                                                       #
+        #################################################################################################################################
+
+        # joints control without dart
         if env_key == 'iiwa_joint_vel':
-            env = IiwaJointVelEnv(max_ts=250, id=id, config=config_dict)
+            env = IiwaJointVelEnv(max_ts=dart_env_dict['max_time_step'], id=id, config=config_dict)
+
+        # Reaching the red target sample env
+        # task-space with dart or joint space control
+        # model-based control with P-controller available
         elif env_key == 'iiwa_sample_dart_unity_env':
-            env = IiwaSampleEnv(max_ts=env_dict['max_time_step'],
-                                orientation_control=env_dict['orientation_control'],
-                                use_ik=env_dict['use_inverse_kinematics'],
-                                ik_by_sns=env_dict['linear_motion_conservation'],
-                                enable_render=env_dict['enable_dart_viewer'],
-                                state_type=config_dict['state'],
-                                with_gripper=config_dict['gripper'],
+            env = IiwaSampleEnv(max_ts=dart_env_dict['max_time_step'], orientation_control=dart_env_dict['orientation_control'],
+                                use_ik=dart_env_dict['use_inverse_kinematics'], ik_by_sns=dart_env_dict['linear_motion_conservation'],
+                                state_type=config_dict['state'], enable_render=dart_env_dict['enable_dart_viewer'],
+                                task_monitor=dart_env_dict['task_monitor'], with_objects=dart_env_dict['with_objects'],
+                                target_mode=dart_env_dict['target_mode'], target_path=dart_env_dict['target_path'],
+                                goal_type="target", joints_safety_limit=config_dict['joints_safety_limit'],
+                                max_joint_vel=config_dict['max_joint_vel'], max_ee_cart_vel=config_dict['max_ee_cart_vel'],
+                                max_ee_cart_acc=config_dict['max_ee_cart_acc'], max_ee_rot_vel=config_dict['max_ee_rot_vel'],
+                                max_ee_rot_acc=config_dict['max_ee_rot_acc'], random_initial_joint_positions=config_dict['random_initial_joint_positions'],
+                                initial_positions=config_dict['initial_positions'], robotic_tool=config_dict["robotic_tool"],
                                 env_id=id)
+
+        # Set env seed #
+        env.seed((id * 150) + (id + 11))
+
         return env
 
     num_envs = config_dict['num_envs']
     env = [create_env for i in range(num_envs)]
-    env = SimulatorVecEnv(env, config_dict)
-    return env
+    env = SimulatorVecEnv(env, config_dict, manual_actions_dict=None, reward_dict=reward_dict) # Set vectorized env
+    env = VecMonitor(env, log_dir, info_keywords=("success",))                                 # Monitor envs
 
+    return env
 
 if __name__ == "__main__":
 
-    config_dict = Config.get_config_dict()
-    env_dict = Config.get_dart_env_dict()
-    env = get_env(config_dict, env_dict)
-
-    # check policy training and inference for joint velocity environment, use with
-
-    model = PPO("MlpPolicy", env, verbose=1)
-    model.learn(total_timesteps=100000)
-    model.save("ppo_trained")
-
-    del model  # remove to demonstrate saving and loading
-
-    model = PPO.load("ppo_trained")
-
-    obs = env.reset()
-    for x in range(1000):
-        action, _states = model.predict(obs, True)
-        obs, rewards, dones, info = env.step(action)
-
-
-    # check model-based controllers, update environment key in config.py, comment lines 40-51 and uncomment below code
-    # to test dart-enabled environments
-
-    # control_kp = 1.0 / env.observation_space.high[0]
-    
-    # if config_dict['seed'] is not None:
-    #     env.seed(config_dict['seed'])
-    
-    # obs = env.reset()
-    # episode_rewards = []
-    # for _ in range(100):
-    #     cum_reward = 0
-    #     while True:
-    #         if env_dict['use_inverse_kinematics']:
-    #             action = np.reshape(env.env_method('action_by_p_control', control_kp, 2.0 * control_kp),
-    #                                 (config_dict['num_envs'], env.action_space.shape[0]))
-    #         else:
-    #             action = np.reshape(env.env_method('random_action'),
-    #                                 (config_dict['num_envs'], env.action_space.shape[0]))
-    #         obs, rewards, dones, info = env.step(action)
-    #         cum_reward += rewards
-    #         if env_dict['enable_dart_viewer']:
-    #             env.render()
-    #         if dones.any():
-    #             episode_rewards.append(cum_reward)
-    #             break
-    # mean_reward = np.mean(episode_rewards)
-    
-    # print(mean_reward)
+    main_config = Config()
+
+    # Parse configs #
+    config_dict = main_config.get_config_dict()
+    dart_env_dict = main_config.get_dart_env_dict()
+    reward_dict = main_config.get_reward_dict()
+
+    # Create new folder if not exists for logging #
+    Path(config_dict["log_dir"]).mkdir(parents=True, exist_ok=True)
+
+    # Build env #
+    env = get_env(config_dict, dart_env_dict, reward_dict, config_dict["log_dir"])
+
+    # Train the agent #
+    if(config_dict['simulation_mode'] == 'train'):
+
+        # Set global seeds and get a PPO seed #
+        ppo_seed = set_seeds(config_dict["seed"])
+
+        # Define the model and its hyperparameters #
+        model = PPO(policy="MlpPolicy", env=env, seed=ppo_seed, tensorboard_log=config_dict["log_dir"], verbose=1)
+
+        # Play some episodes                                                      #
+        # If you retrain the model, you may need to set reset_num_timesteps=False #
+        model.learn(total_timesteps=dart_env_dict["total_timesteps"], reset_num_timesteps=True, tb_log_name=config_dict["tb_log_name"], log_interval=2)
+
+        print("Training ended. Saving a checkpoint at: " + config_dict["log_dir"])
+
+        # Save the last model #
+        model.save(os.path.join(config_dict["log_dir"], "ppo_trained"))
+
+        del model  # remove
+
+    elif(config_dict['simulation_mode'] == 'evaluate'):
+        print("===================================================")
+        print("RL-based evaluation")
+        print("===================================================")
+
+        # Load trained agent #
+        model = PPO.load(os.path.join(config_dict["log_dir"], "ppo_trained"))
+        model.policy.set_training_mode(False)
+
+        obs = env.reset()
+        for x in range(1000):                                                # Run some steps for each env 
+            action, _states = model.predict(obs, deterministic=True)         # Important: set deterministic to True to use the best learned policy (no exploration)
+
+            obs, rewards, dones, info = env.step(action)
+
+            # Render #
+            if dart_env_dict['enable_dart_viewer'] and config_dict['env_key'] != 'iiwa_joint_vel':
+                env.render()
+
+    elif(config_dict['simulation_mode'] == 'evaluate_model_based' and config_dict['env_key'] != 'iiwa_joint_vel'):
+        # check model-based controllers (e.g. P-controller) #
+        print("===================================================")
+        print("Model-based evaluation")
+        print("===================================================")
+
+        control_kp = 1.0 / env.observation_space.high[0]
+
+        obs = env.reset()
+        episode_rewards = []
+        for _ in range(5): # Play some episodes 
+            cum_reward = 0
+
+            while True: # Play until we have a successful episode 
+                if dart_env_dict['use_inverse_kinematics']:                                                   # Generate an action for the current observation using a P-controller
+                    action = np.reshape(env.env_method('action_by_p_control', control_kp, 2.0 * control_kp),
+                                        (config_dict['num_envs'], env.action_space.shape[0]))
+                else:                                                                                         # Random action
+                    action = np.reshape(env.env_method('random_action'),
+                                        (config_dict['num_envs'], env.action_space.shape[0]))
+
+                obs, rewards, dones, info = env.step(action)                                                  # Play this action
+                cum_reward += rewards
+
+                # Render #
+                if dart_env_dict['enable_dart_viewer']:
+                    env.render()
+
+                if dones.any():
+                    episode_rewards.append(cum_reward)
+                    break
+
+        mean_reward = np.mean(episode_rewards)
 
+        print("Mean reward: " + str(mean_reward))
+    else:
+        print("You have set an invalid simulation_mode or some other settings in the config.py are wrong - aborting")
diff --git a/agent/main_advanced.py b/agent/main_advanced.py
new file mode 100755
index 0000000..bbc7ab7
--- /dev/null
+++ b/agent/main_advanced.py
@@ -0,0 +1,123 @@
+import os
+from pathlib import Path
+import numpy as np
+
+from config_advanced import ConfigAdvanced
+
+# Model 
+from stable_baselines3 import PPO
+
+# Policy networks
+from utils.policy_networks import PolicyNetworkVanillaReLU
+
+from utils.helpers import set_seeds
+
+# Set-up envs
+from utils_advanced.helpers import get_env
+
+from utils_advanced.boxes_generator import RandomBoxesGenerator
+from utils_advanced.monitoring_agent import SaveOnBestTrainingRewardCallback
+
+# Evaluate methods
+from utils_advanced.evaluate import evaluation_mode
+
+def train_mode(ppo_seed, config_dict, dart_env_dict, gym_env_dict, hyper_dict, goal_dict, reward_dict, manual_actions_dict=None, randomization_dict=None, randomBoxesGenerator=None):
+    """
+        train the RL agent. Play some episodes, record metrics, and save checkpoints
+
+        :param ppo_seed:             ppo seed
+        :param config_dict:          configuration dictionary
+        :param dart_env_dict:        dart dictionary
+        :param gym_env_dict:         env dictionary
+        :param hyper_dict:           hyperparameters dict - models related
+        :param goal_dict:            goal dict - target, or green box
+        :param reard_dict:           reward dict - reward terms values and ranges
+        :param manual_actions_dict:  manual action dict
+        :param randomization_dict:   randomization dict
+        :param randomBoxesGenerator: boxes generator to spawn boxes for planar grasping
+    """
+
+    # Check if other runs already exist in the log dir #
+    run_num = len([file for file in os.listdir(config_dict["log_dir"]) if file.startswith("run_")]) # How many folders already exist
+    config_dict["log_dir"] += "run_" + str(run_num) + "/"
+
+    # Create new folder for logs #
+    Path(config_dict["log_dir"]).mkdir(parents=True, exist_ok=True)
+
+    # Build env #
+    env = get_env(config_dict, dart_env_dict, gym_env_dict, config_dict["log_dir"], reward_dict,
+                  goal_dict, manual_actions_dict, randomization_dict, randomBoxesGenerator)
+
+    print("Using for training the env: " + config_dict["env_key"])
+
+    # Callback - save checkpoints and log best reward #
+    callback_save_on_best = SaveOnBestTrainingRewardCallback(config_dict["check_freq"], save_model_freq=config_dict["save_model_freq"], 
+                                                            log_dir=config_dict["log_dir"], total_timesteps=dart_env_dict["total_timesteps"], 
+                                                            num_envs=config_dict["num_envs"], best_mean=config_dict["best_mean_reward"])
+
+    # Select policy network #
+    if hyper_dict["policy_network"] is None:
+        policy_kwargs = None
+        print("Using the default SB3 policy")
+    elif hyper_dict["policy_network"] == "PolicyNetworkVanillaReLU":
+        policy_kwargs = PolicyNetworkVanillaReLU()
+        print("Using custom policy with kwargs:\n")
+        print(policy_kwargs)
+    else:
+        policy_kwargs = None
+        print("This policy network is not supported. Using the default SB3 policy")
+
+    # Define the RL model and its hyperparameters #
+    model = PPO(policy=hyper_dict["policy"], env=env, seed=ppo_seed, learning_rate=hyper_dict["learning_rate"], ent_coef=hyper_dict["ent_coef"],
+                vf_coef=hyper_dict["vf_coef"], max_grad_norm=hyper_dict["max_grad_norm"], gae_lambda=hyper_dict["gae_lambda"], n_epochs=hyper_dict["n_epochs"],
+                n_steps=hyper_dict["n_steps"], batch_size=hyper_dict["batch_size"], gamma=hyper_dict["gamma"], clip_range=hyper_dict["clip_range"],
+                tensorboard_log=config_dict["log_dir"], policy_kwargs=policy_kwargs, verbose=1)
+
+    # Play some episodes #
+    model.learn(total_timesteps=dart_env_dict["total_timesteps"], callback=callback_save_on_best, reset_num_timesteps=True, tb_log_name=config_dict["tb_log_name"], log_interval=2)
+
+    print("Training ended")
+    # Save the last model #
+    #model.save(os.path.join(config_dict["log_dir"], "ppo_trained"))
+
+    del model  # remove
+
+if __name__ == "__main__":
+
+    main_config = ConfigAdvanced()
+
+    # Parse configs #
+    config_dict = main_config.get_config_dict()
+    dart_env_dict = main_config.get_dart_env_dict()
+    gym_env_dict = main_config.get_env_dict()
+    reward_dict = main_config.get_reward_dict()
+    hyper_dict = main_config.get_hyper_dict()
+    goal_dict = main_config.get_goal_dict()
+    manual_actions_dict = main_config.get_manual_actions_dict()
+    randomization_dict = main_config.get_randomization_dict()
+
+    # Set global seeds and get a PPO seed #
+    ppo_seed = set_seeds(config_dict["seed"])
+
+    # Set-up the random boxes generator - for planar envs #
+    if(goal_dict["goal_type"] == "box" and config_dict["env_key"].find("planar") != -1):
+        randomBoxesGenerator = RandomBoxesGenerator(box_mode=goal_dict["box_mode"], box_samples=goal_dict["box_samples"], box_split=goal_dict["box_split"], box_save_val=goal_dict["box_save_val"], box_load_val=goal_dict["box_load_val"],
+                                                    box_radius_val=goal_dict["box_radius_val"], box_min_distance_base=goal_dict["box_min_distance_base"],  box_max_distance_base=goal_dict["box_max_distance_base"], box_folder=goal_dict["box_folder"],
+                                                    box_x_min=goal_dict["box_x_min"], box_x_max=goal_dict["box_x_max"], box_x_active=goal_dict["box_x_active"], box_z_min=goal_dict["box_z_min"], box_z_max=goal_dict["box_z_max"],
+                                                    box_z_active=goal_dict["box_z_active"], box_ry_min=goal_dict["box_ry_min"], box_ry_max=goal_dict["box_ry_max"], box_ry_active=goal_dict["box_ry_active"], box_debug=goal_dict["box_debug"]
+                                                    )
+    else:
+        randomBoxesGenerator = None
+
+    # Train the RL-agent #
+    if(config_dict['simulation_mode'] == 'train'):
+        train_mode(ppo_seed, config_dict, dart_env_dict, gym_env_dict, hyper_dict, goal_dict, reward_dict,
+                   manual_actions_dict, randomization_dict, randomBoxesGenerator)
+
+    # Evaluate agents - model-based or rl-based #
+    elif(config_dict['simulation_mode'] == 'evaluate' or config_dict["simulation_mode"] == 'evaluate_model_based'):
+        evaluation_mode(config_dict, dart_env_dict, gym_env_dict, hyper_dict, goal_dict, reward_dict,
+                        manual_actions_dict, randomization_dict, randomBoxesGenerator
+                        )
+    else:
+        print("You have set an invalid simulation_mode or some other settings in the config.py are wrong")
diff --git a/agent/models/ruckig_planar_model.py b/agent/models/ruckig_planar_model.py
new file mode 100644
index 0000000..e16bbed
--- /dev/null
+++ b/agent/models/ruckig_planar_model.py
@@ -0,0 +1,181 @@
+"""
+A time-optimal trajectory generation model - RUCKIG
+
+Notes:
+           - The DoF are optimized to reach at the same time the goal
+           - Support for 2 or 3 DoF control - it is used for planar envs - adapt to your task
+           - The accelerations and velocities are approximated by 'first-order' approximation - e.g. (dx1 - dx2) / control_cycle
+           - Docs: https://docs.ruckig.com/index.html
+           - ruckig allows for both task and joint space control. It is used for task-space control and the agent with IK then maps the velocities to the joint space
+Important:
+           -All the calculations are in the DART coordinate system
+"""
+
+import numpy as np
+
+from ruckig import InputParameter, OutputParameter, Result, Ruckig
+
+class RuckigPlanarModel():
+
+    def __init__(self, env, control_cycle, hyper_dict):
+        self.envs = env                                      # Gym env: 'iiwa_ruckig_planar_grasping_dart_unity_env'
+        self.control_cycle = control_cycle                   # In sec. Set the same value in the Unity simulator
+        self.dof = np.asarray(hyper_dict["dof"])
+        self.target_velocity = hyper_dict["target_vel"]      # Velocity in the target pose 
+        self.target_acceleration = hyper_dict["target_acc"]
+        self.max_vel = hyper_dict["max_vel"]
+        self.max_acc = hyper_dict["max_acc"]
+        self.max_jerk = hyper_dict["max_jerk"]
+
+        ###########
+        # Set DoF #
+        ###########
+        self.dof = np.count_nonzero(self.dof)
+        if(self.dof != 3 and self.dof != 2):
+            raise Exception("Only 2 or 3 DoF are supported for Ruckig Planar Model")
+
+        self.target_velocity = self.target_velocity[3-self.dof:]
+        self.target_acceleration = self.target_velocity[3-self.dof:]
+        self.max_vel = self.max_vel[3-self.dof:]
+        self.max_acc = self.max_acc[3-self.dof:]
+        self.max_jerk = self.max_jerk[3-self.dof:]
+
+        ###############################
+        # DoF we control              #
+        # Dart 0 -> rotation Zd = Yu  #
+        # Dart 1 -> position Xd = Zu  #
+        # Dart 2 -> position Yd = -Xu #
+        ###############################
+
+        ##############################################################################
+        # Set ruckig objects for each env. Supports many envs but the processing     #
+        # is performed in a sequential manner -  no GPU support or parallelization   #
+        ##############################################################################
+        self.ruckig_models = {"otg": [], "res": [], "inp": [], "out": [],
+                              "prev_pose": [], "prev_vels": [], "prev_accs": [], "time_step": []
+                             }
+
+        # See ruckig documentation for better understanding #
+        for i in range(self.envs.num_envs):
+            self.ruckig_models["otg"].append(Ruckig(self.dof, self.control_cycle))
+            self.ruckig_models["res"].append(Result.Working)                                # Flag
+            self.ruckig_models["out"].append(OutputParameter(self.dof))                     # Velocities result of optimization
+
+            self.ruckig_models["inp"].append(InputParameter(self.dof))                      # Current pose
+            self.ruckig_models["inp"][i].target_velocity = self.target_velocity
+            self.ruckig_models["inp"][i].target_acceleration = self.target_acceleration
+            self.ruckig_models["inp"][i].max_velocity = self.max_vel
+            self.ruckig_models["inp"][i].max_acceleration = self.max_acc
+            self.ruckig_models["inp"][i].max_jerk = self.max_jerk
+
+            # For velocity and acceleration 'first-order' approximation #
+            self.ruckig_models["prev_pose"].append(np.zeros(self.dof))
+            self.ruckig_models["prev_vels"].append(np.zeros(self.dof))
+            self.ruckig_models["prev_accs"].append(np.zeros(self.dof))
+            self.ruckig_models["time_step"].append(1)                                       # Reset - at the beginning of each episode
+
+    def predict(self, obs, deterministic=True):
+        """
+           given the observations from 'iiwa_ruckig_planar_grasping_dart_unity_env' type envs, generate time-optimal task-space velocities using the ruckig algorithm
+                 Note: the agents will receive ruckig generated velocities, and then using IK - update_action() method - joint velocities will be sent to the unity simulator
+
+           :param obs:           environments observation shape(num_envs, 5 or 6) - depending on the active DoF (rotation control is active)
+                                    - Format: [reset, rz_ee_d, x_ee_d, y_ee_d, rz_box_d, x_box_d, y_box_d]
+                                              - Should reset, rotation of ee in dart, x position of the ee in dart, etc
+                                              - box pose is used only once (at reset_ruckig_model()) as the box does not move during the planar episode
+                                              - Not normalized in [-1, 1] as in most gym agents
+           :param deterministic: unused
+
+           :return: action - shape(num_envs, 2 or 3). Important: the actions are not normalized in [-1, 1] as in the case of the PPO
+        """
+        action = np.zeros((self.envs.num_envs, self.dof)) # For each env
+
+        # For each env generate the next action given the gym env observations - dart coords #
+        for i in range(self.envs.num_envs): 
+
+            # Reset the target ruckig pose - a new agent episode has started #
+            # First "inp" is also set - refer to the function for more       #                         
+            if(obs[i][0] == 1):
+                self.reset_ruckig_model(obs[i], i)
+
+            if self.ruckig_models["res"][i] == Result.Working:
+
+                # Current pose of the ee # 
+                curr_pose = np.asarray(obs[i][1:self.dof+1])
+
+                curr_vels = self.ruckig_models["prev_vels"][i]
+                curr_accs = self.ruckig_models["prev_accs"][i]
+
+                # Velocity approx #
+                if(self.ruckig_models["time_step"][i] >= 2):
+                    curr_vels = (curr_pose - self.ruckig_models["prev_pose"][i]) / self.control_cycle
+
+                # Acceleration approx #
+                if(self.ruckig_models["time_step"][i] >= 3):
+                    curr_accs = (curr_vels - self.ruckig_models["prev_vels"][i]) / self.control_cycle
+
+                    ##################################
+                    # Update - linear approximation  #
+                    # Vel and acc are both available #
+                    ##################################
+                    self.ruckig_models["inp"][i].current_velocity = curr_vels
+                    self.ruckig_models["inp"][i].current_acceleration = curr_accs
+
+                # Save current state #
+                self.ruckig_models["prev_pose"][i] = curr_pose
+                self.ruckig_models["prev_vels"][i] = curr_vels
+                self.ruckig_models["prev_accs"][i] = curr_accs
+
+                # Get the next result given the current "inp". If "inp" is not expected, re-calculate a new trajectory #
+                self.ruckig_models["res"][i] = self.ruckig_models["otg"][i].update(self.ruckig_models["inp"][i], self.ruckig_models["out"][i])
+
+                # Get the new generated velocity #
+                ruckig_vel = self.ruckig_models["out"][i].new_velocity
+
+                #####################################################
+                # Ip == out                                         #
+                # Move to the next step of the generated trajectory #
+                #####################################################
+                self.ruckig_models["out"][i].pass_to_input(self.ruckig_models["inp"][i])
+
+                action[i] = ruckig_vel # Save to action array for this env
+
+            self.ruckig_models["time_step"][i] += 1
+
+        return action, None
+
+    def reset_ruckig_model(self, obs, index_model):
+        """
+           reset target task-space pose for ruckig
+
+           :affects:   "inp" - current_position, target_position, target_velocity, target_acceleration
+                       "res" - Working
+                       "prev_pos", "prev_vels", "prev_accs", "time_step"
+
+           :param obs: environment (single) observation shape(, 5 or 6) - depending on the active DoF 
+                                   - Format: [reset, rz_ee_d, x_ee_d, y_ee_d, rz_box_d, x_box_d, y_box_d]
+                                             - Should reset, rotation of ee in dart, x position of the ee in dart, etc
+                                             - box pose is used to set the target planar pose (above the box)
+
+           :param index_mode: which ruckig object to reset
+        """
+        ###############################
+        # 3, or 2 DoF are controlled  #
+        # Dart 0 -> rotation Zd = Yu  #
+        # Dart 1 -> position Xd = Zu  #
+        # Dart 2 -> position Yd = -Xu ########4444444444##################################
+        # obs for 2 DoF -> [reset, rz_ee_d, x_ee_d, y_ee_d, x_box_d, y_box_d]            #
+        # obs for 3 DoF- > [reset, rz_ee_d, x_ee_d, y_ee_d, rz_box_d, x_box_d, y_box_d]  #
+        ################################################4444444444########################
+        self.ruckig_models["inp"][index_model].current_position = obs[1:self.dof+1] 
+
+        self.ruckig_models["inp"][index_model].target_position = obs[self.dof + 1:]
+        self.ruckig_models["inp"][index_model].target_velocity = self.target_velocity
+        self.ruckig_models["inp"][index_model].target_acceleration = self.target_acceleration
+ 
+        self.ruckig_models["res"][index_model] = Result.Working
+
+        self.ruckig_models["prev_pose"][index_model] = np.zeros(self.dof)
+        self.ruckig_models["prev_vels"][index_model] = np.zeros(self.dof)
+        self.ruckig_models["prev_accs"][index_model] = np.zeros(self.dof)
+        self.ruckig_models["time_step"][index_model] = 1
diff --git a/agent/simulator_vec_env.py b/agent/simulator_vec_env.py
old mode 100644
new mode 100755
index 6fb67e0..482655e
--- a/agent/simulator_vec_env.py
+++ b/agent/simulator_vec_env.py
@@ -9,17 +9,22 @@
 import json
 import time
 
-import numpy as np
-import roslibpy
-#import torch
-from stable_baselines3.common.vec_env import DummyVecEnv
 import subprocess
 from subprocess import PIPE
+import roslibpy
 import zmq
 import grpc
-from misc import service_pb2_grpc
-from misc.service_pb2 import StepRequest
 
+import numpy as np
+from stable_baselines3.common.vec_env import DummyVecEnv
+
+from utils import service_pb2_grpc
+from utils.service_pb2 import StepRequest
+
+try:
+    from utils_advanced.manual_actions import configure_manual_settings_and_get_manual_function
+except:
+    pass
 
 class CommandModel:
 
@@ -29,11 +34,10 @@ def __init__(self, id, command, env_key, value):
         self.command = command
         self.value = value
 
-
 class SimulatorVecEnv(DummyVecEnv):
     _client = None
 
-    def __init__(self, env_fns, config, spaces=None ):
+    def __init__(self, env_fns, config, manual_actions_dict, reward_dict, spaces=None ):
         """
         envs: list of environments to create
         """
@@ -47,17 +51,26 @@ def __init__(self, env_fns, config, spaces=None ):
 
         self.current_step = 0
         self.config = config
+        self.reward_dict = reward_dict
         self.communication_type = config['communication_type']
         self.port_number = config['port_number']
-        print(config["port_number"])
+        print("Port number: " + config["port_number"])
         self.ip_address = config['ip_address']
+        print("Ip address: " + config["ip_address"])
         self.start = 0
         self.nenvs = len(env_fns)
         self.train_envs = [env_fn(id=ID) for env_fn, ID in zip(env_fns, [x for x in range(self.nenvs)])]
         # self.validation_envs = [env_fn(id=ID) for env_fn, ID in zip(env_fns, [x for x in range(self.nenvs)])]
         self.envs = self.train_envs
+        print("Number of envs: " + str(len(self.envs)))
         #self.envs = [env_fn(id=ID) for env_fn, ID in zip(env_fns, [x for x in range(self.nenvs)])]
 
+        # Initial position flag for the manipulator/robot after reseting. 1 means different than the default vertical position #
+        if (config["initial_positions"] is None or np.count_nonzero(config["initial_positions"]) != 0) and config["random_initial_joint_positions"] == False:
+            self.flag_zero_initial_positions = 0
+        else:
+            self.flag_zero_initial_positions = 1
+
         if self.communication_type == 'ROS':
             # Connect to ROS server
             if SimulatorVecEnv._client is None:
@@ -75,6 +88,32 @@ def __init__(self, env_fns, config, spaces=None ):
         else:
             print("Please specify either ROS or ZMQ communication mode for this environment")
 
+        ###################################################################
+        # Manual/hard-coded actions to command at the end of the episode  #
+        # Disabled by default. Refer to utils_advanced/                   #
+        ###################################################################
+        self.robotic_tool = config["robotic_tool"]
+        if(manual_actions_dict is not None):
+            self.manual = manual_actions_dict["manual"]
+            self.manual_behaviour = manual_actions_dict["manual_behaviour"] # Behaviour to excecute, 'planar_grasping' (down/close/up) or 'close_gripper' (close)
+            self.manual_rewards = manual_actions_dict["manual_rewards"]     # True/False -> whether to add rewards/penalties during manual actions
+
+            # Set-up manual actions and return a function which will be called after the end of the agent episode #
+            self.manual_func = configure_manual_settings_and_get_manual_function(self, manual_actions_dict)
+        else:
+            self.manual = False
+
+        if(self.robotic_tool == "None"):
+            print("The robot has no tool attached to the end-effector")
+        elif(self.robotic_tool == "2_gripper"):
+            print("The robot has a 2-finger gripper attached to the end-effector")
+        elif(self.robotic_tool == "3_gripper"):
+            print("The robot has a 3-finger gripper attached to the end-effector")
+        elif(self.robotic_tool == "calibration_pin"):
+            print("The robot has a calibration pin attached to the end-effector")
+        else:
+            print("The robot has no tool attached to the end-effector")
+
     def switch_to_training(self):
         self.envs = self.train_envs
 
@@ -82,55 +121,87 @@ def switch_to_validation(self):
         self.envs = self.validation_envs
 
     def step(self, actions, dart_convert=True):
+        """
+           :param actions: list[list[float]]] list of lists. Each sublist entails actions that will be transfered to each corresponding env.
+                           These actions originate either from a model-based controller or from the output of a Neural Network of a RL model
+
+           :param dart_convert: bool, if set to True it means the actions are in task-space and must be converted with IK to the joint space. 
+                                      The UNITY simulator always expects velocities in joint space.
+        """
+
         self.current_step += 1
 
         if dart_convert and 'dart' in self.config['env_key']:
             actions_converted = []
             for env, action in zip(self.envs, actions):
-                act = env.update_action(action)
+                act = env.update_action(action)         # Convert agent action to UNITY format - joint space and tool action
                 actions_converted.append(act)
             actions = actions_converted
 
-        #print("current step:" + str(self.current_step))
-        # create request containing all environments with the actions to be executed
-        request = self._create_request("ACTION", self.envs, actions)
-        # execute the simulation for all environments and get observations
-        observations = self._send_request(request)
-        observations_converted = []
-        terminated_environments = []
-        rews = []
-        dones = []
-        infos = []
-        for env, observation in zip(self.envs, observations):
-            obs, rew, done, info = env.update(ast.literal_eval(observation))
-            observations_converted.append(obs)
-            rews.append(rew)
-            dones.append(done)
-            infos.append(info)
-            if done:
-                terminated_environments.append(env)
-        # TODO: Make sure the last observation for terminated environment is the correct one:
-        #  https://github.com/hill-a/stable-baselines/issues/400 talks about reset observation being wrong,
-        #  use terminated observation instead
-        # TODO: make sure this does not cause problems, when the when images are transfered it might be slow
-        #  to get all environment observations again
-        # reset all the terminated environments
-        [env.reset() for env in terminated_environments]
+        ####################################################################################################
+        # Send actions to UNITY (JSON message) and update the envs with the new observation                #
+        # after executing these actions (update the dart kinematic (robotic) chain, agents time-step, etc) #
+        ####################################################################################################
+        terminated_environments, rews, infos, observations_converted, dones = self._send_actions_and_update(actions)
+
+        # Render: Not advised to set 'enable_dart_viewer': True, during RL-training. Use it only for debugging #
+        if(self.config["simulation_mode"] == 'train'):
+            self.render()
+
+        ##########################################################################################################
+        # TODO: Make sure the last observation for terminated environment is the correct one:                    #
+        #       https://github.com/hill-a/stable-baselines/issues/400 talks about reset observation being wrong, #
+        #       use terminated observation instead                                                               #
+        # TODO: make sure this does not cause problems, when the when images are transfered it might be slow     #
+        #       to get all environment observations again                                                        #   
+        ##########################################################################################################
+
+        # Reset all the terminated environments #
         if len(terminated_environments) > 0:
-            request = self._create_request("RESET", terminated_environments)
-            # currently, the simulator returns array of all the environments, not just the terminated ones
-            observations = self._send_request(request)
+
+            ####################################################################################
+            # Manual actions at the end of the agent episode.                                  #
+            # Disabled by default. Refer to utils_advanced/                                    #
+            # Note: the envs should terminate at the same timestep due to UNITY sychronization #
+            #       for collided envs -> send zero velocities for the remaining steps          #
+            ####################################################################################
+            if(self.manual == True): 
+                self.manual_func(self, rews, infos, self.manual_rewards)
+
+            # Successful episode: give terminal reward # 
             for env in terminated_environments:
-                obs, _, _, _ = self.envs[env.id].update(ast.literal_eval(observations[env.id]))
-                observations_converted[env.id] = obs
+                if env.get_terminal_reward():
+                    rews[env.id] += self.reward_dict["reward_terminal"]
+                    infos[env.id]["success"] = True
+
+            ###########################################
+            # Reset gym envs and UNITY simulator envs #
+            ###########################################
+            [env.reset() for env in terminated_environments]
+            observations_converted = self._send_reset_and_update(terminated_environments, time_step_update=True)
+
+            # Render: Not advised to set 'enable_dart_viewer': True, during RL-training. Use it only for debugging #
+            if(self.config["simulation_mode"] == 'train'):
+                self.render()
+
+            ########################################################################################
+            # The manipulator resets to non-zero initial joint positions.                          #
+            # Correct the UNITY observation and update the dart chain due to UNITY synchronization #
+            # Important: Make sure the agent episodes terminate at the same time-step in this case #           
+            ########################################################################################
+            if(self.flag_zero_initial_positions == 1):
+                observations_converted = self._send_zero_vel_and_update(self.envs, True)
+
         return np.stack(observations_converted), np.stack(rews), np.stack(dones), infos
 
     def step_wait(self):
         # only because VecFrameStack uses step_async to provide the actions, then step_wait to execute a step
         return self.step(self.actions)
 
-
     def _create_request(self, command, environments, actions=None):
+        """
+            Create request to send to the UNITY simulator
+        """
         content = ''
         if command == "ACTION":
             for act, env in zip(actions, environments):
@@ -163,7 +234,6 @@ def _send_request(self, content):
             reply = self.stub.step(StepRequest(data=content))
             return self._parse_result(reply.data)
 
-
     def _parse_result(self, result):
         if self.communication_type == 'ROS':
             return ast.literal_eval(result['value'])
@@ -175,14 +245,131 @@ def _parse_result(self, result):
     def reset(self, should_reset=True):
         if should_reset:
             [env.reset() for env in self.envs]
-        request = self._create_request("RESET", self.envs)
+
+        # Reset UNITY environments and update the agent envs (dart chain) #
+        self._send_reset_and_update(self.envs, time_step_update=False)
+
+        # Correct dart chain #
+        observations_converted = self._send_zero_vel_and_update(self.envs, True)
+
+        return np.array(observations_converted)
+
+    ###########
+    # Helpers #
+    ###########
+    def _send_reset_and_update(self, envs, time_step_update=True):
+        """
+            send a reset JSON command and update the envs (time steps, observations, dart chains)
+
+            :param envs: envs
+            :param time_step_update: whether to update the timestep of the agents
+
+            :return: obs_converted
+        """
+
+        # UNITY #
+        request = self._create_request("RESET", envs)
         observations = self._send_request(request)
+
+        # Agents #
+        observations_converted, _, _, _ = self._update_envs(observations, time_step_update=time_step_update)
+
+        return observations_converted
+
+    def _send_actions_and_update(self, actions):
+        """
+            send actions to the UNITY envs and update the agents envs with the returned observations (time steps, observations, dart chains)
+            Note: send zero velocities to collided envs
+
+            :param actions: agent actions (UNITY format) for each env to be send to UNITY simulator
+
+            :return: terminated_environments, rews, infos, obs_converted, dones 
+        """
+
+        rews = [0] * len(self.envs)
+        if(self.config['env_key'] == 'iiwa_joint_vel'):
+            action_dim = self.config['num_joints'] + 1 # For gripper
+        elif(self.config['robotic_tool'].find("gripper") == -1):
+            action_dim = 7
+        else:
+            action_dim = 8
+
+        # For collided envs send zero velocities #
+        for env in self.envs:
+            if env.collided_env == 1:
+                actions[env.id] = np.zeros(action_dim)
+
+        #print("current step:" + str(self.current_step))
+        # create request containing all environments with the actions to be executed
+
+        #####################################################################################################
+        # Execute a UNITY simulation step for all environments and parse the returned observations (result) #
+        #####################################################################################################
+        request = self._create_request("ACTION", self.envs, actions)
+        observations = self._send_request(request)
+
+        ##########################################################################################
+        # Update the envs using the obs returned from UNITY (dart chain of the manipulator, etc) #
+        ##########################################################################################
         observations_converted = []
-        for env, observation in zip(self.envs, observations):
-            obs, _, _, _ = env.update(ast.literal_eval(observation))
-            observations_converted.append(obs)
+        terminated_environments = []                                                             # Assume done in the same timestep else move it outside the for
+        dones = []
+        infos = []
 
-        return np.array(observations_converted)
+        observations_converted, rews, dones, infos = self._update_envs(observations, time_step_update=True)
+        for env, done in zip(self.envs, dones): # Scan for terminated envs
+            if done is True:
+                terminated_environments.append(env)
+
+        return terminated_environments, rews, infos, observations_converted, dones
+
+    def _send_zero_vel_and_update(self, envs, time_step_update=True):
+        """
+            send zero velocities to all UNITY envs and update the agent envs
+
+            Note: this function is needed to be called when the manipulator resets to different initial joints
+                  positions than the vertical default position. It corrects the dart chain of the envs due to UNITY synchronization
+
+            :param envs: envs
+            :param time_step_update: whether to update the timestep of the agents
+
+            :return: obs_converted
+        """
+        # Send zero velocities to the UNITY envs  #
+        if(self.config['env_key'] == 'iiwa_joint_vel'):
+            action_dim = self.config['num_joints'] + 1
+        elif(self.config['robotic_tool'].find("gripper") == -1):
+            action_dim = 7
+        else:
+            action_dim = 8
+
+        actions = np.zeros((len(envs), action_dim))
+        request = self._create_request("ACTION", envs, actions)
+        observations = self._send_request(request)
+
+        # Update the agents envs #
+        observations_converted, _, _, _ = self._update_envs(observations, time_step_update=time_step_update)
+
+        return observations_converted
+
+    def _update_envs(self, observations, time_step_update):
+        """
+            update the agent envs (dart chain, time_step etc.) given the new UNITY observations
+ 
+            :param observations: UNITY format
+            :param time_step_update: whether to update the timestep of the agents
+
+            :return: terminated_environments, rews, infos, obs_converted, dones 
+        """
+        env_stack = []
+        for obs, env in zip(observations, self.envs):
+            env_stack.append(env.update(ast.literal_eval(obs), time_step_update))
+
+        return [list(param) for param in zip(*env_stack)]
+
+    ###############
+    # End helpers #
+    ###############
 
     def reset_task(self):
         pass
@@ -193,8 +380,17 @@ def close(self):
     def __len__(self):
         return self.nenvs
 
+    def render(self, mode=None):
+        """
+            Override default vectorized render behaviour. SB3 renders all envs into a single window. This behaviour is
+            incompatible with our task_monitor.py implementation. Instead render each env into separate windows
+        """
+        if(self.config["env_key"] != 'iiwa_joint_vel'):
+            for env in self.envs:
+                if(env.dart_sim.enable_viewer): # Render is active
+                    env.render()
 
     # Calling destructor
     # def __del__(self):
     #     print("Destructor called")
-    #     self.env_process.terminate()
\ No newline at end of file
+    #     self.env_process.terminate()
diff --git a/agent/utils/dart_guide.py b/agent/utils/dart_guide.py
new file mode 100644
index 0000000..46c5158
--- /dev/null
+++ b/agent/utils/dart_guide.py
@@ -0,0 +1,108 @@
+"""
+Help code for:
+              1. how to retrieve transforamtion matrices of chains of rigidbodies in target (dart)
+
+              2. how to generate reachable targets for the end-effector pose
+
+DART uses the Eigen library - Geometry module documentation: https://eigen.tuxfamily.org/dox/group__Geometry__Module.html
+
+body_node.getTransform().translation() --> Cartesian position (x,y,z) of the center of that body_node
+body_node.getTransform().rotation()    --> Orientation of the body_node in a 3*3 rotation matrix
+body_node.getTransform().quaternion()  --> Orientation of the body_node in a unit quaternion form (w,x,y,z)
+dart.math.logMap()                     --> calculates an angle-axis representation of a rotation matrix
+
+background details:   https://en.wikipedia.org/wiki/Axis%E2%80%93angle_representation
+further code details: https://github.com/dartsim/dart/blob/main/dart/math/Geometry.cpp
+"""
+
+import dartpy as dart
+import numpy as np
+
+def get_pos_error(self):
+    ee = self.dart_sim.chain.getBodyNode('iiwa_link_ee')  # The end-effector rigid-body node
+    target = self.dart_sim.target.getBodyNode(0)          # The target rigid-body node
+    position_error = target.getTransform().translation() - ee.getTransform().translation()
+
+    return position_error
+
+def get_pos_distance(self):
+    distance = np.linalg.norm(self.get_pos_error())
+
+    return distance
+
+def get_rot_error(self):
+    ee = self.dart_sim.chain.getBodyNode('iiwa_link_ee')  # The end-effector rigid-body node
+    target = self.dart_sim.target.getBodyNode(0)          # The target rigid-body node
+    quaternion_error = target.getTransform().quaternion().multiply(ee.getTransform().quaternion().inverse())
+    orientation_error = dart.math.logMap(quaternion_error.rotation()) # angle-axis x, y, z
+
+    return orientation_error
+
+def get_rot_distance(self):
+    distance = np.linalg.norm(self.get_rot_error())
+
+    return distance
+
+def get_ee_pos(self):
+    """
+        return the position of the ee in dart cords
+
+        :return: x, y, z position of the ee in dart coords system
+    """
+    x, y, z = self.dart_sim.chain.getBodyNode('iiwa_link_ee').getTransform().translation()
+
+    return x, y, z
+
+def get_ee_orient(self):
+    """
+        return the orientation of the ee in angle-axis
+
+        :return: rx, ry, rz orientation of the ee in angle-axis in dart coords system
+    """
+    rot_mat = self.dart_sim.chain.getBodyNode('iiwa_link_ee').getTransform().rotation()
+    rx, ry, rz = dart.math.logMap(rot_mat)
+
+    return rx, ry, rz
+
+def _random_target_gen_joint_level(self):
+    """
+        generate a rechable target in task-space for the manipulator
+
+        :return: rx, ry, rz, x, y, z pose the ee in dart coords system (angle-axis)
+    """
+
+    # backing up joint positions of the arm in radians
+    positions = self.dart_sim.chain.getPositions()
+
+    # If your gym observation state is different than the default, #
+    # please adapt the self.observation_indices['joint_pos']       #
+    joint_indices_start = self.observation_indices['joint_pos']
+
+    while True:
+        # generating a random valid observation including joint positions
+        random_state = self.observation_space.sample()
+
+        # selecting the randomly generated joint positions from the agent state                               #
+        # Note: the agent state containts additional information other than the joints positions of the robot #
+        initial_positions = random_state[joint_indices_start:joint_indices_start + self.n_links]
+
+        # computing the forward kinematics to retrieve end-effector pose
+        self.dart_sim.chain.setPositions(initial_positions)
+        ee = self.dart_sim.chain.getBodyNode('iiwa_link_ee')
+        rx, ry, rz = dart.math.logMap(ee.getTransform().rotation())  # SO(3) to so(3): Angle * Axis of Rotation
+        x, y, z = ee.getTransform().translation()
+
+        # conditions for the generated targets - adapt it for your task
+        if z > 0.1:
+            break
+
+    # reverting the joint positions back to the original ones
+    self.dart_sim.chain.setPositions(positions)
+
+    # resetting the orientation vector if it is not controlled
+    if not self.dart_sim.orientation_control:
+        rx, ry, rz = 0.0, 0.0, 0.0
+
+    # initial_positions vector is a valid IK solution to the [rx, ry, rz, x, y, z]
+
+    return rx, ry, rz, x, y, z
diff --git a/agent/utils/helpers.py b/agent/utils/helpers.py
new file mode 100644
index 0000000..97436d9
--- /dev/null
+++ b/agent/utils/helpers.py
@@ -0,0 +1,10 @@
+import torch
+import random
+import numpy as np
+
+def set_seeds(seed: int) -> int:
+    torch.manual_seed(seed + 135)
+    np.random.seed(seed + 235)
+    random.seed(seed + 335)
+
+    return seed + 435
diff --git a/agent/utils/policy_networks.py b/agent/utils/policy_networks.py
new file mode 100644
index 0000000..2dc9184
--- /dev/null
+++ b/agent/utils/policy_networks.py
@@ -0,0 +1,24 @@
+import torch
+
+def PolicyNetworkVanillaReLU():
+    """
+        returns a default policy network from SB3 but with ReLU activation functions
+
+        :return: policy_kwargs required for RL models like PPO
+    """
+    policy_kwargs = dict(activation_fn=torch.nn.ReLU,
+                         net_arch=[dict(pi=[64, 64], vf=[64, 64])],
+                        )
+
+    return policy_kwargs
+
+##################
+# Other examples #
+##################
+# Note: when using images as state representation, the default PPO architecture of 64 neurons per layer may be too small to solve the task
+
+# net_arch=[128, 128, dict(pi=[64,64], vf=[64,64])],
+
+# policy_kwargs = dict(activation_fn=torch.nn.ReLU, net_arch=[dict(pi=[64, 64], vf=[64, 64])],
+#                     features_extractor_class=your_pytorch_network_class_here,
+#                     features_extractor_kwargs=give_your_kwargs_when_your_pytorch_network_class_is_instantiated)
diff --git a/agent/misc/service_pb2.py b/agent/utils/service_pb2.py
similarity index 100%
rename from agent/misc/service_pb2.py
rename to agent/utils/service_pb2.py
diff --git a/agent/misc/service_pb2_grpc.py b/agent/utils/service_pb2_grpc.py
similarity index 98%
rename from agent/misc/service_pb2_grpc.py
rename to agent/utils/service_pb2_grpc.py
index 4b90ca6..ee3fd6f 100644
--- a/agent/misc/service_pb2_grpc.py
+++ b/agent/utils/service_pb2_grpc.py
@@ -2,7 +2,7 @@
 """Client and server classes corresponding to protobuf-defined services."""
 import grpc
 
-from misc import service_pb2 as service__pb2
+from utils import service_pb2 as service__pb2
 
 
 class CommunicationServiceStub(object):
diff --git a/agent/utils/simulator_configuration.py b/agent/utils/simulator_configuration.py
new file mode 100644
index 0000000..70f89d3
--- /dev/null
+++ b/agent/utils/simulator_configuration.py
@@ -0,0 +1,230 @@
+import traceback
+import xml.etree.ElementTree as ET
+import copy
+
+def update_simulator_configuration(config, xml_file):
+    """
+        Update the UNITY simulator .xml file based on the settings that the user has been provided in a Config() type of class
+
+        Note: to change more options either adapt the UNITY simulator manually or extend this function, and the Config() class
+
+        :param config: configuration object
+        :param xml_file: path of the .xml file
+
+        :return: bool. Returns False when an error was encountered and the .xml file could not be updated properly
+    """
+
+    tree = ET.parse(xml_file)
+    oldTree = copy.deepcopy(tree)
+
+    root = tree.getroot()
+
+    # Port #
+    field = root.find('CommunicationType')
+    oldFieldText = field.text
+    newFieldText = config.config_dict["communication_type"]
+    if(newFieldText != "GRPC" and newFieldText != "ROS" and newFieldText != "ZMQ"):
+        print("Given CommunicationType field is invalid")
+    else:
+        field.text = newFieldText
+        if(oldFieldText != field.text):
+            field.set('updated', 'yes')
+
+    # Port #
+    field = root.find('PortNumber')
+    oldFieldText = field.text
+    field.text = config.config_dict["port_number"]
+    if(oldFieldText != field.text):
+        field.set('updated', 'yes')
+
+    # Control cycles #
+    field = root.find('TimestepDurationInSeconds')
+    oldFieldText = field.text
+    field.text = str(config.dart_dict["control_cycle"])
+    if(oldFieldText != field.text):
+        field.set('updated', 'yes')
+
+    field = root.find('PhyscsSimulationIncrementInSeconds')
+    oldFieldText = field.text
+    field.text = str(config.dart_dict["unity_cycle"])
+    if(oldFieldText != field.text):
+        field.set('updated', 'yes')
+
+    # Images #
+    field = root.find('EnableObservationImage')
+    oldFieldText = field.text
+    newFieldText = str(config.config_dict["use_images"]).lower()
+    if(newFieldText != "true" and newFieldText != "false"):
+        print("Given EnableObservationImage field is invalid")
+    else:
+        field.text = newFieldText
+        if(oldFieldText != field.text):
+            field.set('updated', 'yes')
+
+    field = root.find('ObservationImageWidth')
+    oldFieldText = field.text
+    field.text = str(config.config_dict["image_size"])
+    if(oldFieldText != field.text):
+        field.set('updated', 'yes')
+
+    field = root.find('ObservationImageHeight')
+    oldFieldText = field.text
+    field.text = str(config.config_dict["image_size"])
+    if(oldFieldText != field.text):
+        field.set('updated', 'yes')
+
+    # Camera position #
+    field = root.find('ObservationCameras')[0][0]
+    x = field[0]
+    oldFieldText = x.text
+    x.text = str(config.config_dict["camera_position"][0])
+    if(oldFieldText != x.text):
+        x.set('updated', 'yes')
+
+    y = field[1]
+    oldFieldText = y.text
+    y.text = str(config.config_dict["camera_position"][1])
+    if(oldFieldText != y.text):
+        y.set('updated', 'yes')
+
+    z = field[2]
+    oldFieldText = z.text
+    z.text = str(config.config_dict["camera_position"][2])
+    if(oldFieldText != z.text):
+        z.set('updated', 'yes')
+
+    # Camera rotation #
+    field = root.find('ObservationCameras')[0][1]
+    x = field[0]
+    oldFieldText = x.text
+    x.text = str(config.config_dict["camera_rotation"][0])
+    if(oldFieldText != x.text):
+        x.set('updated', 'yes')
+
+    y = field[1]
+    oldFieldText = y.text
+    y.text = str(config.config_dict["camera_rotation"][1])
+    if(oldFieldText != y.text):
+        y.set('updated', 'yes')
+
+    z = field[2]
+    oldFieldText = z.text
+    z.text = str(config.config_dict["camera_rotation"][2])
+    if(oldFieldText != z.text):
+        z.set('updated', 'yes')
+
+    # End effector #
+    field = root.find('EnableEndEffector')
+    oldFieldText = field.text
+    newFieldText = str(config.config_dict["enable_end_effector"]).lower()
+    if(newFieldText != "true" and newFieldText != "false"):
+        print("Given EnableEndEffector field is invalid")
+    else:
+        field.text = newFieldText
+        if(oldFieldText != field.text):
+            field.set('updated', 'yes')
+
+    field = root.find('EndEffectorModel')
+    oldFieldText = field.text
+    if config.config_dict["robotic_tool"] == "3_gripper":
+        model = "ROBOTIQ_3F"
+    elif config.config_dict["robotic_tool"] == "2_gripper":
+        model = "ROBOTIQ_2F85"
+    elif config.config_dict["robotic_tool"] == "calibration_pin":
+        model = "CALIBRATION_PIN"
+    else:
+        model = "ROBOTIQ_3F"
+
+    field.text = model
+    if(oldFieldText != field.text):
+        field.set('updated', 'yes')
+
+    # Box #
+    try: # Might not be availavle depending on the simulator version
+        field = root.find('ItemSizeX')
+        oldFieldText = field.text
+        field.text = str(config.goal_dict["box_dim"][0])
+        if(oldFieldText != field.text):
+            field.set('updated', 'yes')
+
+        field = root.find('ItemSizeY')
+        oldFieldText = field.text
+        field.text = str(config.goal_dict["box_dim"][1])
+        if(oldFieldText != field.text):
+            field.set('updated', 'yes')
+
+        field = root.find('ItemSizeZ')
+        oldFieldText = field.text
+        field.text = str(config.goal_dict["box_dim"][2])
+        if(oldFieldText != field.text):
+            field.set('updated', 'yes')
+    except:
+        pass
+
+    # Randomization #
+    field = root.find('RandomizeLatency')
+    oldFieldText = field.text
+    newFieldText = str(config.randomization_dict["randomize_latency"]).lower()
+    if(newFieldText != "true" and newFieldText != "false"):
+        print("Given RandomizeLatency field is invalid")
+    else:
+        field.text = newFieldText
+        if(oldFieldText != field.text):
+            field.set('updated', 'yes')
+
+    field = root.find('RandomizeTorque')
+    oldFieldText = field.text
+    newFieldText = str(config.randomization_dict["randomize_torque"]).lower()
+    if(newFieldText != "true" and newFieldText != "false"):
+        print("Given RandomizeTorque field is invalid")
+    else:
+        field.text = newFieldText
+        if(oldFieldText != field.text):
+            field.set('updated', 'yes')
+
+    field = root.find('RandomizeAppearance')
+    oldFieldText = field.text
+    newFieldText = str(config.randomization_dict["randomize_appearance"]).lower()
+    if(newFieldText != "true" and newFieldText != "false"):
+        print("Given RandomizeAppearance field is invalid")
+    else:
+        field.text = newFieldText
+        if(oldFieldText != field.text):
+            field.set('updated', 'yes')
+
+    try: # Might not be availavle depending on the simulator version
+        field = root.find('EnableShadows')
+        oldFieldText = field.text
+        newFieldText = str(config.randomization_dict["enable_shadows"]).lower()
+        if(newFieldText != "true" and newFieldText != "false"):
+            print("Given EnableShadows field is invalid")
+        else:
+            field.text = newFieldText
+            if(oldFieldText != field.text):
+                field.set('updated', 'yes')
+
+        field = root.find('ShadowType')
+        oldFieldText = field.text
+        newFieldText = config.randomization_dict["shadow_type"]
+        if(newFieldText != "Soft" and newFieldText != "Hard"):
+            print("Given ShadowType field is invalid")
+        else:
+            field.text = newFieldText
+            if(oldFieldText != field.text):
+                field.set('updated', 'yes')
+    except:
+        pass
+
+    try:
+        # Write the updated .xml file
+        tree.write(xml_file)
+
+    except Exception as e:
+        print("Warning: the .xml file has not been updated properly. An Exception was occurred:")
+        print(traceback.format_exc())
+
+        oldTree.write(xml_file) # Revert to the old .xml file
+
+        return False
+
+    return True
diff --git a/agent/utils/task_monitor.cpython-37m-x86_64-linux-gnu.so b/agent/utils/task_monitor.cpython-37m-x86_64-linux-gnu.so
new file mode 100755
index 0000000..5dc889b
Binary files /dev/null and b/agent/utils/task_monitor.cpython-37m-x86_64-linux-gnu.so differ
diff --git a/agent/utils_advanced/boxes_generator.py b/agent/utils_advanced/boxes_generator.py
new file mode 100644
index 0000000..e6d9e23
--- /dev/null
+++ b/agent/utils_advanced/boxes_generator.py
@@ -0,0 +1,277 @@
+"""
+A box generator class to spawn random boxes in the UNITY simulator
+It allows to define the range where the boxes will be spawned in the workspace of the robot
+"""
+
+import os
+
+import scipy
+import scipy.spatial
+
+import numpy as np
+import matplotlib.pyplot as plt
+
+from pathlib import Path
+
+class RandomBoxesGenerator:
+    def __init__(self, box_mode="train", box_samples=400, box_split=0.1, box_save_val=False, box_load_val=False,
+                 box_radius_val=0.01, box_min_distance_base=0.475, box_max_distance_base=0.67, box_folder="./agent/logs/dataset/",
+                 box_x_min=-0.67, box_x_max=0.67, box_x_active=True, box_z_min=0.42, box_z_max=0.67, box_z_active=True, 
+                 box_ry_min=-np.inf, box_ry_max=np.inf, box_ry_active=False, box_debug=[0.48159, 0.05, 0.4406864, 0.0, 0.0, 0.0]):
+
+        self.box_mode = box_mode                            # Train, test, debug
+        self.box_samples = box_samples
+        self.index_train = 0                                # Current index
+        self.index_val = 0                                  # Starting index of val boxes
+        self.val_size = 0
+        self.train_size = 0
+        self.box_split = box_split                          # Val split -> 0.1, 0.2, etc
+        self.box_load_val = box_load_val                    # Load validation boxes from a saved dataset
+        self.box_folder = box_folder                        # Folder dataset path to load (val), and/or save the generated dataset (train, val)
+        self.box_radius_val = box_radius_val                # Exclude spawned train boxes that are close to the val boxes within this radius (threshold) in meters. e.g. 0.01
+        self.box_min_distance_base = box_min_distance_base
+        self.box_max_distance_base = box_max_distance_base
+        self.box_save_val = box_save_val                    # Save generated dataset 
+        self.box_debug = box_debug
+
+        ##############
+        # Set ranges #
+        ##############
+        self.box_x_min = box_x_min
+        self.box_x_max = box_x_max
+        self.box_x_active = box_x_active
+        self.box_z_min = box_z_min
+        self.box_z_max = box_z_max
+        self.box_z_active = box_z_active
+        self.box_ry_min = box_ry_min
+        self.box_ry_max = box_ry_max
+        self.box_ry_active = box_ry_active
+
+        ######################
+        # Set-up the Dataset #
+        ######################
+
+        # Train split #
+        self.objects_X, self.objects_Y, self.objects_Z, self.objects_RY = [], [], [], []
+
+        # Val split #
+        self.objects_X_val, self.objects_Y_val, self.objects_Z_val, self.objects_RY_val = [], [], [], []
+
+        # Create the directory to save the dataset #
+        Path(self.box_folder).mkdir(parents=True, exist_ok=True)
+
+        # Set-up the train and val splits #
+        if (self.box_mode == "debug"):
+            self.val_size = 1
+            self.train_size = 1
+        else:
+            self.val_size = int(self.box_samples * self.box_split)
+            self.train_size = self.box_samples - self.val_size
+
+        # Useful struct for computing fast euclidean distances. Refer to self._get_samples() #
+        self.val_set = np.column_stack((self.objects_X_val, self.objects_Z_val))
+
+        # Generate always a random train set #
+        for i in range(self.box_samples):
+            X, Y, Z, _, RY, _ = self._get_samples(self.objects_X_val, self.objects_Z_val)
+
+            self.objects_X.append(X)
+            self.objects_Y.append(Y)
+            self.objects_Z.append(Z)
+            self.objects_RY.append(RY)
+
+        self.objects_X_train = self.objects_X[:self.train_size]
+        self.objects_Y_train = self.objects_Y[:self.train_size]
+        self.objects_Z_train = self.objects_Z[:self.train_size]
+        self.objects_RY_train = self.objects_RY[:self.train_size]
+
+        # Load validation boxes #
+        if (self.box_load_val == True): 
+            self._load_val()
+        else: # Do not load - use random (newly generated)
+            self.objects_X_val = self.objects_X[self.train_size:]
+            self.objects_Y_val = self.objects_Y[self.train_size:]
+            self.objects_Z_val = self.objects_Z[self.train_size:]
+            self.objects_RY_val = self.objects_RY[self.train_size:]
+
+        # Save the box dataset #
+        if (self.box_save_val == True):
+            self._save_dataset()
+
+    def __call__(self):
+        """
+            return the next box from the dataset
+
+            :return: x, y, z, 0, ry, 0 box coords - in unity coords system - orientation in degrees
+        """
+
+        if (self.box_mode == "train"):
+            box = [
+                self.objects_X_train[self.index_train], self.objects_Y_train[self.index_train],
+                self.objects_Z_train[self.index_train], 0.0, self.objects_RY_train[self.index_train], 0.0
+            ]
+
+            self.index_train += 1
+            if (self.index_train == self.train_size): # Important: start from the beginning
+                self.index_train = 0
+
+        elif (self.box_mode == "val"):
+            box = [
+                self.objects_X_val[self.index_val], self.objects_Y_val[self.index_val],
+                self.objects_Z_val[self.index_val], 0.0,  self.objects_RY_val[self.index_val], 0.0
+            ]
+
+            self.index_val += 1
+            if (self.index_val == self.val_size):
+                self.index_val = 0
+
+        elif (self.box_mode == "debug"):
+            box = self.box_debug
+
+        else:
+            raise Exception("Not valid box mode")
+
+        return box
+
+    def _load_val(self):
+        """
+            load the validation split from a saved dataset file
+        """
+
+        val_set_file = np.loadtxt(os.path.join(self.box_folder, "val_data.txt"), delimiter=",")
+
+        for box in val_set_file:
+            self.objects_X_val.append(box[0])
+            self.objects_Y_val.append(box[1])
+            self.objects_Z_val.append(box[2])
+            self.objects_RY_val.append(box[3])
+
+        self.objects_X_val = np.asarray(self.objects_X_val)
+        self.objects_Y_val = np.asarray(self.objects_Y_val)
+        self.objects_Z_val = np.asarray(self.objects_Z_val)
+        self.objects_RY_val = np.asarray(self.objects_Z_val)
+
+        self.val_size = len(self.objects_X_val)
+
+    def _get_samples(self, objects_X_val, objects_Z_val):
+        """
+            generate one random box with coordinates that complies with the user-defined constraints
+
+            if 'box_radius_val' is not 0.0: keep spawning until the generated box is far apart from all the val boxes by the defined threshold
+
+            :param objects_X_val: x validation boxes coords
+
+            :return: x, y, z, 0, ry, 0 box coords - in unity coords system - orientation in degrees
+        """
+        while True:
+            if(self.box_x_active):
+                X = np.random.uniform(self.box_x_min, self.box_x_max, size=1)[0]
+            else:
+                X = 0
+
+            Y = 0.05 # Hard-coded box height. Adapt to your task
+
+            if(self.box_z_active):
+                Z = np.random.uniform(self.box_z_min, self.box_z_max, size=1)[0]
+            else:
+                Z = 0
+
+            if(self.box_ry_active):
+                RY = np.random.uniform(self.box_ry_min, self.box_ry_max, size=1)[0]
+            else:
+                RY = 0
+
+            distance_to_base = np.sqrt(X**2 + Z**2)
+            if (distance_to_base > self.box_min_distance_base and distance_to_base < self.box_max_distance_base):
+
+                # Train boxes must be far apart from the validation boxes #
+                if(self.box_radius_val >= 0.001):                         # Threshold is active 
+                    point_set = np.asarray([[X, Z]])                      # Current generated box
+
+                    # Calculate the distances to all other validation boxes from the generated box #
+                    dist_radius = scipy.spatial.distance.cdist(self.val_set, point_set, 'euclidean')
+
+                    # Check violation #
+                    violation = False
+                    for val_dist in dist_radius:
+                        if (val_dist <= self.box_radius_val):
+                            violation = True
+
+                    if violation: # Box too close to a validation box - spawn another box
+                        continue
+
+                return X, Y, Z, 0.0, RY, 0.0
+
+    def _save_dataset(self):
+        """
+            save the generated dataset:
+                - .txt formant: x,y,z,ry
+                - saves also some .png plots
+        """
+
+        # Train split #
+        x_np_train = np.asarray(self.objects_X_train)
+        x_np_train = x_np_train.reshape(x_np_train.shape[0], 1)
+        z_np_train = np.asarray(self.objects_Z_train)
+        z_np_train = z_np_train.reshape(z_np_train.shape[0], 1)
+        ry_np_train = np.asarray(self.objects_RY_train)
+        ry_np_train = ry_np_train.reshape(ry_np_train.shape[0], 1)
+        y_np_train = np.ones_like(x_np_train) * 0.05
+        train_data = np.concatenate((x_np_train, y_np_train, z_np_train, ry_np_train), axis=1)
+
+        # Val split #
+        x_np_val = np.asarray(self.objects_X_val)
+        x_np_val = x_np_val.reshape(x_np_val.shape[0], 1)
+        z_np_val = np.asarray(self.objects_Z_val)
+        z_np_val = z_np_val.reshape(z_np_val.shape[0], 1)
+        ry_np_val = np.asarray(self.objects_RY_val)
+        ry_np_val = ry_np_val.reshape(ry_np_val.shape[0], 1)
+        y_np_val = np.ones_like(x_np_val) * 0.05
+        val_data = np.concatenate((x_np_val, y_np_val, z_np_val, ry_np_val), axis=1)
+
+        # Save to .txt files #
+        np.savetxt(os.path.join(self.box_folder, "train_data.txt"), train_data, delimiter=",")
+        np.savetxt(os.path.join(self.box_folder, "val_data.txt"), val_data, delimiter=",")
+
+        #########
+        # Plots #
+        #########
+
+        # x - z coords #
+        fig = plt.figure()
+        ax1 = fig.add_subplot(111)
+
+        ax1.scatter(x_np_train, z_np_train, s=10, c='#726bff', marker="s", label='train')
+        ax1.scatter(x_np_val, z_np_val, s=10, c='#ff6e66', marker="o", label='val')
+        plt.legend(loc='upper left')
+        plt.xlabel("x")
+        plt.ylabel("z")
+        plt.title("dataset")
+        plt.savefig(os.path.join(self.box_folder, 'box_dataset_x_z_axis.png'))
+
+        # ry - y coords #
+        fig = plt.figure()
+        ax1 = fig.add_subplot(111)
+
+        # Adapt box height to your task #
+        ax1.scatter(ry_np_train, 0.05 * np.ones_like(ry_np_train), s=10, c='b', marker="s", label='train')
+        ax1.scatter(ry_np_val, 0.05 * np.ones_like(ry_np_val), s=10, c='r', marker="o", label='val')
+        plt.legend(loc='upper left')
+        plt.xlabel("ry (deg)")
+        plt.ylabel("y")
+        plt.title("dataset")
+        plt.savefig(os.path.join(self.box_folder, 'box_dataset_ry_y_axis.png'))
+
+        # x - z - ry coords #
+        fig = plt.figure()
+        ax1 = fig.add_subplot(111, projection='3d')
+
+        ax1.scatter(x_np_train, z_np_train, ry_np_train, s=10, c='b', marker="s", label='train')
+        ax1.scatter(x_np_val, z_np_val, ry_np_val, s=10, c='r', marker="o", label='val')
+        plt.legend(loc='upper left')
+        ax1.set_xlabel("x")
+        ax1.set_ylabel("z")
+        ax1.set_zlabel("ry (deg)")
+
+        plt.title("dataset")
+        plt.savefig(os.path.join(self.box_folder, 'box_dataset.png'))
diff --git a/agent/utils_advanced/evaluate.py b/agent/utils_advanced/evaluate.py
new file mode 100644
index 0000000..a848aaa
--- /dev/null
+++ b/agent/utils_advanced/evaluate.py
@@ -0,0 +1,389 @@
+import os
+from pathlib import Path
+
+import numpy as np
+import pandas as pd
+
+# Import envs #
+from simulator_vec_env import SimulatorVecEnv
+from envs.iiwa_sample_joint_vel_env import IiwaJointVelEnv
+from envs.iiwa_sample_env import IiwaSampleEnv
+
+# Planar grasping
+from envs_advanced.iiwa_numerical_planar_grasping_env import IiwaNumericalPlanarGraspingEnv
+from envs_advanced.iiwa_end_to_end_planar_grasping_env import IiwaEndToEndPlanarGraspingEnv
+from envs_advanced.iiwa_ruckig_planar_grasping_env import IiwaRuckigPlanarGraspingEnv
+
+# Monitor
+from stable_baselines3.common.vec_env import VecMonitor
+from torch.utils.tensorboard import SummaryWriter
+
+# Models #
+from stable_baselines3 import PPO
+from models.ruckig_planar_model import RuckigPlanarModel
+
+# Set-up envs
+from utils_advanced.helpers import get_env
+
+def evaluation_mode(config_dict, dart_env_dict, gym_env_dict, hyper_dict, goal_dict, reward_dict, manual_actions_dict=None, randomization_dict=None, randomBoxesGenerator=None):
+    """
+        evaluate a single checkpoint or a whole directory that may include many runs and many checkpoints per run. Save results in .csv file
+
+        each environment will call a dedicated evaluation function - adapt and extend to your task
+
+        :param config_dict:          configuration dictionary
+        :param dart_env_dict:        dart dictionary
+        :param gym_env_dict:         env dictionary
+        :param hyper_dict:           hyperparameters dict - models related
+        :param goal_dict:            goal dict - target, or green box
+        :param reard_dict:           reward dict - reward terms values and ranges
+        :param manual_actions_dict:  manual action dict
+        :param randomization_dict:   randomization dict
+        :param randomBoxesGenerator: boxes generator to spawn boxes for planar grasping
+    """
+
+    env_key = config_dict['env_key']
+
+    # Build env #
+    env = get_env(config_dict, dart_env_dict, gym_env_dict, config_dict["log_dir"], reward_dict, goal_dict, manual_actions_dict, randomization_dict, randomBoxesGenerator)
+
+    print("Using for evaluation the env: " + config_dict["env_key"])
+
+    if env_key.find("planar") != -1: # Planar envs
+        _evaluate_planar_grasping(env, config_dict, dart_env_dict, hyper_dict, randomBoxesGenerator)
+    elif env_key == "iiwa_sample_dart_unity_env" or env_key == "iiwa_joint_vel":
+        _evaluate_base_env(env, config_dict, dart_env_dict)
+    else:
+        raise Exception("This type of env has no available evaluation method. Create one in the evaluate.py first - aborting")
+
+def _evaluate_base_env(env, config_dict, dart_env_dict):
+    """
+        evaluate iiwa_sample_joint_vel_env, or iiwa_joint_vel envs
+
+        :param env:           gym env
+        :param config_dict:   configuration dict
+        :param dart_env_dict: dart dictionary
+    """
+
+    if(config_dict["simulation_mode"] == "evaluate"):
+        run_name = config_dict["model_evaluation_type"].split("/")
+        run_name = run_name[-2] + "_" + run_name[-1]                 # e.g. run_0/model_3125_0
+
+        print("===================================================")
+        print("(RL) Model evaluation: " + str(run_name))
+        print("===================================================")
+
+        # Load trained agent #
+        model = PPO.load(os.path.join(config_dict["model_evaluation_type"]))
+        model.policy.set_training_mode(False)
+
+        obs = env.reset()
+        for x in range(1000):                                        # Run some steps for each env 
+            action, _states = model.predict(obs, deterministic=True) # Important: set deterministic to True to use the best learned policy (no exploration)
+
+            obs, rewards, dones, info = env.step(action)
+
+            if dart_env_dict['enable_dart_viewer'] and config_dict['env_key'] != 'iiwa_joint_vel':
+                env.render()
+
+    elif(config_dict['simulation_mode'] == 'evaluate_model_based' and config_dict['env_key'] != 'iiwa_joint_vel'):
+        # check model-based controllers (e.g. P-controller) #
+        print("===================================================")
+        print("Model-based evaluation")
+        print("===================================================")
+
+        control_kp = 1.0 / env.observation_space.high[0]
+
+        obs = env.reset()
+        episode_rewards = []
+        for _ in range(5): # Play some episodes 
+            cum_reward = 0
+
+            while True: # Play until we have a successful episode
+
+                if dart_env_dict['use_inverse_kinematics']:                                                   # Generate an action for the current observation using a P-controller
+                    action = np.reshape(env.env_method('action_by_p_control', control_kp, 2.0 * control_kp),
+                                        (config_dict['num_envs'], env.action_space.shape[0]))
+                else:                                                                                         # Random action
+                    action = np.reshape(env.env_method('random_action'),
+                                        (config_dict['num_envs'], env.action_space.shape[0]))
+
+                obs, rewards, dones, info = env.step(action)                                                  # Play this action
+                cum_reward += rewards
+
+                # Render #
+                if dart_env_dict['enable_dart_viewer']:
+                    env.render()
+
+                if dones.any():
+                    episode_rewards.append(cum_reward)
+                    break
+
+        mean_reward = np.mean(episode_rewards)
+
+        print("Mean reward: " + str(mean_reward))
+    else:
+        print("You have set an invalid simulation_mode or some other settings in the config.py are wrong - aborting")
+
+def _evaluate_planar_grasping(env, config_dict, dart_env_dict, hyper_dict, randomBoxesGenerator):
+    """
+        evaluate a saved model or many runs and many checkpoints per run. Print some stats and save pandas dfs (.csv) in an evaluation folder
+
+        :param env:                  gym env
+        :param config_dict:          configuration dictionary
+        :param dart_env_dict:        dart dictionary
+        :param hyper_dict:           hyperparameters dict. Models related: RUCKIG, and for PPO model, a checkpoint will be loaded (with saved hyperparams)
+        :param randomBoxesGenerator: spawn boxes for evaluating planar grasping agents
+    """
+
+    ########################################################################
+    # Df to save evaluation stats                                          #
+    # Columns:                                                             #
+    #        run: 0, 1, 2, .. -> folder with checkpoints                   #
+    #        timestep: 0, ..., total_timestep -> many checkpoints          #
+    # Note: evaluate the agent in the same boxes for each saved checkpoint #
+    # e.g. run_0/model_3125_0, run_0/model_6250_1, etc                     #
+    ########################################################################
+
+    # Save all runs and all checkpoints per run #
+    df = pd.DataFrame(columns=['evaluation_name', 'run', 'timestep', 'reward', 'success_ratio'])
+
+    # Keep a separate file for the best model #
+    df_best = pd.DataFrame(columns=['evaluation_name',
+                                    'best_succ_ratio_model', 'best_succ_ratio', 'best_succ_ratio_reward', 
+                                    'best_reward_model', 'best_reward', 'best_reward_succ_ratio'])
+
+    # Set dataset to test mode #
+    randomBoxesGenerator.mode = "test"
+
+    ############################################
+    # Keep best stats from all runs and models #
+    ############################################
+    best_succ_ratio = 0
+    best_succ_ratio_model = None
+    best_succ_ratio_reward = -np.inf # For best success ratio model save its corresponding reward value
+
+    best_reward = -np.inf
+    best_reward_model = None
+    best_reward_succ_ratio = 0       # For best reward model save its corresponding success ratio value
+
+    log_dir = config_dict["log_dir"]
+
+    # Evaluate a single model #
+    if (config_dict["model_evaluation_type"] != "all" or config_dict["env_key"] == 'iiwa_ruckig_planar_grasping_dart_unity_env'):
+
+        # Create a dir to save the evaluation pandas dfs #
+        Path(log_dir + "evaluation_single_dfs/").mkdir(parents=True, exist_ok=True)
+
+        # Load model #
+        if(config_dict["model"] == "PPO"):
+            model = PPO.load(config_dict["model_evaluation_type"])
+            model.policy.set_training_mode(False)
+
+            run_name = config_dict["model_evaluation_type"].split("/")
+            run_name = run_name[-2] + "_" + run_name[-1] # e.g. run_0/model_3125_0
+
+        elif(config_dict["model"] == "RUCKIG"):
+            model = RuckigPlanarModel(env, control_cycle=dart_env_dict["control_cycle"], hyper_dict=hyper_dict)
+            run_name = "ruckig_model"
+
+        print("===================================================")
+        print("Model evaluation: " + str(run_name))
+        print("===================================================")
+
+        # Evaluate single model #
+        mean_r, succ_ratio = _evaluate_planar_grapsing_single_model(env=env, model=model, log_dir=log_dir, run_name=run_name, model_id=0, 
+                                                                    config_dict=config_dict, dart_env_dict=dart_env_dict, episodes_test=randomBoxesGenerator.val_size)
+
+        # One model -> equivalent best stats #
+        best_reward = mean_r
+        best_reward_model = run_name
+        best_reward_succ_ratio = succ_ratio
+
+        best_succ_ratio = succ_ratio
+        best_succ_ratio_model = run_name
+        best_succ_ratio_reward = mean_r
+        ######################################
+
+        # Save dfs #
+        df.loc[0] = [config_dict["evaluation_name"], 0, 0, mean_r, succ_ratio]
+        df.to_csv(log_dir + "evaluation_single_dfs/" + "df_evaluation_" + run_name + ".csv", index=False)
+
+        df_best.loc[0] = [config_dict["evaluation_name"],
+                          best_succ_ratio_model, best_succ_ratio,  best_succ_ratio_reward, 
+                          best_reward_model, best_reward, best_reward_succ_ratio
+                         ]
+
+        df_best.to_csv(log_dir + "evaluation_single_dfs/" + "df_evaluation_best_" + run_name + ".csv", index=False)
+
+    else: # Evaluate many runs and their saved checkpoints 
+
+        # Create a dir to save the evaluation pandas dfs #
+        Path(log_dir + "evaluation_all_dfs/").mkdir(parents=True, exist_ok=True)
+
+        # Scan logs folder and find how many runs exist #
+        # logs/ -> run_0/, run_1/                       #
+        log_dirs = [filename for filename in os.listdir(log_dir[:-1]) if filename.startswith("run_")]
+        log_dirs = sorted(log_dirs, key=lambda x: int(x.split("_")[1])) 
+
+        model_i = 0 # For run_0, run_1
+        j = 0       # For pandas rows -> one df for all runs 
+
+        for log_dir_name in log_dirs: # run_i 
+            print("=================================")
+            print("Model evaluation: " + str(model_i + 1))
+            print("=================================")
+
+            # Scan the checkpoints of this runs and sort them by the time step #
+            models_names = [
+                filename for filename in os.listdir(log_dir + log_dir_name + "/")
+                if filename.startswith("model") and filename.endswith(".zip")
+            ]
+
+            models_names = sorted(models_names, key=lambda x: int(x.split("_")[1]))
+
+            # First time step 0 - baseline for all runs - set the initial reward value from the config_advanced.py - adapt if the reward denifition or task changes #
+            df.loc[j] = [config_dict["evaluation_name"], model_i + 1, 0, config_dict["reward_baseline"], 0.0]
+            j += 1
+
+            for i, m_name in enumerate(models_names): # Load and evaluate the current checkpoint of the run_i 
+                print("Checkpoint number: " + str(i + 1))
+
+                # Load saved model #
+                model = PPO.load(log_dir + log_dir_name + "/" + m_name[:-4]) # Remove .zip
+                model.policy.set_training_mode(False)
+
+                # Evaluate model #
+                mean_r, succ_ratio = _evaluate_planar_grapsing_single_model(env=env, model=model, log_dir=log_dir, run_name=log_dir_name, 
+                                                                            model_id=m_name.split("_")[1], config_dict=config_dict, 
+                                                                            dart_env_dict=dart_env_dict, episodes_test=randomBoxesGenerator.val_size)
+                # Update best stats #
+                if (mean_r > best_reward): # reward-based
+                    best_reward = mean_r
+                    best_reward_model = log_dir + log_dir_name + "/" + m_name # which model had the best reward
+                    best_reward_succ_ratio = succ_ratio
+
+                if (succ_ratio > best_succ_ratio): # success ratio-based
+                    best_succ_ratio = succ_ratio
+                    best_succ_ratio_model = log_dir + log_dir_name + "/" + m_name
+                    best_succ_ratio_reward = mean_r
+
+                # Save results for the current checkpoint #
+                df.loc[j] = [config_dict["evaluation_name"], model_i + 1, m_name.split("_")[1], mean_r, succ_ratio]
+                j += 1
+
+            # All checkpoints were evaluated for the run_i. Go to the next run
+            model_i += 1
+
+        # Save pandas df of all checkpoints and runs #
+        df.to_csv(log_dir + "evaluation_all_dfs/" + "df_evaluation_all" + ".csv", index=False)
+
+        # Save pandas df of the best stats #
+        df_best.loc[0] = [
+            config_dict["evaluation_name"],
+            best_succ_ratio_model, best_succ_ratio,  best_succ_ratio_reward, 
+            best_reward_model, best_reward,  best_reward_succ_ratio
+        ]
+        df_best.to_csv(log_dir + "evaluation_all_dfs/" + "df_evaluation_best" + ".csv", index=False)
+
+    # Print the best stats in console #
+    print("best_reward: " + str(best_reward))
+    print("best_reward_model: " + str(best_reward_model))
+    print("best_reward_succ_ratio: " + str(best_reward_succ_ratio))
+
+    print("best_succ_ratio: " + str(best_succ_ratio))
+    print("best_succ_ratio_model: " + str(best_succ_ratio_model))
+    print("best_succ_ratio_reward: " + str(best_succ_ratio_reward))
+
+    # Revert dataset to train mode #
+    randomBoxesGenerator.mode = "train"
+
+def _evaluate_planar_grapsing_single_model(env, model, log_dir, run_name, model_id, config_dict, dart_env_dict, episodes_test):
+    """
+        evaluate a saved model - grasping planar environment. It writes metrics in a tensorboard summary file
+
+        :param env:           gym env
+        :param model:         saved model to evaluate
+        :param log_dir:       log directory to save the tensorboard summary file
+        :param run_name:      name of the run e.g. run_0
+        :param model_id:      checkpoint id e.g. model_200 -> 200
+        :param config_dict:   configuration dict
+        :param dart_env_dict: dart dict
+        :param episodes_test: how many evaluation targets (boxes) to test
+
+        :return: mean_r, succ_ratio -> mean reward and success ratio for all the episodes (episodes_test)
+    """
+
+    # Save results to tensorboard summary file #
+    writer = SummaryWriter(str(log_dir) + "/evaluation_tb_" + run_name)
+
+    mean_reward = 0
+    success_count = 0  # #successfull episodes
+    total_episodes = 0 # total episodes 
+
+    num_envs = config_dict["num_envs"]
+
+    # Save total reward per episode per each env.  These detailed stats are not printed by default (see below) #
+    total_rewards = np.zeros((num_envs, 5000))  # Assume max 5000 episodes played
+    total_success = np.zeros((num_envs, 5000))  # Assume max 5000 episodes played
+    num_episode = 0                             # All envs terminate at the same time step
+
+    ##############################################################################################################
+    # Calculate how many steps to play for the required "episodes_test" given the number of the envs,            #
+    # and the 'max_time_step defined' per each episode                                                           #
+    # Imporant: make sure your values result in a perfect division, else some target boxes will not be evaluated #
+    ##############################################################################################################
+    time_steps = int(episodes_test * dart_env_dict["max_time_step"] / num_envs)
+
+    obs = env.reset()
+    for _ in range(time_steps):
+
+        action, _states = model.predict(obs, deterministic=True)
+        obs, rewards, dones, info = env.step(action)
+        total_rewards[:, num_episode] += rewards
+
+        # Render #
+        if dart_env_dict['enable_dart_viewer']:
+            env.render()
+
+        for i in range(num_envs):                                # Check if done
+            if dones[i]:
+                if(i == 0):                                      # Assume envs terminate at the same time step
+                    num_episode += 1
+
+                mean_reward += total_rewards[i][num_episode - 1] # Append total reward of this env in the cumulative sum, 'mean_rewards'
+
+                if (info[i]["success"] == True):                 # Successful episode 
+                    success_count += 1
+                    total_success[i][num_episode - 1] = 1
+                else:
+                    total_success[i][num_episode - 1] = 0
+
+                total_episodes += 1
+
+    # Fix metrics #
+    succ_ratio = (success_count * 100) / total_episodes
+    mean_r = mean_reward / float(total_episodes)
+
+    # Log metrics in tb summary file #
+    writer.add_scalar('success_ratio', succ_ratio, model_id)
+    writer.add_scalar('total_test_episodes', total_episodes, model_id)
+    writer.add_scalar('mean_reward', mean_r, model_id)
+    writer.flush()
+
+    # Print some stats and metrics in the console #
+    print("End of model evaluation with name: " + run_name + " and id: " + str(model_id))
+    print("Number of targets that were tested: " + str(total_episodes))
+    print("Success ratio: " + str(succ_ratio))
+    print("Mean reward: " + str(mean_r))
+
+    # Uncomment these lines for more detailed results (per episode) #
+    #print("Total reward for 50 episodes for every env")
+    #print(total_rewards[:, 0:50]) 
+    #print("Success flag for 50 episodes for every env")
+    #print(total_success[:, 0:50]) 
+
+    print("")
+    writer.close()
+
+    return [mean_r, succ_ratio]
diff --git a/agent/utils_advanced/helpers.py b/agent/utils_advanced/helpers.py
new file mode 100644
index 0000000..e169cff
--- /dev/null
+++ b/agent/utils_advanced/helpers.py
@@ -0,0 +1,150 @@
+import numpy as np
+
+# Import envs #
+
+# Base envs
+from envs.iiwa_sample_joint_vel_env import IiwaJointVelEnv
+from envs.iiwa_sample_env import IiwaSampleEnv
+
+# Planar grasping #
+from envs_advanced.iiwa_numerical_planar_grasping_env import IiwaNumericalPlanarGraspingEnv
+from envs_advanced.iiwa_end_to_end_planar_grasping_env import IiwaEndToEndPlanarGraspingEnv
+from envs_advanced.iiwa_ruckig_planar_grasping_env import IiwaRuckigPlanarGraspingEnv
+
+# Monitor envs
+from stable_baselines3.common.vec_env import VecMonitor
+from simulator_vec_env import SimulatorVecEnv
+
+def get_env(config_dict, dart_env_dict, gym_env_dict, log_dir, reward_dict=None, goal_dict=None, manual_actions_dict=None, randomization_dict=None, randomBoxesGenerator=None):
+    """
+        Set-up the env according to the input dictionary settings
+    """
+
+    # Some basic checks #
+    if(config_dict["simulation_mode"] == "train" and config_dict["env_key"] == "iiwa_ruckig_planar_grasping_dart_unity_env"):
+        raise Exception("Can not train RUCKIG model - aborting")
+
+    if(config_dict["env_key"] == "iiwa_ruckig_planar_grasping_dart_unity_env" and config_dict["model"] != "RUCKIG"):
+        raise Exception("For ruckig env use only the RUCKIG model - aborting")
+
+    if(config_dict["env_key"] != "iiwa_ruckig_planar_grasping_dart_unity_env" and config_dict["model"] == "RUCKIG"):
+        raise Exception("For non ruckig env do not use the RUCKIG model - aborting")
+
+    if(config_dict["env_key"] == "iiwa_end_to_end_planar_grasping_dart_unity_env" and config_dict["use_images"] == False):
+        raise Exception("For end_to_end env enable the images - aborting")
+
+    if(config_dict["env_key"].find("planar") != -1 and ((reward_dict is None) or (goal_dict is None) or (randomization_dict is None)  or (randomBoxesGenerator is None))):
+        raise Exception("Missing dictionaries for planar envs - aborting")
+
+    if(config_dict["env_key"].find("planar") != -1 and (manual_actions_dict is None or manual_actions_dict["manual"] == False)):
+        raise Exception("Please enable manual actions for planar envs - aborting")
+
+    if(config_dict["env_key"].find("planar") != -1 and (manual_actions_dict["manual_behaviour"] != "planar_grasping")):
+        raise Exception("Please enable manual planar grasing actions for planar envs - aborting")
+
+    if(config_dict["env_key"].find("planar") != -1 and (config_dict["enable_end_effector"] == False or config_dict["robotic_tool"].find("gripper") == -1)):
+        raise Exception("Please enable the gripper for planar envs - aborting")
+
+    if(config_dict["env_key"].find("planar") != -1 and (goal_dict["goal_type"] != "box")):
+        raise Exception("Please enable boxes as targets for planar envs - aborting")
+
+    if(config_dict["env_key"].find("planar") != -1 and (goal_dict["box_ry_active"] == True and np.isclose(reward_dict["reward_pose_weight"], 0.0))):
+        raise Exception("Add more weight to the reward pose to activate 3 DoF control for planar envs when the rotation is active in spanwed boxes - aborting")
+
+    if((manual_actions_dict["manual"] == True) and (manual_actions_dict["manual_behaviour"] == "planar_grasping" or manual_actions_dict["manual_behaviour"] == "close_gripper") and (config_dict["enable_end_effector"] == False or config_dict["robotic_tool"].find("gripper") == -1)):
+        raise Exception("Please enable the gripper for planar_grasping and close_gripper manual actions - aborting")
+
+    if((manual_actions_dict["manual"] == True) and (manual_actions_dict["manual_behaviour"] == "planar_grasping" or manual_actions_dict["manual_behaviour"] == "close_gripper") and (config_dict["env_key"] == "iiwa_joint_vel")):
+        raise Exception("Can not use dart-based manual actions for iiwa_joint_vel env - aborting")
+    # End checks #
+
+    env_key = config_dict['env_key']
+    def create_env(id=0):
+
+        #################################################################################################################################
+        # Important: 'dart' substring should always be included in the 'env_key' for dart-based envs. E.g. 'iiwa_sample_dart_unity_env' #
+        # If 'dart' is not included, IK behaviour can not be used                                                                       #
+        #################################################################################################################################
+
+        # joints control without dart
+        if env_key == 'iiwa_joint_vel':
+            env = IiwaJointVelEnv(max_ts=dart_env_dict['max_time_step'], id=id, config=config_dict)
+
+        # Reaching the red target sample env
+        # task-space with dart or joint joint space control
+        # model-based control with P-controller available
+        elif env_key == 'iiwa_sample_dart_unity_env': 
+            env = IiwaSampleEnv(max_ts=dart_env_dict['max_time_step'], orientation_control=dart_env_dict['orientation_control'],
+                                use_ik=dart_env_dict['use_inverse_kinematics'], ik_by_sns=dart_env_dict['linear_motion_conservation'],
+                                state_type=config_dict['state'], enable_render=dart_env_dict['enable_dart_viewer'],
+                                task_monitor=dart_env_dict['task_monitor'], with_objects=dart_env_dict['with_objects'],
+                                target_mode=dart_env_dict['target_mode'], target_path=dart_env_dict['target_path'],
+                                goal_type=goal_dict['goal_type'], joints_safety_limit=config_dict['joints_safety_limit'], 
+                                max_joint_vel=config_dict['max_joint_vel'], max_ee_cart_vel=config_dict['max_ee_cart_vel'], 
+                                max_ee_cart_acc=config_dict['max_ee_cart_acc'], max_ee_rot_vel=config_dict['max_ee_rot_vel'],
+                                max_ee_rot_acc=config_dict['max_ee_rot_acc'], random_initial_joint_positions=config_dict['random_initial_joint_positions'], 
+                                initial_positions=config_dict['initial_positions'], robotic_tool=config_dict["robotic_tool"], 
+                                env_id=id)
+
+        # Planar RL grasping using the true numeric observations from the UNITY simulator #
+        elif env_key == 'iiwa_numerical_planar_grasping_dart_unity_env': 
+            env = IiwaNumericalPlanarGraspingEnv(max_ts=dart_env_dict['max_time_step'], orientation_control=dart_env_dict['orientation_control'],
+                                use_ik=dart_env_dict['use_inverse_kinematics'], ik_by_sns=dart_env_dict['linear_motion_conservation'],
+                                state_type=config_dict['state'], enable_render=dart_env_dict['enable_dart_viewer'],
+                                task_monitor=dart_env_dict['task_monitor'], with_objects=dart_env_dict['with_objects'],
+                                target_mode=dart_env_dict['target_mode'], goal_type=goal_dict['goal_type'],
+                                randomBoxesGenerator=randomBoxesGenerator, joints_safety_limit=config_dict['joints_safety_limit'],
+                                max_joint_vel=config_dict['max_joint_vel'], max_ee_cart_vel=config_dict['max_ee_cart_vel'],
+                                max_ee_cart_acc=config_dict['max_ee_cart_acc'], max_ee_rot_vel=config_dict['max_ee_rot_vel'],
+                                max_ee_rot_acc=config_dict['max_ee_rot_acc'], random_initial_joint_positions=config_dict['random_initial_joint_positions'],
+                                initial_positions=config_dict['initial_positions'], noise_enable_rl_obs=randomization_dict['noise_enable_rl_obs'],
+                                noise_rl_obs_ratio=randomization_dict['noise_rl_obs_ratio'], reward_dict=reward_dict,
+                                agent_kp=gym_env_dict['agent_kp'], agent_kpr=gym_env_dict['agent_kpr'],
+                                robotic_tool=config_dict["robotic_tool"],
+                                env_id=id)
+
+        # Planar RL grasping using image observations as state representation - end-to-end learning #
+        elif env_key == 'iiwa_end_to_end_planar_grasping_dart_unity_env': 
+            env = IiwaEndToEndPlanarGraspingEnv(max_ts=dart_env_dict['max_time_step'], orientation_control=dart_env_dict['orientation_control'],
+                                use_ik=dart_env_dict['use_inverse_kinematics'], ik_by_sns=dart_env_dict['linear_motion_conservation'],
+                                state_type=config_dict['state'], enable_render=dart_env_dict['enable_dart_viewer'], task_monitor=dart_env_dict['task_monitor'],
+                                target_mode=dart_env_dict['target_mode'], goal_type=goal_dict['goal_type'],
+                                randomBoxesGenerator=randomBoxesGenerator, joints_safety_limit=config_dict['joints_safety_limit'],
+                                max_joint_vel=config_dict['max_joint_vel'], max_ee_cart_vel=config_dict['max_ee_cart_vel'],
+                                max_ee_cart_acc=config_dict['max_ee_cart_acc'], max_ee_rot_vel=config_dict['max_ee_rot_vel'],
+                                max_ee_rot_acc=config_dict['max_ee_rot_acc'], random_initial_joint_positions=config_dict['random_initial_joint_positions'],
+                                initial_positions=config_dict['initial_positions'], noise_enable_rl_obs=randomization_dict['noise_enable_rl_obs'],
+                                noise_rl_obs_ratio=randomization_dict['noise_rl_obs_ratio'], reward_dict=reward_dict,
+                                agent_kp=gym_env_dict['agent_kp'], agent_kpr=gym_env_dict['agent_kpr'],
+                                image_size=config_dict['image_size'], robotic_tool=config_dict["robotic_tool"],
+                                gripper_type=config_dict["gripper_type"],
+                                env_id=id)
+
+        # Planar grasping using time-optimal trajectory generation method - RUCKIG #
+        elif env_key == 'iiwa_ruckig_planar_grasping_dart_unity_env': 
+            env = IiwaRuckigPlanarGraspingEnv(max_ts=dart_env_dict['max_time_step'], orientation_control=dart_env_dict['orientation_control'],
+                                use_ik=dart_env_dict['use_inverse_kinematics'], ik_by_sns=dart_env_dict['linear_motion_conservation'],
+                                state_type=config_dict['state'], enable_render=dart_env_dict['enable_dart_viewer'],
+                                task_monitor=dart_env_dict['task_monitor'], with_objects=dart_env_dict['with_objects'],
+                                target_mode=dart_env_dict['target_mode'], goal_type=goal_dict['goal_type'],
+                                randomBoxesGenerator=randomBoxesGenerator, joints_safety_limit=config_dict['joints_safety_limit'],
+                                max_joint_vel=config_dict['max_joint_vel'], max_ee_cart_vel=config_dict['max_ee_cart_vel'],
+                                max_ee_cart_acc=config_dict['max_ee_cart_acc'], max_ee_rot_vel=config_dict['max_ee_rot_vel'],
+                                max_ee_rot_acc=config_dict['max_ee_rot_acc'], random_initial_joint_positions=config_dict['random_initial_joint_positions'],
+                                initial_positions=config_dict['initial_positions'], noise_enable_rl_obs=randomization_dict['noise_enable_rl_obs'],
+                                noise_rl_obs_ratio=randomization_dict['noise_rl_obs_ratio'], reward_dict=reward_dict,
+                                agent_kp=gym_env_dict['agent_kp'], agent_kpr=gym_env_dict['agent_kpr'],
+                                threshold_p_model_based=gym_env_dict["threshold_p_model_based"], robotic_tool=config_dict["robotic_tool"],
+                                env_id=id)
+
+        # Set env seed #
+        env.seed((id * 150) + (id + 11))
+
+        return env
+
+    num_envs = config_dict['num_envs']
+    env = [create_env for i in range(num_envs)]
+    env = SimulatorVecEnv(env, config_dict, manual_actions_dict, reward_dict) # Set vectorized env
+    env = VecMonitor(env, log_dir, info_keywords=("success",))                # Monitor envs 
+
+    return env
diff --git a/agent/utils_advanced/manual_actions.py b/agent/utils_advanced/manual_actions.py
new file mode 100644
index 0000000..9ba1f7f
--- /dev/null
+++ b/agent/utils_advanced/manual_actions.py
@@ -0,0 +1,381 @@
+"""
+Manual actions functionality used in simulator_vec_env.py - see step() method
+
+Note: you can also implement your own manual function - in that case create a new function below and call it in the simulator_vec_env.py
+"""
+
+import ast
+import numpy as np
+
+def configure_manual_settings_and_get_manual_function(vec_env, manual_actions_dict):
+    """
+        Depending of the input manual_behaviour (see config_advanced.py) setting, set the class variables of a SimulatorVecEnv class related to the manual actions,
+        and return a corresponding manual function to excecute after the end of the episode - step() method during reseting in simulator_vec_env.py
+
+        :param vec_env:             SimulatorVecEnv object
+        :param manual_actions_dict: manual actions dictionary settings
+
+        :return: manual_func: returns a function to excecute at the end of the episode - step() method during reseting - SimulatorVecEnv class
+                              format: manual_func(vec_env, rews, infos, enable_rewards=True)
+                                      where:
+                                        vec_env        -> SimulatorVecEnv object (contains the envs objects, etc)
+                                        rews           -> rewards list of all envs
+                                        infos          -> infos list of dictionaries of all envs
+                                        enable_rewards -> whether to give reward/penalties during manual actions
+    """
+
+    manual_func = _send_no_manual # Do nothing by default
+
+    # Planar envs: Down -> close -> up #
+    if(vec_env.manual_behaviour == "planar_grasping"):
+        # Clip gripper action to this limit #
+        if(vec_env.robotic_tool.find("3_gripper") != -1):
+            vec_env.gripper_clip = 90
+        elif(vec_env.robotic_tool.find("2_gripper") != -1):
+            vec_env.gripper_clip = 250
+        else:
+            vec_env.gripper_clip = 90
+
+        vec_env.manual_rewards = manual_actions_dict["manual_rewards"]                   # For enable_rewards
+        vec_env.manual_kp = manual_actions_dict["manual_kp"]                             # Gain for P-controller, position error
+        vec_env.manual_kpr = manual_actions_dict["manual_kpr"]
+        vec_env.manual_tol = manual_actions_dict["manual_tol"]                           # Tolerance of P-controller
+        vec_env.manual_down_height = manual_actions_dict["manual_down_height"]
+        vec_env.manual_up_height = manual_actions_dict["manual_up_height"]
+        vec_env.manual_close_steps = manual_actions_dict["manual_close_steps"]           # How many steps to close the gripper (with 'manual_close_vel' per each step)
+        vec_env.manual_close_vel = manual_actions_dict["manual_close_vel"]               # Velocity value to close the gripper per each step
+        vec_env.manual_steps_same_state = manual_actions_dict["manual_steps_same_state"] # How many steps the ee is allowed to not moved during the manual actions down/up 
+        vec_env.manual_tol_same_state = manual_actions_dict["manual_tol_same_state"]     # If the gripper has not moved more than this distance threshold value within 'steps_same_state' -> it means collision 
+
+        # Controll all DoF with a P-controller during the manual actions #
+        vec_env.config_p_controller = {"kpr_x": 1,
+                                       "kpr_y": 1,
+                                       "kpr_z": 1,
+                                       "kp_x": 1,
+                                       "kp_y": 1,
+                                       "kp_z": 1
+                                      }
+
+
+        #####################################################################################################
+        # prev_dist: needed for 'manual_steps_same_state'. Save the previous distance of the ee to the goal #
+        #            and compare it to the current distance. See _go_down_up_manual function below          #
+        ####################################################################################################################################
+        # pid_step: counter to reset the p-controllers target pose for each env. During the down/up manual actions,                        #
+        #           keep in the other DoF the same pose as the pose that the agent had at the end of the RL episode                        #
+        #           e.g. rotation of the gripper - it means the P-controller does not correct the rotation when approaching the target box #
+        ####################################################################################################################################
+        vec_env.prev_dist = []
+        vec_env.pid_step = []
+        for env_idx in range(len(vec_env.envs)):
+            vec_env.pid_step.append(0)
+            vec_env.prev_dist.append(0)
+
+        # Set grasping manual function to call at the end of the episode #
+        manual_func = _send_manual_actions_planar_grasping
+
+    # Close the gripper only #
+    elif(vec_env.manual_behaviour == "close_gripper"): 
+        # Clip gripper action to this limit #
+        if(vec_env.robotic_tool.find("3_gripper") != -1):
+            vec_env.gripper_clip = 90
+        elif(vec_env.robotic_tool.find("2_gripper") != -1):
+            vec_env.gripper_clip = 250
+        else:
+            vec_env.gripper_clip = 90
+
+        vec_env.manual_close_steps = manual_actions_dict["manual_close_steps"]
+        vec_env.manual_close_vel = manual_actions_dict["manual_close_vel"]
+
+        # Set close manual function to call at the end of the episode #
+        manual_func = _send_manual_actions_close_gripper
+
+    # Invalid option do nothing #
+    else:
+        manual_func = _send_no_manual
+
+    return manual_func
+
+##################################
+# Manual actions implementations #
+##################################
+
+######################
+# 0. Nothing happens #
+######################
+def _send_no_manual(vec_env, rews=None, infos=None, enable_rewards=False):
+    pass
+
+######################
+# 1. Planar grasping #
+######################
+def _send_manual_actions_planar_grasping(vec_env, rews, infos, enable_rewards=True):
+    """
+        Planar grasping manual actions:
+        1. down
+        2. close
+        3. up
+
+        :param rews:           rewards of envs
+        :param infos:          infos dicts of envs
+        :param enable_rewards: whether to give rewards/penalties during manual actions
+    """
+    envs_active = []
+    for env in vec_env.envs: # Send manual actions commands only to valid envs
+        if env.collided_env != 1:
+            envs_active.append(env)
+
+    # 1.Down #
+    envs_active = _go_down_up_manual(vec_env, envs_active, rews, vec_env.manual_down_height, gripper=0.0, enable_rewards=enable_rewards)
+
+    # 2.Close #
+    envs_active = _close_manual(vec_env, envs_active, rews, steps=vec_env.manual_close_steps, vel=vec_env.manual_close_vel, enable_rewards=enable_rewards)
+
+    # 3.Up #
+    gripper_pos = np.clip(vec_env.manual_close_steps * vec_env.manual_close_vel, 0, vec_env.gripper_clip) # Gripper is closed - during up movement
+    envs_active = _go_down_up_manual(vec_env, envs_active, rews, vec_env.manual_up_height, gripper=gripper_pos, enable_rewards=enable_rewards)
+
+########################
+# 2. Close the gripper #
+########################
+def _send_manual_actions_close_gripper(vec_env, rews, infos, enable_rewards=True):
+    """
+        Close the gripper at the end of the episode
+
+        :param rews:           rewards of envs
+        :param infos:          infos dicts of envs
+        :param enable_rewards: whether to give rewards/penalties during manual actions
+    """
+    envs_active = []
+    for env in vec_env.envs: # Send manual actions commands only to valid envs 
+        if env.collided_env != 1:
+            envs_active.append(env)
+
+    # 1. Close #
+    envs_active = _close_manual(vec_env, envs_active, rews, steps=vec_env.manual_close_steps, vel=vec_env.manual_close_vel, enable_rewards=enable_rewards)
+
+###########
+# Helpers #
+###########
+def _update_envs_manual_and_check_collisions(vec_env, envs_active, rews, observations, enable_rewards=True):
+    """
+        update the active envs (dart chain, etc) during the manual actions and give rewards/collision penalties if enabled
+
+        :param observations:     by unity
+        :param time_step_update: whether to update the timestep of the gym agents
+                                 - it is always set to False, as the RL-episode has been finished - else adapt to your task
+
+        :return: terminated_environments, rews, infos, obs_converted, dones - see simulator_vec_env.py step() method for more details
+    """
+    envs_active_new = []
+
+    for env in envs_active:
+        observation = observations[env.id]
+        ob_coll = ast.literal_eval(observation)["Observation"][33]
+
+        ##########################################################################
+        # During the manual actions, the RL agents do not generate any velocites #
+        # This line of code is related to the task monitor visualization         #
+        ##########################################################################
+        env.action_state = np.zeros(env.action_space_dimension)
+
+        # Update only valid envs #
+        if (float(ob_coll) != 1.0 and env.joints_limits_violation() == False):
+            env.update(ast.literal_eval(observation), time_step_update=False)
+            envs_active_new.append(env)
+
+        # Collision - add penalty #
+        else:
+            if (enable_rewards):
+                rews[env.id] += vec_env.reward_dict["reward_collision"]
+            env.collided_env = 1
+            env.collision_flag = True # Give penality only
+
+    # Render: Not advised to set 'enable_dart_viewer': True, during RL-training. Use it only for debugging #
+    vec_env.render()
+
+    return envs_active_new
+
+def _go_down_up_manual(vec_env, envs_active, rews, height_target, gripper=0.0, enable_rewards=True):
+    """
+        go down or up by using a P-controller
+
+        :param envs_active:     active envs
+        :param rews:            rewards of envs
+        :param height_target:   height to move the ee
+        :param gripper: gipper  position to keep during up/down (e.g. close/open)
+        :param enable_rewards:  whether to give penalties during manual actions
+
+        :return: envs_active - which envs did not collided during manual actions
+    """
+
+    for env in envs_active:
+        env.dart_sim.ik_by_sns = True # Use sns during manual actions - adapt if needed
+
+        # Set viewer and the change target during the up movement #
+        if(gripper != 0.0):
+
+            ############################################################################################################################
+            # Note: in the dart viewer the ee at the goal is more up as we assume that we have a gripper (urdf) but the gripper is not #
+            #       yet visualized in the viewer. The vec_env.dart_sim.get_pos_distance() returns 0 correctly at the goal              #
+            ############################################################################################################################
+            target_object_X, _, target_object_Z = env.init_object_pose_unity[0], env.init_object_pose_unity[1], env.init_object_pose_unity[2]
+            tool_length = 0.0 # Adapt if the height of the box changes
+            target_object_RX, target_object_RY, target_object_RZ = env.get_box_rotation_in_target_dart_coords_angle_axis()
+
+            target = [target_object_RX, target_object_RY, target_object_RZ, target_object_Z, -target_object_X, height_target + tool_length]
+            env.set_target(target) # Set the dart target
+
+    # Unity expects velocities in joint space for each env #
+    # Send zero velocities to collided envs                #
+    actions = gripper * np.ones((len(vec_env.envs), 8))
+
+    ###################################################################
+    # Save the last pose of the ee before the manual actions and keep #
+    # the DoF that we do not control with the manual actions fixed    #
+    # e.g. do not rotate the gripper during the up/down movement      #
+    ###################################################################
+    target_pos_quat = [None] * len(vec_env.envs)                      # Target orientation in quat
+    target_pos_x = np.zeros(len(vec_env.envs))
+    target_pos_z = np.zeros(len(vec_env.envs))
+
+    # Set the target height #
+    target_pos_y = height_target
+
+    # Wait all envs to finish #
+    while True:
+        actions[:, :7] = np.zeros((len(vec_env.envs), 7))
+        flag_active = False # At least one envs has not finished with its manual actions when set to True
+
+        # Active envs (non-collided) during this loop #
+        envs_active_new = [] 
+        for env in envs_active:
+
+            # Position of the ee in Unity #
+            ee_x, ee_y, ee_z, _, _, _ = env.get_ee_pose_unity()
+
+            # Current orientation of ee in quaternion in Dart #
+            curr_quat = env.get_rot_ee_quat()
+
+            # Set target pose - 0 timestep #
+            if (vec_env.pid_step[env.id] == 0):
+                target_pos_quat[env.id] = curr_quat # Keep fixed orientation
+                target_pos_x[env.id] = ee_x         # Only move down/up keep. Keep fixed x, z position (dart)
+                target_pos_z[env.id] = ee_z
+
+            # Current positional errors in dart #
+            x_diff = target_pos_z[env.id] - ee_z
+            y_diff = -target_pos_x[env.id] + ee_x
+            z_diff = target_pos_y - ee_y
+
+            # Current orientation error in dart #
+            rx_diff, ry_diff, rz_diff = env.get_rot_error_from_quaternions(target_pos_quat[env.id], curr_quat)
+
+            #############################################################################################
+            # Distance from the goal pose                                                               #
+            # If we moved in some directions other than up/down make corrections using the P-controller #
+            #############################################################################################
+            dist = np.linalg.norm(np.array([x_diff, y_diff, z_diff, rx_diff, ry_diff, rz_diff]))
+
+            # Goal reached - zero vel should be send to the Unity simulator #
+            if (dist < vec_env.manual_tol):
+                envs_active_new.append(env)
+                continue
+
+            ###########################################################################################
+            # Check if we have moved at all within "vec_env.manual_steps_same_state" time             #
+            # e.g. case: ee fingers bounce at the top surface of the box when we go down -> collision #
+            ###########################################################################################
+            if (vec_env.pid_step[env.id] != 0 and vec_env.pid_step[env.id] % vec_env.manual_steps_same_state == 0):
+
+                # The ee has not moved a lot from the previous time -> collision #
+                if (abs(vec_env.prev_dist[env.id] - dist) <= vec_env.manual_tol_same_state): 
+                    if (enable_rewards):
+                        rews[env.id] += vec_env.reward_dict["reward_collision"]
+                    else:
+                        pass
+
+                    env.collided_env = 1
+                    env.collision_flag = True # Give penality only once
+                    continue
+
+                ######################################################
+                # No collision yet and the goal has not been reached #
+                # Save current distance to the goal from the ee      #
+                ######################################################
+                vec_env.prev_dist[env.id] = dist
+
+            ##################################################################################
+            # Calculate joint velocities. Call the custom P-controller that first calculates #
+            # task-space velocities and then returns, using IK, joint-space velocities       #
+            ##################################################################################
+            vel = env.action_by_p_controller_custom(rx_diff, ry_diff, rz_diff, x_diff, y_diff, z_diff, vec_env.manual_kpr, vec_env.manual_kp, config_p_controller=vec_env.config_p_controller)
+
+            actions[env.id][0:7] = vel # Set action for this env 
+
+            # Update P-controller step #
+            vec_env.pid_step[env.id] += 1
+            flag_active = True            # At lease one env is active - continue with manual actions
+            envs_active_new.append(env)
+
+        # No more actions to take: all envs have reached their goal, and/or no more active envs #
+        if (flag_active == False):
+            break
+        else:
+            ############################
+            # Send JSON action command #
+            ############################
+            request = vec_env._create_request("ACTION", vec_env.envs, actions)
+            observations = vec_env._send_request(request)
+
+            #####################################################
+            # Update envs (dart chain) and check for collisions #
+            #####################################################
+            envs_active = _update_envs_manual_and_check_collisions(vec_env, envs_active_new, rews, observations, enable_rewards)
+
+    ############################################################
+    # Manual action down/up finished. Set envs_active with     #
+    # the envs that did not collided during this manual action #
+    ############################################################
+    envs_active = envs_active_new
+
+    # Deactivate sns and reset P-controller #
+    for env in vec_env.envs:
+        vec_env.pid_step[env.id] = 0
+        vec_env.prev_dist[env.id] = 0
+        env.dart_sim.ik_by_sns = False
+
+    return envs_active
+
+def _close_manual(vec_env, envs_active, rews, steps=4, vel=15, enable_rewards=True):
+    """
+        close the gripper
+
+        :param envs_active:    active envs
+        :param rews:           rewards of envs
+        :param steps:          how many gripper commands to send
+        :param vel:            velocity per command
+        :param enable_rewards: whether to give reward/penalties during manual actions
+
+        :return: envs_active - which envs did not collided during manual actions
+    """
+
+    actions_close = np.zeros((len(vec_env.envs), 8))  # Do not move the robot: joints - zeros, and gripper position
+
+    for j in range(steps):  # Send gripper position for this steps 
+
+        for env in envs_active: # For each active env 
+            actions_close[env.id][7] = np.clip((actions_close[env.id][7] + vel), 0.0, vec_env.gripper_clip)
+
+        ############################
+        # Send JSON action command #
+        ############################
+        request = vec_env._create_request("ACTION", vec_env.envs, actions_close)
+        observations = vec_env._send_request(request)
+
+        #####################################################
+        # Update envs (dart chain) and check for collisions #
+        #####################################################
+        _update_envs_manual_and_check_collisions(vec_env, envs_active, rews, observations, enable_rewards=enable_rewards)
+
+    return envs_active
diff --git a/agent/utils_advanced/monitoring_agent.py b/agent/utils_advanced/monitoring_agent.py
new file mode 100644
index 0000000..bdc6f6d
--- /dev/null
+++ b/agent/utils_advanced/monitoring_agent.py
@@ -0,0 +1,98 @@
+"""
+Monitoring functionality for SB3 gym agents - save checkpoints and additional metrics
+"""
+
+import os
+import sys
+from pathlib import Path
+from tqdm import tqdm
+
+import numpy as np
+
+from stable_baselines3.common.results_plotter import load_results
+from stable_baselines3.common.callbacks import BaseCallback
+
+from torch.utils.tensorboard import SummaryWriter
+
+class SaveOnBestTrainingRewardCallback(BaseCallback):
+    def __init__(self, check_freq: int, save_model_freq: int, log_dir: str, total_timesteps: int, num_envs: int, best_mean="inf", verbose=1):
+
+        super(SaveOnBestTrainingRewardCallback, self).__init__(verbose)
+
+        self.check_freq = check_freq           # Check if there is any improvement from the previous best model - frequency 
+        self.save_model_freq = save_model_freq # Save model checkpoints at this frequency
+        self.model_id = 0
+        self.log_dir = log_dir
+        self.num_envs = num_envs
+        self.save_path = os.path.join(log_dir, 'best_model')  # One best model per run 
+        self.save_path_model = os.path.join(log_dir, 'model') # Save many model checkpoints
+        self.best_mean_success = 0.0
+
+        # Create path to save models if not exists      #
+        # Tensorboard summary writer and custom metrics #
+        Path(self.log_dir).mkdir(parents=True, exist_ok=True)
+
+        custom_logs_path = log_dir + "custom_log_tb/"
+        Path(custom_logs_path).mkdir(parents=True, exist_ok=True)
+
+        writer = SummaryWriter(custom_logs_path)
+        self.writer = writer
+
+        if (best_mean == "inf"): # Training from the beginning 
+            self.best_mean_reward = -np.inf
+        else: # Best model already exists and the user has defined a 'best_mean_reward' value
+            self.best_mean_reward = float(best_mean)
+
+
+        self.progress_bar = tqdm(total=total_timesteps, file=sys.stdout)
+        self.progress_bar.set_postfix({"Mean Reward": 0.0, "Success ratio": 0.0})
+
+    def _on_step(self) -> bool:
+        '''
+            Update the best_mean_reward - using the 50 last episodes (adapt if needed)- and save metrics and checkpoints
+        '''
+        if self.n_calls % self.save_model_freq == 0: # Save model checkpoint
+            print("Saving model number: " + str(self.model_id))
+            self.model.save(self.save_path_model + "_" + str(self.num_timesteps) + "_" + str(self.model_id))
+            self.model_id += 1
+
+        if self.n_calls % self.check_freq == 0: # Update best_mean_reward and metrics
+
+            results = load_results(self.log_dir)
+
+            if len(results.index) > 0:
+                mean_reward = np.mean(results["r"].tail(50))
+                mean_success = np.mean(results["success"].tail(50))
+
+                # Update summary writer #
+                self.writer.add_scalar('Reward/ep_rew_best', self.best_mean_reward, self.num_timesteps)
+                self.writer.add_scalar('Reward/ep_reward_mean', mean_reward, self.num_timesteps)
+                self.writer.add_scalar('Reward/ep_success_mean', mean_success, self.num_timesteps)
+                self.writer.flush()
+
+                self.progress_bar.update(self.n_calls * self.num_envs - self.progress_bar.n)
+                self.progress_bar.set_postfix({"Mean Reward": mean_reward, "Success ratio": mean_success})
+
+                if mean_success > self.best_mean_success:
+                    self.best_mean_success = mean_success
+
+                # New best model: save it - based on the reward #
+                if mean_reward > self.best_mean_reward:
+                    print(f"Saving new best model to {self.save_path}.zip")
+                    self.model.save(self.save_path)
+                    self.best_mean_reward = mean_reward
+
+        return True
+
+    def _on_training_end(self) -> None:
+        """
+            Print some stats at the end of the agenttraining
+        """
+
+        print("Training ended with best mean reward: " + str(self.best_mean_reward))
+        self.writer.add_scalar('best_reward', self.best_mean_reward)
+        self.writer.flush()
+
+        self.progress_bar.close()
+        self.writer.close()
+
diff --git a/docker/requirements.txt b/docker/requirements.txt
deleted file mode 100644
index d9b6994..0000000
--- a/docker/requirements.txt
+++ /dev/null
@@ -1,81 +0,0 @@
-absl-py==0.7.1
-asn1crypto==0.24.0
-astor==0.8.0
-atari-py==0.2.6
-attrs==20.3.0
-autobahn==20.12.2
-Automat==20.2.0
-Box2D==2.3.10
-ccd-tools==0.8.17
-cffi==1.14.4
-cloudpickle==1.6.0
-cmake==3.18.4.post1
-constantly==15.1.0
-cryptography==3.3.1
-cycler==0.10.0
-Cython==0.29.21
-decorator==4.4.2
-future==0.18.2
-gast==0.2.2
-geccoinv==0.8.6
-glfw==2.0.0
-google-pasta==0.1.7
-grpcio==1.21.1
-gym==0.17.3
-h5py==2.9.0
-hyperlink==20.0.1
-idna==2.6
-imageio==2.9.0
-incremental==17.5.0
-joblib==1.0.0
-Keras-Applications==1.0.8
-Keras-Preprocessing==1.1.0
-keyring==10.6.0
-keyrings.alt==3.0
-kiwisolver==1.3.1
-lockfile==0.12.2
-Markdown==3.1.1
-matplotlib==3.3.3
-networkx==2.5
-numpy==1.16.4
-opencv-python==4.4.0.46
-pandas==1.1.5
-Pillow==8.0.1
-protobuf==3.19.4
-pyaml==20.4.0
-pycparser==2.20
-pycrypto==2.6.1
-pyglet==1.5.0
-#pygobject==3.26.1
-PyHamcrest==2.0.2
-pyparsing==2.4.7
-#python-apt==1.6.4
-python-dateutil==2.8.1
-pytz==2020.4
-PyWavelets==1.1.1
-pyxdg==0.25
-PyYAML==5.3.1
-pyzmq==20.0.0
-roslibpy==1.1.0
-scikit-image==0.17.2
-scikit-learn==0.24.1
-scikit-optimize==0.8.1
-scipy==1.5.4
-SecretStorage==2.3.1
-sip-models==0.1.2
-six==1.13.0
-#stable-baselines==2.10.1
-tensorboard==1.14.0
-tensorflow-estimator==1.14.0
-tensorflow-gpu==1.14.0
-termcolor==1.1.0
-threadpoolctl==2.1.0
-tifffile==2020.9.3
-torch==1.10.2
-Twisted==20.3.0
-txaio==20.4.1
-Werkzeug==0.15.4
-wandb==0.12.11
-wrapt==1.11.2
-zmq==0.0.0
-zope.interface==5.2.0