diff --git a/resources/dart_additional_files/python/dartpy/gui/osg/Viewer.cpp b/Dart_Additional_Files/python/dartpy/gui/osg/Viewer.cpp similarity index 100% rename from resources/dart_additional_files/python/dartpy/gui/osg/Viewer.cpp rename to Dart_Additional_Files/python/dartpy/gui/osg/Viewer.cpp diff --git a/resources/dart_additional_files/python/dartpy/math/Geometry.cpp b/Dart_Additional_Files/python/dartpy/math/Geometry.cpp similarity index 100% rename from resources/dart_additional_files/python/dartpy/math/Geometry.cpp rename to Dart_Additional_Files/python/dartpy/math/Geometry.cpp diff --git a/docker/Commands b/Docker/Commands similarity index 63% rename from docker/Commands rename to Docker/Commands index 9111cb6..20d3124 100644 --- a/docker/Commands +++ b/Docker/Commands @@ -1,29 +1,31 @@ -# Build a docker image with all dependencies for Linux running the command below from the repo root folder. -# Dependency: Docker, Nvidia-Docker, Nvidia cuda-enabled GPU. If you do not have Nvidia GPU you might use also the Dockerfile_Windows for building image on Linux. -docker build . -t vtprl_image -f docker/Dockerfile +# Build a docker image with all dependencies for Linux running the command below from the repo root folder +# Dependency: Docker, Nvidia-Docker, Nvidia cuda-enabled GPU. if you do not have Nvidia GPU you might use also the Dockerfile_Windows for building image on Linux) +docker build . -t cmlr_image -f Docker/Dockerfile # Build a docker image with all dependencies for Windows running the command below from the repo root folder on Windows. -# Dependencies: Docker for Windows with Linux containers enabled. Only difference from Dockerfile is the root image it starts from as Docker Windows does not support GPU acceleration from inside Docker. -docker build . -t vtprl_image -f docker/Dockerfile_Windows +# Dependencies: Docker for Windows with Linux containers enabled. Only difference from Dockerfile is the root image it starts from as Docker Windows does not support GPU acceleration from inside Docker +docker build . -t cmlr_image -f Docker/Dockerfile_Windows -# These commands are useful to run before starting the container in case you want to start the simulator from a shell inside the container (not possible for Windows, works on linux). +# Starting the simulator on a fake display of X server on the AI4DI server from the shell: +DISPLAY=:3 ./ManipulatorEnvironment_Linux.x86_64 -export DISPLAY=":0" +# These commands are useful to run before starting the container in case you want to start the simulator from a shell inside the container (not possible for Windows, works on AI4DI servers, you can set it on your local machines or on VM from LRZ CC after installing X server) - it allows local connections to your X server +export DISPLAY=":3" xhost + local: -# After building create a container with the built image running the command below from the repo root folder. Same for Windows, but remove the --gpus parameter as it is not supported on Windows. +# After building create a container with the built image running the command below from the repo root folder. Same for Windows, but remove the --gpus parameter as it is not supported on Windows docker run --rm -it \ - --name vtprl_container \ + --name cmlr_container \ --gpus all \ -e DISPLAY \ -v $(pwd):/home/[repo_name]:rw \ -v $(pwd)/external/stable-baselines3:/home/repos/stable-baselines3:ro \ --privileged \ --net="host" \ - vtprl_image:latest \ + cmlr_image:latest \ bash # On local machines you can start the simulator outside of docker as well, cd to the repository root folder inside the docker container and run python3 agant/main.py to connect to the simulator and start training. -# If there are errors see "Troubleshooting" section. +# If there are errors see "Troubleshooting" section diff --git a/docker/Dockerfile b/Docker/Dockerfile similarity index 71% rename from docker/Dockerfile rename to Docker/Dockerfile index 810ab90..fed8607 100644 --- a/docker/Dockerfile +++ b/Docker/Dockerfile @@ -1,5 +1,5 @@ # syntax=docker/dockerfile:1 -FROM nvidia/cudagl:10.0-devel-ubuntu18.04 +FROM nvidia/cudagl:11.0-devel-ubuntu18.04 ENV DEBIAN_FRONTEND=noninteractive @@ -9,7 +9,7 @@ ADD https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64 RUN dpkg -i cuda-keyring_1.0-1_all.deb RUN rm /etc/apt/sources.list.d/cuda.list RUN rm /etc/apt/sources.list.d/nvidia-ml.list -RUN apt-get update && apt-get install -y ffmpeg libsm6 libxext6 python3.7 python3.7-dev python3-pip && \ +RUN apt-get update && apt-get install -y htop nano graphviz ffmpeg libsm6 libxext6 python3.7 python3.7-dev python3-pip && \ rm /usr/bin/python3 && ln -s /usr/bin/python3.7 /usr/bin/python3 && \ python3 -m pip install --upgrade pip @@ -23,20 +23,29 @@ RUN apt-get install -y build-essential cmake pkg-config git && \ cd pybind11 && mkdir build && cd build && cmake .. && make -j4 && make install && \ cd /home/repos && git clone https://github.com/dartsim/dart.git && cd dart && git checkout tags/v6.11.1 +# Install viewer dependencies +RUN apt-get install -y libxcb-icccm4 && apt-get install -y libxcb-image0 && \ + apt-get install -y libxcb-keysyms1 && apt-get install -y libxcb-render-util0 && \ + apt-get install -y libxcb-xkb-dev && apt-get install -y libxkbcommon-x11-0 && \ + apt-get install -y libxcb-xinerama0 + +RUN python3 -m pip install --upgrade pip + # Install DartPy from source with new bindings -COPY resources/dart_additional_files /home/repos/dart +COPY Dart_Additional_Files /home/repos/dart RUN cd /home/repos/dart && mkdir build && cd build && \ cmake .. -DCMAKE_INSTALL_PREFIX=/usr/ -DCMAKE_BUILD_TYPE=Release -DDART_BUILD_DARTPY=ON && \ make -j4 dartpy && make install-dartpy # Install Requirements -ADD docker/requirements.txt /home/requirements.txt -RUN python3 -m pip install -r /home/requirements.txt +ADD Docker/requirements.txt /home/requirements.txt +RUN python3 -m pip install -r /home/requirements.txt --log /home/requirements_logs.txt # Install Stable-Baselines-3 COPY external/stable-baselines3 /home/repos/stable-baselines3 #RUN cd /home/repos/stable-baselines3 && python3 -m pip install -e . -RUN cd /usr/local/lib/python3.7/dist-packages/ && python3 /home/repos/stable-baselines3/setup.py develop + +RUN cd /usr/local/lib/python3.7/dist-packages/ && python3 /home/repos/stable-baselines3/setup.py develop ENV PYTHONPATH='/home/repos/stable-baselines3' ENV WANDB_API_KEY=INSERT_KEY_HERE diff --git a/docker/Dockerfile_Windows b/Docker/Dockerfile_Windows similarity index 76% rename from docker/Dockerfile_Windows rename to Docker/Dockerfile_Windows index e0ad8e8..b046f0e 100644 --- a/docker/Dockerfile_Windows +++ b/Docker/Dockerfile_Windows @@ -16,14 +16,20 @@ RUN apt-get install -y build-essential cmake pkg-config git && \ cd /home/repos && git clone https://github.com/dartsim/dart.git && cd dart && git checkout tags/v6.11.1 # Install DartPy from source with new bindings -COPY resources/dart_additional_files /home/repos/dart +COPY Dart_Additional_Files /home/repos/dart RUN cd /home/repos/dart && mkdir build && cd build && \ cmake .. -DCMAKE_INSTALL_PREFIX=/usr/ -DCMAKE_BUILD_TYPE=Release -DDART_BUILD_DARTPY=ON && \ make -j4 dartpy && make install-dartpy +# Install viewer dependencies +RUN apt-get install -y libxcb-icccm4 && apt-get install -y libxcb-image0 && \ + apt-get install -y libxcb-keysyms1 && apt-get install -y libxcb-render-util0 && \ + apt-get install -y libxcb-xkb-dev && apt-get install -y libxkbcommon-x11-0 && \ + apt-get install -y libxcb-xinerama0 + # Install Requirements -ADD docker/requirements.txt /home/requirements.txt -RUN python3 -m pip install -r /home/requirements.txt +ADD Docker/requirements.txt /home/requirements.txt +RUN python3 -m pip install -r /home/requirements.txt --log /home/requirements_logs.txt # Install Stable-Baselines-3 COPY external/stable-baselines3 /home/repos/stable-baselines3 @@ -33,3 +39,4 @@ ENV PYTHONPATH='/home/repos/stable-baselines3' ENV WANDB_API_KEY=INSERT_KEY_HERE WORKDIR /home + diff --git a/docker/ROS-Dockerfile b/Docker/ROS-Dockerfile similarity index 100% rename from docker/ROS-Dockerfile rename to Docker/ROS-Dockerfile diff --git a/Docker/requirements.txt b/Docker/requirements.txt new file mode 100644 index 0000000..bccf604 --- /dev/null +++ b/Docker/requirements.txt @@ -0,0 +1,31 @@ +--extra-index-url https://download.pytorch.org/whl/cu116 +tqdm==4.65.0 +zmq==0.0.0 +protobuf==3.20.1 +roslibpy==1.5.0 + +Pillow==8.0.1 +numpy==1.21.6 +matplotlib==3.5.3 +opencv-python-headless==4.7.0.72 +scipy==1.7.3 +seaborn==0.12.2 +pandas==1.1.5 + +tensorboard==1.15.0 +tensorflow-estimator==1.15.1 +tensorflow-gpu==1.15.0 + +torch==1.12.0+cu116 +torchvision==0.13.0+cu116 +pytorch-lightning==0.8.1 + +optuna==3.1.1 +wandb==0.12.11 + +Box2D==2.3.10 +gym==0.17.3 + +PySide2==5.15.2.1 + +ruckig==0.9.2 diff --git a/agent/config.py b/agent/config.py index 360498a..1cf246a 100644 --- a/agent/config.py +++ b/agent/config.py @@ -1,16 +1,38 @@ -class Config: +""" +Configuration dict for dart and gym agents + +Run this script to update the UNITY configuration.xml (or update the fields manually). See the main method below for instructions +Symbol (*): If you have changed an option in the Config class where there is a "(*)" symbol in the comments, please run this script or + update the corresponding UNITY field in the .xml file to apply the changes - you may need to restart the simulator +""" +import os +import numpy as np +from utils.simulator_configuration import update_simulator_configuration + +class Config: def __init__(self): - pass + self.dart_dict = self.get_dart_env_dict() # Dart-related + self.config_dict = self.get_config_dict() # General and environment settings (e.g. number of envs, velocity limits, joints) + self.reward_dict = self.get_reward_dict() # Reward dict values + self.goal_dict = self.get_goal_dict() # UNITY options: Box dimensions + self.randomization_dict = self.get_randomization_dict() # UNITY options: latency, appearance, shadows, etc @staticmethod def get_dart_env_dict(): dart_env_dict = { # episode length in no. of simulation steps - 'max_time_step': 200, + 'max_time_step': 400, + + # to train RL-based models: PPO + 'total_timesteps': 10000, + + # agents and UNITY control cycles + 'control_cycle': 0.02, # (*) agents-related + 'unity_cycle': 0.02, # (*) UNITY-related # should control end-effector orientation or not - 'orientation_control': False, + 'orientation_control': True, # when True: actions in task space, when False: actions in joint space 'use_inverse_kinematics': True, @@ -19,9 +41,26 @@ def get_dart_env_dict(): # note: might conflict with training agents in task space velocities, in such cases set it to False 'linear_motion_conservation': False, - # when True: the reaching task can be also rendered in the DART viewer - # note: only for visualization, not appropriate for training a model on the pixels + # when True: the task can be also rendered in the DART viewer + # Important: Use it when evaluating an agent (e.g. checkpoint) . Only for debugging when training an RL agent ('simulation_mode': 'train') - set to False in this case + # Advanced: with 'weights & biases', you can log videos during training 'enable_dart_viewer': False, + + # enable task monitor to visualize states, velocities, agent actions, reward of the robot. + # 'enable_dart_viewer' should be set to True + 'task_monitor': False, + + # whether to load additional objects in the DART simulation and viewer - ground, background, etc. + 'with_objects': False, + + # how to spawn the red targets in the dart simulation + # Options: 'random', 'random_joint_level', 'import', 'fixed', 'None' + # import -> import targets from a .csv file (see 'target_path' below) + # None -> default behaviour. Can be adapted: See iiwa_sample_dart_unity_env, create_target() method + 'target_mode': "random_joint_level", + + # when target_mode is 'import': load targets from a .csv file + 'target_path': "/misc/generated_random_targets/cart_pose_7dof.csv" } return dart_env_dict @@ -29,45 +68,142 @@ def get_dart_env_dict(): @staticmethod def get_config_dict(): config_dict = { - - ###################################### GENERAL PARAMETERS ################################################## - # whether the RL algorithm uses customized or stable-baseline specific parameters 'custom_hps': True, - # use 'iiwa_joint_vel' for iiwa_environment without dart (only joint velocity control), or - # 'iiwa_sample_dart_unity_env' for control in task space with dart - 'env_key': 'iiwa_sample_dart_unity_env', + # agent model + 'model': 'PPO', - # environment-specific parameter, e.g, number of links, only relevant for iiwa_joint_vel env + # options: 'train', 'evaluate', 'evaluate_model_based" -> only for 'iiwa_sample_dart_unity_env' environment (see below). Refer to the main.py for more details + # 'evaluate': load an RL saved checkpoint + # 'evaluate_model_based': use a p-controller to solve the task for debugging + 'simulation_mode': 'evaluate_model_based', + + # available environments + 'env_key': 'iiwa_sample_dart_unity_env', # for control in task space with dart + #'env_key': 'iiwa_joint_vel', # without dart (only joint velocity control) -> enable gripper (see below). Sample env for testing images state representation + + # environment-specific parameter, e.g, number of links, only relevant for 'iiwa_joint_vel' env # currently, num_joints should be always 7 when the DART based environment is used 'num_joints': 7, - # whether to use images as state representation, an example code is in iiwa_joint_vel env - 'use_images': False, + # camera and images settings: gym and UNITY # + 'use_images': False, # (*) Use images as state representation. Example code at 'iiwa_joint_vel', _retrieve_image() and 'iiwa_sample_dart_unity_env', update() functions + 'image_size': 128, # (*) Gym observation space dimension. See 'iiwa_joint_vel', __init__() + 'camera_position': [0.0, 3.0, 0.0], # (*) UNITY + 'camera_rotation': [90.0, 0.0, -90.0], # (*) + + # end effector + 'enable_end_effector': True, # (*) Set to False if no tool is attached. Important: in that case, set 'robotic_tool' to None + 'robotic_tool': "3_gripper", # (*) Options: '3_gripper', '2_gripper', 'calibration_pin', 'None' (string). Also, 'end_effector' should be set to True. + # -> For 'iiwa_sample_joint_vel_env' select a gripper # GRPC, ROS or ZMQ - 'communication_type': 'GRPC', + 'communication_type': 'GRPC', # (*) # the ip address of the server, for windows and docker use 'host.docker.internal', # for linux use 'localhost' for connections on local machine 'ip_address': 'localhost', # 'host.docker.internal', # 'localhost' # port number for communication with the simulator - 'port_number': '9092', + 'port_number': '9092', # (*) # the seed used for generating pseudo-random sequences - 'seed': None, + 'seed': 1235, # the state of the RL agent in case of numeric values for manipulators 'a' for angles only # or 'av' for angles and velocities 'state': 'a', - # whether to enable gripper for manipulation tasks such as grasping or not - 'gripper': False, - - # number of environments to run in parallel - 'num_envs': 16, + # number of environments to run in parallel, 8, 16, ... + # you may need to restart unity + 'num_envs': 1, + + ######################################################################### + # Logging settings during training # + ######################################################################### + 'log_dir': "./agent/logs/", # Folder to save the model checkpoints and tensorboard logs + 'tb_log_name': "ppo_log_tb", # Name of the tb folder + ######################################################################## + + ########################################################################################################## + # Manipulator related # + # Important: these functionallities are not supported for the standalone env 'iiwa_sample_joint_vel_env' # + # - see reset() and __init__ to adapt if needed # + ########################################################################################################## + + ################################################################################################################## + # Important: When the manipulator is spawned to a different position than the vertical position, # + # the parallel envs should terminate at the same timestep due to Unity synchronization behaviour # + # Imporant: when the observation space is an image, can not sample random_initial_joint_positions, set to False # + ################################################################################################################## + 'random_initial_joint_positions': False, # If set to True, it overrides the values set in 'initial_positions'. + 'initial_positions': None, # Example options: [0, 0, 0, 0, 0, 0, 0] same as None, [0, 0, 0, -np.pi/2, 0, np.pi/2, 0] + + ################################################################################################################################## + # Default velocity and joint limits for 'iiwa_sample_dart_unity_env' # + # Note: 'joints_safety_limit' -> set to higher value depending on your task and velocity ranges # + # the UNITY behaviour may be unstable when having a 0.0 safety limit with high velocities # + # e.g. nan tensor joint error -> the robot is in an invalid configuration - reset the manipulator # + ################################################################################################################################## + 'joints_safety_limit': 0.0, # [deg] + 'max_joint_vel': 20.0, # [deg/s] - Joint space + 'max_ee_cart_vel': 10.0, # [m/s] - Task space -- Not optimized values for sim2real transfer + 'max_ee_cart_acc': 3.0, # [m/s^2] + 'max_ee_rot_vel': 4.0, # [rad/s] + 'max_ee_rot_acc': 1.2 # [rad/s^2] } return config_dict + + @staticmethod + def get_reward_dict(): + reward_dict = { + 'reward_terminal': 0.0 # Given at the end of the episode if it was successful - see simulator_vec_env.py, step() method during reset + } + + return reward_dict + + def get_goal_dict(self): + """ + (*) Box dimensions - UNITY + """ + goal_dict = { + 'box_dim': [0.05, 0.1, 0.05] # (*) x, y, z + } + + return goal_dict + + def get_randomization_dict(self): + """ + (*) Randomization settings - UNITY + """ + + randomization_dict = { + 'randomize_latency': False, # (*) + 'randomize_torque': False, # (*) + 'randomize_appearance': False, # (*) + 'enable_shadows': True, # (*) Note: For sim2real transfer (images) try also False + 'shadow_type': "Soft", # (*) Options: Soft/Hard + } + + return randomization_dict + +if __name__ == "__main__": + """ + If you have updated the Config class, you can run this script to update the configuration.xml of the + UNITY simulator instead of updating the fields manually + + Important: In this case the simulator folder should be located inside the vtprl/ folder + with name: simulator/Linux/ManipulatorEnvironment/ManipulatorEnvironment.x86_64, or update the path + of the 'xml_file' variable below + + Note: You many need to restart the UNITY simulator after updating the .xml file + """ + config = Config() + + #os.path.dirname(os.path.realpath(__file__)) + + xml_file = "./simulator/Linux/ManipulatorEnvironment/configuration.xml" # Change the path if needed + + # Update the .xml based on the new changes in the Config class # + update_simulator_configuration(config, xml_file) diff --git a/agent/config_advanced.py b/agent/config_advanced.py new file mode 100644 index 0000000..02d828c --- /dev/null +++ b/agent/config_advanced.py @@ -0,0 +1,504 @@ +""" +Advanced Configuration dict for dart and gym agents + +Run this script to update the UNITY configuration.xml (or update the fields manually). See the main method below for instructions + +Symbol (*): If you have changed an option in the Config class where there is a "(*)" symbol in the comments, please run this script or + update the corresponding UNITY field in the .xml file to apply the changes - you may need to restart the simulator + +Important: First fully understand the logic behind config.py and 'iiwa_sample_dart_unity_env', and 'iiwa_joint_vel' envs +""" +import os +import numpy as np +from utils.simulator_configuration import update_simulator_configuration + +class ConfigAdvanced: + + def __init__(self): + self.dart_dict = self.get_dart_env_dict() # Dart-related + self.config_dict = self.get_config_dict() # General settings (e.g. number of envs, velocity limits, joints) + self.env_dict = self.get_env_dict() # Environment specific settings + self.reward_dict = self.get_reward_dict() # Reward dict values + self.hyper_dict = self.get_hyper_dict() # Model settings (e.g. PPO, RUCKIG Model-based) + self.goal_dict = self.get_goal_dict() # Green box and red rectangle target settings + self.manual_actions_dict = self.get_manual_actions_dict() # Hard-coded actions settings (applied at the end of the agent episode) + self.randomization_dict = self.get_randomization_dict() # UNITY and agent randomization settings + + @staticmethod + def get_dart_env_dict(): + dart_env_dict = { + # episode length in no. of simulation steps + 'max_time_step': 400, + + # to train RL-based models: PPO + 'total_timesteps': 1500000, + + # agents and UNITY control cycles + 'control_cycle': 0.05, # (*) agents-related + 'unity_cycle': 0.025, # (*) UNITY-related + + # should control end-effector orientation or not + 'orientation_control': True, + + # when True: actions in task space, when False: actions in joint space + 'use_inverse_kinematics': True, + + # when True: SNS algorithm is used for inverse kinematics to conserve optimal linear motion in task space + # note: might conflict with training agents in task space velocities, in such cases set it to False + 'linear_motion_conservation': False, + + # when True: the task can be also rendered in the DART viewer + # Important: Use it when evaluating an agent (e.g. checkpoint). Only for debugging when training an RL agent ('simulation_mode': 'train') - set to False in this case + # Advanced: with 'weights & biases', you can log videos during training + 'enable_dart_viewer': False, + + # enable task monitor to visualize states, velocities, agent actions, reward of the robot. + # 'enable_dart_viewer' should be set to True + 'task_monitor': True, + + # whether to load additional objects in the DART simulation and viewer - ground, background, etc. + 'with_objects': False, + + # how to spawn the red targets in the dart simulation + # Options: 'random', 'random_joint_level', 'import', 'fixed', 'None' + # import -> import targets from a .csv file (see 'target_path' below) + # None -> default behaviour. Can be adapted: See iiwa_sample_dart_unity_env.py, create_target() method + 'target_mode': "None", + + # when target_mode is 'import': load targets from a .csv file + 'target_path': "/misc/generated_random_targets/cart_pose_7dof.csv" + } + + return dart_env_dict + + @staticmethod + def get_config_dict(): + config_dict = { + # whether the RL algorithm uses customized or stable-baseline specific parameters + 'custom_hps': True, + + # agent model + # options: 'PPO', 'RUCKIG' + # RUCKIG is a model-based trajectory-generator. Set env_key to 'iiwa_ruckig_planar_grasping_dart_unity_env' + 'model': 'PPO', + + # options: 'train', 'evaluate', 'evaluate_model_based" -> only for 'iiwa_sample_dart_unity_env' environment (see below). Refer to the main.py and evaluate.py for more details + # 'evaluate', for grasping: if the number of test boxes can not be divided by the 'num_envs' (see below), some boxes will not be evaluated + 'simulation_mode': 'train', + + # available environments + #'env_key': 'iiwa_sample_dart_unity_env', # for control in task space with dart + #'env_key': 'iiwa_joint_vel', # without dart (only joint velocity control) -> enable gripper (see below). Sample env for testing images state representation + 'env_key': 'iiwa_numerical_planar_grasping_dart_unity_env', # RL-based planar grasping. Enable manual actions (see below) + #'env_key': 'iiwa_end_to_end_planar_grasping_dart_unity_env', # image-based planar grasping. Enable manual actions, enable images in the Unity simulator, and set 'use_images' to True + #'env_key': 'iiwa_ruckig_planar_grasping_dart_unity_env', # time-optimal planar grasping. Enable manual actions, set 'model' to RUCKIG, and 'simulation_mode' to evaluate + + # environment-specific parameter, e.g, number of links, only relevant for iiwa_joint_vel env + # currently, num_joints should be always 7 when the DART based environment is used + 'num_joints': 7, + + # camera and images settings: gym and UNITY # + 'use_images': False, # (*) Use images as state representation. Example code at 'iiwa_joint_vel', _retrieve_image() and 'iiwa_sample_dart_unity_env', update() function + 'image_size': 128, # (*) Gym observation space dimension. See 'iiwa_joint_vel', __init__() + 'camera_position': [0.0, 1.7, 1.6], # (*) UNITY + 'camera_rotation': [125.0, 0.0, 180.0], # (*) + + # end effector + 'enable_end_effector': True, # (*) Set to False if no tool is attached. Important: in that case, set 'robotic_tool' to None + 'robotic_tool': "3_gripper", # (*) Options: '3_gripper', '2_gripper', 'calibration_pin', 'None' (string). Also, 'end_effector' should be set to True. + # -> For 'iiwa_sample_joint_vel_env' select a gripper + + # GRPC, ROS or ZMQ + 'communication_type': 'GRPC', # (*) + + # the ip address of the server, for windows and docker use 'host.docker.internal', + # for linux use 'localhost' for connections on local machine + 'ip_address': 'localhost', # 'host.docker.internal', # 'localhost' + + # port number for communication with the simulator + 'port_number': '9092', # (*) + + # the seed used for generating pseudo-random sequences + 'seed': 1235, + + # the state of the RL agent in case of numeric values for manipulators 'a' for angles only + # or 'av' for angles and velocities + # Usage in: 'iiwa_sample_env', and 'iiwa_sample_joint_vel_env' + 'state': 'a', + + # number of environments to run in parallel, 8, 16, ... + # you may need to restart unity + # Note: for RUCKIG model set to 1 - no Neural Network - for parallel GPU support + 'num_envs': 4, + + ######################################################################### + # Logging settings during training: see monitoring_agent.py # + ######################################################################### + 'log_dir': "./agent/logs/", # Folder to save the model checkpoints and tensorboard logs + 'save_model_freq': 3125, # Save model checkpoints at this frequency + 'check_freq': 3125, # Frequency to update the 'best_mean_reward' and 'best_model.zip' + 'tb_log_name': "ppo_log_tb", # Name of the tb folder to save custom metrics/stats. e.g. success rate + 'best_mean_reward': "inf", # Continue training from a checkpoint -> set 'best_mean_reward' to the 'best_mean_reward' value of the loaded model + ######################################################################### + + ################################################################################################################################### + # Evaluation # + # Important: For base envs -> support to evaluate only one model per time - do not use 'model_evaluation_type': all' # + # For planar grasping envs -> use 8 envs for 80 boxes, 10 envs for 50 boxes, etc. # + ################################################################################################################################### + + # Options for 'model_evaluation_type': 'all', or './agent/logs/run_0/best_model', or './agent/logs/run_1/model_25000_0', etc + # - 'all' -> Scan the agent/logs/ folder and evaulate all the checkpoints that exists in each agen/logs/run_i/ folder + # - './agent/logs/run_0/best_model' -> Single model evalaluation. Do not append .zip + # - Note: -> Use 'all' for model-based evaluation - RUCKIG + 'model_evaluation_type': 'all', + 'evaluation_name': 'planar_grasping_eval', # Pandas column to name your evaluation experiment + 'reward_baseline': -95, # (Adapt if needed). Reward at time step 0. "untrained" agent. For ploting. + ################################################################################################## + + ########################################################################################################## + # Manipulator related # + # Important: these functionallities are not supported for the standalone env 'iiwa_sample_joint_vel_env' # + # - see reset() and __init__ to adapt if needed # + ########################################################################################################## + + ############################################################################################################################ + # Important: When the manipulator is spawned to a different position than the vertical position, # + # the parallel envs should terminate at the same timestep due to Unity synchronization behaviour # + # Imporant: when the observation space is an image, can not sample random_initial_joint_positions, set to False # + # if you change the initial position - grasping envs should be adapted - e.g. height goal during manual actions # + ############################################################################################################################ + 'random_initial_joint_positions': False, # If set to True, it overrides the values set in 'initial_positions' + 'initial_positions': [0, 0, 0, -np.pi/2, 0, np.pi/2, 0], # Other options: [0, 0, 0, 0, 0, 0, 0] or None (same as the zero list) + + # Planar Limits - optimized values for sim2real transfer # + # Note: these values can be increased further # + 'joints_safety_limit': 10, + 'max_joint_vel': 20, + 'max_ee_cart_vel': 0.035, + 'max_ee_cart_acc': 0.35, + 'max_ee_rot_vel': 0.15, + 'max_ee_rot_acc': 15.0 # Remove limits to match ruckig + + ################################################################################################################################## + # Default velocity and joint limits for 'iiwa_sample_dart_unity_env' # + # Note: 'joints_safety_limit' -> set to higher value depending on your task and velocity ranges # + # the UNITY behaviour may be unstable when having a 0.0 safety limit with high velocities # + # e.g. nan tensor joint error -> the robot is in an invalid configuration - reset the manipulator # + ################################################################################################################################## + #'joints_safety_limit': 0.0, + #'max_joint_vel': 20, # Joint space + #'max_ee_cart_vel': 10.0, # Task space -- Not optimized values for sim2real transfer + #'max_ee_cart_acc': 3.0, + #'max_ee_rot_vel': 4.0, + #'max_ee_rot_acc': 1.2 + } + + return config_dict + + def get_env_dict(self): + """ + Environment specific settings + """ + + env_dict = None + + if(self.config_dict["env_key"].find("planar") != -1): # Planar environment + env_dict = { + ######################################################################## + # Agents p-controller settings # + # Planar RL-agents are using a p-controller to keep fixed some DoF # + # so that the robot moves in a planar manner during the episode # + ######################################################################## + 'agent_kp': 0.5, # P-controller gain position + 'agent_kpr': 1.5, # P-controller gain rotation + 'threshold_p_model_based': 0.01 # Threshold for model-based agents - RUCKIG + ######################################################################## + } + + return env_dict + + def get_goal_dict(self): + """ + Goal Settings: used mainly by the "advanced" envs (planar grasping) # + (*) + """ + goal_dict = { + 'goal_type': 'box', # 'target' -> red rectangle or 'box' -> green box + 'box_dim': [0.05, 0.1, 0.05] # (*) UNITY option x, y, z - if you change the height of the box, then adapt in boxes_generator.py the _get_samples() method + } + + if(goal_dict["goal_type"] == "box"): + goal_dict_box = { + 'box_type': 'small', # 'big' (10x10x10cm), or 'small' (5x10x5). Set 'box_dim' to the correct values + 'box_mode': "train", # 'train', 'val', 'debug' + 'box_max_distance_base': 0.67, # Max distance of the box to the base of the robot + 'box_min_distance_base': 0.475, # Min distance of the box to the base of the robot + 'box_folder': "./agent/logs/dataset/", # Folder to save the generated dataset + 'box_samples': 200000, # Train and val + 'box_split': 0.00025, # Val split ratio + 'box_save_val': True, # Save the dataset + 'box_load_val': False, # Load the validation boxes from a saved dataset. Train boxes are always random (seed) + 'box_radius_val': 0.0, # Exclude spawned train boxes that are close to the val boxes within this radius (threshold) in meters. e.g. 0.01 + 'box_x_active': True, # If set to False: x coord of the box will be always 0.0 meters + 'box_x_min': -0.67, # Minimum range for x box coordinate + 'box_x_max': 0.67, # Maximum range for x box coordinate + 'box_z_active': True, + 'box_z_min': 0.42, + 'box_z_max': 0.67, + 'box_ry_active': True, # If set to False -> Boxes will have a fixed rotation (0.0 deg) + 'box_ry_min': -90, + 'box_ry_max': 0, + 'box_debug': [-0.1, 0.05, 0.55, 0.0, -90.0, 0.0] # 'box_mode': 'debug' -> spawn a fixed box for all envs + } + + if(goal_dict_box['box_type'] == 'small'): + goal_dict['box_dim'] = [0.05, 0.1, 0.05] + elif(goal_dict_box['box_type'] == 'big'): + goal_dict['box_dim'] = [0.1, 0.1, 0.1] + + goal_dict.update(goal_dict_box) + + return goal_dict + + def get_hyper_dict(self): + """ + Model settings: PPO or RUCKIG + """ + + if(self.config_dict['model'] == 'PPO'): + hyper_dict = { + + #################################### MOST COMMON ################################################################################################# + 'learning_rate': 0.0005, # Default 0.0003 + 'clip_range': 0.25, # Default 0.2 + 'policy': "MlpPolicy", # Try 'CnnPolicy' with 'iiwa_end_to_end_planar_grasping_dart_unity_env' + 'policy_network': None, # None (means default), or 'PolicyNetworkVanillaReLU' -> default SB3 network but with ReLU act. func. instead of Tanh + ################################################################################################################################################# + + 'ent_coef': 0.02, # Default 0.0 + 'gamma': 0.96, # Default 0.99 + 'gae_lambda': 0.94, # Default 0.95 + 'n_steps': 512, # Default 2048 + 'n_epochs': 10, # Default 10 + 'vf_coef': 0.5, # Default 0.5 + 'max_grad_norm': 0.5, # Default 0.5 + 'batch_size': 32 # Default 64 + } + + # For 'iiwa_ruckig_planar_grasping_dart_unity_env'. Set 'simulation_mode': 'evaluate', and 'model': 'RUCKIG' + # Set 'num_envs': 1 -> the code can not be parallelized in the GPU (no neural network) + elif(self.config_dict['model'] == 'RUCKIG'): + hyper_dict = { + 'dof': [1, 1, 1], # Active dof. [1,1,1] or [0,1,1]. Important: for 011: set 'reward_pose_weight': 0.0 (see below). Activate only 2DoF control + 'target_vel': [0, 0, 0], # Velocity of the ee in the target pose (rz, x, y - dart coords) + 'target_acc': [0, 0, 0], # Acceleration of the ee in the target pose (rz, x, y - dart coords) + 'max_vel': [0.15, 0.035, 0.035], # Limits task-space + 'max_acc': [1000, 1000, 1000], # Disabled -> can also be set to some smaller values + 'max_jerk': [1000, 1000, 1000], + } + + else: + return None + + return hyper_dict + + @staticmethod + def get_reward_dict(): + """ + Novel reward that uses the displacements of the ee to the box (previous distance of the ee to the box minus currrent distance of ee to the box) + + see 'iiwa_numerical_planar_grasping_dart_unity_env' + + Note: adapt if needed to your task + + """ + + reward_dict = { + "reward_collision": -100, # One time penalty. When: collision with the box, floor, itself or joints position limits have been violated + "reward_terminal": 0.0, # If the box is in the air after the episode -> give this reward. Also, used in the default envs for a successful episode + "reward_height_goal": 0.25, # Goal height in meters -> then give 'reward_terminal' for planar envs only + 'reward_pose_weight': 1.0, # Important: Set 'reward_pose_weight': 0.0 to activate 2DoF control + 'reward_pose_norm_const': 0.04, # Normalization constant + 'reward_x_weight': 1.0, # Importance of this term + 'reward_x_norm_const': 0.01, + 'reward_z_weight': 1.0, + 'reward_z_norm_const': 0.01 + } + + return reward_dict + + def get_manual_actions_dict(self): + """ + Manual actions settings. + Whether to perform manual actions at the end of the RL-episode + (*) + """ + + manual_actions_dict = { + 'manual': True, # Activate manual actions + 'manual_rewards': True, # Give collision penalties during the manual actions + 'manual_behaviour': "planar_grasping", # Option 1: "planar_grasping" -> only for planar envs. Go 'down', 'close', and then 'up' at the end of the episode + # Option 2: "close_gripper" -> close the gripper at the end of the episode. Works for non-planar agents too + # The envs should terminate at the same time step always, see simulator_vec_env.py for explanations - self.step() method + } + + # Extend the 'manual_actions_dict' dict depending on the user-defined 'manual_behaviour' option # + behaviour_dict = None + if(manual_actions_dict["manual_behaviour"] == "planar_grasping"): + behaviour_dict = self.get_manual_actions_planar_dict() + + elif(manual_actions_dict["manual_behaviour"] == "close_gripper"): + behaviour_dict = self.get_manual_actions_close_gripper_dict() + + # Extend manual_actions_dict # + if(behaviour_dict is not None): + manual_actions_dict.update(behaviour_dict) + + return manual_actions_dict + + def get_manual_actions_planar_dict(self): + """ + Manual actions dictionary settings for planar grasping + + Actions: + 1. down + 2. close + 3. up + (*) + """ + manual_actions_planar_dict = None + + ###################################################################### + # Box 10x10x10 -> (0.055, 4, 15) / (down, close, close_vel) # + # Box 5x10x5 -> (0.055, 6, 15) / (down, close, close_vel) # + # (*) Change the configuration.xml of the Unity simulator: # + # (gripper type and box dimensions ) # + ###################################################################### + manual_actions_planar_dict = { + 'manual_kp': 1.0, # P-controller gain when going down/up for position error + 'manual_kpr': 3.0, # P-controller gain when going down/up for orientation error + 'manual_tol': 0.015, # Theshold of the P-controller + 'manual_up_height': 0.35, # Target height + 'manual_steps_same_state': 30, # How many steps the ee is allowed to not moved during the manual actions down/up + 'manual_tol_same_state': 0.01, # If the gripper has not moved more than this distance threshold value within 'steps_same_state' -> it means collision + } + + if(self.config_dict["robotic_tool"] == "None"): + manual_actions_planar_dict = None + + # 3-finger gripper # + elif(self.config_dict["robotic_tool"] == "3_gripper"): + manual_actions_planar_dict["manual_down_height"] = 0.055 # How much down to move to reach the box + manual_actions_planar_dict["manual_close_vel"] = 15 # Velocity value to close the gripper per each step + + if(self.goal_dict["box_type"] == "big"): # Big box + manual_actions_planar_dict["manual_close_steps"] = 4 # How many steps to close the gripper (with 'manual_close_vel' per each step) + + elif(self.goal_dict["box_type"] == "small"): # Small box + manual_actions_planar_dict["manual_close_steps"] = 6 + + else: + manual_actions_planar_dict = None + + # 2-finger gripper # + elif(self.config_dict['robotic_tool'] == "2_gripper"): + if(self.goal_dict["box_type"] == "small"): # Small box + manual_actions_planar_dict["manual_down_height"] = 0.055 + manual_actions_planar_dict["manual_close_steps"] = 10 + manual_actions_planar_dict["manual_close_vel"] = 15 + + else: # Important: Gripper 2 can not manipulate the big box + manual_actions_planar_dict = None + + else: + manual_actions_planar_dict = None + + return manual_actions_planar_dict + + def get_manual_actions_close_gripper_dict(self): + """ + Manual actions dictionary settings for closing the gripper at the end of the episode + (*) + """ + manual_actions_close_gripper_dict = None + + ###################################################################### + # Box 10x10x10 -> (4, 15) / (close, close_vel) # + # Box 5x10x5 -> (6, 15) / (close, close_vel) # + # (*) Change the configuration.xml of the Unity simulator: # + # (gripper type and box dimensions ) # + ###################################################################### + manual_actions_close_gripper_dict = { + } + + if(self.config_dict["robotic_tool"] is "None"): + manual_actions_close_gripper_dict = None + + # 3-finger gripper # + elif(self.config_dict["robotic_tool"] == "3_gripper"): + manual_actions_close_gripper_dict["manual_close_vel"] = 15 # Velocity value to close the gripper per each step + + if(self.goal_dict["box_type"] == "big"): # Big box + manual_actions_close_gripper_dict["manual_close_steps"] = 4 # How many steps to close the gripper (with 'manual_close_vel' per each step) + + elif(self.goal_dict["box_type"] == "small"): + manual_actions_close_gripper_dict["manual_close_steps"] = 6 + + else: + manual_actions_close_gripper_dict = None + + # 2-finger gripper # + elif(self.config_dict['robotic_tool'] == "2_gripper"): + if(self.goal_dict["box_type"] == "small"): # Small box + manual_actions_close_gripper_dict["manual_close_steps"] = 10 + manual_actions_close_gripper_dict["manual_close_vel"] = 15 + + else: # Important: Gripper 2 can not manipulate the big box + manual_actions_close_gripper_dict = None + + else: + manual_actions_close_gripper_dict = None + + return manual_actions_close_gripper_dict + + def get_randomization_dict(self): + """ + 1. Apply noise to the agent state observation see 'iiwa_numerical_planar_grasping_dart_unity_env' - self.get_state() method + + 2. (*) UNITY Randomization settings + """ + + randomization_dict = { + 'noise_enable_rl_obs': False, # Whether to apply noise in the agent state + 'noise_rl_obs_ratio': 0.05, # How much noise to apply in % percentage + + 'randomize_latency': False, # (*) + 'randomize_torque': False, # (*) + 'randomize_appearance': False, # (*) + 'enable_shadows': True, # (*) Note: For sim2real transfer (images) try also False + 'shadow_type': "Soft", # (*) Options: Soft/Hard + } + + return randomization_dict + +if __name__ == "__main__": + """ + If you have updated the Config class, you can run this script to update the configuration.xml of the + UNITY simulator instead of updating the fields manually + + Important: In this case the simulator folder should be located inside the vtprl/ folder + with name: simulator/Linux/ManipulatorEnvironment/ManipulatorEnvironment.x86_64, or update the path + of the 'xml_file' variable below + + Note: You many need to restart the UNITY simulator after updating the .xml file + """ + + config = ConfigAdvanced() + + #os.path.dirname(os.path.realpath(__file__)) + + xml_file = "./simulator/Linux/ManipulatorEnvironment/configuration.xml" # Change the path if needed + + # Update the .xml based on the new changes in the Config class # + update_simulator_configuration(config, xml_file) + diff --git a/agent/dart_envs/iiwa_dart.cpython-37m-x86_64-linux-gnu.so b/agent/dart_envs/iiwa_dart.cpython-37m-x86_64-linux-gnu.so deleted file mode 100755 index d68213b..0000000 Binary files a/agent/dart_envs/iiwa_dart.cpython-37m-x86_64-linux-gnu.so and /dev/null differ diff --git a/agent/dart_envs/iiwa_dart_unity.cpython-37m-x86_64-linux-gnu.so b/agent/dart_envs/iiwa_dart_unity.cpython-37m-x86_64-linux-gnu.so deleted file mode 100755 index 12a76cd..0000000 Binary files a/agent/dart_envs/iiwa_dart_unity.cpython-37m-x86_64-linux-gnu.so and /dev/null differ diff --git a/agent/env_gym_run.py b/agent/env_gym_run.py new file mode 100755 index 0000000..fc9bbcb --- /dev/null +++ b/agent/env_gym_run.py @@ -0,0 +1,59 @@ +import numpy as np + +from envs_other.cartpole import CartPoleEnv +from envs_other.nlinks_box2d import NLinksBox2DEnv +from envs_other.pendulum import PendulumEnv + +from stable_baselines3.common.env_checker import check_env + +if __name__ == '__main__': + """ + Example method to run gym base environments + """ + + env_mode = "cartpole" # "nlinks_box2d", pendulum + + if(env_mode == "cartpole"): + env = CartPoleEnv(200) + check_env(env) + + while True: + env.render() + + rnd_act = [np.random.uniform(-1, 1)] + + s, _, d, _ = env.step(action=rnd_act) + if d: + env.reset() + + elif(env_mode == "nlinks_box2d"): + num_links = 10 + nlinks = NLinksBox2DEnv(max_ts=200, n_links=num_links) + + check_env(nlinks) + + # some silent initialization steps + # for _ in range(100): + # nlinks.render() + # zero_act = np.zeros(num_links) + # nlinks.step(action=zero_act) + + while True: + nlinks.render() + # print("anchor", nlinks.anchor.position) + # print("end_eff", nlinks.end_effector.position) + # rnd_act = np.array([0.3, -0.3]) + # rnd_act = np.array([np.random.rand() * 2 - 1, np.random.rand() * 2 - 1]) + rnd_act = [] + for i in range(num_links): + rnd_act.append(np.random.rand() * 2 - 1) + + s, _, d, _ = nlinks.step(action=rnd_act) + # print("tx: {0:2.1f}; ty: {1:2.1f}".format(s[0], s[1])) + # print("eex: {0:2.1f}; eey: {1:2.1f}".format(s[2], s[3])) + if d: + nlinks.reset() + + elif(env_mode == "pendulum"): + env = PendulumEnv(200) + check_env(env) diff --git a/agent/env_sim_run.py b/agent/env_sim_run.py old mode 100644 new mode 100755 index 2bf2254..daf7273 --- a/agent/env_sim_run.py +++ b/agent/env_sim_run.py @@ -1,30 +1,31 @@ -from config import Config -from dart_envs.iiwa_dart import IiwaDartEnv - import numpy as np +import time + +from config import Config +from envs_dart.iiwa_dart import IiwaDartEnv if __name__ == '__main__': """ - Example method to run the base standalone DART environment. - This is useful to quickly develop and check different model-based control policies. + Example method to run the base standalone DART environment. + This is useful to quickly develop and check different model-based control policies. """ - env_dict = Config.get_dart_env_dict() iiwa = IiwaDartEnv(max_ts=env_dict['max_time_step'], orientation_control=env_dict['orientation_control'], use_ik=env_dict['use_inverse_kinematics'], ik_by_sns=env_dict['linear_motion_conservation'], - enable_render=env_dict['enable_dart_viewer']) + enable_render=env_dict['enable_dart_viewer'], task_monitor=env_dict['task_monitor'], with_objects=env_dict['with_objects']) # iiwa.set_target(iiwa.create_target()) - pd_control = True and iiwa.USE_IK + pd_control = True and iiwa.dart_sim.use_ik control_kp = 1.0 / iiwa.observation_space.high[0] - import time last_episode_time = time.time() - iiwa.render() + iiwa.reset() + if iiwa.dart_sim.enable_viewer: + iiwa.render() time.sleep(1) while True: @@ -37,7 +38,8 @@ state, reward, done, info = iiwa.step(action=action) - iiwa.render() + if iiwa.dart_sim.enable_viewer: + iiwa.render() if done: iiwa.reset() diff --git a/agent/envs/iiwa_sample_env.py b/agent/envs/iiwa_sample_env.py new file mode 100644 index 0000000..9cd7643 --- /dev/null +++ b/agent/envs/iiwa_sample_env.py @@ -0,0 +1,482 @@ +""" +A sample Env class inheriting from the DART-Unity Env for the Kuka LBR iiwa manipulator with 7 links and a Gripper +The parent class takes care of integrating DART Engine with Unity simulation environment +Unity is used as the main simulator for physics/rendering computations. +The Unity interface receives joint velocities as commands and returns joint positions and velocities + +DART is used to calculate inverse kinematics of the iiwa chain. +DART changes the agent action space from the joint space to the cartesian space +(position-only or pose/SE(3)) of the end-effector. + +action_by_pd_control method can be called to implement a Proportional-Derivative control law instead of an RL policy. + +Note: Coordinates in the Unity simulator are different from the ones in DART which used here: + The mapping is [X, Y, Z] of Unity is [-y, z, x] of DART +""" + +import math +import numpy as np +from scipy.spatial.transform import Rotation as R + +from gym import spaces +from envs_dart.iiwa_dart_unity import IiwaDartUnityEnv + +# Import when images are used as state representation +# import cv2 +# import base64 + +class IiwaSampleEnv(IiwaDartUnityEnv): + def __init__(self, max_ts, orientation_control, use_ik, ik_by_sns, + state_type, enable_render=False, task_monitor=False, + with_objects=False, target_mode="random", target_path="/misc/generated_random_targets/cart_pose_7dof.csv", goal_type="target", + joints_safety_limit=0, max_joint_vel=20.0, max_ee_cart_vel=10.0, max_ee_cart_acc =3.0, max_ee_rot_vel=4.0, max_ee_rot_acc=1.2, + random_initial_joint_positions=False, initial_positions=[0, 0, 0, 0, 0, 0, 0], + robotic_tool="3_gripper", env_id=0): + + # range of vertical, horizontal pixels for the DART viewer + viewport = (0, 0, 500, 500) + + self.state_type = state_type + self.reset_flag = False + self.goal_type = goal_type # Target or box + + # Collision happened for this env when set to 1. In case the manipulator is spanwed in a different position than the # + # default vertical, for the remaining of the episode zero velocities are sent to UNITY # + self.collided_env = 0 + + ################################ + # Set initial joints positions # + ################################ + self.random_initial_joint_positions = random_initial_joint_positions # True, False + self.initial_positions = np.asarray(initial_positions, dtype=float) if initial_positions is not None else None + + # Initial positions flag for the manipulator after reseting. 1 means different than the default vertical position # + # In that case, the environments should terminate at the same time step due to UNITY synchronization # + if((self.initial_positions is None or np.count_nonzero(initial_positions) == 0) and self.random_initial_joint_positions == False): + self.flag_zero_initial_positions = 0 + else: + self.flag_zero_initial_positions = 1 + + ############################################################################## + # Set Limits -> Important: Must be set before calling the super().__init__() # + ############################################################################## + + # Variables below exist in the parent class, hence the names should not be changed # + # Min distance to declare that the target is reached by the end-effector, adapt the values based on your task # + self.MIN_POS_DISTANCE = 0.05 # [m] + self.MIN_ROT_DISTANCE = 0.1 # [rad] + + self.JOINT_POS_SAFE_LIMIT = np.deg2rad(joints_safety_limit) # Manipulator joints safety limit + + # admissible range for joint positions, velocities, accelerations, # + # and torques of the iiwa kinematic chain # + self.MAX_JOINT_POS = np.deg2rad([170, 120, 170, 120, 170, 120, 175]) - self.JOINT_POS_SAFE_LIMIT # [rad]: based on the specs + self.MIN_JOINT_POS = -self.MAX_JOINT_POS + + # Joint space # + self.MAX_JOINT_VEL = np.deg2rad(np.full(7, max_joint_vel)) # np.deg2rad([85, 85, 100, 75, 130, 135, 135]) # [rad/s]: based on the specs + self.MAX_JOINT_ACC = 3.0 * self.MAX_JOINT_VEL # [rad/s^2]: just approximation due to no existing data + self.MAX_JOINT_TORQUE = np.array([320, 320, 176, 176, 110, 40, 40]) # [Nm]: based on the specs + + # admissible range for Cartesian pose translational and rotational velocities, # + # and accelerations of the end-effector # + self.MAX_EE_CART_VEL = np.full(3, max_ee_cart_vel) # np.full(3, 10.0) # [m/s] --- not optimized values for sim2real transfer + self.MAX_EE_CART_ACC = np.full(3, max_ee_cart_acc) # np.full(3, 3.0) # [m/s^2] --- not optimized values + self.MAX_EE_ROT_VEL = np.full(3, max_ee_rot_vel) # np.full(3, 4.0) # [rad/s] --- not optimized values + self.MAX_EE_ROT_ACC = np.full(3, max_ee_rot_acc) # np.full(3, 1.2) # [rad/s^2] --- not optimized values + + ################################################################################## + # End set limits -> Important: Must be set before calling the super().__init__() # + ################################################################################## + + super().__init__(max_ts=max_ts, orientation_control=orientation_control, use_ik=use_ik, ik_by_sns=ik_by_sns, + robotic_tool=robotic_tool, enable_render=enable_render, task_monitor=task_monitor, + with_objects=with_objects, target_mode=target_mode, target_path=target_path, viewport=viewport, + random_initial_joint_positions=random_initial_joint_positions, initial_positions=initial_positions, env_id=env_id) + + # Clip gripper action to this limit # + if(self.robotic_tool.find("3_gripper") != -1): + self.gripper_clip = 90 + elif(self.robotic_tool.find("2_gripper") != -1): + self.gripper_clip = 250 + else: + self.gripper_clip = 90 + + # Some attributes that are initialized in the parent class: + # self.reset_counter --> keeps track of the number of resets performed + # self.reset_state --> reset vector for initializing the Unity simulator at each episode + # self.tool_target --> initial position for the gripper state + + # the lines below should stay as it is + self.MAX_EE_VEL = self.MAX_EE_CART_VEL + self.MAX_EE_ACC = self.MAX_EE_CART_ACC + if orientation_control: + self.MAX_EE_VEL = np.concatenate((self.MAX_EE_ROT_VEL, self.MAX_EE_VEL)) + self.MAX_EE_ACC = np.concatenate((self.MAX_EE_ROT_ACC, self.MAX_EE_ACC)) + + # the lines below wrt action_space_dimension should stay as it is + self.action_space_dimension = self.n_links # there would be 7 actions in case of joint-level control + if use_ik: + # There are three cartesian coordinates x,y,z for inverse kinematic control + self.action_space_dimension = 3 + if orientation_control: + # and the three rotations around each of the axis + self.action_space_dimension += 3 + + if self.robotic_tool.find("gripper") != -1: + self.action_space_dimension += 1 # gripper velocity + + # Variables below exist in the parent class, hence the names should not be changed + tool_length = 0.2 # [m] allows for some tolerances in maximum observation + + # x,y,z of TCP: maximum reach of arm plus tool length in meters + ee_pos_high = np.array([0.95 + tool_length, 0.95 + tool_length, 1.31 + tool_length]) + ee_pos_low = -np.array([0.95 + tool_length, 0.95 + tool_length, 0.39 + tool_length]) + + high = np.empty(0) + low = np.empty(0) + if orientation_control: + # rx,ry,rz of TCP: maximum orientation in radians without considering dexterous workspace + ee_rot_high = np.full(3, np.pi) + # observation space is distance to target orientation (rx,ry,rz), [rad] + high = np.append(high, ee_rot_high) + low = np.append(low, -ee_rot_high) + + # and distance to target position (dx,dy,dz), [m] + high = np.append(high, ee_pos_high - ee_pos_low) + low = np.append(low, -(ee_pos_high - ee_pos_low)) + + # and joint positions [rad] and possibly velocities [rad/s] + if 'a' in self.state_type: + high = np.append(high, self.MAX_JOINT_POS) + low = np.append(low, self.MIN_JOINT_POS) + if 'v' in self.state_type: + high = np.append(high, self.MAX_JOINT_VEL) + low = np.append(low, -self.MAX_JOINT_VEL) + + # the lines below should stay as it is. # + # Important: Adapt only if you use images as state representation, or your task is more complicated # + # Good practice: If you need to adapt several methods, inherit from IiwaSampleEnv and define your own class # + self.action_space = spaces.Box(-np.ones(self.action_space_dimension), np.ones(self.action_space_dimension), + dtype=np.float32) + + self.observation_space = spaces.Box(low, high, dtype=np.float32) + + def create_target(self): + """ + defines the target to reach per episode, this should be adapted by the task + + should always return rx,ry,rz,x,y,z in order, -> dart coordinates system + i.e., first target orientation rx,ry,rz in radians, and then target position x,y,z in meters + in case of orientation_control=False --> rx,ry,rz are irrelevant and can be set to zero + + _random_target_gen_joint_level(): generate a random sample target in the reachable workspace of the iiwa + + :return: Cartesian pose of the target to reach in the task space (dart coordinates) + """ + + # Default behaviour # + if(self.target_mode == "None"): + rx, ry, rz = 0.0, np.pi, 0.0 + target = None + while True: + x, y, z = np.random.uniform(-1.0, 1.0), np.random.uniform(-1.0, 1.0), 0.2 + + if 0.4 < math.sqrt(math.pow(x, 2) + math.pow(y, 2)) < 0.8: + target = rx, ry, rz, x, y, z + break + + elif self.target_mode == "import": + target = self._recorded_next_target() + + elif self.target_mode == "random": + target = self._random_target() + + elif self.target_mode == "random_joint_level": + target = self._random_target_gen_joint_level() # Sample always a rechable target + + elif self.target_mode == "fixed": + target = self._fixed_target() + + else: + target = [0, 0, 0, 0, -5, -200] # Dummy will be defined from the user later - advise define it in the reset() function + + return target + + def get_state(self): + """ + defines the environment state, this should be adapted by the task + + get_pos_error(): returns Euclidean error from the end-effector to the target position + get_rot_error(): returns Quaternion error from the end-effector to the target orientation + + :return: state for the policy training + """ + state = np.empty(0) + if self.dart_sim.orientation_control: + state = np.append(state, self.dart_sim.get_rot_error()) + + state = np.append(state, self.dart_sim.get_pos_error()) + + if 'a' in self.state_type: # Append the joints position of the manipulator + state = np.append(state, self.dart_sim.chain.getPositions()) + if 'v' in self.state_type: + state = np.append(state, self.dart_sim.chain.getVelocities()) + + # the lines below should stay as it is + self.observation_state = np.array(state) + + return self.observation_state + + def get_reward(self, action): + """ + defines the environment reward, this should be adapted by the task + + :param action: is the current action decided by the RL agent + + :return: reward for the policy training + """ + # stands for reducing position error + reward = -self.dart_sim.get_pos_distance() + + # stands for reducing orientation error + if self.dart_sim.orientation_control: + reward -= 0.5 * self.dart_sim.get_rot_distance() + + # stands for avoiding abrupt changes in actions + reward -= 0.1 * np.linalg.norm(action - self.prev_action) + + # stands for shaping the reward to increase when target is reached to balance at the target + if self.get_terminal_reward(): + reward += 1.0 * (np.linalg.norm(np.ones(self.action_space_dimension)) - np.linalg.norm(action)) + + # the lines below should stay as it is + self.reward_state = reward + + return self.reward_state + + def get_terminal_reward(self): + """ + checks if the target is reached + + get_pos_distance(): returns norm of the Euclidean error from the end-effector to the target position + get_rot_distance(): returns norm of the Quaternion error from the end-effector to the target orientation + + Important: by default a 0.0 value of a terminal reward will be given to the agent. To adapt it please refer to the config.py, + in the reward_dict. This terminal reward is given to the agent during reset in the step() function in the simulator_vec_env.py + + :return: a boolean value representing if the target is reached within the defined threshold + """ + if self.dart_sim.get_pos_distance() < self.MIN_POS_DISTANCE: + if not self.dart_sim.orientation_control: + return True + if self.dart_sim.get_rot_distance() < self.MIN_ROT_DISTANCE: + return True + + return False + + def get_terminal(self): + """ + checks the terminal conditions for the episode - for reset + + :return: a boolean value indicating if the episode should be terminated + """ + + if self.time_step > self.max_ts: + self.reset_flag = True + + return self.reset_flag + + def update_action(self, action): + """ + converts env action to the required unity action by possibly using inverse kinematics from DART + + self.dart_sim.command_from_action() --> changes the action from velocities in the task space (position-only 'x,y,z' or complete pose 'rx,ry,rz,x,y,z') + to velocities in the joint space of the kinematic chain (j1,...,j7) + + :param action: The action vector decided by the RL agent, acceptable range: [-1,+1] + + It should be a numpy array with the following shape: [arm_action] or [arm_action, tool_action] + in case 'robotic_tool' is a gripper, tool_action is always a dim-1 scalar value representative of the normalized gripper velocity + + arm_action has different dim based on each case of control level: + in case of use_ik=False -> is dim-7 representative of normalized joint velocities + in case of use_ik=True -> there would be two cases: + orientation_control=False -> of dim-3: Normalized EE Cartesian velocity in x,y,z DART coord + orientation_control=True -> of dim-6: Normalized EE Rotational velocity in x,y,z DART coord followed by Normalized EE Cartesian velocity in x,y,z DART coord + + :return: the command to send to the Unity simulator including joint velocities and possibly the gripper position + """ + + # the lines below should stay as it is + self.action_state = action + action = np.clip(action, -1., 1.) + + # This updates the gripper target by accumulating the tool velocity from the action vector # + # Note: adapt if needed: e.g. accumulating the tool velocity may not work well for your task # + if self.robotic_tool.find("gripper") != -1: + tool_action = action[-1] + self.tool_target = np.clip((self.tool_target + tool_action), 0.0, self.gripper_clip) + + # This removes the gripper velocity from the action vector for inverse kinematics calculation + action = action[:-1] + + # use INVERSE KINEMATICS # + if self.dart_sim.use_ik: + task_vel = self.MAX_EE_VEL * action + joint_vel = self.dart_sim.command_from_action(task_vel, normalize_action=False) + else: + joint_vel = self.MAX_JOINT_VEL * action + + # append tool action # + unity_action = joint_vel + if self.robotic_tool.find("gripper") != -1: + unity_action = np.append(unity_action, [float(self.tool_target)]) + + return unity_action + + def update(self, observation, time_step_update=True): + """ + converts the unity observation to the required env state defined in get_state() + with also computing the reward value from the get_reward(...) and done flag, + it increments the time_step, and outputs done=True when the environment should be reset + + important: it also updates the dart kinematic chain of the robot using the new unity simulator observation. + always call this function, once you have send a new command to unity to synchronize the agent environment + + :param observation: is the observation received from the Unity simulator within its [X,Y,Z] coordinate system + 'joint_values': indices [0:7], + 'joint_velocities': indices [7:14], + 'ee_position': indices [14:17], + 'ee_orientation': indices [17:20], + 'target_position': indices [20:23], + 'target_orientation': indices [23:26], + 'object_position': indices [26:29], + 'object_orientation': indices [29:32], + 'gripper_position': indices [32:33], ---(it is optional, in case a gripper is enabled) + 'collision_flag': indices [33:34], ---([32:33] in case of without gripper) + + :param time_step_update: whether to increase the time_step of the agent + + :return: The state, reward, episode termination flag (done), and an info dictionary + """ + # use commented lines below in case you'd like to save the image observations received from the Unity + # base64_bytes = observation['ImageData'][0].encode('ascii') + # image_bytes = base64.b64decode(base64_bytes) + # image = np.frombuffer(image_bytes, np.uint8) + # image = cv2.imdecode(image, cv2.IMREAD_GRAYSCALE) + # print(image.shape) + # cv2.imwrite("test.jpg", image) + + self.current_obs = observation['Observation'] + + ########################################################################################### + # collision happened or joints limits overpassed # + # Important: in case the manipulator is spanwed in a different position than the default # + # vertical, for the remaining of the episode zero velocities are sent to UNITY # + # see _send_actions_and_update() method in simulator_vec_env.py # + ########################################################################################### + if(observation['Observation'][-1] == 1.0 or self.joints_limits_violation()): + self.collided_env = 1 + # Reset when we have a collision only when we spawn the robot to the default # + # vertical position, else wait the episode to finish # + # Important: you may want to reset anyway depending on your task - adapt # + if(self.flag_zero_initial_positions == 0): + self.reset_flag = True + + # the method below handles synchronizing states of the DART kinematic chain with the # + # observation from Unity hence it should be always called # + self._unity_retrieve_joint_values(observation['Observation']) + + # Class attributes below exist in the parent class, hence the names should not be changed + if(time_step_update == True): + self.time_step += 1 + + state = self.get_state() + reward = self.get_reward(self.action_state) + done = bool(self.get_terminal()) + info = {"success": False} # Episode was successful. For now it is set at simulator_vec_env.py before reseting, step() method. Adapt if needed + + self.prev_action = self.action_state + + return state, reward, done, info + + def reset(self, temp=False): + """ + resets the DART-gym environment and creates the reset_state vector to be sent to Unity + + :param temp: not relevant + + :return: The initialized state + """ + # takes care of resetting the DART chain and should stay as it is + state = super().reset(random_initial_joint_positions=self.random_initial_joint_positions, initial_positions=self.initial_positions) + + self.collided_env = 0 + random_target = self.create_target() # draw a red target + + # sets the initial reaching target for the current episode, + # should be always called in the beginning of each episode, + # you might need to call it even during the episode run to change the reaching target for the IK-P controller + self.set_target(random_target) + + # initial position for the gripper state, accumulates the tool_action velocity received in update_action + self.tool_target = 0.0 # should be in range [0.0,90.0] + + # movement control of each joint can be disabled by setting zero for that joint index + active_joints = [1] * 7 + + # the lines below should stay as it is, Unity simulator expects these joint values in radians + joint_positions = self.dart_sim.chain.getPositions().tolist() + joint_velocities = self.dart_sim.chain.getVelocities().tolist() + + # the mapping below for the target should stay as it is, unity expects orientations in degrees + target_positions = self.dart_sim.target.getPositions().tolist() + + target_X, target_Y, target_Z = -target_positions[4], target_positions[5], target_positions[3] + target_RX, target_RY, target_RZ = np.rad2deg([-target_positions[1], target_positions[2], target_positions[0]]) + target_positions_mapped = [target_X, target_Y, target_Z, target_RX, target_RY, target_RZ] + + # spawn the object in UNITY: by default a green box is spawned + # depending on your task, positioning the object might be necessary, start from the following sample code + # important: unity expects orientations in degrees + object_X, object_Y, object_Z = np.random.uniform(-1.0, 1.0), 0.05, np.random.uniform(-1.0, 1.0) + object_RX, object_RY, object_RZ = 0.0, 0.0, 0.0 + object_positions_mapped = [object_X, object_Y, object_Z, object_RX, object_RY, object_RZ] + + self.reset_state = active_joints + joint_positions + joint_velocities\ + + target_positions_mapped + object_positions_mapped + + if self.robotic_tool.find("gripper") != -1: + self.reset_state = self.reset_state + [self.tool_target] + + self.reset_counter += 1 + self.reset_flag = False + + return state + + #################### + # Commands related # + #################### + def action_by_p_control(self, coeff_kp_lin, coeff_kp_rot): + """ + computes the task-space velocity commands proportional to the reaching target error + + :param coeff_kp_lin: proportional coefficient for the translational error + :param coeff_kp_rot: proportional coefficient for the rotational error + + :return: The action in task space + """ + + action_lin = coeff_kp_lin * self.dart_sim.get_pos_error() + action = action_lin + + if self.dart_sim.orientation_control: + action_rot = coeff_kp_rot * self.dart_sim.get_rot_error() + action = np.concatenate(([action_rot, action])) + + if self.robotic_tool.find("gripper") != -1: + tool_vel = 0.0 # zero velocity means no gripper movement - should be adapted for the task + action = np.append(action, [tool_vel]) + + return action diff --git a/agent/iiwa_sample_joint_vel_env.py b/agent/envs/iiwa_sample_joint_vel_env.py similarity index 53% rename from agent/iiwa_sample_joint_vel_env.py rename to agent/envs/iiwa_sample_joint_vel_env.py index 3853b33..62ed000 100644 --- a/agent/iiwa_sample_joint_vel_env.py +++ b/agent/envs/iiwa_sample_joint_vel_env.py @@ -1,23 +1,25 @@ """ -A sample Env class inheriting from basic gym.Env for the Kuka LBR iiwa manipulator with 7 links and a Gripper. -There is no inverse kinematics calculation supported here (see iiwa_sample_env for IK), only joint velocity control -is supported with it. Unity is used as the main simulator for physics/rendering computations. The Unity interface -receives joint velocities as commands and returns joint positions and velocities. The class presents a way to alternate -between numeric observations and image observations, how to parse images returned from the Unity simulator and how to -reset the environments in the simulator +A sample Env class inheriting from basic gym.Env for the Kuka LBR iiwa manipulator with 7 links and a Gripper. + +Important: There is no inverse kinematics calculation supported here (see iiwa_sample_env for IK), only joint velocity control is supported with it. + +Unity is used as the main simulator for physics/rendering computations. The Unity interface receives joint velocities as commands +and returns joint positions and velocities. The class presents a way to alternate between numeric observations and image observations, +how to parse images returned from the Unity simulator and how to reset the environments in the simulator """ import math import time -import numpy as np import cv2 import base64 + +import numpy as np from numpy import newaxis + from gym import spaces, core from gym.utils import seeding - class IiwaJointVelEnv(core.Env): # the max velocity allowed for a joint in rad, used for normalization of the state values related to angle velocity MAX_VEL = 1.1 # value used to normalize the speed returned from the @@ -27,17 +29,24 @@ class IiwaJointVelEnv(core.Env): # assumed max distance between target and end-effector, used for normalizing the agent state in range (-1,1) MAX_DISTANCE = 2 + # min distance to declare that the target is reached by the end-effector, adapt the values based on your task + MIN_POS_DISTANCE = 0.05 # [m] + def __init__(self, id, max_ts, config): self.max_ts = max_ts self.ts = 0 + self.use_images = config["use_images"] + self.image_size = config["image_size"] + # relevant for numeric observations: whether only angles (a) or angles and velocities (av) of the joints self.state_type = config["state"] self.simulator_observation = None self.id = id - self.reset_counter = 0; + self.reset_counter = 0 self.reset_state = None - self.collided = 0 + + self.collided_env = 0 self.seed() # number of joints to be controlled from the 7 available @@ -54,12 +63,12 @@ def __init__(self, id, max_ts, config): high = np.array(state_array_limits) low = -high + # Define a gym observation space suitable for images # if self.use_images: - # if using images make 100x100 pixels grayscale observation space - self.observation_space = spaces.Box( - low=0, high=255, - shape=(100, 100, 1), - dtype=np.uint8) + # if using images make self.image_sizexself.image_size pixels grayscale observation space + self.observation_space = spaces.Box(low=0, high=255, + shape=(self.image_size, self.image_size, 1), + dtype=np.uint8) else: self.observation_space = spaces.Box(low=low, high=high, dtype=np.float32) @@ -71,55 +80,81 @@ def __init__(self, id, max_ts, config): low = -high self.action_space = spaces.Box(low=low, high=high, dtype=np.float32) - def update(self, observation): + def update(self, observation, time_step_update=True): """ - a replacement of the standart step() method used in OpenAI gym. Unlike typical gym environment where the step() - function is called at each timestep, in this case the simulator_vec_env step() function is called during - training, which in turn calls the update() function here for a single env to pass its simulator observation. - Reasons are related to the logic of how to communicate with the simulator and how to reset individual - environments running in the simulator independently. + a replacement of the standart step() method used in OpenAI gym. Unlike typical gym environment where the step() + function is called at each timestep, in this case the simulator_vec_env step() function is called during + training, which in turn calls the update() function here for a single env to pass its simulator observation. + Reasons are related to the logic of how to communicate with the simulator and how to reset individual + environments running in the simulator independently. """ - self.ts += 1 + if(time_step_update == True): + self.ts += 1 + # create agent state for the specific task from the generic simulator observation that was passed self._convert_observation(observation['Observation']) - info = {} + info = {"success": False} # Episode was successful. For now it is set at simulator_vec_env.py before the reset. Adapt if needed terminal = self._get_terminal() reward = self._get_reward() + if self.use_images: return self._retrieve_image(observation), reward, terminal, info else: return self.state, reward, terminal, info def _get_reward(self): - """ example definition of a reward function for a specific task""" + """ + example definition of a reward function for a specific task + """ absolute_distance = self._get_distance() + # in this case the reward is the negative distance between the target and the end-effector and a punishment # of 1 for collision. This reward is suitable for training reaching tasks - return - 1 * (absolute_distance + self.collided) + return - 1 * (absolute_distance + self.collided_env) + + def get_terminal_reward(self): + """ + checks if the target is reached + + _get_distance(): returns norm of the Euclidean error from the end-effector to the target position + + Important: by default a 0.0 value of a terminal reward will be given to the agent. To adapt it please refer to the config.py, + in the reward_dict. This terminal reward is given to the agent during the step() function in the simulator_vec_env.py + + :return: a boolean value representing if the target is reached within the defined threshold + """ + if self._get_distance() < self.MIN_POS_DISTANCE: + return True + + return False def _convert_observation(self, new_observation): """ - method used for creating task-specific agent state from the generic simulator observation returned. - The simulator observation has the following array of 34 values: - [a1, a2, a3, a4, a5, a6, a7, - v1, v2, v3, v4, v5, v6, v7, - ee_x, ee_y, ee_z ee_rx, ee_ry, ee_rz, - t_x, t_y, t_z, t_rx, t_ry, t_rz, - o_x, o_y, o_z, o_rx, o_ry, o_rz, - g_p, - c] - where a1..a7 are the angles of each joint of the robot in radians, v1..v7 are the velocities of each joint - in rad/sec, x, y, and z for ee, t and o are the coordinates and and rx, ry, and rz for ee, t and o are the - quaternion x, y and z components of the rotation for the end-effector, the target and the object(box) - respectively. g_p is the position (opening) of the gripper and c is a collision flag (0 if no collision and 1 - if a collision of any part of the robot with the floor happened) + method used for creating task-specific agent state from the generic simulator observation returned. + + The simulator observation has the following array of 34 values: + [a1, a2, a3, a4, a5, a6, a7, + v1, v2, v3, v4, v5, v6, v7, + ee_x, ee_y, ee_z ee_rx, ee_ry, ee_rz, + t_x, t_y, t_z, t_rx, t_ry, t_rz, + o_x, o_y, o_z, o_rx, o_ry, o_rz, + g_p, c + ] + + where + - a1..a7 are the angles of each joint of the robot in radians, + - v1..v7 are the velocities of each joint in rad/sec, + - x, y, and z for ee, t and o are the coordinates and and rx, ry, and rz for ee, t and o are the + quaternion x, y and z components of the rotation for the end-effector, the target and the object(box) respectively + - g_p is the position (opening) of the gripper and, + - c is a collision flag (0 if no collision and 1 if a collision of any part of the robot with the floor happened) """ # below is an example agent state suitable for solving the position reaching task self.simulator_observation = new_observation - self.collided = self.simulator_observation[-1] + self.collided_env = self.simulator_observation[-1] self.gripper_position = self.simulator_observation[-2] self.target_x, self.target_y, self.target_z, _, _, _ = self._get_target_pose() @@ -167,35 +202,39 @@ def _get_distance(self): def step(self): """ - not used directly on level on single environment, see update() method instead + not used directly on level on single environment, + see update() method instead and step() of simulator_vec_env.py """ pass def reset(self): """ - reset method that can set the environment in the simulator in a specific initial state and enable/disable joints - The method defines the self.reset_state that is used from simulator_vec_env to send the reset values to the - Unity simulator. The simulator reset state has the following array of 32 values: - [j1, j2, 23, j4, j5, j6, j7, - a1, a2, a3, a4, a5, a6, a7, - v1, v2, v3, v4, v5, v6, v7, - t_x, t_y, t_z, t_rx, t_ry, t_rz, - o_x, o_y, o_z, o_rx, o_ry, o_rz, - g_p] - where j1..j7 are flags indicating whether a joint should be enabled (1) or disabled (0), - a1..a7 are the initial angles of each joint of the robot in radians (currently only 0 initial values supported - due to unity limitations) - v1..v7 are the initial velocities of each joint in rad/sec (currently only 0 initial values supported due to - unity limitations) - x, y, and z for t and o are the initial coordinates of the target and the object in meters (note that y is - vertical axis in unity) - rx, ry, and rz for t and o are the euler angles in degrees for the rotation of the object and the target - g_p is the position (opening) of the gripper (0 is open, value up to 90 supported) + reset method that can set the environment in the simulator in a specific initial state and enable/disable joints + The method defines the self.reset_state that is used from simulator_vec_env to send the reset values to the Unity simulator. + + The simulator reset state has the following array of 32 values: + [j1, j2, 23, j4, j5, j6, j7, + a1, a2, a3, a4, a5, a6, a7, + v1, v2, v3, v4, v5, v6, v7, + t_x, t_y, t_z, t_rx, t_ry, t_rz, + o_x, o_y, o_z, o_rx, o_ry, o_rz, + g_p] + + where + - j1..j7 are flags indicating whether a joint should be enabled (1) or disabled (0), + - a1..a7 are the initial angles of each joint of the robot in radians (currently only 0 initial values supported + due to unity limitations) + - v1..v7 are the initial velocities of each joint in rad/sec (currently only 0 initial values supported due to + unity limitations) + - x, y, and z for t and o are the initial coordinates of the target and the object in meters (note that y is + vertical axis in unity) + - rx, ry, and rz for t and o are the euler angles in degrees for the rotation of the object and the target + - g_p is the position (opening) of the gripper (0 is open, value up to 90 supported) """ self.reset_counter += 1 self.ts = 0 - self.collided = 0; + self.collided_env = 0 self.target_x = 0 self.target_y = 0.38 # np.random.uniform(0, 1.15) @@ -207,8 +246,11 @@ def reset(self): # the joints to be enabled have value 1 and the others have value 0 self.reset_state = [1] * self.num_joints + [0] * (7 - self.num_joints) - # zeros for initial joint angles and velocities + + # zeros for initial joint angles and velocities + # Note: adapt this to reset to different initial positions if needed self.reset_state.extend([0] * 14) + # initial target and object position and orientation and gripper opening self.reset_state.extend([ self.target_x, self.target_y, self.target_z, 90, 0, 0, @@ -217,21 +259,17 @@ def reset(self): def _get_terminal(self): """ - a function to define terminal condition, it can be customized to define terminal conditions based on episode - duration, whether the task was solved, whether collision or some illegal state happened + a function to define terminal condition, it can be customized to define terminal conditions based on + episode duration, whether the task was solved, whether collision or some illegal state happened """ - dist = np.sqrt(np.power(self.target_x - self.ee_x, 2) + np.power(self.target_y - self.ee_y, 2) - + np.power(self.target_z - self.ee_z, 2)) - # if dist < 0.3: - # return True if self.ts > self.max_ts: return True return False def _retrieve_image(self, observation): """ - if the image observations are enabled at the simulator, it passes image data in the observation in addition - to the numeric data. This method shows how the image data can be accessed, saved locally, processed, etc. + if the image observations are enabled at the simulator, it passes image data in the observation in addition + to the numeric data. This method shows how the image data can be accessed, saved locally, processed, etc. """ # how to access the image data when it is provided by the simulator base64_bytes = observation['ImageData'][0].encode('ascii') @@ -246,10 +284,10 @@ def _retrieve_image(self, observation): # cv2.imwrite('images/received-image-bw-{}.{}'.format('cv', 'jpg'), image_bw) image = cv2.GaussianBlur(image, (3, 3), cv2.BORDER_DEFAULT) # cv2.imwrite('images/received-image-{}.{}'.format('blurred', 'jpg'), dst) - #dim = (100, 100) + #dim = (self.image_size, self.image_size) # resize image #image = cv2.resize(image, dim, interpolation=cv2.INTER_AREA) #return image return image[:, :, newaxis] - # return np.zeros(shape=(100, 100, 1), + # return np.zeros(shape=(self.image_size, self.image_size, 1), # dtype=np.uint8) diff --git a/agent/envs_advanced/iiwa_end_to_end_planar_grasping_env.py b/agent/envs_advanced/iiwa_end_to_end_planar_grasping_env.py new file mode 100755 index 0000000..6ab2dce --- /dev/null +++ b/agent/envs_advanced/iiwa_end_to_end_planar_grasping_env.py @@ -0,0 +1,291 @@ +""" +A vision-based end-to-end RL planar grasping Env class inheriting from the IiwaNumericalPlanarGraspingEnv for the Kuka LBR iiwa manipulator with 7 links and a Gripper +The parent class takes care of integrating DART Engine with Unity simulation environment + +Unity is used as the main simulator for physics/rendering computations. +The Unity interface receives joint velocities as commands and returns joint positions and velocities + +DART is used to calculate inverse kinematics of the iiwa chain. +DART changes the agent action space from the joint space to the cartesian space (position-only or pose/SE(3)) of the end-effector. + +action_by_pd_control method can be called to implement a Proportional-Derivative control law instead of an RL policy. + +Note: Coordinates in the Unity simulator are different from the ones in DART which used here: +The mapping is [X, Y, Z] of Unity is [-y, z, x] of DART +""" + +import numpy as np +import math +from gym import spaces +from envs_advanced.iiwa_numerical_planar_grasping_env import IiwaNumericalPlanarGraspingEnv +import cv2 +import base64 + +class IiwaEndToEndPlanarGraspingEnv(IiwaNumericalPlanarGraspingEnv): + def __init__(self, max_ts, orientation_control, use_ik, ik_by_sns, state_type, use_images=True, enable_render=False, task_monitor=False, target_mode="None", goal_type="box", + randomBoxesGenerator=None, joints_safety_limit=10, max_joint_vel=20, max_ee_cart_vel=0.035, max_ee_cart_acc =10, max_ee_rot_vel=0.15, max_ee_rot_acc=10, + random_initial_joint_positions=False, initial_positions=[0, 0, 0, -np.pi/2, 0, np.pi/2, np.pi/2], + noise_enable_rl_obs=False, noise_rl_obs_ratio=0.05, reward_dict=None, agent_kp=0.5, agent_kpr=1.5, + robotic_tool="3_gripper", image_size=128, env_id=0): + + if(use_images != True): + raise Exception("End-to-end vision-based env requires use_images to be set to True - abort") + + # the init of the parent class should be always called, this will in the end call reset() once + super().__init__(max_ts=max_ts, orientation_control=orientation_control, use_ik=use_ik, ik_by_sns=ik_by_sns, state_type=state_type, enable_render=enable_render, task_monitor=task_monitor, target_mode=target_mode, goal_type=goal_type, + randomBoxesGenerator=randomBoxesGenerator, joints_safety_limit=joints_safety_limit, max_joint_vel=max_joint_vel, max_ee_cart_vel=max_ee_cart_vel, + max_ee_cart_acc=max_ee_cart_acc, max_ee_rot_vel=max_ee_rot_vel, max_ee_rot_acc=max_ee_rot_acc, + random_initial_joint_positions=random_initial_joint_positions, initial_positions=initial_positions,noise_enable_rl_obs=False,noise_rl_obs_ratio=0.05, + reward_dict=reward_dict, agent_kp=agent_kp, agent_kpr=agent_kpr, robotic_tool=robotic_tool, env_id=env_id) + + self.use_images = use_images + self.image_size = image_size + self.current_obs_img = None + + ################################### + # RGB image # + # Unormalized observation for now # + # Adapt to your project # + ################################### + self.observation_space = spaces.Box(low=0, high=255, shape=(self.image_size, self.image_size, 3), dtype=np.uint8) + + ####################################################### + # Set-up the task_monitor # + ####################################################### + if self.task_monitor: + self._create_task_monitor() + + def get_state(self): + """ + End-to-end planar grasping agent receives a whole image as state input + + :return: state for the policy training + """ + return self.current_obs_img + + def update(self, observation, time_step_update=True): + """ + converts the unity observation to the required env state defined in get_state() + with also computing the reward value from the get_reward(...) and done flag, + it increments the time_step, and outputs done=True when the environment should be reset + + important: it also updates the dart kinematic chain of the robot using the new unity simulator observation. + always call this function, once you have send a new command to unity to synchronize the agent environment + + :param observation: is the observation received from the Unity simulator within its [X,Y,Z] coordinate system + 'joint_values': indices [0:7], + 'joint_velocities': indices [7:14], + 'ee_position': indices [14:17], + 'ee_orientation': indices [17:20], + 'target_position': indices [20:23], + 'target_orientation': indices [23:26], + 'object_position': indices [26:29], + 'object_orientation': indices [29:32], + 'gripper_position': indices [32:33], + 'collision_flag': indices [33:34], + + :param time_step_update: whether to increase the time_step of the agent - during manual actions call with False + + :return: The state, reward, episode termination flag (done), and an empty info dictionary + """ + + self.current_obs = observation['Observation'] + + ############################################################################################### + # Parse the image observation from Unity # + # This is a sample code - to make it faster process images in batches in simulator_vec_env.py # + # Decoding is done faster with pytorch -> then detach().numpy() and pass to the update() # + # the decoded image instead of decoding the images inside each env individually # + ############################################################################################### + self.current_obs_img = self.parse_image_observation(observation) + + # Collision or joints limits overpassed # + # env will not be reseted - wait until the end of the episoe for planar envs # + if observation['Observation'][-1] == 1.0 or self.joints_limits_violation(): + self.collided_env = 1 + self.reset_flag = True + + # the method below handles synchronizing states of the DART kinematic chain with the # + # observation from Unity hence it should be always called # + self._unity_retrieve_joint_values(observation['Observation']) + + # Class attributes below exist in the parent class, hence the names should not be changed + if(time_step_update == True): + self.time_step += 1 + + state = self.get_state() + reward = self.get_reward(self.action_state) + done = bool(self.get_terminal()) + info = {"success": False} # Episode was successful. It is set at simulator_vec_env.py before reseting + + # Keep track the previous distance of the ee to the box - used in the reward function # + self.prev_dist_ee_box_z = self.get_relative_distance_ee_box_z_unity() + self.prev_dist_ee_box_x = self.get_relative_distance_ee_box_x_unity() + self.prev_dist_ee_box_ry = self.get_relative_distance_ee_box_ry_unity() + + self.prev_action = self.action_state + + return state, reward, done, info + + def parse_image_observation(self, observation): + """ + Read the unity observation and decode the RGB image + + :param observation: is the observation received from the Unity simulator + + :return: decoded RGB image + """ + + base64_bytes = observation['ImageData'][0].encode('ascii') + image_bytes = base64.b64decode(base64_bytes) + image = np.frombuffer(image_bytes, np.uint8) + image = cv2.imdecode(image, cv2.IMREAD_COLOR) + # cv2.IMREAD_GRAYSCALE - for faster processing but with lower performance (depending on the task) + + #print(image.shape) + #cv2.imwrite("test.jpg", image) + + return image + + """ + -> Advanced code: + - Sample code to parallelize the image-proccessing logic. Should be adapted, can not be run + - Add this code/logic in the simulator_vec_env.py + - Ideas of Ludwig Graef, TUM + + Advise: + - Parse the images from the response of the json message, and then call the env.update() with the decode images + - More advanced idea: try in addition to use nvidia dali. jpeg decoding is faster + + # Sample code starts # + + # Put this in the simulator_vec_env.py # + observations_preprocessed = preprocessImage(observations_as_literal) + for env, observation, observation_img in zip(self.envs, observations_as_literal, observations_preprocessed): + obs, rew, done, info = env.update(observation, observation_img) + observations_converted.append(obs) + + def preprocessImage(self, observation): + mean = torch.tensor([0.8, 0.9. 0.99]) + std = torch.tensor([0.8, 0.9. 0.99]) + norm_transform = T.Normalize(mean, std) + device = "cuda" + imgs = torch.stack([self.observation_to_image_tensor(obs) for obs in observation]) + imgs = norm_transform(imgs/255).to(device) + return imgs.numpy() + + def observation_to_image_tensor(self, observation) -> torch.tensor: + base64_bytes = observation['ImageData'][0].encode('ascii') + image_bytes = base64.b64decode(base64_bytes) + buffer2 = bytearray(len(image_bytes)) + buffer2[:] = image_bytes + image = torch.frombuffer(buffer2, dtype=torch.uint8) + img = torchvision.io.decode_image(image, torchvision.io.ImageReadMode.RGB) + return img.float() + # Sample code ends # + """ + + ############## + # Monitoring # + ############## + def _create_task_monitor(self, plot_joint_position=True, plot_joint_velocity=True, plot_joint_acceleration=False, plot_joint_torque=True, plot_agent_state=False, plot_agent_action=True, plot_agent_reward=True): + """ + Override the task monitor definition since we have changed the get_state() function and the actions dimensions + refer to iiwa_dart.py and task_monitor.py for more + """ + from PySide2.QtWidgets import QApplication + from utils.task_monitor import TaskMonitor + + if not QApplication.instance(): + self.monitor_app = QApplication([]) + else: + self.monitor_app = QApplication.instance() + + plot_agent_state = False + # Unused -> state observation is an image # + # Do not visualize the agent state # + self.monitor_n_states = 2 + state_chart_categories = ['X', 'Z'] + if self.action_space_dimension == 3: + self.monitor_n_states += 1 + state_chart_categories = ['X', 'Z', 'RY'] + # Unused ################################### + + self.monitor_window = \ + TaskMonitor(plot_joint_position=plot_joint_position, + param_joint_position={'dim': self.n_links, + 'min': np.rad2deg(self.MIN_JOINT_POS), + 'max': np.rad2deg(self.MAX_JOINT_POS), + 'cat': [str(1 + value) for value in list(range(self.n_links))], + 'zone': 10.0, + 'title': "Joint Position [" + u"\N{DEGREE SIGN}" + "]"}, + plot_joint_velocity=plot_joint_velocity, + param_joint_velocity={'dim': self.n_links, + 'min': np.rad2deg(-self.MAX_JOINT_VEL), + 'max': np.rad2deg(+self.MAX_JOINT_VEL), + 'cat': [str(1 + value) for value in list(range(self.n_links))], + 'zone': 5.0, + 'title': "Joint Velocity [" + u"\N{DEGREE SIGN}" + "/s]"}, + plot_joint_acceleration=plot_joint_acceleration, + param_joint_acceleration={'dim': self.n_links, + 'min': np.rad2deg(-self.MAX_JOINT_ACC), + 'max': np.rad2deg(+self.MAX_JOINT_ACC), + 'cat': [str(1 + value) for value in list(range(self.n_links))], + 'title': "Joint Acceleration [" + u"\N{DEGREE SIGN}" + "/s^2]"}, + plot_joint_torque=plot_joint_torque, + param_joint_torque={'dim': self.n_links, + 'min': -self.MAX_JOINT_TORQUE, + 'max': +self.MAX_JOINT_TORQUE, + 'cat': [str(1 + value) for value in list(range(self.n_links))], + 'zone': 5.0, + 'title': "Joint Torque [Nm]"}, + plot_agent_state=plot_agent_state, + param_agent_state={'dim': self.monitor_n_states, + 'min': self.observation_space.low[0:self.monitor_n_states], + 'max': self.observation_space.high[0:self.monitor_n_states], + 'cat': state_chart_categories, + 'title': "Reaching Target Error"}, + plot_agent_action=plot_agent_action, + param_agent_action={'dim': self.action_space_dimension, + 'min': self.action_space.low, + 'max': self.action_space.high, + 'cat': [str(1 + value) for value in + list(range(self.action_space_dimension))], + 'title': "Normalized Command"}, + plot_agent_reward=plot_agent_reward, + param_agent_reward={'dim': 1, + 'min': -3.0, + 'max': +3.0, + 'cat': ['r'], + 'title': "Step Reward"}, + ) + + self.monitor_window.show() + self.monitor_window.correct_size() + + def render(self, mode='human', monitor_real_values=False, joint_torques=None): + """ + Override the render definition since we have changed the get_state() function and the actions dimensions + Note: we do not keep the joints positions in the state of the agent + refer to iiwa_dart.py and task_monitor.py for more + """ + if not self.dart_sim.enable_viewer: + return False + + if self.task_monitor: + if self.monitor_window.plot_joint_torque: + if monitor_real_values and joint_torques is not None: + joint_torques = joint_torques + else: + self.dart_sim.chain.computeInverseDynamics() + joint_torques = self.dart_sim.chain.getForces() + + self.monitor_window.update_values(values_joint_position=np.rad2deg(self.dart_sim.chain.getPositions()), + values_joint_velocity=np.rad2deg(self.dart_sim.chain.getVelocities()), + values_joint_acceleration=np.rad2deg(self.dart_sim.chain.getAccelerations()), + values_joint_torque=joint_torques, + values_agent_action=self.action_state, + values_agent_reward=[self.reward_state]) + self.monitor_app.processEvents() + + return self.dart_sim.render(mode=mode) diff --git a/agent/envs_advanced/iiwa_numerical_planar_grasping_env.py b/agent/envs_advanced/iiwa_numerical_planar_grasping_env.py new file mode 100644 index 0000000..44f0a17 --- /dev/null +++ b/agent/envs_advanced/iiwa_numerical_planar_grasping_env.py @@ -0,0 +1,940 @@ +""" +A numerical planar grasping Env class inheriting from the IiwaSampleEnv for the Kuka LBR iiwa manipulator with 7 links and a Gripper +The parent class takes care of integrating DART Engine with Unity simulation environment + +Unity is used as the main simulator for physics/rendering computations. +The Unity interface receives joint velocities as commands and returns joint positions and velocities + +DART is used to calculate inverse kinematics of the iiwa chain. +DART changes the agent action space from the joint space to the cartesian space (position-only or pose/SE(3)) of the end-effector. + +Notes: - Coordinates in the Unity simulator are different from the ones in DART which used here: + The mapping is [X, Y, Z] of Unity is [-y, z, x] of DART + - In numerical planar grasping envs, a P-controller (agent controller) keeps fixed the uncontrolled DoF during the episode. E.g. height of the ee + - At the end of the episode, manual actions help the agent to grasp the box + In that case, the P-controller (manual_actions controller) does not correct the controlled DoF by the RL agent. It keeps them fixed. E.g. rotation of the ee + see simulator_vec_env.py for more details + - gym functions use UNITY coordinates - e.g. get_state(), get_reward() +""" + +import numpy as np +from scipy.spatial.transform import Rotation as R + +import math +import dartpy as dart +from gym import spaces + +from envs.iiwa_sample_env import IiwaSampleEnv + +class IiwaNumericalPlanarGraspingEnv(IiwaSampleEnv): + def __init__(self, max_ts, orientation_control, use_ik, ik_by_sns, state_type, enable_render=False, task_monitor=False, + with_objects=False, target_mode="None", goal_type="box", randomBoxesGenerator=None, + joints_safety_limit=10, max_joint_vel=20, max_ee_cart_vel=0.035, max_ee_cart_acc =10, max_ee_rot_vel=0.15, max_ee_rot_acc=10, + random_initial_joint_positions=False, initial_positions=[0, 0, 0, -np.pi/2, 0, np.pi/2, np.pi/2], noise_enable_rl_obs=False, noise_rl_obs_ratio=0.05, + reward_dict=None, agent_kp=0.5, agent_kpr=1.5, + robotic_tool="3_gripper", env_id=0): + + # Some checks # + if(use_ik == False or orientation_control == False or 'v' in state_type): + raise Exception("Enable orientation control and inverse kinematics for planar grasping. Use also only joints for the state - abort") + + if(ik_by_sns == True): + raise Exception("RL agent with sns is not advised - abort") + + if(goal_type != "box"): + raise Exception("Grasping accepts a box target for now - abort") + + if(robotic_tool.find("gripper") == -1): + raise Exception("Enable gripper for grasping - abort") + + if(not np.isclose(initial_positions[3], -np.pi/2) or not np.isclose(initial_positions[5], np.pi/2) or (not np.isclose(initial_positions[6], np.pi/2) and not np.isclose(initial_positions[6], 0))): + print("Warning: initial_positions are different - make sure you have adapted the planar envs and manual actions correctly") + + # the init of the parent class should be always called, this will in the end call reset() once + super().__init__(max_ts=max_ts, orientation_control=orientation_control, use_ik=use_ik, ik_by_sns=ik_by_sns, state_type=state_type, enable_render=enable_render, task_monitor=task_monitor, + with_objects=with_objects, target_mode=target_mode, goal_type=goal_type, joints_safety_limit=joints_safety_limit, max_joint_vel=max_joint_vel, max_ee_cart_vel=max_ee_cart_vel, + max_ee_cart_acc=max_ee_cart_acc, max_ee_rot_vel=max_ee_rot_vel, max_ee_rot_acc=max_ee_rot_acc, random_initial_joint_positions=random_initial_joint_positions, initial_positions=initial_positions, + robotic_tool=robotic_tool, env_id=env_id) + + # Box dataset to spawn boxes # + self.randomBoxesGenerator = randomBoxesGenerator + + # Keep object initial pose. Easier calculations for rotations - box does not moved during the planar episode # + self.init_object_pose_unity = None + + ################################################################### + # Reward related # + ################################################################### + self.reward_collision = reward_dict["reward_collision"] # Value + self.reward_height_goal = reward_dict["reward_height_goal"] # Height in meters + + self.reward_pose_weight = reward_dict["reward_pose_weight"] # Weight for the pose error displacement + self.reward_pose_norm_const = reward_dict["reward_pose_norm_const"] # Normalization constant for the pose error displacement + + self.reward_x_weight = reward_dict["reward_x_weight"] + self.reward_x_norm_const = reward_dict["reward_x_norm_const"] + + self.reward_z_weight = reward_dict["reward_z_weight"] + self.reward_z_norm_const = reward_dict["reward_z_norm_const"] + + self.collision_flag = False # Give collision reward one once per episode + + # Save previous distance ee/box for calculting the displacements - see reward definition # + self.prev_dist_ee_box_z = np.inf + self.prev_dist_ee_box_x = np.inf + self.prev_dist_ee_boxry = np.inf + ################################################################### + + ################################################################### + # P-controller related # + ################################################################### + self.agent_kp = agent_kp # P-controller gain for positional errors during the episode + self.agent_kpr = agent_kpr + self.target_rot_quat_dart = None # Save the target orientation in quaternions - keep moving planar during the episode + self.target_z_dart = None # Target height - keep moving planar during the episode + ################################################################### + + ################################################################### + # Noise related # + ################################################################### + self.noise_enable_rl_obs = noise_enable_rl_obs # True/False + self.noise_rl_obs_ratio = noise_rl_obs_ratio # e.g. 0.05 + ################################################################### + + ############################################################################################# + # Joints are normalized in [-1, 1] at the get_state() function - for planar envs # + # The below settings are necessary to be set so that in iiwa_dart.py when # + # calling the self.observation_space.sample() function, we denormalize the joints positions # + ############################################################################################# + self.normalized_rl_obs = True + self.observation_indices = {'obs_len': 0} + + ######################################### + # Action and observation spaces related # + ######################################### + if np.isclose(np.asarray([self.reward_pose_weight]), np.asarray([0])): # No rotation control - 2DoF + ################## + # Viewer-related # + ################## + + # Exist in base-class - overide # + self.observation_indices['joint_pos'] = 2 # In which position of the agent state the joint positions start + self.observation_indices['obs_len'] = 9 + + ############### + # Gym-related # + ############### + + # X, and Z in unity - no rotation # + self.action_space_dimension = 2 + self.observation_space_dimension = 9 + + # 4 DoF that are being controlled by the P-controller - keep moving planar during the episode # + self.config_p_controller = {"kpr_x": 1, + "kpr_y": 1, + "kpr_z": 1, + "kp_x": 0, # Controlled by the RL-agent + "kp_y": 0, + "kp_z": 1 + } + else: # 3DoF control, rotation is active + ################## + # Viewer-related # + ################## + self.observation_indices['joint_pos'] = 3 + self.observation_indices['obs_len'] = 10 + + ############### + # Gym-related # + ############### + + # X, Y, and RY in unity # + self.action_space_dimension = 3 + self.observation_space_dimension = 10 + + # 3 DoF that are being controlled by the P-controller - keep moving planar during the episode # + self.config_p_controller = {"kpr_x": 1, + "kpr_y": 1, + "kpr_z": 0, + "kp_x": 0, + "kp_y": 0, + "kp_z": 1 + } + + ##################### + # Define gym spaces # + ##################### + self.action_space = spaces.Box(-np.ones(self.action_space_dimension), np.ones(self.action_space_dimension), dtype=np.float32) + + self.observation_space = spaces.Box(-np.ones(self.observation_space_dimension), np.ones(self.observation_space_dimension), dtype=np.float32) + + ######################################################################## + # Set-up the task_monitor. The re-implementation resides in this class # + # since the dims of action, and obs spaces have been overridden above # + ######################################################################## + if self.task_monitor: + self._create_task_monitor() + + def get_state(self): + """ + defines the environment state: + Format: [x_error, y_error, ry_error, j1, .., j7] normalized in [-1, 1] + - error from the target pose: ee to the target box (x, y, ry axis) + - joints positions + - unity coords system + + Note: if ry rotation is not controlled by the RL agent, then the state will not include the ry_error part + + :return: observation state for the policy training. + """ + state = np.empty(0) + + # Relative position normalized errors of ee to the box # + dx_ee_b, dz_ee_b = self.get_error_ee_box_x_z_normalized_unity() + + # Add to state # + state = np.append(state, np.array([dx_ee_b, dz_ee_b])) + + # Rotation control is active # + if(self.action_space_dimension == 3): + # Normalized rotation error - y axis # + dry_ee_b = self.get_error_ee_box_ry_normalized_unity() + + # Add to state # + state = np.append(state, np.array([dry_ee_b])) + + # Get joints positions # + joint_positions = self.dart_sim.chain.getPositions() + joint_positions = self.normalize_joints(joint_positions) + + # Add to state # + state = np.append(state, joint_positions) + + ##################################################### + # Apply random noise to the RL-observation # + # Needed for deploying the model to the real system # + ##################################################### + if (self.noise_enable_rl_obs == True): + state = (state + self.np_random.uniform(-self.noise_rl_obs_ratio, self.noise_rl_obs_ratio, + (1, len(state)))).clip(-1, 1).tolist()[0] + + # the lines below should stay as it is - exist in parent class + self.observation_state = np.array(state) + + return self.observation_state + + def get_reward(self, action): + """ + defines the environment reward + novel reward that uses displacements -- see self.get_reward_term_displacement() + + :param action: is the current action decided by the RL agent - not used + + :return: reward for the policy training + """ + + reward = 0.0 + if(self.collided_env != 1): # Valid env + reward += self.get_reward_term_displacement() + + # Collision penalty is given only once # + elif(self.collided_env == 1 and self.collision_flag == False): + reward += self.reward_collision + self.collision_flag = True + + # the lines below should stay as it is + self.reward_state = reward + + return self.reward_state + + def get_terminal_reward(self): + """ + checks if the box is in the air after the end of the episode and manual actions (down/close/up) - see also simulator_vec_env.py + + :return: a boolean value representing if the box is in the air (True means success) + """ + if(self.get_object_height_unity() >= self.reward_height_goal and self.collided_env != 1): + return True + + return False + + def get_terminal(self): + """ + checks the terminal conditions for the episode + Important: planar envs should terminate at the same time step - when 'num_envs' != 1 + + :return: a boolean value indicating if the episode should be terminated - maximum timesteps reached + """ + self.reset_flag = False + if self.time_step > self.max_ts: + self.reset_flag = True + + return self.reset_flag + + def update_action(self, action): + """ + converts env action to the required unity action by using inverse kinematics from DART + - action dim: 3, or 2 (in case of no rotation control) + - the remaining DoF are controlled by the P-controller. + - the Gripper is not controlled by the RL agent only during the manual and at the end of the episode - see simulator_vec_env.py + + :param action: The action vector decided by the RL agent. Acceptable range: [-1, +1] + + :return: the command to send to the Unity simulator including joint velocities and gripper position + """ + ################# + # Unity in DART # + # Yu = Zd # + # Xu = -Yd # + # Zu = Xd # + ################# + + ############################################################################## + # Reset the P-controller - save the initial pose of the manipulator # + # for moving in a planar manner during the episode e.g. keep the same height # + ############################################################################## + if (self.time_step == 1): + self.reset_agent_p_controller() + + # the lines below should stay as it is + self.action_state = action + env_action = np.clip(action, -1., 1.) + + # Rotation is controlled by the RL-agent + if(self.action_space_dimension == 3): + task_vel = np.zeros(3) + else: + task_vel = np.zeros(2) + + ################################################################### + # The RL agent controls the x, y and rotational z-axis (optional) # + ################################################################### + + ################################################################################## + # Calculate the errors for the P-controller. Axis not controlled by the RL-agent # + # Important: P-controller expects dart coordinates # + ################################################################################## + _, ee_y, _ = self.get_ee_pos_unity() + z_diff = self.target_z_dart - ee_y # Height + curr_quat = self.get_rot_ee_quat() # Current orientation of the ee in quaternions + rx_diff, ry_diff, rz_diff = self.get_rot_error_from_quaternions(self.target_rot_quat_dart, curr_quat) # Orientation error from the target pose + ################################################################################################ + + if(self.action_space_dimension == 3): # Rotation is active - 3DoF control by the RL agent + task_vel[0] = self.MAX_EE_VEL[2] * env_action[0] + task_vel[1] = self.MAX_EE_VEL[3] * env_action[1] + task_vel[2] = self.MAX_EE_VEL[4] * env_action[2] + + ###################################################################################### + # P-controller + inverse kinematics # + # - The DoF that are controlled by the RL-agent are unaffected by the P-controller # + # - see config_p_controller dictionary # + ###################################################################################### + joint_vel = self.action_by_p_controller_custom(rx_diff, ry_diff, task_vel[0], + task_vel[1], task_vel[2], z_diff, + self.agent_kpr, self.agent_kp, + self.config_p_controller) + else: # Rotation is not active - 2DoF control by the RL agent + task_vel[0] = self.MAX_EE_VEL[3] * env_action[0] + task_vel[1] = self.MAX_EE_VEL[4] * env_action[1] + + joint_vel = self.action_by_p_controller_custom(rx_diff, ry_diff, rz_diff, + task_vel[0], task_vel[1], z_diff, + self.agent_kpr, self.agent_kp, + self.config_p_controller) + + ########################################################################################## + # Gripper is not controlled via the RL-agent - manual actions - see simulator_vec_env.py # + ########################################################################################## + unity_action = np.append(joint_vel, [float(0.0)]) + + return unity_action + + def update(self, observation, time_step_update=True): + """ + converts the unity observation to the required env state defined in get_state() + with also computing the reward value from the get_reward(...) and done flag, + it increments the time_step, and outputs done=True when the environment should be reset + + important: it also updates the dart kinematic chain of the robot using the new unity simulator observation. + always call this function, once you have send a new command to unity to synchronize the agent environment + + :param observation: is the observation received from the Unity simulator within its [X,Y,Z] coordinate system + 'joint_values': indices [0:7], + 'joint_velocities': indices [7:14], + 'ee_position': indices [14:17], + 'ee_orientation': indices [17:20], + 'target_position': indices [20:23], + 'target_orientation': indices [23:26], + 'object_position': indices [26:29], + 'object_orientation': indices [29:32], + 'gripper_position': indices [32:33], + 'collision_flag': indices [33:34], + + :param time_step_update: whether to increase the time_step of the agent - during manual actions call with False + + :return: The state, reward, episode termination flag (done), and an empty info dictionary + """ + + self.current_obs = observation['Observation'] + + # Collision or joints limits overpassed # + # env will not be reseted - wait until the end of the episoe for planar envs # + if observation['Observation'][-1] == 1.0 or self.joints_limits_violation(): + self.collided_env = 1 + + # the method below handles synchronizing states of the DART kinematic chain with the # + # observation from Unity hence it should be always called # + self._unity_retrieve_joint_values(observation['Observation']) + + # Class attributes below exist in the parent class, hence the names should not be changed + if(time_step_update == True): + self.time_step += 1 + + state = self.get_state() + reward = self.get_reward(self.action_state) + done = bool(self.get_terminal()) + info = {"success": False} # Episode was successful. It is set at simulator_vec_env.py before reseting + + + # Keep track the previous distance of the ee to the box - used in the reward function # + self.prev_dist_ee_box_z = self.get_relative_distance_ee_box_z_unity() + self.prev_dist_ee_box_x = self.get_relative_distance_ee_box_x_unity() + self.prev_dist_ee_box_ry = self.get_relative_distance_ee_box_ry_unity() + + self.prev_action = self.action_state + + return state, reward, done, info + + ########### + # Monitor # + ########### + def _create_task_monitor(self, plot_joint_position=True, plot_joint_velocity=True, plot_joint_acceleration=False, plot_joint_torque=True, + plot_agent_state=True, plot_agent_action=True, plot_agent_reward=True): + """ + Override the task monitor definition since we have changed the get_state() function and the actions dimensions + refer to iiwa_dart.py and task_monitor.py for more + """ + + from PySide2.QtWidgets import QApplication + from utils.task_monitor import TaskMonitor + + if not QApplication.instance(): + self.monitor_app = QApplication([]) + else: + self.monitor_app = QApplication.instance() + + self.monitor_n_states = 2 + state_chart_categories = ['X', 'Z'] + if self.action_space_dimension == 3: + self.monitor_n_states += 1 + state_chart_categories = ['X', 'Z', 'RY'] + + self.monitor_window = \ + TaskMonitor(plot_joint_position=plot_joint_position, + param_joint_position={'dim': self.n_links, + 'min': np.rad2deg(self.MIN_JOINT_POS), + 'max': np.rad2deg(self.MAX_JOINT_POS), + 'cat': [str(1 + value) for value in list(range(self.n_links))], + 'zone': 10.0, + 'title': "Joint Position [" + u"\N{DEGREE SIGN}" + "]"}, + plot_joint_velocity=plot_joint_velocity, + param_joint_velocity={'dim': self.n_links, + 'min': np.rad2deg(-self.MAX_JOINT_VEL), + 'max': np.rad2deg(+self.MAX_JOINT_VEL), + 'cat': [str(1 + value) for value in list(range(self.n_links))], + 'zone': 5.0, + 'title': "Joint Velocity [" + u"\N{DEGREE SIGN}" + "/s]"}, + plot_joint_acceleration=plot_joint_acceleration, + param_joint_acceleration={'dim': self.n_links, + 'min': np.rad2deg(-self.MAX_JOINT_ACC), + 'max': np.rad2deg(+self.MAX_JOINT_ACC), + 'cat': [str(1 + value) for value in list(range(self.n_links))], + 'title': "Joint Acceleration [" + u"\N{DEGREE SIGN}" + "/s^2]"}, + plot_joint_torque=plot_joint_torque, + param_joint_torque={'dim': self.n_links, + 'min': -self.MAX_JOINT_TORQUE, + 'max': +self.MAX_JOINT_TORQUE, + 'cat': [str(1 + value) for value in list(range(self.n_links))], + 'zone': 5.0, + 'title': "Joint Torque [Nm]"}, + plot_agent_state=plot_agent_state, + param_agent_state={'dim': self.monitor_n_states, + 'min': self.observation_space.low[0:self.monitor_n_states], + 'max': self.observation_space.high[0:self.monitor_n_states], + 'cat': state_chart_categories, + 'title': "Reaching Target Error"}, + plot_agent_action=plot_agent_action, + param_agent_action={'dim': self.action_space_dimension, + 'min': self.action_space.low, + 'max': self.action_space.high, + 'cat': [str(1 + value) for value in + list(range(self.action_space_dimension))], + 'title': "Normalized Command"}, + plot_agent_reward=plot_agent_reward, + param_agent_reward={'dim': 1, + 'min': -3.0, + 'max': +3.0, + 'cat': ['r'], + 'title': "Step Reward"}, + ) + + self.monitor_window.show() + self.monitor_window.correct_size() + + def reset(self, temp=False): + """ + resets the DART-gym environment and creates the reset_state vector to be sent to Unity + + :param temp: not relevant + + :return: The initialized state + """ + self.current_obs = np.ones(34) + + ############################################################ + # Spawn the next box and fix the target (for task monitor) # + ############################################################ + object_X, object_Y, object_Z, object_RX, object_RY, object_RZ = self.randomBoxesGenerator() + self.init_object_pose_unity = [object_X, object_Y, object_Z, object_RX, object_RY, object_RZ] + + ################################################################################################ + # Align the target with the box. This is done for visualization purposes for the dart viewer # + # In the Unity simulator, however, we spawn the target far away # + # Vision-based models might get confused if there is in the image a red target - if the target # + # is used in vision-based envs -> adapt # + ################################################################################################ + + # Align with the box + target_object_X, target_object_Y, target_object_Z = self.init_object_pose_unity[0], self.init_object_pose_unity[1], self.init_object_pose_unity[2] + + # Note in the dart viewer the ee at the goal is more up as we assume that we have a gripper (urdf) but the gripper is not # + # yet visualized in the viewer. The self.dart_sim.get_pos_distance() returns 0 correctly at the goal # + tool_length = 0.0 + target_object_RX, target_object_RY, target_object_RZ = self.get_box_rotation_in_target_dart_coords_angle_axis() + target = [target_object_RX, target_object_RY, target_object_RZ, target_object_Z, -target_object_X, target_object_Y + tool_length] + + # takes care of resetting the DART chain and should stay as it is + state = super().reset() + + # sets the initial reaching target for the current episode, + # should be always called in the beginning of each episode, + # you might need to call it even during the episode run to change the reaching target for the IK-P controller + self.set_target(target) + + # initial position for the gripper state, accumulates the tool_action velocity received in update_action + self.tool_target = 0.0 # should be in range [0.0,90.0] + + # movement control of each joint can be disabled by setting zero for that joint index + active_joints = [1] * 7 + + # the lines below should stay as it is, Unity simulator expects these joint values in radians + joint_positions = self.dart_sim.chain.getPositions().tolist() + joint_velocities = self.dart_sim.chain.getVelocities().tolist() + + ####################################################################################### + # Spawn the target rectangle outside of the Unity simulator -> since it is not used # + # or can harm the vision-based methods # + # in dart we map the target to be in the same pose as the pose of the box (see above) # + ####################################################################################### + #target_positions = self.dart_sim.target.getPositions().tolist() + target_positions = [0, 0, 0, 0, -5, -200] + + target_X, target_Y, target_Z = -target_positions[4], target_positions[5], target_positions[3] + target_RX, target_RY, target_RZ = np.rad2deg([-target_positions[1], target_positions[2], target_positions[0]]) + target_positions_mapped = [target_X, target_Y, target_Z, target_RX, target_RY, target_RZ] + + object_positions_mapped = [object_X, object_Y, object_Z, object_RX, object_RY, object_RZ] + + self.reset_state = active_joints + joint_positions + joint_velocities\ + + target_positions_mapped + object_positions_mapped + [self.tool_target] + + self.collision_flag = False + + ####################################################################################### + # Keep track the previous distance of the ee to the box - used in the reward function # + # Initialize -during the __init__() they are set to np.inf # + ####################################################################################### + self.prev_dist_ee_box_z = self.get_relative_distance_ee_box_z_unity() + self.prev_dist_ee_box_x = self.get_relative_distance_ee_box_x_unity() + self.prev_dist_ee_box_ry = self.get_relative_distance_ee_box_ry_unity() + + return state + + ################ + # P-controller # + ################ + def action_by_p_controller_custom(self, rx_diff, ry_diff, rz_diff, x_diff, y_diff, z_diff, kpr, kp, config_p_controller, normalize_action=False): + """ + computes the joint-space velocity command given position and orientation errors and possibly task-space velocities generated from an RL-agent + it uses a P-controller for generating task-space velocities for the input errors and IK from dart for the joint-space transformation + + Important: input params should be in DARTS cords + + Note: some axis can be controller by an RL-agent while others by the p-controller + hence, inputs should be errors or task-space velocities + + + :param rx_diff: rx axis error or task-space vel + :param ry_diff: ry axis error or task-space vel + :param rz_diff: rz axis error or task-space vel + + :param x_diff: x axis error or task-space vel + :param y_diff: y axis error or task-space vel + :param z_diff: z axis error or task-space vel + + + :param kpr: P-controller gain for orientation errors + :param kp: P-controller gain for position errors + + :param config_p_controller: dictionary to define which axis are controlled by the P-controller. It should be 6-dim + - 1 means apply the corresponding gain. Else, input is a task-space velocity. Do nothing. + - e.g. {"kpr_x": 1, "kpr_y": 1, + "kpr_z": 1, "kp_x": 0, + "kp_y": 1, "kp_z": 0 + } + + :param normalize_action: whether IK should denormalize the input velocities + + :return: Action in joint-space + """ + + # Wheter to multiply with the P-controller gains # + kpr_x = kpr if config_p_controller["kpr_x"] == 1 else 1 + kpr_y = kpr if config_p_controller["kpr_y"] == 1 else 1 + kpr_z = kpr if config_p_controller["kpr_z"] == 1 else 1 + kp_x = kp if config_p_controller["kp_x"] == 1 else 1 + kp_y = kp if config_p_controller["kp_y"] == 1 else 1 + kp_z = kp if config_p_controller["kp_z"] == 1 else 1 + + task_space_vel = np.array([kpr_x * rx_diff, kpr_y * ry_diff, kpr_z * rz_diff, kp_x * x_diff, kp_y * y_diff, kp_z * z_diff]) + + # IK # + joint_space_vel = self.dart_sim.command_from_action(task_space_vel, normalize_action=normalize_action) + + return joint_space_vel + + def reset_agent_p_controller(self): + """ + reset the P-controller. Save the initial pose of ee. e.g. for keeping the same height during the RL episode + Note: dart coords + + affects: self.target_z_dart + affects: self.target_rot_quat_dart + """ + + _, ee_y, __ = self.get_ee_pos_unity() + + self.target_z_dart = ee_y + self.target_rot_quat_dart = self.get_rot_ee_quat() + + ################ + # Reward terms # + ################ + def get_reward_term_displacement(self): + """ + returns the reward value for the current observation + + uses a displacements logic: + - the current ee distance to the box in x, z, and ry axis minus the previous ee distance to box for the same axis (see implementation for more) + + :return: reward displacement term (float) + """ + reward = 0 + + # Z axis # + curr_dist_ee_box_z = self.get_relative_distance_ee_box_z_unity() + dz = (self.prev_dist_ee_box_z - curr_dist_ee_box_z) # Displacement + + dz /= self.reward_z_norm_const # Normalize + dz = np.clip(dz, -1, 1) # Clip for safety + dz *= self.reward_z_weight # Weight this term + + # X axis # + curr_dist_ee_box_x = self.get_relative_distance_ee_box_x_unity() + dx = (self.prev_dist_ee_box_x - curr_dist_ee_box_x) + + dx /= self.reward_x_norm_const + dx = np.clip(dx, -1, 1) + dx *= self.reward_x_weight + + # RY axis # + curr_dist_ee_box_ry = self.get_relative_distance_ee_box_ry_unity() + dry = (self.prev_dist_ee_box_ry - curr_dist_ee_box_ry) + + dry /= self.reward_pose_norm_const + dry = np.clip(dry, -1, 1) + dry *= self.reward_pose_weight + + reward = dz + dx + dry + + return reward + + + ############# + # Accessors # + ############# + def get_ee_orient_unity(self): + """ + Override it for planar grasping envs - easier calculations + + :return: x, y, z orientation of the ee in Euler + """ + rot_mat = self.dart_sim.chain.getBodyNode('iiwa_link_ee').getTransform().rotation() + rx, ry, rz = dart.math.matrixToEulerXYZ(rot_mat) + + rx_unity = -ry + ry_unity = rz + rz_unity = rx + + return rx_unity, ry_unity, rz_unity + + def get_box_rotation_in_target_dart_coords_angle_axis(self): + """ + return in angle-axis dart coordinates the orientation of the box + - read the unity coordinates of the box and then convert + - unity uses degrees [-90, 0] for rotation in our case) + - if the orientation of the box is in a different range adapt + + :return: a, b, c (in rad) angle-axis dart coordinates the orientation of the box + """ + + object_RY = self.init_object_pose_unity[4] + object_RY = -object_RY if object_RY >= -45 else -object_RY - 90 + r = R.from_euler('xyz', [-180, 0, -180 + object_RY], degrees=True) + r = r.as_matrix() + a, b, c = dart.math.logMap(r) + + return a, b, c + + def get_object_pos_unity(self): + """ + get the position of the box in unity coords + + :return: x, y, z coords of box in unity + """ + return self.current_obs[26], self.current_obs[27], self.current_obs[28] + + def get_object_orient_unity(self): + """ + get the orientation of the box in unity coords + Important: works for planar grasping envs only - assume the box does not move during the RL episode + + :return: rx, ry, rz coords of box in unity + """ + return self.init_object_pose_unity[3], self.init_object_pose_unity[4], self.init_object_pose_unity[5] + + def get_object_height_unity(self): + """ + get the height of the box in unity coords + + :return: y coords of box + """ + return self.current_obs[27] + + def get_object_pose_unity(self): + """ + get the pose of the box in unity coords + Important: works for planar grasping envs only - assume the box does not move during the RL episode + + :return: x, y, z, rx, ry, rz coords of box + """ + return self.current_obs[26], self.current_obs[27], self.current_obs[28], \ + self.init_object_pose_unity[3], self.init_object_pose_unity[4], self.init_object_pose_unity[5] + + def get_collision_flag(self): + """ + get the collision flag value + + :return: 0 (no collision) or 1 (collision) + """ + return self.current_obs[33] + + def get_relative_distance_ee_box_x_unity(self): + """ + get the relative distance from the box to the ee in x axis in unity coords + + :return: dist (float) of the box to the ee in x axis + """ + x_err, _, _ = self.get_error_ee_box_pos_unity() + dist = abs(x_err) + + return dist + + def get_relative_distance_ee_box_z_unity(self): + """ + get the relative distance from the box to the ee in z axis in unity coords + + :return: dist of the box to the ee in z axis + """ + _, _, z_err = self.get_error_ee_box_pos_unity() + dist = abs(z_err) + + return dist + + def get_relative_distance_ee_box_ry_unity(self): + """ + get the relative distance from the box to the ee in ry axis in unity coords + + :return: dist of the box to the ee in ry axis + """ + ry_err = self.get_error_ee_box_ry_unity() + dist = abs(ry_err) + + return dist + + def get_error_ee_box_pos_unity(self): + """ + get the error from the box to the ee in unity coords + + :return: x_err, y_err, z_err of the box to the ee + """ + object_x, object_y, object_z = self.get_object_pos_unity() + ee_x, ee_y, ee_z = self.get_ee_pos_unity() + + return object_x - ee_x, object_y - ee_y, object_z - ee_z + + def get_error_ee_box_x_z_unity(self): + """ + get the error from the box to the ee in x and z axis in unity coords + + :return: x_err, z_err of the box to the ee + """ + x_err, _, z_err = self.get_error_ee_box_pos_unity() + + return x_err, z_err + + def get_error_ee_box_x_z_normalized_unity(self): + """ + get the normalized error from the box to the ee in x and z axis in unity coords + Important: hard-coded manner - adapt if the ee starts from different initial position, or + the boxes are not spawned in front of the robot + + :return: x_err, z_err normalized of the box to the ee + """ + x_err, z_err = self.get_error_ee_box_x_z_unity() + + return x_err / 1.4, z_err / 0.73 + + def get_error_ee_box_ry_unity(self): + """ + get the relative error from the box to the ee in ry axis in unity coords + + :return: ry_err of the box to the ee - in radians + """ + _, ee_ry, _ = self.get_ee_orient_unity() + + ##################################################### + # Transorm ee and box ry rotation to our needs # + # see the function implementation for more details # + ##################################################### + box_ry = np.deg2rad(self.init_object_pose_unity[4]) # Deg -> rad + clipped_ee_ry, clipped_box_ry = self.clip_ee_ry_and_box_ry(ee_ry, box_ry) + + ry_error = clipped_box_ry - clipped_ee_ry + + return ry_error + + def get_error_ee_box_ry_normalized_unity(self): + """ + get the normalized relative error from the box to the ee in ry axis in unity coords + Important: hard-coded normalization - adapt if needed + + :return: ry_err normalized of the box to the ee + """ + error_ry = self.get_error_ee_box_ry_unity() + + return error_ry / (2 * np.pi) + + def clip_ee_ry_and_box_ry(self, ee_ry, box_ry): + """ + Fix the rotation in y axis for both box and end-effector + - Input for the ee is the raw observation of rotation returned from the Unity simulator + - Input for the box is the rotation returned from the boxGenerator but transformed in radians + - The ee starts at +-np.pi rotation in y-axis. + - Important: some observations are returned with a change of sign and they should be corrected. + + Important: In this clipping behaviour, we assume that when the ee is at +-np.pi and the box + at 0 radians, then the error between them is zero. No rotation should be performed - 'highest reward' + - Hence, in this case, the function returns for ee_ry -np.pi and for box_ry -np.pi so that their difference is 0. + + Note: We also define only one correct rotation for grasping the box. + - The Box rotation ranges from [-90, 0] + - The ee should (only) turn clock-wise when the box is between [-90, -45), and + counter-clock-wise when the box is between [-45, 0]. + + Warning: If the ee starts from different rotation than +-np.pi or the box rotation spawn range of [-90, 0] is different - adapt. + + :param ee_ry: ee rotation returned from the unity simulator in radians + :param boy_ry: box rotation returned from the boxGenerator, but transformed in radians - does not change during the RL episode + + :return: clipped_ee_ry corrected ee ry rotation in radians + :return: clipped_box_ry: corrected box ry rotation in radians + """ + + if (self.init_object_pose_unity[4] >= -45): # Counter-clock-wise rotation should be performed + if (ee_ry > 0): # Change of sign + ee_ry *= -1 + else: + # ee is turning in the wrong direction. ee rotation is # + # decreasing but the error to the box is increasing. # + # add the difference to correct the error # + ee_ry = -np.pi - (np.pi + ee_ry) + + # When box_ry is at 0 rad - no rotation of the ee is needed # + clipped_box_ry = -np.pi - box_ry + + elif (self.init_object_pose_unity[4] < -45): # Clock-wise rotation should be performed + if (ee_ry < 0): + ee_ry *= -1 + else: + ee_ry = np.pi + (np.pi - ee_ry) + + clipped_box_ry = np.pi + (-np.pi / 2 - box_ry) + + clipped_ee_ry = ee_ry + + return clipped_ee_ry, clipped_box_ry + + def clip_ry(self, ee_ry): + """ + Individual cliping function. For more information refer to the self.clip_ee_ry_and_box_ry() definition + - This re-definition is for agents that need to clip ee_ry and box_ry seperately + + :param ee_ry: ee rotation returned from the unity simulator in radians + + :return: clipped_ee_ry corrected ee rotation (rad) + """ + + if (self.init_object_pose_unity[4] >= -45): + if (ee_ry > 0): + ee_ry *= -1 + else: + ee_ry = -np.pi - (np.pi + ee_ry) + + elif (self.init_object_pose_unity[4] < -45): + if (ee_ry < 0): + ee_ry *= -1 + else: + ee_ry = np.pi + (np.pi - ee_ry) + + clipped_ee_ry = ee_ry + + return clipped_ee_ry + + def clip_box_ry(self, box_ry): + """ + Individual cliping function. For more information refer to the self.clip_ee_ry_and_box_ry() definition + - This re-definition is for agents that need to clip ee_ry and box_ry seperately + + :param boy_ry: box rotation returned from the boxGenerator, but transformed in radians - does not change during the RL episode + + :return: clipped_box_ry: corrected box ry rotation in radians + """ + + if (self.init_object_pose_unity[4] >= -45): + clipped_box_ry = -np.pi - box_ry + elif (self.init_object_pose_unity[4] < -45): + clipped_box_ry = np.pi + (-np.pi / 2 - box_ry) + + return clipped_box_ry diff --git a/agent/envs_advanced/iiwa_ruckig_planar_grasping_env.py b/agent/envs_advanced/iiwa_ruckig_planar_grasping_env.py new file mode 100755 index 0000000..d92e1e5 --- /dev/null +++ b/agent/envs_advanced/iiwa_ruckig_planar_grasping_env.py @@ -0,0 +1,383 @@ +""" +A model-based numerical planar grasping Env class inheriting from the IiwaNumericalPlanarGraspingEnv for the Kuka LBR iiwa manipulator with 7 links and a Gripper +The parent class takes care of integrating DART Engine with Unity simulation environment + +Note: update_action() receives velocities genereted by ruckig. For more refer to ruckig_planar_model.py + +Unity is used as the main simulator for physics/rendering computations. +The Unity interface receives joint velocities as commands and returns joint positions and velocities + +DART is used to calculate inverse kinematics of the iiwa chain. +DART changes the agent action space from the joint space to the cartesian space (position-only or pose/SE(3)) of the end-effector. + +action_by_pd_control method can be called to implement a Proportional-Derivative control law instead of an RL policy. + +Note: Coordinates in the Unity simulator are different from the ones in DART which used here: +The mapping is [X, Y, Z] of Unity is [-y, z, x] of DART +""" + +import numpy as np +import math + +from gym import spaces + +from envs_advanced.iiwa_numerical_planar_grasping_env import IiwaNumericalPlanarGraspingEnv + +class IiwaRuckigPlanarGraspingEnv(IiwaNumericalPlanarGraspingEnv): + def __init__(self, max_ts, orientation_control, use_ik, ik_by_sns, state_type, enable_render=False, task_monitor=False, + with_objects=False, target_mode="None", goal_type="box", randomBoxesGenerator=None, + joints_safety_limit=10, max_joint_vel=20, max_ee_cart_vel=0.035, max_ee_cart_acc =10, max_ee_rot_vel=0.15, max_ee_rot_acc=10, + random_initial_joint_positions=False, initial_positions=[0, 0, 0, -np.pi/2, 0, np.pi/2, np.pi/2], noise_enable_rl_obs=False, noise_rl_obs_ratio=0.05, + reward_dict=None, agent_kp=0.5, agent_kpr=1.5, threshold_p_model_based=0.01, + robotic_tool="3_gripper", env_id=0): + + ################################################################################################ + # the init of the parent class should be always called, this will in the end call reset() once # + ################################################################################################ + super().__init__(max_ts=max_ts, orientation_control=orientation_control, use_ik=use_ik, ik_by_sns=ik_by_sns, state_type=state_type, enable_render=enable_render, task_monitor=task_monitor, + with_objects=with_objects, target_mode=target_mode, goal_type=goal_type, randomBoxesGenerator=randomBoxesGenerator, + joints_safety_limit=joints_safety_limit, max_joint_vel=max_joint_vel, max_ee_cart_vel=max_ee_cart_vel, max_ee_cart_acc=max_ee_cart_acc, max_ee_rot_vel=max_ee_rot_vel, max_ee_rot_acc=max_ee_rot_acc, + random_initial_joint_positions=random_initial_joint_positions, initial_positions=initial_positions,noise_enable_rl_obs=False,noise_rl_obs_ratio=0.05, + reward_dict=reward_dict,agent_kp=agent_kp, agent_kpr=agent_kpr, robotic_tool=robotic_tool, env_id=env_id) + + ############################################################################################# + # Joints are normalized in [-1, 1] at the get_state() function - for planar envs # + # The below settings are necessary to be set so that in iiwa_dart.py when # + # calling the self.observation_space.sample() function, we denormalize the joints positions # + ############################################################################################# + self.normalized_rl_obs = True + self.observation_indices = {'obs_len': 0} + + # Whether to apply a threshold to the model-based controller - see update_action() # + self.threshold_p_model_based = threshold_p_model_based + + ######################################## + # Action and observation space related # + ######################################## + if np.isclose(np.asarray([self.reward_pose_weight]), np.asarray([0])): # No rotation control - 2DoF + ################## + # Viewer-related # + ################## + self.observation_indices['joint_pos'] = 0 # Unused -> no joints position information in the state + self.observation_indices['obs_len'] = 5 + + ############### + # Gym-related # + ############### + self.action_space_dimension = 2 + self.observation_space_dimension = 5 # [reset, x_ee_d, y_ee_d, x_box_d, y_box_d] - see get_state() + + # 4 DoF that are being controlled by the P-controller - keep moving planar during the episode # + self.config_p_controller = {"kpr_x": 1, + "kpr_y": 1, + "kpr_z": 1, + "kp_x": 0, # Controlled by the model-based agent + "kp_y": 0, + "kp_z": 1 + } + + tool_length = 0.2 # Tolerance + + ########################################################################## + # Important: adapt if the ranges change in the randomBoxesGenerator, or # + # e.g. the manipulator is in a different initial planar position, or # + # the boxes are spawned behind/left/right and not in-front # + ########################################################################## + low = np.asarray([0, -(0.95 + tool_length), -(0.95 + tool_length), -(0.95 + tool_length), -(0.95 + tool_length)]) + + high = np.asarray([1, 0.95 + tool_length, 0.95 + tool_length, 0.95 + tool_length, 0.95 + tool_length]) + + self.observation_space = spaces.Box(low=low, high=high, dtype=np.float32) + + self.action_space = spaces.Box(low=np.asarray([-self.MAX_EE_CART_VEL[1], -self.MAX_EE_CART_VEL[0]]), + high=np.asarray([self.MAX_EE_CART_VEL[1], self.MAX_EE_CART_VEL[0]]), + dtype=np.float32) + + else: # 3DoF control, rotation is active + ################## + # Viewer-related # + ################## + self.observation_indices['joint_pos'] = 0 # Unused + self.observation_indices['obs_len'] = 6 + + ############### + # Gym-related # + ############### + + # X, Y, RY in unity # + self.action_space_dimension = 3 + self.observation_space_dimension = 6 # [reset, rz_ee_d, x_ee_d, y_ee_d, rz_box_d, x_box_d, y_box_d] - see get_state() + + # 3 DoF that are being controlled by the P-controller - keep moving planar during the episode # + self.config_p_controller = {"kpr_x": 1, + "kpr_y": 1, + "kpr_z": 0, + "kp_x": 0, + "kp_y": 0, + "kp_z": 1 + } + + tool_length = 0.2 # Tolerance + low = np.asarray([0, -2*np.pi, -(0.95 + tool_length), -(0.95 + tool_length), -2*np.pi, -(0.95 + tool_length), -(0.95 + tool_length)]) + + high = np.asarray([1, 2*np.pi, 0.95 + tool_length, 0.95 + tool_length, 2*np.pi, 0.95 + tool_length, 0.95 + tool_length]) + + self.observation_space = spaces.Box(low=low, high=high, dtype=np.float32) + + self.action_space = spaces.Box(low=np.asarray([-self.MAX_EE_ROT_VEL[2], -self.MAX_EE_CART_VEL[1], -self.MAX_EE_CART_VEL[1]]), + high=np.asarray([self.MAX_EE_ROT_VEL[2], self.MAX_EE_CART_VEL[1], self.MAX_EE_CART_VEL[0]]), + dtype=np.float32) + + ######################################################################## + # Set-up the task_monitor. The re-implementation resides in this class # + # since the dims of action, and obs spaces have been overridden above # + ######################################################################## + if self.task_monitor: + self._create_task_monitor() + + def get_state(self): + """ + defines the environment state with shape(num_envs, 5 or 6) - depending on the active DoF (rotation control is active) + Format: [reset, rz_ee_d, x_ee_d, y_ee_d, rz_box_d, x_box_d, y_box_d] + - Should reset, rotation of ee in dart, x position of the ee in dart, etc + - box pose is used only once (at reset_ruckig_model()) as the box does not move during the planar episode + - Not normalized in [-1, 1] as in most gym agents + + :return: state for the model-based agent (no RL training) + """ + state = np.empty(0) + + # Reset ruckig model (flag). Set target pose of ee only once # + # at the first step - see how it is used in ruckig_planar_model.py # + if(self.time_step == 1): + reset_ruckig_model = 1 + else: + reset_ruckig_model = 0 + + state = np.append(state, np.asarray([reset_ruckig_model])) + + state = np.append(state, self.get_ruckig_current_pose()) # Current pose of the ee + state = np.append(state, self.get_ruckig_target_pose()) # Pose of the box - changes only per episode. Assume the box is not moved during the planar motion + + # the lines below should stay as it is + self.observation_state = np.array(state) + + return self.observation_state + + def update_action(self, action): + """ + converts env action to the required unity action by using inverse kinematics from DART + - action dim: 3, or 2 (in case of no rotation control) + - the remaining DoF are controlled by the P-controller. + - the Gripper is not controlled by the RL agent only during the manual and at the end of the episode - see simulator_vec_env.py + + :param action: The action vector decided by the model-based model + - Important: the input action is not normalized in [-1, 1] in this case - m/sec + + :return: the command to send to the Unity simulator including joint velocities and gripper position + """ + + ################# + # Unity in DART # + # Yu = Zd # + # Xu = -Yd # + # Zu = Xd # + ################# + + ############################################################################## + # Reset the P-controller - save the initial pose of the manipulator # + # for moving in a planar manner during the episode e.g. keep the same height # + ############################################################################## + if (self.time_step == 1): + self.reset_agent_p_controller() + + # the lines below should stay as it is + self.action_state = action + env_action = action + + # Rotation is controlled by the model-based agent + if(self.action_space_dimension == 3): + task_vel = np.zeros(3) + else: + task_vel = np.zeros(2) + + ############################################################################ + # The model-based agent controls the x, y and rotational z-axis (optional) # + ############################################################################ + + ########################################################################################### + # Calculate the errors for the P-controller. Axis not controlled by the model-based agent # + # Important: P-controller expects dart coordinates # + ########################################################################################### + _, ee_y, _ = self.get_ee_pos_unity() + z_diff = self.target_z_dart - ee_y # Height + curr_quat = self.get_rot_ee_quat() # Current orientation of the ee in quaternions + rx_diff, ry_diff, rz_diff = self.get_rot_error_from_quaternions(self.target_rot_quat_dart, curr_quat) # Orientation error from the target pose + ################################################################################################ + + # Get poses # + curr_pose = self.get_ruckig_current_pose() # Current ee pose + target_pose = self.get_ruckig_target_pose() # Current pose of the box - does not change during the episode + + ############## + # Dart cords # + ############## + if(self.action_space_dimension == 3): # Rotation is active - 3DoF control by the model-based agent + task_vel[0] = env_action[0] + task_vel[1] = env_action[1] + task_vel[2] = env_action[2] + + # Distance to the goal # + dist = np.linalg.norm(np.array([rx_diff, ry_diff, z_diff, target_pose[0] - curr_pose[0], target_pose[1] - curr_pose[1], target_pose[2] - curr_pose[2]])) + + if(dist < self.threshold_p_model_based): # Threshold is reached - stop, if active only + joint_vel = np.zeros(7) + else: + ############################################################################################### + # P-controller + inverse kinematics # + # - The DoF that are controlled by the model-based agent are unaffected by the P-controller # + # - see config_p_controller dictionary # + ############################################################################################### + joint_vel = self.action_by_p_controller_custom(rx_diff, ry_diff, task_vel[0], + task_vel[1], task_vel[2], z_diff, + self.agent_kpr, self.agent_kp, + self.config_p_controller) + else: # Rotation is not active - 2DoF control by the model-based agent + task_vel[0] = env_action[0] + task_vel[1] = env_action[1] + + dist = np.linalg.norm(np.array([rx_diff, ry_diff, z_diff, rz_diff, target_pose[0] - curr_pose[0], target_pose[1] - curr_pose[1]])) + + if(dist < self.threshold_p_model_based): # Threshold is reached + joint_vel = np.zeros(7) + else: + joint_vel = self.action_by_p_controller_custom(rx_diff, ry_diff, rz_diff, + task_vel[0], task_vel[1], z_diff, + self.agent_kpr, self.agent_kp, + self.config_p_controller) + + ################################################################################################### + # Gripper is not controlled via the model-based agent - manual actions - see simulator_vec_env.py # + ################################################################################################### + unity_action = np.append(joint_vel, [float(0.0)]) + + return unity_action + + ########### + # Helpers # + ########### + def get_ruckig_current_pose(self): + """ + get the current state pose of the ee in dart coordinates - needed in the get_state() + - refer also to the clip methods in iiwa_numerical_planar_grasping_env.py + + :return: ee_rz_d, ee_x_d, ee_y_d - or the first value is skipped if only 2DoF are controlled by the model-based agent + """ + state = np.empty(0) + + # Rotation part of the ee - only if 3DoF are active # + if(self.action_space_dimension == 3): + _, ee_ry, _ = self.get_ee_orient_unity() + ee_ry = self.clip_ry(ee_ry) + state = np.append(state, np.asarray([ee_ry])) + + # x and z position of the ee # + x, _, z = self.get_ee_pos_unity() + state = np.append(state, np.array([z, -x])) + + return state + + def get_ruckig_target_pose(self): + """ + get the target state pose of the box - needed in the get_state() + - refer also to the clip methods in iiwa_numerical_planar_grasping_env.py + + :return: rz_box_d, x_box_d, y_box_d - or first value is skipped if only 2DoF are controlled by the model-based agent + + """ + state = np.empty(0) + + # Rotation part of the box - if 3DoF are active # + if(self.action_space_dimension == 3): + box_ry_rad = np.deg2rad(self.init_object_pose_unity[4]) + box_ry_rad = self.clip_box_ry(box_ry_rad) + state = np.append(state, np.asarray([box_ry_rad])) + + # x and z position of the box # + state = np.append(state, np.array([self.init_object_pose_unity[2], -self.init_object_pose_unity[0]])) + + return state + + ########### + # Monitor # + ########### + def _create_task_monitor(self, plot_joint_position=True, plot_joint_velocity=True, plot_joint_acceleration=False, plot_joint_torque=True, plot_agent_state=True, plot_agent_action=True, plot_agent_reward=True): + """ + Override the task monitor definition since we have changed the get_state() function and the actions dimensions + refer to iiwa_dart.py and task_monitor.py for more + """ + from PySide2.QtWidgets import QApplication + from utils.task_monitor import TaskMonitor + + if not QApplication.instance(): + self.monitor_app = QApplication([]) + else: + self.monitor_app = QApplication.instance() + + self.monitor_n_states = 5 + state_chart_categories = ['RESET', 'X_EE', 'Y_EE', 'X_B', 'Y_B'] + if self.action_space_dimension == 3: + self.monitor_n_states += 2 + state_chart_categories = ['RESET', 'RZ_EE', 'X_EE', 'Y_EE', 'RZ_B', 'X_B', 'Y_B'] + + self.monitor_window = \ + TaskMonitor(plot_joint_position=plot_joint_position, + param_joint_position={'dim': self.n_links, + 'min': np.rad2deg(self.MIN_JOINT_POS), + 'max': np.rad2deg(self.MAX_JOINT_POS), + 'cat': [str(1 + value) for value in list(range(self.n_links))], + 'zone': 10.0, + 'title': "Joint Position [" + u"\N{DEGREE SIGN}" + "]"}, + plot_joint_velocity=plot_joint_velocity, + param_joint_velocity={'dim': self.n_links, + 'min': np.rad2deg(-self.MAX_JOINT_VEL), + 'max': np.rad2deg(+self.MAX_JOINT_VEL), + 'cat': [str(1 + value) for value in list(range(self.n_links))], + 'zone': 5.0, + 'title': "Joint Velocity [" + u"\N{DEGREE SIGN}" + "/s]"}, + plot_joint_acceleration=plot_joint_acceleration, + param_joint_acceleration={'dim': self.n_links, + 'min': np.rad2deg(-self.MAX_JOINT_ACC), + 'max': np.rad2deg(+self.MAX_JOINT_ACC), + 'cat': [str(1 + value) for value in list(range(self.n_links))], + 'title': "Joint Acceleration [" + u"\N{DEGREE SIGN}" + "/s^2]"}, + plot_joint_torque=plot_joint_torque, + param_joint_torque={'dim': self.n_links, + 'min': -self.MAX_JOINT_TORQUE, + 'max': +self.MAX_JOINT_TORQUE, + 'cat': [str(1 + value) for value in list(range(self.n_links))], + 'zone': 5.0, + 'title': "Joint Torque [Nm]"}, + plot_agent_state=plot_agent_state, + param_agent_state={'dim': self.monitor_n_states, + 'min': self.observation_space.low[0:self.monitor_n_states], + 'max': self.observation_space.high[0:self.monitor_n_states], + 'cat': state_chart_categories, + 'title': "Reaching Target Error"}, + plot_agent_action=plot_agent_action, + param_agent_action={'dim': self.action_space_dimension, + 'min': self.action_space.low, + 'max': self.action_space.high, + 'cat': [str(1 + value) for value in + list(range(self.action_space_dimension))], + 'title': "Normalized Command"}, + plot_agent_reward=plot_agent_reward, + param_agent_reward={'dim': 1, + 'min': -3.0, + 'max': +3.0, + 'cat': ['r'], + 'title': "Step Reward"}, + ) + + self.monitor_window.show() + self.monitor_window.correct_size() diff --git a/agent/dart_envs/LICENSE b/agent/envs_dart/LICENSE similarity index 100% rename from agent/dart_envs/LICENSE rename to agent/envs_dart/LICENSE diff --git a/agent/dart_envs/README.md b/agent/envs_dart/README.md similarity index 100% rename from agent/dart_envs/README.md rename to agent/envs_dart/README.md diff --git a/agent/envs_dart/base_dart_simulation.cpython-37m-x86_64-linux-gnu.so b/agent/envs_dart/base_dart_simulation.cpython-37m-x86_64-linux-gnu.so new file mode 100755 index 0000000..32b640c Binary files /dev/null and b/agent/envs_dart/base_dart_simulation.cpython-37m-x86_64-linux-gnu.so differ diff --git a/agent/envs_dart/iiwa_dart.cpython-37m-x86_64-linux-gnu.so b/agent/envs_dart/iiwa_dart.cpython-37m-x86_64-linux-gnu.so new file mode 100755 index 0000000..74d0b29 Binary files /dev/null and b/agent/envs_dart/iiwa_dart.cpython-37m-x86_64-linux-gnu.so differ diff --git a/agent/envs_dart/iiwa_dart_unity.cpython-37m-x86_64-linux-gnu.so b/agent/envs_dart/iiwa_dart_unity.cpython-37m-x86_64-linux-gnu.so new file mode 100755 index 0000000..5f0d8da Binary files /dev/null and b/agent/envs_dart/iiwa_dart_unity.cpython-37m-x86_64-linux-gnu.so differ diff --git a/agent/envs_dart/misc/generated_random_targets/CSV_contents_guide.txt b/agent/envs_dart/misc/generated_random_targets/CSV_contents_guide.txt new file mode 100644 index 0000000..bdabfe2 --- /dev/null +++ b/agent/envs_dart/misc/generated_random_targets/CSV_contents_guide.txt @@ -0,0 +1,3 @@ +Each row of generated targets CSV files correspond to a random target. +Columns of generated targets correspond to the following order: +cart_pose_y, cart_pose_z, cart_pose_x, quaternion_w, quaternion_x, quaternion_y, quaternion_z, joint_pos_1, joint_pos_2, joint_pos_3, joint_pos_4, joint_pos_5, joint_pos_6, joint_pos_7 diff --git a/agent/envs_dart/misc/generated_random_targets/cart_pose_7dof.csv b/agent/envs_dart/misc/generated_random_targets/cart_pose_7dof.csv new file mode 100644 index 0000000..9e50b5e --- /dev/null +++ b/agent/envs_dart/misc/generated_random_targets/cart_pose_7dof.csv @@ -0,0 +1,195 @@ +0.636911404421983,0.913358883325954,0.223688801631281,0.228858550238294,0.121947178591538,0.375864763638961,0.889650678115303,1.88646566867828,0.702981173992157,1.76862525939941,0.97777533531189,0.479131609201431,0.449924528598785,-1.17468595504761 +-0.172564472486644,1.19472477694813,0.238597657983051,0.36784919690639,0.243972351162842,-0.4211664541066,0.792327759288207,-1.23522877693176,0.073914274573326,1.27880215644836,-0.548812210559845,-1.90543782711029,1.03065967559814,-1.85495471954346 +-0.348973447115343,1.17363446329339,-0.044651328986998,0.632612410525263,0.270949759408944,0.55169256588449,-0.471193249817353,-2.21242594718933,0.196580037474632,0.445660382509232,-0.238308236002922,0.79594898223877,1.06861257553101,-0.549297690391541 +0.338599205678434,0.828664415672876,-0.651609554108356,0.747044425735506,0.332513709271991,-0.550756575196238,-0.167411033094596,-0.941726624965668,-1.15098190307617,2.26723909378052,0.646672129631043,2.16433262825012,0.766096532344818,1.7537122964859 +0.381673212808921,0.885215277119418,-0.381767228759397,0.68277541460987,-0.574022852046658,-0.022524688722399,0.451451145674148,-1.18400228023529,-0.008043975569308,0.460767924785614,1.42930769920349,0.768580615520477,0.303574293851852,1.28979384899139 +-0.774020843018197,0.679233019295516,-0.482397925503371,-0.415182368829033,-0.508780062611764,-0.283532239869952,0.69883897820357,-2.24807977676392,1.19180631637573,-1.91322731971741,0.234353572130203,-0.034982848912478,0.083682261407375,2.02784562110901 +-0.761811644305853,0.294981455992918,0.202087722082298,0.452952283287461,0.26207208096634,0.84901830846331,0.072940834494463,1.26870024204254,-1.53389990329742,-1.57877540588379,-0.83793181180954,-0.713739812374115,1.04835093021393,-0.661163628101349 +-0.700435806150988,0.819221846839289,0.157110238745731,0.533723066771078,0.044651432163337,-0.120884210288562,-0.835782833818181,1.66434979438782,-1.15863466262817,1.49038469791412,0.304716646671295,-1.24069654941559,1.43124032020569,2.25601577758789 +0.131465782796256,0.906574207805222,-0.519395480573886,0.631770894583743,0.452111905760019,-0.215588570711769,-0.591592705841914,-1.3143744468689,-0.891731977462769,-1.04966413974762,-1.34605967998505,1.26334130764008,-0.039275709539652,-0.92792671918869 +-0.398273439416948,0.413367136168851,-0.654544853110886,0.667584834532649,-0.14246060475558,-0.715438654539584,-0.14893957293238,1.27727603912354,-1.48156023025513,-1.59957242012024,1.12508797645569,0.149799346923828,-0.578854084014893,1.32979655265808 +-0.675036005622146,0.73119076584406,0.25103923850997,-0.247086916420851,-0.10601439739991,0.855696030491492,-0.442146250327505,-1.15182828903198,0.618721127510071,0.197253867983818,-0.698967337608337,2.16231989860535,-1.18650114536285,1.93397986888886 +-0.642721104741678,0.56326003360993,0.075407815637437,0.268546785791751,0.751635439329062,-0.061140812349429,0.599323444602979,0.870910108089447,-0.674615621566772,0.955447793006897,1.44490587711334,-2.05302143096924,0.766474306583405,0.99465537071228 +-0.870504543529283,0.53811219515483,-0.252312602646826,0.896130688155642,0.179337384648051,-0.326177745945576,0.241652581743204,-1.8067409992218,1.48299026489258,-1.81458365917206,0.037965621799231,2.34465003013611,-0.898180961608887,2.09032988548279 +-0.745646334812978,0.597419933264637,-0.267647142123039,0.777709075579262,-0.287999879264276,-0.46792537794371,0.305402200354955,-1.52358639240265,1.35906076431274,-1.64770567417145,-0.430638462305069,-0.384777337312698,1.28676605224609,-0.771496176719666 +0.161736788605422,1.06222787871894,-0.368662414616576,-0.370123277188546,-0.021208823769587,0.764041443972289,0.528014788969267,2.21840667724609,0.105984322726727,-2.12894463539124,0.622080087661743,-1.0746750831604,-1.31553387641907,-0.447015821933746 +0.240890679767527,1.03831435913241,-0.343171040867735,-0.490794890325449,0.742511305887802,0.350902437787198,-0.290972190104524,-1.55874609947205,-0.148293256759644,-1.80815839767456,-0.611432790756226,-0.410930782556534,1.27658247947693,-1.36323225498199 +-0.201532582176514,1.02175520100892,-0.122207016160399,0.891596105410748,-0.119893700674116,0.091946680312633,0.42688135744553,0.01774044521153,-0.892684996128082,-0.655002295970917,-1.54584896564484,2.21748828887939,1.09420657157898,-1.2143120765686 +0.112024603053261,1.26280525293063,-0.21534478063865,0.934950092410331,-0.121298674349814,-0.193848617696462,0.271252040950962,-1.86652612686157,-0.232403695583343,-0.912892878055573,-0.570689380168915,-2.12120079994202,0.120552010834217,-0.842764675617218 +0.22381281909273,1.10938426943059,0.120586640405228,-0.472141588132912,-0.225410404265326,0.023695562395159,0.851886724117151,2.33498620986938,0.424914240837097,-2.15903854370117,-1.15687024593353,0.436919122934341,-1.41438591480255,-2.52330446243286 +-0.05471988267764,0.876820695598893,0.471997776369938,-0.42965223077325,0.546748673974259,-0.007745093078829,0.718613151585858,-1.52714216709137,0.732043385505676,2.09672737121582,-1.5400025844574,-1.72158360481262,-0.098712041974068,-1.49899458885193 +-0.146593424051392,0.612697682651349,0.649490444129963,0.471708797902704,-0.150891397898446,0.854362232935551,-0.157441325435282,-1.06678926944733,0.8619504570961,-1.53483581542969,1.39567053318024,1.63468456268311,0.84794944524765,0.250422865152359 +0.493372040464022,0.799955416616922,-0.268524717672336,-0.226457851172098,0.134822602745586,0.383833634413641,0.884992343765886,1.68102943897247,0.223840564489365,0.62731409072876,-1.56917488574982,0.503437042236328,-1.19090461730957,1.25378572940826 +0.497450462391834,0.541396213711154,0.796895134646632,0.810838564831695,-0.02128455670615,0.576334480659517,0.099631098711318,0.647509157657623,1.40354382991791,-1.22563219070435,-0.04446292668581,2.32823014259338,-0.471204668283463,-1.13512992858887 +0.539947549572346,0.701082655049513,0.075503890246938,-0.492702296847456,0.833639804136288,-0.229289791878748,0.098566297389387,-2.33286571502686,-0.072443321347237,0.775075495243073,1.52542626857758,2.40691232681274,0.684577345848084,-0.685503482818604 +-0.396061854013517,0.96893495104751,0.384636413803617,0.417459786659642,-0.173461209342896,-0.036827863257251,0.891225136462887,-1.20013034343719,1.01531338691711,-1.16109001636505,0.783139884471893,-1.39830911159515,1.26807808876038,-0.63716059923172 +-0.614581880552827,0.90599410376589,0.499056127565472,-0.370897389466195,0.040653012517565,-0.319425030769302,0.871062631949299,-0.908458471298218,1.04080283641815,-1.12210929393768,0.077483110129833,-1.62269484996796,0.386096507310867,1.34500873088837 +0.04607403299296,0.947153814874523,-0.551287812625047,0.105773462428278,0.004409985574704,-0.258059509599587,0.960311312116225,0.350414633750916,-1.03172135353088,-2.03262209892273,0.998356401920319,0.913001537322998,1.32961320877075,-2.32348132133484 +0.549991212243178,0.870614833020334,-0.535112360835291,0.635701306216602,-0.445952334005778,-0.508833127312725,0.371617025469039,-0.900344610214233,-0.98240339756012,0.372431188821793,-0.145762056112289,0.63100653886795,-0.943620383739471,0.539177656173706 +-0.048283939831369,1.2362449424691,0.29787725684504,0.846125673932814,0.311159118864901,0.237247882180404,-0.361890576086023,0.131306916475296,0.307573229074478,0.629711925983429,-0.092944040894508,-2.43593955039978,0.883309900760651,1.02441227436066 +0.419023455393329,0.859348436777655,-0.341275036383636,0.162391009781394,-0.540186061176027,0.172631835129085,0.807481534620621,-0.337848335504532,-0.039250899106264,-0.705408692359924,1.47724485397339,-0.97467577457428,0.599819898605347,-1.9131795167923 +0.283469627014647,1.10094351821382,-0.035115090306096,0.590192848843529,-0.746083691285846,0.286033108986598,0.114963417375735,0.292415797710419,-0.148666888475418,-2.14911818504333,0.467957317829132,0.147470816969872,-1.45261335372925,2.19984698295593 +0.036814053504143,0.880353828397754,0.439921063054493,-0.298185579652034,0.655613005366004,-0.001079241369185,0.693726014014614,-1.36029016971588,0.743379473686218,-0.811925530433655,1.55870616436005,1.64342546463013,0.522528290748596,-1.95877933502197 +0.725281592689829,0.80578169721448,0.052137521702016,0.346443211480051,-0.545977117302355,0.177753416658574,0.741815213828756,-1.46110332012177,-1.23600196838379,0.914525210857391,-0.786412835121155,0.47497296333313,-1.31889641284943,2.1042594909668 +-0.442862964189149,1.07725757273753,-0.433045477191641,0.8515821062149,0.33786172126744,-0.008754644064904,-0.40072525486185,0.680771827697754,-0.937028050422668,-0.183417826890945,-0.450688093900681,1.07007968425751,-0.237898334860802,-2.5278480052948 +-0.757227783445803,0.96450445980435,-0.043142701858597,0.724800017466872,0.345456180287254,-0.260619233688185,0.536099409823537,-1.72294104099274,0.889577984809876,-1.67553949356079,0.139355719089508,-0.369058519601822,0.030817808583379,-1.29576599597931 +0.701219651074966,0.583341970690438,0.387291900618304,0.662049977641164,-0.020772824057373,0.207443032787867,0.719878951653532,-1.82827854156494,-1.10021686553955,-0.602835297584534,0.864984333515167,0.519585192203522,1.41983067989349,-2.33843278884888 +-0.73265291185144,0.909476270669214,-0.184163265048354,0.701544912715108,0.029874945350108,-0.35606908850093,0.616568753098953,-1.96214723587036,1.10566163063049,2.15609192848206,-0.454432129859924,2.26549911499023,0.637875318527222,-0.987691938877106 +0.187933577830459,0.506086100167199,-0.843502572758487,-0.055981031491685,-0.526293014380211,0.653859660888984,0.540693379828032,0.17501300573349,-1.4101699590683,1.1767909526825,-0.631067752838135,1.37765073776245,0.850337982177734,1.42891871929169 +-0.650711947417774,0.694664950992731,0.4745821452281,0.594819223154125,0.629506978887004,0.490566335635647,0.09620668188798,1.77752816677094,-0.8520268201828,1.11220037937164,0.757686734199524,-0.911316990852356,-0.534466207027435,-1.93114912509918 +-0.192068746264104,0.872975685921411,-0.431830607765067,0.840814792204557,-0.148206604604031,-0.36999878863564,-0.366287024026119,1.40057301521301,-1.19151973724365,-2.27863430976868,1.48207640647888,2.38569593429565,0.318593591451645,-1.54156732559204 +0.832294648381876,0.831714884245273,0.170540904312942,-0.230881609492375,0.20543530054791,0.463286621727337,0.830575418499411,-1.77722430229187,-1.07024419307709,0.722939312458038,-0.016314847394824,-2.35883736610413,0.063507936894894,0.783059656620026 +-0.51222496323253,0.857679063504942,0.205074942615334,0.651175640324347,0.573140946943665,0.030758010177279,0.496521585829489,-1.19358098506928,0.047104109078646,-0.110984414815903,-1.45819914340973,2.19533777236938,0.512102663516998,0.070922926068306 +-0.533592700938702,1.03805569028235,0.215209605167806,-0.474686519347246,-0.277324954690901,-0.09080039189394,0.830372727567322,-0.646244049072266,0.909859716892242,1.02605783939362,0.667910575866699,-0.690209686756134,-0.487149804830551,-1.37605381011963 +-0.244207952301942,0.830921774010588,0.476447053393594,0.626324101854176,0.120427763109515,0.71478178360511,-0.286883731039764,-2.21100473403931,0.093533247709274,1.89500069618225,-1.44827473163605,2.11830258369446,-0.335476040840149,-2.48427224159241 +0.195623878095059,1.19276072003075,0.081137724098354,0.648979327215869,0.034513898580478,0.626798020997072,0.429835857654256,-1.88847732543945,-0.227765366435051,2.36348223686218,0.297593683004379,0.006863658782095,1.48738944530487,0.803833544254303 +0.19399917071796,0.964531336831757,0.289465615431401,-0.087088373411935,-0.18834333380389,0.685710131580867,0.697670423104957,-0.862358689308167,0.604137241840363,2.14134573936462,-1.09194445610046,0.544340431690216,1.21212339401245,0.631623029708862 +0.723788575487757,0.338714710085193,0.366271843021665,-0.175483827830372,-0.186470159922931,0.925397969346943,0.279415289411427,0.802120745182037,1.25753223896027,0.787411034107208,-0.538206875324249,-0.098978132009506,1.25263249874115,0.644879877567291 +0.386969290019709,1.06212124473252,0.341342928236557,0.734998365014975,0.111259215602606,0.658415506841479,0.117846555823599,0.897514402866364,0.459190756082535,-2.24435639381409,0.303252726793289,-1.99260652065277,-1.3047833442688,-2.13302636146545 +0.341327290047859,1.03682207047903,-0.343559120901693,0.085886298645526,-0.185460705806964,0.110732275311721,0.972607954682072,-1.5161806344986,-0.936827659606934,2.2996654510498,1.16891312599182,-0.43856680393219,0.515383660793304,2.33067297935486 +-0.218002091930658,0.915521533648101,0.194271024432429,-0.40149322841597,-0.251432577103765,0.353311087740603,0.806694565487231,2.11276030540466,-1.38333332538605,-0.293501138687134,-1.45050740242004,-2.36680746078491,-1.0226321220398,-2.05796504020691 +-0.684700222045974,0.834870556622838,-0.308597390327423,-0.409248865285432,0.013652520870523,-0.087536367388445,0.908111424507423,-2.01192402839661,0.971780359745026,-0.065050348639488,-0.376711964607239,-0.105748988687992,-1.26167547702789,-0.215934306383133 +0.289704750476023,1.09091516143354,-0.289396937891571,0.628670608557409,0.048710566786553,0.019177025221453,-0.775907718947972,0.095926031470299,-0.248476654291153,1.86832165718079,-1.03642082214355,0.166418701410294,-1.17997479438782,2.45396709442139 +-0.011135227010258,1.24659651681178,0.006005885637619,0.902571677165782,0.133859032776,0.408021732969878,-0.031054667067586,-2.00510501861572,0.203737244009972,0.643991649150848,0.46343344449997,0.927428305149078,1.15708589553833,0.492472231388092 +-0.264153312969764,0.907470239842529,0.584918465791843,0.227709322236425,0.439372850124007,0.262980743660596,0.828215606955251,0.065881595015526,0.67616868019104,-1.44337332248688,-0.979189395904541,-0.444043755531311,-1.15996432304382,-2.15457630157471 +-0.600168721083803,0.716981898235472,0.234166601743867,0.424446562765201,0.732661887061321,-0.169900888598559,0.504167990513053,-0.708645582199097,1.3731644153595,0.75609278678894,1.40222728252411,-0.385849386453629,1.49883663654327,1.95318472385407 +0.36388612516406,0.946990070116241,0.262468205760144,-0.017948783407879,0.657212863167675,0.347140067897349,0.668762190110467,-1.23726654052734,-0.846203744411469,0.63184928894043,-1.17945241928101,1.45900893211365,1.14597284793854,2.34366154670715 +-0.642025802590455,0.658869291905894,-0.510328200517284,0.406873466657085,0.493474311262002,0.552412212836622,-0.534581923903049,0.860302746295929,-0.803196609020233,-0.218701630830765,0.723901569843292,1.53480517864227,-0.79029643535614,1.55526554584503 +-0.476222728141901,0.660543263041618,0.615106967208472,-0.04322478891931,0.754500923636895,0.130060198018312,0.641828885876439,1.97466599941254,-1.28361022472382,2.0766761302948,0.720331728458405,2.21355772018433,0.959938406944275,2.3403799533844 +0.42948271142836,0.234393194428786,-0.682061422270137,0.642759229389321,0.15048841488784,-0.747201566065165,-0.076835081109839,-0.908741652965546,-1.22482562065125,0.430074125528336,1.10046803951263,-1.38909411430359,0.667581021785736,0.345533102750778 +0.035958811982747,0.970222854250159,-0.162392229657976,0.663630299213701,-0.138116969572001,-0.611087859630394,-0.408766628400759,-2.42525148391724,0.903214931488037,-0.035555075854063,1.55613350868225,-1.37560665607452,1.48206162452698,2.16241264343262 +0.13966574528122,0.49743104396609,0.788707148924698,-0.020664820163767,0.704443122757874,0.194689272192212,0.682223525906283,-0.47078675031662,1.23214721679688,1.4730931520462,-1.24052083492279,0.546141028404236,-0.46508976817131,1.1477313041687 +0.620467749577061,0.835660768010111,-0.338626321028026,-0.207228774747179,-0.193220407516312,0.775310453462218,0.564460636172464,-0.643993556499481,-0.613588333129883,2.00864291191101,-0.744429051876068,-1.96885454654694,-0.837775766849518,-1.97907400131226 +0.182636353228026,0.712888357898798,-0.71581830790195,0.244074535562574,0.872035760088231,-0.289743947042581,-0.309886591141729,-0.629477560520172,-1.00143384933472,-1.309565782547,-0.49194923043251,1.92519807815552,-1.41939949989319,-2.46686673164368 +-0.148216279051337,1.18644620529776,0.28912175736045,-0.377560350598175,0.441928734882788,-0.270633381731087,0.767401295043133,1.20376873016357,-0.246277362108231,2.10855078697205,0.597861289978027,-0.256641715764999,-0.582962989807129,0.904814541339874 +-0.591914859436688,1.03679948769014,0.01996030535417,0.160918271379811,-0.092404506913417,-0.667214546309369,0.721381637021212,1.51444220542908,-0.445340931415558,-2.44122791290283,-0.390372753143311,1.84915995597839,-0.899656116962433,2.07026243209839 +-0.179507235267371,0.910505846048781,-0.680798968688491,0.847372696583647,0.216914467471526,0.135601685751838,-0.465316891706461,0.002595300553367,-0.931283533573151,0.950899302959442,0.320824295282364,1.76274061203003,-1.08374714851379,2.10952281951904 +-0.099581682239317,1.25671637890683,0.004343522053436,-0.306317903624794,0.273712289094264,-0.342186187745497,0.845079604317779,1.45656180381775,0.042004987597466,-0.84948855638504,0.314662575721741,2.13893485069275,-1.07146692276001,0.958908021450043 +0.288789453562025,1.1500885929307,-0.420045528193466,-0.27712866517016,-0.448746497089631,0.280634094178459,0.801916946744129,2.39289474487305,0.471732079982758,0.389285802841187,-0.041007269173861,0.364815235137939,0.691260278224945,0.522503137588501 +-0.111291498283606,1.20513156315291,-0.156493040059587,0.77990639018144,-0.067312437668952,0.477101382936538,-0.399486331041828,0.395847111940384,-0.441004276275635,0.552285730838776,-0.20714958012104,-1.21672105789185,1.27649974822998,-0.710156500339508 +0.748896516178909,0.487473392973478,-0.084063722748344,0.844241298817629,-0.496116898993354,0.164382131257394,-0.118756754869856,-0.698665976524353,-1.42892980575562,-1.5327752828598,1.32558858394623,-1.69981026649475,-0.473166793584824,-1.41057848930359 +0.361292774981517,1.0839765222981,0.267824736530015,0.898120607138184,-0.059154121766314,0.159026091918045,-0.405697999749899,0.681072056293488,0.155821174383163,0.450344324111939,-1.00181376934052,0.291104048490524,-1.0343359708786,-2.11548852920532 +0.334347254405228,1.01318429350774,-0.047997643881906,0.389619695084182,-0.104702961846437,-0.306144242828816,0.862269960955305,-0.805354475975037,-0.897411227226257,0.874174118041992,-1.23818969726563,1.08773195743561,-1.474280834198,1.63746774196625 +0.641002131925426,0.358845231215015,0.59618516381757,0.094208467541888,0.133442659617282,0.774485120282756,0.611138789227358,0.345772355794907,1.36220765113831,1.20781576633453,-0.867246150970459,-0.441142141819,0.14402163028717,0.836861610412598 +-0.409087386360666,0.936882707823581,0.378851171758633,0.4339812463198,-0.030336054103767,0.07254277778541,0.89748400935911,-0.221609935164452,0.880178689956665,1.47170460224152,1.02065896987915,0.954361140727997,1.3151490688324,0.512748003005981 +-0.320004769238541,0.975840441156021,-0.14539082584455,0.580644238157311,-0.108162404776262,-0.612663628801537,0.525163251600361,1.07162272930145,0.295391082763672,0.251605480909348,1.39015352725983,-1.33656907081604,-0.774456262588501,2.04469299316406 +0.604387491805053,0.99642292925467,-0.02027035442361,0.773464643872538,-0.484574850089228,-0.258725641363043,0.316228875723599,1.34539759159088,1.04740989208221,-0.126202672719955,0.643973469734192,-1.74820530414581,-0.941993534564972,1.06344962120056 +0.753197635865217,0.649333852311558,0.294092297139401,0.337372513443376,-0.345396117147017,0.423343063749094,0.766591129487026,-1.87265729904175,-0.849061012268066,-0.434947669506073,0.901000499725342,-0.670849204063416,1.03916776180267,-1.52743232250214 +0.46425114407617,0.7496384283268,-0.413607279014546,-0.276122286941419,0.797312413718034,0.136920593498341,-0.518943300044836,-1.25144112110138,-0.259436726570129,0.351937562227249,1.24962031841278,0.879777908325195,-0.707617163658142,1.43954288959503 +-0.405221030006359,1.01252663388676,0.517578126173335,0.624289242001611,0.061888506218627,-0.116484959752057,0.769976629041845,-0.698528289794922,0.833487272262573,-2.32901501655579,0.198639735579491,-0.387742578983307,0.914884865283966,-1.02422988414764 +0.064890012174057,0.971389192232388,0.370336169456273,0.56655890906982,0.462699195904591,0.410131263805732,0.544713505534767,-1.29073870182037,-0.605242908000946,0.638175666332245,-1.41972804069519,-1.83921492099762,-0.524857938289642,-2.2324116230011 +-0.666462901799575,0.96387155233006,0.089844650948206,0.810718421978087,0.503297958527946,0.067800951918514,0.291255619904414,1.15522050857544,-0.902801215648651,-1.14371263980865,-0.747685790061951,-1.68646144866943,0.249685302376747,2.0882420539856 +0.150364949342243,0.594499650224636,0.625920213512265,0.713111277872918,0.275824196593662,0.026932773539285,0.643947159053751,-2.29116225242615,-0.843475461006165,-0.818664193153381,1.45974588394165,-2.16186237335205,-1.55656599998474,1.62833106517792 +0.350610519865953,1.19871171448214,0.325881066975633,0.920109703468102,-0.004800719611637,0.271966454226276,0.281796618948202,-2.26239800453186,-0.503514111042023,-2.42722034454346,-0.057914540171623,0.652493298053741,-0.328758627176285,-1.57261848449707 +-0.393833661119184,0.79715645470876,-0.692072717848862,0.90686943844939,-0.093374635761429,-0.367602185440689,-0.183678066905491,-2.34343361854553,1.15574133396149,1.51945960521698,0.303888410329819,2.34424734115601,0.827578485012054,-1.36024475097656 +-0.536163836798907,0.95082975770887,-0.498691626058434,0.84370770391306,0.311804835279106,-0.220153959000251,-0.377448393016632,-2.41668009757996,0.693041265010834,0.48855996131897,-0.505365908145905,0.318310588598251,-0.518589437007904,0.857737183570862 +-0.075070285166067,0.881015245203664,0.527023356719585,0.654717786031845,0.127653266408096,0.656279372833639,0.352628202249889,-2.36193251609802,-0.055402912199497,1.99379503726959,-1.33222210407257,1.51664006710053,0.658399403095245,-0.669269323348999 +0.148391695834763,1.2732560226575,0.108166407160642,0.577388046981363,-0.111906532252114,0.090489609025018,0.803686258375029,0.002778482856229,0.411468416452408,-0.494688808917999,0.594145476818085,0.631799578666687,0.046489782631397,1.70506203174591 +-0.752287020133316,0.800195446038831,0.332522791160324,-0.136871103623447,-0.250934862741574,-0.468995899842689,0.835667901492282,-1.0856260061264,1.06295418739319,-2.12158751487732,0.126125141978264,0.3807512819767,0.836376190185547,0.451923340559006 +0.848139546001332,0.197491989193884,-0.015310398252788,-0.193726888162622,-0.554086164793174,0.623408938267002,0.5165459422708,1.49908792972565,1.47856783866882,-0.288708865642548,-0.474588662385941,-1.56651222705841,-1.19009387493134,2.52681946754456 +0.503226625797267,0.841931252106391,-0.054799784626148,-0.225264563748471,0.029735261192193,0.68326102555032,0.693920789085446,-0.115268036723137,-0.521479070186615,1.08305966854095,-1.52854955196381,1.8706179857254,0.810707211494446,0.674304127693176 +-0.431703061311451,0.701639891689123,-0.483473971598871,0.55042811094886,0.214960839099995,-0.667499446726759,0.45306204977924,-0.111347265541554,-0.417775243520737,1.05572152137756,1.47535693645477,-0.753979027271271,0.138935908675194,0.765320062637329 +0.718214830828173,0.402007000682715,0.060018150526458,0.587866963880869,-0.511597676041811,-0.609930024586329,-0.143755402521882,0.8628950715065,0.895659923553467,-2.35839295387268,1.41510498523712,-1.88455617427826,0.45818954706192,1.51454544067383 +-0.676170778734924,0.37538445045393,-0.652076206819349,-0.025341811897448,0.600325401276232,0.643156772650608,-0.474675226808752,-2.2694091796875,1.44283628463745,-1.46202456951141,-0.128668129444122,-1.66491031646729,-0.693396031856537,2.2326078414917 +0.461615380719027,0.982459354810667,-0.41776797769925,0.938676932334039,0.229683264582428,-0.085598253116928,-0.242495677771889,2.02555632591248,1.01858770847321,2.22515869140625,-0.351834177970886,0.54879754781723,0.994260191917419,0.604972064495087 +-0.627837015531445,1.02264113291143,-0.068486314938888,0.076095611483002,0.00034356069174,-0.341496012044326,0.936797637506041,-1.19204568862915,0.573889791965485,-1.19834077358246,-0.744904935359955,0.09378944337368,-0.452984541654587,-0.905039072036743 +-0.906560656345078,0.450451003812456,0.11262002380878,0.10832586573097,0.241986443831847,0.771223744498933,-0.578724462707683,2.00777459144592,-1.28900837898254,1.92464125156403,-0.578844010829926,-2.06056761741638,-0.362781286239624,2.10588097572327 +0.383038238638205,0.957235860879417,-0.130601065644323,-0.156579177251606,0.199491701534845,0.819413807837509,0.514049641372481,0.488070398569107,-0.299338608980179,-2.20404648780823,1.01788091659546,0.331022024154663,-1.14423930644989,-1.04700672626495 +0.59182375203404,0.611567905339327,-0.149481093052288,0.165924786780849,-0.496049284666164,0.720145578206375,0.455844730696454,0.839916229248047,0.614544808864594,1.49868381023407,-1.55400085449219,1.83896172046661,-0.719889938831329,-1.6755096912384 +-0.657646105293129,0.374274560260741,0.572999661522511,0.010051357351637,0.091277807352834,-0.304351175406072,0.948123248385562,-0.770921051502228,1.47840929031372,-0.149398699402809,-0.470839709043503,0.429544895887375,-1.3538635969162,-2.29718852043152 +-0.748511667298653,0.880030247916194,-0.027300932517323,-0.121964652695929,0.12136604307742,-0.492745760301271,0.852992686243917,1.09181833267212,-1.09600150585175,-1.04664921760559,-0.687567114830017,1.82958054542542,-0.191698864102364,1.18249273300171 +0.687747679867611,0.282815111480477,0.010609617664817,0.521141692716388,-0.688266498064893,-0.504310420938747,-0.019275971746461,2.09928154945374,1.21911549568176,1.99226307868958,1.56296825408936,-0.799715280532837,1.31677293777466,2.15742754936218 +0.686816636638714,0.840939721961332,0.187739358180226,0.576876071617844,-0.031394340861979,0.681978199304276,0.448479797795066,1.92726469039917,0.952082872390747,-1.84983670711517,-0.790094316005707,0.805806457996368,0.580945193767548,0.982917129993439 +-0.569476043921849,0.367643315814899,0.663326046958155,0.259592952243274,0.557283103049882,0.782093323955749,0.101867928341376,-0.643958508968353,1.35042548179626,2.00872015953064,0.247655957937241,-1.70695173740387,1.22527956962585,0.327810764312744 +0.213923476525526,0.551563383316013,0.729889813852367,-0.430310203011133,0.866271033627604,-0.081305817640648,0.240410044507432,0.641947031021118,0.918042898178101,-1.13383269309998,-1.01264464855194,-1.34713327884674,-1.27726447582245,-0.188324749469757 +-0.776428573775432,0.539980723608266,-0.178622984073823,0.073926877446345,-0.15759890992335,0.818261944749677,-0.547854716285697,0.741220414638519,-1.06590104103088,1.32689905166626,0.987933933734894,-0.642112553119659,-0.528240203857422,1.10301661491394 +0.318007375783224,1.0596931167993,-0.504521186874696,0.144030907121101,-0.660768588296194,0.102381822376082,0.729491557841406,-0.716541767120361,-0.53775280714035,-2.32374858856201,-0.127791970968246,-0.406732499599457,0.850658118724823,-0.205385774374008 +-0.666035523732795,0.918171311526587,-0.205844894692418,0.34957608084149,-0.226532234091884,-0.623483720539397,0.661625090850698,-1.99308490753174,0.59045273065567,0.740805208683014,-0.570022821426392,-1.7342973947525,0.876800000667572,-0.690746486186981 +-0.668505759340301,0.815237766513034,-0.377650746379333,0.711051232104791,0.490491592541418,-0.384029629896919,-0.326106403386499,1.29375803470612,-0.676185190677643,-0.293555468320847,0.675547897815704,-1.44143772125244,-0.612814903259277,0.143297776579857 +-0.264977965969188,0.952359545468656,0.176864508766961,0.759662609384277,-0.18721067799516,-0.259744114200181,0.566036992683718,-0.709902465343475,1.39356100559235,0.061480898410082,1.48805022239685,0.744490921497345,-0.540858685970306,1.23264384269714 +-0.586531229111167,0.91733236177502,-0.524142305198111,0.346001860434152,-0.085956682916821,-0.511788582569864,0.781643529998919,-2.26591181755066,0.835746467113495,-0.504677653312683,-0.218594551086426,-1.36645209789276,-0.21320316195488,0.091533102095127 +0.32907315418907,0.783669668905489,0.490316921188424,0.556166042879733,0.32246748016536,0.725455594467371,0.245780872810102,-0.16031776368618,0.565495848655701,-1.55429565906525,1.25373363494873,1.3286988735199,1.34862816333771,2.04824376106262 +0.740476739546802,0.508906020458506,0.603523951722767,-0.069374722062251,0.434892222708766,0.544194743491542,0.714078415667628,-2.19421792030334,-1.37954449653625,1.7333300113678,-0.164611726999283,-0.743789792060852,-0.248036950826645,-1.79799818992615 +-0.426931917258644,1.10738479015684,0.230589866919196,-0.001332982558704,0.143400541238524,-0.429299634128785,0.891704172955856,-0.088951952755451,0.324605315923691,-1.58029294013977,-0.8197340965271,-1.72347366809845,-0.127518728375435,0.346147239208221 +-0.498904878818019,0.857038021196022,-0.631700433986429,0.729950718441936,0.16171190095633,-0.65583481429468,0.104412193225955,-2.353670835495,0.948889374732971,1.33693337440491,0.093638755381107,1.21934521198273,-0.61371111869812,0.310350298881531 +-0.796092851320248,0.883723674324673,0.035391902294859,0.513025867118131,0.28341652721721,-0.37716799647462,0.717094020476497,-1.28012347221375,0.93154788017273,1.78906118869782,0.424548387527466,0.074686452746391,0.201474115252495,1.43182384967804 +-0.193070468710785,0.974077319785481,-0.485406647489432,0.883682069865396,0.425165159078826,-0.188977566405332,0.051264669114358,0.018785402178764,-0.155118405818939,0.187805131077766,1.13202512264252,-1.05889332294464,0.955407440662384,0.409219563007355 +-0.600299330106969,0.996042577258841,-0.127989534859512,0.474411185584567,-0.036959054392156,0.13949017258485,0.868395386355162,-1.77483308315277,0.924811065196991,-0.920451819896698,-0.01666047796607,-2.23721265792847,1.22453057765961,0.794268071651459 +0.739500631189621,0.779818126306504,-0.075003872010091,0.556413188438782,-0.011437897438489,-0.100333540806674,0.824746457296284,1.63159453868866,1.37227201461792,-0.075814314186573,0.291924685239792,-0.068284846842289,-1.2279999256134,0.360205084085464 +0.372532017517189,1.13879928495473,0.006715317162449,0.679546247344315,-0.62199422963931,-0.359705788505673,-0.148161471817651,1.56638741493225,0.254667222499847,-1.15005266666412,-0.142917513847351,-1.63915526866913,-1.34926390647888,0.60636693239212 +0.626081759733775,0.96401080090732,-0.176462130745053,0.796989484850512,-0.065738772363574,0.293224542059905,0.523932765514436,2.30857682228088,1.05379199981689,0.995922386646271,0.651656329631805,-0.577432215213776,-0.520536839962006,-1.10011792182922 +-0.385436802315983,1.21277123344765,0.110988039053359,-0.199959255671856,0.141912304440522,-0.352900763432689,0.902960821458032,-1.62989711761475,0.198353096842766,0.44916182756424,-0.40257066488266,0.835498154163361,0.290389686822891,-2.45420527458191 +-0.217223039096263,1.23721617757484,0.257666933824183,0.741760216709742,0.042804065194866,-0.095753348385191,0.662412929510388,-0.871574580669403,0.601327836513519,-0.521132051944733,0.446233302354813,-1.37333929538727,0.421416312456131,-2.09539723396301 +-0.655925190053986,0.859946939184442,0.425207580490031,-0.187895232634169,0.159355997067299,-0.11533485782234,0.962288375864085,-1.06878113746643,1.20323061943054,-0.298799067735672,0.190806835889816,0.002903472865,-0.709571361541748,-1.53831994533539 +0.006092298477722,1.14256393910604,0.075266013745795,0.905638943696485,-0.091837835392896,0.349866474763222,-0.221308304153773,-2.37750768661499,-0.569985628128052,-0.058434393256903,-1.2255380153656,-0.492108941078186,-1.30936467647553,2.24883222579956 +0.202197059056828,0.227751531650246,-0.713764065051687,0.212434585516412,0.810862875710789,-0.159192128705576,-0.521565729154712,2.38333082199097,1.11489868164063,0.617834210395813,-1.41361808776855,2.12524199485779,0.347592860460281,-2.35176730155945 +0.683866137935586,0.819658905992823,0.200336436569389,-0.473985421019589,0.585991132425147,0.04075872159205,0.655965654584262,1.63988626003265,1.39168226718903,0.406433701515198,0.964644134044647,1.90109443664551,-0.723686873912811,0.792403399944305 +0.718408265882564,0.612640985928754,-0.456863919548639,0.701385075469172,-0.144684576440578,-0.694522281660211,-0.069022818884844,1.69799482822418,1.20437502861023,1.49709165096283,-0.595990419387817,-0.161659434437752,0.581349670886993,2.17840170860291 +0.381099885611891,1.18663598744888,-0.086335837283864,0.722671467991921,-0.259233474941003,-0.139636988965194,-0.625336282437817,-0.274553000926971,-0.198571667075157,-1.41759288311005,0.711088836193085,-2.102370262146,-0.254179239273071,2.50888609886169 +0.343582010959522,0.981486872632579,0.38308606213035,0.006708926446723,0.080912315163729,0.102712373449892,0.991392130239628,0.789387047290802,1.37655222415924,0.091171883046627,1.22058844566345,0.797704637050629,0.153486207127571,1.5029639005661 +-0.091867647190339,0.159565159978342,-0.613826472653765,-0.029951715707651,0.40991897681676,0.880780182554135,0.235150158816817,-0.114079274237156,-1.15668618679047,0.643234074115753,1.27980351448059,2.13264012336731,1.35938155651093,-1.48569226264954 +-0.070291483715151,1.25863320300463,-0.094402068330469,0.822586018224927,0.163540765864257,-0.462142400316521,0.288151110272158,-0.860713601112366,-0.020580988377333,-2.37619471549988,0.044807214289904,-2.37511444091797,-1.05614018440247,0.00911473389715 +0.469445475590626,0.967530439693512,0.133578299227314,0.646395368321752,-0.379733450538871,0.462758501145971,0.473106863162186,2.33162903785706,-0.126651719212532,-0.889535427093506,-1.27033340930939,-0.858386218547821,0.123358078300953,0.668511152267456 +0.220182215893538,1.03960741414431,-0.367461603028727,0.663514220733396,-0.398201627408038,0.308586632644125,0.553135275468402,-1.41233479976654,-0.179858759045601,-1.66807293891907,-1.04493188858032,-2.20793724060059,1.27347612380981,0.940442025661469 +-0.799535165688586,0.81975657333375,-0.071287503459986,0.572154189412222,0.603106372372623,-0.367419232503882,0.417019657483716,-1.44114947319031,1.01459586620331,-1.89757978916168,-0.456281125545502,2.32944345474243,0.784060895442963,2.17194318771362 +-0.697848542782954,0.397897717290489,0.454206033407438,0.690618802266347,0.548622433167677,-0.446475293151702,0.150727928355948,-1.07027721405029,1.2751384973526,0.808172047138214,-0.711658596992493,0.764662802219391,-1.47497451305389,1.11145913600922 +0.023527560179199,1.23745879414635,-0.148400865898892,0.855720511958687,-0.333106068596611,-0.344754084964009,0.194749514448437,-0.572307586669922,0.206033632159233,0.319890916347504,0.470549613237381,-0.403964549303055,-0.762171566486359,1.19003689289093 +0.192124252801009,1.25233196208838,0.08358372109579,-0.467016543575534,0.342186378924006,0.359412887498785,0.731865019252474,0.624193727970123,0.11352726072073,2.0148458480835,-0.048224214464426,1.89949476718903,-0.941429316997528,-0.28108823299408 +-0.403387845272815,1.12814304088634,-0.183090762974073,0.836914434003485,-0.284622712780322,-0.381694173421965,0.269951290980278,1.44289124011993,-0.627323091030121,0.138860031962395,-0.097943857312203,-2.18525624275208,-1.16134119033813,1.5269935131073 +0.11911474121473,1.23796973439978,-0.061556752821084,0.669142470440705,-0.207882968799013,0.144132340660021,0.698755246070956,-0.262430757284164,0.331818133592606,-0.109087407588959,0.842773735523224,-2.06526517868042,-0.448707222938538,-2.14086031913757 +-0.053939737324878,0.850778397615735,-0.604482524852875,0.744445238146539,0.110235119166531,-0.653652373503802,-0.079925468507413,-2.30587720870972,0.211778312921524,-0.925400793552399,-1.28495442867279,-1.68455147743225,-0.116159215569496,-1.5447895526886 +-0.273548598374909,1.18872343537976,-0.054548494172537,-0.334253187160937,-0.399616330242983,-0.515236365616198,0.680524123762339,-1.94101166725159,0.065824709832668,0.305742770433426,-0.193280965089798,-0.145369157195091,1.16428112983704,-0.437267243862152 +0.461324745911674,0.809296465789907,-0.707002795814508,0.352207386404887,-0.561785770604271,-0.080475201588148,0.744224728719592,-0.709498882293701,-0.936362028121948,0.492805868387222,0.289299845695496,1.154456615448,-0.246571019291878,1.07685887813568 +-0.321941512166996,1.19391186037425,-0.097274052961326,-0.34550705626918,-0.348022258206239,0.152747153729227,0.858005646187049,0.984721541404724,-0.328294575214386,-1.66347658634186,-0.491117686033249,0.195964187383652,-1.13326239585876,-1.72793412208557 +-0.527352112430607,1.05666950494206,-0.002039115192397,0.730525863257351,0.605101835506915,-0.0800967560201,0.306216004567562,2.16129970550537,-0.27230441570282,1.98679971694946,-0.746099352836609,-1.75452637672424,-0.681978821754456,-1.81680619716644 +0.628053257884524,0.995956600787964,-0.117129117213013,0.624450525403548,-0.657848392763224,0.293551276633517,0.301868649330978,-1.06492877006531,-0.689320921897888,-1.89582216739655,0.356725573539734,1.56281304359436,-0.959017932415009,2.3663318157196 +0.857540857823805,0.473360858893732,0.411945514860971,0.649313984710093,-0.452514521515561,0.588199085223802,0.166264227118541,1.27484011650085,1.43356049060822,-1.77172899246216,-0.230303525924683,1.27137053012848,0.313244342803955,0.053516868501902 +-0.379478787617769,1.10629264030008,-0.01469814840505,-0.12357617729377,0.26081414485868,-0.667230225051596,0.686664937960786,-1.17543721199036,0.285774886608124,-1.70625436306,-0.480580538511276,1.94856560230255,1.47674024105072,-2.27068758010864 +0.964610580999139,0.381875717204817,0.096499355790074,0.676442646038083,-0.698588645784445,-0.141899020602452,-0.185105155395834,1.3994003534317,1.51703858375549,1.2214081287384,-0.142985045909882,0.965710639953613,-0.036180999130011,2.06186008453369 +-0.125927882515087,0.953182522538202,-0.249272289735829,0.934571799921988,-0.353418767239977,-0.039870487593845,0.009003886504144,1.92499876022339,-0.824121713638306,0.850838899612427,-1.56785261631012,-1.60747575759888,1.0047550201416,-0.255334943532944 +0.228670148656106,0.405294540026063,0.78566292381853,0.093770257845315,-0.512313263803796,0.853644076864461,-0.00583510998589,0.171032756567001,1.26858866214752,1.04518735408783,-0.242275148630142,-0.976981401443481,1.55835485458374,-0.696242332458496 +-0.864170416915797,0.572477771505419,0.308833952909195,0.799547716800972,0.51661452208157,0.172703042513711,-0.252995144695092,2.16114449501038,-1.29079627990723,-1.13794934749603,0.422983109951019,-1.58790934085846,-0.379500567913055,0.406428605318069 +-0.086833531101529,1.26142004232052,-0.157127987435393,0.856520570456875,0.154499976151293,0.186937733066628,-0.455583750488743,-1.48611009120941,-0.117594733834267,-1.09735810756683,-0.548085987567902,2.0996253490448,0.695748031139374,-0.65185809135437 +-0.224571768776128,0.965543710437858,-0.454342831791305,0.832578518046094,0.528573665838438,0.056647208639745,-0.155608434294582,-0.246502116322517,-0.198775932192802,0.5822873711586,1.07710993289948,-1.33364486694336,1.10879325866699,-0.135208860039711 +-0.063281385891176,0.918850765575839,0.546927242595696,-0.061514080586567,0.176839105289233,-0.194568675996848,0.962853560543308,0.619490683078766,1.19062793254852,0.863060355186462,1.30468881130219,1.01496946811676,0.308291167020798,1.30862879753113 +0.203240878063783,1.01955086362581,0.295364055102529,0.777472161600101,-0.176335884011322,-0.181418945838496,-0.575786297195114,-1.10046923160553,-0.370542675256729,1.00188100337982,-1.41339313983917,-0.185629144310951,-1.35502791404724,-1.11336266994476 +-0.048208774052019,1.12561379046006,-0.389976122502398,0.499681727723347,-0.412518392846953,-0.50339371420367,0.571613081588072,2.20388555526733,0.031262047588825,-2.02111482620239,0.618494987487793,-0.017173374071717,-0.784897983074188,1.52073585987091 +-0.347864080265155,1.25208413074213,-0.136952451124811,0.983451200718103,0.148200433221111,-0.103766156200301,-0.009641173507943,1.4613424539566,-0.432672441005707,-1.96504497528076,0.195305839180946,1.83104836940765,0.035984437912703,-1.30533921718597 +-0.660730726766445,0.359901997440489,0.201063453924481,0.259760923149527,-0.484420066739764,0.737789262411178,-0.391827086882872,-2.12178111076355,1.49276566505432,1.69086110591888,-1.41702246665955,-1.16274857521057,0.773430526256561,-1.986976146698 +0.697988459405288,0.553914765036702,-0.157679257951369,0.6013443076691,-0.338612372997193,-0.575101856995332,-0.439300055278508,-0.642451941967011,-1.36695218086243,-1.79795575141907,1.51899361610413,-0.207648172974586,1.03474676609039,1.67819547653198 +0.061062905915831,1.22832782927929,0.42997277686964,0.951130785972439,-0.053390665176946,0.253128043172948,-0.168599699308295,0.186012372374535,0.418193429708481,-0.268376588821411,-0.074450545012951,-0.842691719532013,0.063423246145249,0.592402994632721 +0.199547234362016,0.627424125323426,-0.661959353702476,0.653356265253686,-0.347009265705389,0.495733837600203,0.454926502224851,0.25161224603653,-0.996740877628326,-0.933246552944183,0.831357479095459,-1.10136520862579,-1.54066407680511,-1.56143522262573 +0.616056533225128,1.02746937245706,0.330934345184125,0.123313443495479,0.208660515420551,0.263116612500772,0.933822377213052,-2.01276779174805,-0.832539916038513,-0.559663772583008,0.008370717056096,1.43823289871216,0.287754058837891,-2.17213129997253 +0.121659843335466,1.23209574134165,-0.050425111213988,0.501000295666875,-0.160007630259349,-0.437135767329695,0.729594807356502,-1.43290305137634,-0.337000340223312,-2.40010118484497,0.292319387197495,1.34939694404602,1.19666361808777,-1.83561825752258 +0.708255364800549,0.336366314342272,0.238781164794421,0.310977292577168,-0.312124342354714,0.630157539142739,0.639353575318524,-1.54258680343628,-1.02765846252441,-0.696378946304321,1.47776365280151,-0.360515087842941,1.06904888153076,-1.68400931358337 +-0.244860966808423,1.24587651456609,-0.253632049463707,-0.229477203316089,-0.188580431698158,0.025075484593168,0.954541174602322,-2.00725269317627,0.529901027679443,0.583177804946899,0.360274881124496,-2.2754762172699,0.132288172841072,1.09904360771179 +-0.762841927976915,0.497662030768475,-0.570849126142578,0.531144323244537,0.728062377035855,-0.030677513330595,-0.432284366136291,1.01456761360168,-1.30739498138428,-0.706560015678406,0.219337075948715,0.512083053588867,-0.158802464604378,-2.04325819015503 +0.0301702688036,1.19949008539334,-0.233851675928276,0.593193673572835,0.384446482659661,-0.425618668576406,-0.564952136523995,-2.41525387763977,0.293216437101364,0.420879125595093,0.513094186782837,-1.04464721679688,1.22705805301666,1.35793542861938 +0.014951676186971,1.29019906579111,0.141398850380279,0.80855058442395,0.097291696252603,0.347941313965433,0.464453571742094,-2.43373394012451,-0.010759285651147,2.01016044616699,-0.097374081611633,-2.39843463897705,-0.655430376529694,-2.43921828269958 +0.617718792361255,0.983315459224614,0.176832258666868,-0.19079672181237,-0.233412163165239,0.087497971488244,0.949452198911361,1.10525691509247,0.942209243774414,-1.3262711763382,0.127341210842133,-2.29564762115479,1.07525682449341,-0.545569539070129 +-0.645530841211927,0.177988792066627,0.313251363674584,-0.009436365913785,-0.265571559978458,0.963493104156757,-0.032615023705735,-0.663061022758484,1.36240708827972,-1.08700120449066,-1.05327117443085,-2.00707769393921,-1.20538365840912,1.00517332553864 +0.073167493675162,0.821764571657435,0.575812370209355,0.624044652248538,-0.364346761983641,0.629175865144625,0.286282098203066,0.415200173854828,0.279424786567688,2.42608666419983,1.18701887130737,1.63146638870239,-1.22081935405731,1.67620182037354 +-0.069675406449229,1.12176372759619,0.281820595989504,0.859842478732164,-0.211565504865627,0.339476771095458,-0.317279798924047,1.81507956981659,0.327413529157639,0.407986223697662,1.15595865249634,2.14545631408691,-0.753958225250244,0.976218998432159 +0.245750401391424,0.976518234534024,0.652926975831676,0.170059163201151,0.340779615432439,0.402509193707542,0.832427464526458,0.012699390761554,0.612366020679474,0.861287355422974,-0.534394204616547,0.632673799991608,0.182209685444832,0.995842814445496 +0.24382135303838,1.15689840628626,0.477119318242821,0.921313442553855,-0.12801379923266,0.267797908093546,0.251153913360523,0.160798966884613,0.617120265960693,-1.30352318286896,0.313988566398621,1.9622415304184,-0.001645527197979,-0.383113831281662 +-0.605478366455286,0.97425608478441,0.417559417099507,-0.301178361488927,0.137174910831896,-0.460315497452486,0.823762272269237,-1.19809448719025,0.858566701412201,-1.45394229888916,0.345582723617554,0.840273201465607,0.200753435492516,-0.70950984954834 +-0.573440758228347,1.08538952912975,-0.118516014866951,0.695405919134404,0.380764405632268,0.232213513925808,-0.563476671199943,1.14628458023071,-0.36615988612175,0.440569192171097,0.579094886779785,-1.27761077880859,-0.005499926395714,-1.71829497814178 +-0.577713342085434,0.820563123650167,-0.362037928037098,0.206392590597009,-0.441428628629169,-0.60577404803728,0.628952038789727,1.75864589214325,-0.80280613899231,1.44370007514954,-0.96798712015152,1.02111029624939,0.613745331764221,-1.32988131046295 +0.138302157587216,1.14199587472173,-0.19560397688806,0.645650829786833,-0.690460812966654,0.056196463325714,0.32131110977164,-0.544483006000519,0.10510428249836,0.358821421861649,0.558321118354797,2.08619236946106,1.32144129276276,-0.611496329307556 +-0.616904280376582,0.497226062300036,0.579628377379033,0.255260676395013,-0.452340998505152,0.832004200302661,-0.194932344254239,-0.820925891399384,1.20717215538025,2.13999199867249,0.159369692206383,1.42129504680634,-1.37950241565704,1.20559918880463 +-0.126611897428104,0.698363533339438,0.487604805523578,0.627270599841923,0.626584080300186,0.376274579922072,-0.268963613510809,0.579923391342163,0.208352103829384,-0.631639540195465,-1.5590124130249,-1.40874481201172,1.3664128780365,2.34109997749329 +0.256703642883507,0.985965662035167,-0.474644026468992,0.637881939940872,-0.571195986555754,-0.388846923709799,-0.340058591365216,0.101648449897766,-0.871674954891205,0.680574357509613,-0.646894037723541,-2.15447974205017,-1.13724732398987,0.828339815139771 +0.368480444857791,1.11973695342029,0.174180847519709,-0.456280572585493,-0.316526780628434,0.416153709542883,0.72002425393854,-2.27150201797485,-0.596828043460846,0.755089938640595,-0.084245294332504,-1.80126428604126,1.38243043422699,0.998225450515747 +-0.248037236632446,0.997984891041308,-0.338429418942203,0.809442660865486,0.465511732311919,-0.351657009965641,-0.066623968607282,0.638268530368805,0.188850238919258,-0.081894092261791,1.30080103874207,1.15366160869598,-0.300139874219894,-2.06458592414856 +-0.360457941235354,0.876111408216616,0.723413358747915,-0.071537326445128,0.464148801574552,-0.233049084203132,0.851549426207353,-0.670872271060944,0.933017671108246,1.3028017282486,-0.340695202350616,0.859216272830963,-0.085435435175896,1.69431090354919 +0.426987957060252,0.232254104108508,0.548588847813482,0.114083523189833,0.077970052204838,0.952686770747072,0.27072779232891,-1.93354439735413,-1.20678150653839,2.0512170791626,-1.43280041217804,1.75474166870117,1.00753927230835,-0.18412572145462 +-0.825283278566211,0.304408949990684,0.135939314126004,0.079447955004877,0.983327290567561,0.121384741120633,-0.109641263645029,1.75989902019501,-1.43303632736206,0.533579647541046,0.06353610008955,2.37738108634949,1.515833735466,1.54336178302765 +-0.216718175034519,1.09628521960642,0.222565328436649,0.158886579601356,0.076825051487916,0.234842847686814,0.955877504273852,-2.15745115280151,-0.089647330343723,1.04148995876312,-1.13509917259216,-0.326217204332352,-1.40369355678558,-2.2589328289032 +-0.697348508320503,0.81881624163625,0.023284395418522,0.028334239153153,-0.068682604231789,0.918521011223108,-0.388328498451028,-1.68516874313355,0.796210706233978,-1.64344143867493,0.187828674912453,-1.3171671628952,-1.51519656181335,1.5646630525589 +0.230287711575945,0.979194624080139,0.412807766274565,-0.311958240046239,0.46471273374212,0.240264326954164,0.793093427509295,2.1987407207489,-0.499979764223099,2.07976698875427,1.30679500102997,-2.06130075454712,-0.093529343605042,1.42197704315186 +0.413382233385853,1.03630987298274,0.227824664385203,-0.30893312206583,0.28084114394025,0.232783308306051,0.87835101715378,0.267852604389191,-0.01381757017225,-2.34710884094238,1.21004617214203,0.065994210541248,0.45452943444252,-0.429983049631119 +-0.552584435420759,0.682645409436014,-0.388815136172824,0.768673657369574,0.313442182830123,-0.400813015248212,-0.387612865235929,-1.31363391876221,0.906925737857819,-1.37290585041046,-1.36175060272217,-1.72353148460388,0.622546792030335,-1.75211668014526 +0.55831021738535,0.464102144707506,-0.134563215093199,-0.014504264003811,-0.426082390392616,0.904567112615893,-0.001327289255715,-0.73624861240387,-0.657847821712494,2.15796136856079,-1.38983809947968,0.568065047264099,1.39728653430939,0.534001350402832 +-0.497771465678185,0.904132684121846,-0.259153822615261,0.752999240927675,0.611744511721642,-0.187158445599859,-0.154053600352082,1.85346102714539,-0.080631390213966,-0.832447528839111,1.26871919631958,0.543653190135956,-0.072993040084839,-1.9505363702774 +-0.482067062616781,0.804541366592778,-0.641976682438821,0.089403105871745,-0.076299239405212,-0.38065794806714,0.91721591640136,0.765132486820221,-0.961885392665863,2.26129508018494,-0.508382320404053,-0.07028578221798,-0.929624795913696,-0.204895809292793 +0.44920693385886,1.0422272929826,0.003768906409485,-0.35811228110198,0.247625883454974,0.422751082788055,0.794807233213568,1.24168336391449,-0.087278947234154,0.31125670671463,-1.17347431182861,0.931364059448242,-0.110606156289577,1.57281744480133 diff --git a/agent/dart_envs/misc/kuka_iiwa_urdf/LICENSE b/agent/envs_dart/misc/kuka_iiwa_urdf/LICENSE similarity index 100% rename from agent/dart_envs/misc/kuka_iiwa_urdf/LICENSE rename to agent/envs_dart/misc/kuka_iiwa_urdf/LICENSE diff --git a/agent/dart_envs/misc/kuka_iiwa_urdf/README.md b/agent/envs_dart/misc/kuka_iiwa_urdf/README.md similarity index 100% rename from agent/dart_envs/misc/kuka_iiwa_urdf/README.md rename to agent/envs_dart/misc/kuka_iiwa_urdf/README.md diff --git a/agent/envs_dart/misc/kuka_iiwa_urdf/background.urdf b/agent/envs_dart/misc/kuka_iiwa_urdf/background.urdf new file mode 100644 index 0000000..4dd3e81 --- /dev/null +++ b/agent/envs_dart/misc/kuka_iiwa_urdf/background.urdf @@ -0,0 +1,156 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/agent/envs_dart/misc/kuka_iiwa_urdf/ground.urdf b/agent/envs_dart/misc/kuka_iiwa_urdf/ground.urdf new file mode 100644 index 0000000..e66e1c6 --- /dev/null +++ b/agent/envs_dart/misc/kuka_iiwa_urdf/ground.urdf @@ -0,0 +1,31 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/agent/envs_dart/misc/kuka_iiwa_urdf/iiwa14_2gripper.urdf b/agent/envs_dart/misc/kuka_iiwa_urdf/iiwa14_2gripper.urdf new file mode 100755 index 0000000..61ea381 --- /dev/null +++ b/agent/envs_dart/misc/kuka_iiwa_urdf/iiwa14_2gripper.urdf @@ -0,0 +1,252 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/agent/envs_dart/misc/kuka_iiwa_urdf/iiwa14_3gripper.urdf b/agent/envs_dart/misc/kuka_iiwa_urdf/iiwa14_3gripper.urdf new file mode 100755 index 0000000..d785ab0 --- /dev/null +++ b/agent/envs_dart/misc/kuka_iiwa_urdf/iiwa14_3gripper.urdf @@ -0,0 +1,252 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/agent/dart_envs/misc/kuka_iiwa_urdf/iiwa14.urdf b/agent/envs_dart/misc/kuka_iiwa_urdf/iiwa14_base.urdf old mode 100644 new mode 100755 similarity index 99% rename from agent/dart_envs/misc/kuka_iiwa_urdf/iiwa14.urdf rename to agent/envs_dart/misc/kuka_iiwa_urdf/iiwa14_base.urdf index 37755db..8a40e62 --- a/agent/dart_envs/misc/kuka_iiwa_urdf/iiwa14.urdf +++ b/agent/envs_dart/misc/kuka_iiwa_urdf/iiwa14_base.urdf @@ -249,3 +249,4 @@ + diff --git a/agent/envs_dart/misc/kuka_iiwa_urdf/iiwa14_calibration_pin.urdf b/agent/envs_dart/misc/kuka_iiwa_urdf/iiwa14_calibration_pin.urdf new file mode 100755 index 0000000..8a40e62 --- /dev/null +++ b/agent/envs_dart/misc/kuka_iiwa_urdf/iiwa14_calibration_pin.urdf @@ -0,0 +1,252 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/collision/link_0.stl b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/collision/link_0.stl similarity index 100% rename from agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/collision/link_0.stl rename to agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/collision/link_0.stl diff --git a/agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/collision/link_1.stl b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/collision/link_1.stl similarity index 100% rename from agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/collision/link_1.stl rename to agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/collision/link_1.stl diff --git a/agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/collision/link_2.stl b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/collision/link_2.stl similarity index 100% rename from agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/collision/link_2.stl rename to agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/collision/link_2.stl diff --git a/agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/collision/link_3.stl b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/collision/link_3.stl similarity index 100% rename from agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/collision/link_3.stl rename to agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/collision/link_3.stl diff --git a/agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/collision/link_4.stl b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/collision/link_4.stl similarity index 100% rename from agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/collision/link_4.stl rename to agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/collision/link_4.stl diff --git a/agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/collision/link_5.stl b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/collision/link_5.stl similarity index 100% rename from agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/collision/link_5.stl rename to agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/collision/link_5.stl diff --git a/agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/collision/link_6.stl b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/collision/link_6.stl similarity index 100% rename from agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/collision/link_6.stl rename to agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/collision/link_6.stl diff --git a/agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/collision/link_7_electric.stl b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/collision/link_7.stl similarity index 100% rename from agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/collision/link_7_electric.stl rename to agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/collision/link_7.stl diff --git a/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/collision/link_7_electric.stl b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/collision/link_7_electric.stl new file mode 100644 index 0000000..ea2d9d2 Binary files /dev/null and b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/collision/link_7_electric.stl differ diff --git a/agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/collision/link_7_electric_touch.stl b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/collision/link_7_electric_touch.stl similarity index 100% rename from agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/collision/link_7_electric_touch.stl rename to agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/collision/link_7_electric_touch.stl diff --git a/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/link_0.stl b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/link_0.stl new file mode 100644 index 0000000..e057ea1 Binary files /dev/null and b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/link_0.stl differ diff --git a/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/link_1.stl b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/link_1.stl new file mode 100644 index 0000000..4eaa051 Binary files /dev/null and b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/link_1.stl differ diff --git a/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/link_2.stl b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/link_2.stl new file mode 100644 index 0000000..2eb3709 Binary files /dev/null and b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/link_2.stl differ diff --git a/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/link_3.stl b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/link_3.stl new file mode 100644 index 0000000..2892530 Binary files /dev/null and b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/link_3.stl differ diff --git a/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/link_4.stl b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/link_4.stl new file mode 100644 index 0000000..dda08fe Binary files /dev/null and b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/link_4.stl differ diff --git a/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/link_5.stl b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/link_5.stl new file mode 100644 index 0000000..6095451 Binary files /dev/null and b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/link_5.stl differ diff --git a/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/link_6.stl b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/link_6.stl new file mode 100644 index 0000000..6224ddb Binary files /dev/null and b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/link_6.stl differ diff --git a/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/link_7.stl b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/link_7.stl new file mode 100644 index 0000000..3d1760b Binary files /dev/null and b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/link_7.stl differ diff --git a/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/link_7_electric.stl b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/link_7_electric.stl new file mode 100644 index 0000000..ea2d9d2 Binary files /dev/null and b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/link_7_electric.stl differ diff --git a/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/link_7_electric_touch.stl b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/link_7_electric_touch.stl new file mode 100644 index 0000000..f916477 Binary files /dev/null and b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/link_7_electric_touch.stl differ diff --git a/agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/silver/link_0.dae b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/silver/link_0.dae similarity index 100% rename from agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/silver/link_0.dae rename to agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/silver/link_0.dae diff --git a/agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/silver/link_1.dae b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/silver/link_1.dae similarity index 100% rename from agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/silver/link_1.dae rename to agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/silver/link_1.dae diff --git a/agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/silver/link_2.dae b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/silver/link_2.dae similarity index 100% rename from agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/silver/link_2.dae rename to agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/silver/link_2.dae diff --git a/agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/silver/link_3.dae b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/silver/link_3.dae similarity index 100% rename from agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/silver/link_3.dae rename to agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/silver/link_3.dae diff --git a/agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/silver/link_4.dae b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/silver/link_4.dae similarity index 100% rename from agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/silver/link_4.dae rename to agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/silver/link_4.dae diff --git a/agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/silver/link_5.dae b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/silver/link_5.dae similarity index 100% rename from agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/silver/link_5.dae rename to agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/silver/link_5.dae diff --git a/agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/silver/link_6.dae b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/silver/link_6.dae similarity index 100% rename from agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/silver/link_6.dae rename to agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/silver/link_6.dae diff --git a/agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/silver/link_7_electric.dae b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/silver/link_7_electric.dae similarity index 100% rename from agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/silver/link_7_electric.dae rename to agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/silver/link_7_electric.dae diff --git a/agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/silver/link_7_electric_touch.dae b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/silver/link_7_electric_touch.dae similarity index 100% rename from agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/silver/link_7_electric_touch.dae rename to agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/silver/link_7_electric_touch.dae diff --git a/agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/white/link_0.dae b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/white/link_0.dae similarity index 100% rename from agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/white/link_0.dae rename to agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/white/link_0.dae diff --git a/agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/white/link_1.dae b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/white/link_1.dae similarity index 100% rename from agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/white/link_1.dae rename to agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/white/link_1.dae diff --git a/agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/white/link_2.dae b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/white/link_2.dae similarity index 100% rename from agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/white/link_2.dae rename to agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/white/link_2.dae diff --git a/agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/white/link_3.dae b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/white/link_3.dae similarity index 100% rename from agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/white/link_3.dae rename to agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/white/link_3.dae diff --git a/agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/white/link_4.dae b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/white/link_4.dae similarity index 100% rename from agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/white/link_4.dae rename to agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/white/link_4.dae diff --git a/agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/white/link_5.dae b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/white/link_5.dae similarity index 100% rename from agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/white/link_5.dae rename to agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/white/link_5.dae diff --git a/agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/white/link_6.dae b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/white/link_6.dae similarity index 100% rename from agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/white/link_6.dae rename to agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/white/link_6.dae diff --git a/agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/white/link_7_electric.dae b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/white/link_7_electric.dae similarity index 100% rename from agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/white/link_7_electric.dae rename to agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/white/link_7_electric.dae diff --git a/agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/white/link_7_electric_touch.dae b/agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/white/link_7_electric_touch.dae similarity index 100% rename from agent/dart_envs/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/white/link_7_electric_touch.dae rename to agent/envs_dart/misc/kuka_iiwa_urdf/meshes/iiwa14/visual/white/link_7_electric_touch.dae diff --git a/agent/envs_dart/misc/real_two_joints_targets/target_points_sphere1.csv b/agent/envs_dart/misc/real_two_joints_targets/target_points_sphere1.csv new file mode 100644 index 0000000..a6d539b --- /dev/null +++ b/agent/envs_dart/misc/real_two_joints_targets/target_points_sphere1.csv @@ -0,0 +1,50 @@ +0.387046,0.588643,-0.861801,0.768436,-0.12956,-0.604665,-0.164622,-0.422118,-1.33335,2.56E-05,-3.98E-05,-7.93E-05,-7.1E-05,9.08E-05 +-0.00123494,1.33089,0.0463198,0.999628,0.0003179,0.023818,-0.0133045,-0.026655,0.0476791,2.63E-05,-3.87E-05,-7.92E-05,-7.22E-05,9.05E-05 +0.049673,1.3269,0.0861862,0.964751,-0.013236,0.049476,0.258133,0.522844,0.102511,2.64E-05,-3.8E-05,-7.93E-05,-7.24E-05,9.05E-05 +0.654445,0.803076,0.565835,0.775868,-0.216946,0.474385,0.354854,-2.28371,-1.09747,2.56E-05,-5.39E-07,-7.93E-05,-7.11E-05,9.04E-05 +-0.721295,0.945411,0.286027,0.74044,0.250556,0.368878,-0.502892,-1.19327,0.924425,2.74E-05,-6.17E-05,-7.87E-05,-7.26E-05,9.07E-05 +-0.353213,1.26478,0.0374496,0.730499,0.124349,0.138225,-0.657117,-1.46517,0.374098,2.7E-05,-3.54E-06,-7.87E-05,-7.19E-05,9.05E-05 +-0.508844,1.06333,-0.437258,0.84364,0.155117,-0.337833,0.387401,-2.28067,0.761797,2.76E-05,-4.55E-05,-7.83E-05,-7.26E-05,9.07E-05 +0.0494147,1.33067,-0.0121016,0.786467,-0.0161455,-0.0205759,-0.617078,1.81097,0.0523618,2.69E-05,-2.42E-05,-7.85E-05,-7.14E-05,9.05E-05 +-0.00502128,1.07908,0.653971,0.932684,0.00139173,0.360674,-0.00356224,-0.00768045,0.738008,2.73E-05,-5.2E-05,-7.87E-05,-7.26E-05,9.07E-05 +0.085346,0.990145,-0.735131,0.9063,-0.0242297,-0.418671,-0.0524157,-0.115579,-0.865447,2.61E-05,-1.24E-05,-7.87E-05,-7.11E-05,9.05E-05 +-0.501228,1.18547,0.110228,0.749413,0.172044,0.213982,-0.602489,1.78727,-0.556235,2.61E-05,-2.67E-05,-7.88E-05,-7.15E-05,9.05E-05 +-0.198982,1.31129,-0.0154172,0.729976,0.0701228,-0.075769,0.675631,1.49347,-0.206806,2.66E-05,-3.28E-05,-7.87E-05,-7.18E-05,9.05E-05 +0.747739,0.874838,0.347293,0.73721,-0.260873,0.408818,0.470462,-2.00561,-1.01259,2.54E-05,-2.44E-05,-7.94E-05,-7.09E-05,9.05E-05 +-0.302458,1.24707,0.257717,0.887859,0.0876099,0.189741,-0.409914,-0.865099,0.42117,2.67E-05,1.23E-05,-7.94E-05,-7.16E-05,9.05E-05 +0.914476,0.689419,-0.000467361,0.578722,-0.406432,-0.406624,-0.578404,1.57131,1.22502,2.71E-05,-6.56E-05,-7.94E-05,-7.22E-05,9.05E-05 +-0.0715548,1.32929,0.0122452,0.763898,0.024105,0.0285794,-0.644253,1.74029,-0.0747768,2.62E-05,-5.53E-05,-7.93E-05,-7.21E-05,9.05E-05 +0.679878,0.977421,-0.318341,0.763009,-0.229179,-0.360351,-0.485223,2.00871,0.882523,2.7E-05,-1.51E-05,-7.92E-05,-7.29E-05,9.07E-05 +0.235172,0.515882,-0.93015,0.755844,-0.080038,-0.642999,-0.0940569,-0.247643,-1.40969,2.46E-05,4.14E-05,-7.94E-05,-7.38E-05,9.07E-05 +-0.916423,0.499866,-0.292216,0.610624,0.385988,-0.528236,0.446227,-1.87947,1.42642,2.74E-05,8.51E-06,-7.88E-05,-7.45E-05,9.05E-05 +-0.0431887,0.636267,0.930911,0.801125,0.0138772,0.598049,-0.018558,-0.0463608,1.28261,2.73E-05,9.59E-06,-7.88E-05,-7.47E-05,9.05E-05 +0.540553,0.872777,-0.624216,0.818908,-0.169779,-0.45538,-0.305277,2.4279,1.01511,2.73E-05,1.71E-05,-7.87E-05,-7.44E-05,9.06E-05 +0.0398599,1.31074,-0.198219,0.989624,-0.0103638,-0.104088,-0.0984974,-0.198444,-0.209547,2.59E-05,-3.04E-05,-7.93E-05,-7.25E-05,9.05E-05 +-0.196303,0.718404,0.881928,0.822323,0.061408,0.558429,-0.0903956,-0.219013,1.19315,2.68E-05,-3.27E-05,-7.93E-05,-7.31E-05,9.05E-05 +0.075669,0.378383,-0.968876,0.713195,-0.0273016,-0.699882,-0.0277948,-0.0779416,-1.55185,2.54E-05,2.96E-05,-7.93E-05,-7.28E-05,9.06E-05 +0.099022,1.32516,0.0587185,0.867381,-0.0293775,0.0515768,0.494092,-2.10604,-0.118702,2.6E-05,8.03E-06,-7.92E-05,-7.34E-05,9.06E-05 +-0.602865,1.08635,0.23185,0.770479,0.201249,0.293003,-0.52916,-1.20365,0.726825,2.68E-05,-6.15E-05,-7.93E-05,-7.38E-05,9.05E-05 +0.0358194,0.691392,-0.913061,0.818644,-0.0112645,-0.573967,-0.0160366,-0.0392093,-1.22286,2.54E-05,2.71E-05,-7.95E-05,-7.25E-05,9.07E-05 +-0.971686,0.372724,-0.0211701,0.508686,0.491286,-0.502124,0.497743,1.54901,-1.55768,2.54E-05,2.52E-05,-7.93E-05,-7.24E-05,9.06E-05 +-0.120265,1.32323,0.0501203,0.830198,0.0372841,0.0559282,-0.553401,1.96566,-0.134404,2.59E-05,5.51E-05,-7.94E-05,-7.33E-05,9.05E-05 +0.410801,0.764779,-0.78242,0.81707,-0.129329,-0.524492,-0.201441,-0.483477,-1.14123,2.51E-05,5.21E-05,-7.93E-05,-7.22E-05,9.06E-05 +-0.184575,1.30698,0.118094,0.871537,0.0544893,0.0995465,-0.477016,2.13998,-0.227366,2.57E-05,1.8E-05,-7.94E-05,-7.34E-05,9.06E-05 +0.19187,1.3071,-0.104756,0.854481,-0.0577735,-0.0973623,-0.506996,-1.07105,-0.226821,2.54E-05,1.8E-05,-7.93E-05,-7.35E-05,9.04E-05 +-0.0118187,1.3318,0.0155168,0.947461,0.00319775,0.00947526,-0.319715,-0.650927,0.0200873,2.56E-05,1.29E-05,-7.93E-05,-7.35E-05,9.05E-05 +0.140475,1.30749,-0.165288,0.932685,-0.0387288,-0.105367,-0.342778,2.43717,0.225082,2.64E-05,1.47E-05,-7.9E-05,-7.36E-05,9.06E-05 +0.102779,1.12746,-0.58756,0.942375,-0.02806,-0.323191,-0.0817839,-0.173172,-0.660661,2.5E-05,3.58E-05,-7.93E-05,-7.24E-05,9.05E-05 +-0.535461,1.17084,0.024655,0.692579,0.198843,0.208202,-0.661398,-1.52478,0.584131,2.61E-05,-9.83E-06,-7.89E-05,-7.44E-05,9.06E-05 +0.828553,0.631074,-0.429886,0.683338,-0.311878,-0.513152,-0.415279,-1.09219,-1.28814,2.49E-05,2.18E-05,-7.92E-05,-7.31E-05,9.06E-05 +-0.216545,1.28462,0.207293,0.908383,0.0613059,0.143547,-0.387912,-0.807224,0.313519,2.56E-05,-1.76E-05,-7.93E-05,-7.38E-05,9.04E-05 +0.707388,0.790014,-0.509386,0.755877,-0.240707,-0.469929,-0.387138,2.19488,1.11253,2.68E-05,-1.11E-05,-7.89E-05,-7.44E-05,9.06E-05 +-0.819711,0.681315,0.411842,0.694269,0.303681,0.492411,-0.428135,-1.10521,1.23389,2.68E-05,-2.87E-05,-7.87E-05,-7.34E-05,9.04E-05 +-0.179436,1.27513,0.274082,0.944182,0.0488758,0.163877,-0.28156,-0.579658,0.343762,2.61E-05,-2.06E-05,-7.87E-05,-7.29E-05,9.06E-05 +0.354092,1.25366,-0.14412,0.812879,-0.112028,-0.166542,-0.546756,1.95733,0.404231,2.62E-05,-1.85E-05,-7.88E-05,-7.31E-05,9.06E-05 +-0.529087,1.0994,-0.343717,0.824603,0.165009,-0.303983,0.447656,-2.14692,0.706409,2.67E-05,-2.03E-05,-7.84E-05,-7.35E-05,9.06E-05 +0.0369512,0.944619,0.775654,0.894593,-0.0106146,0.446247,0.0213141,0.0476028,0.925467,2.68E-05,-2.25E-05,-7.83E-05,-7.35E-05,9.04E-05 +0.170645,1.3131,-0.0852475,0.84642,-0.0518719,-0.0838939,-0.523301,-1.1075,-0.19752,2.57E-05,3.6E-07,-7.87E-05,-7.21E-05,9.06E-05 +-0.875839,0.637922,-0.316936,0.656386,0.343186,-0.489171,0.460535,1.22359,-1.28081,2.51E-05,2.34E-06,-7.85E-05,-7.18E-05,9.06E-05 +-0.769683,0.923478,0.186723,0.698596,0.283395,0.360353,-0.549361,1.8088,-0.952423,2.51E-05,-1.74E-05,-7.85E-05,-7.21E-05,9.05E-05 +-0.383404,0.451755,-0.888463,0.724416,0.136114,-0.659014,0.14965,0.407393,-1.47626,2.51E-05,-1.95E-05,-7.87E-05,-7.19E-05,9.07E-05 +-0.573824,1.13832,-0.0985943,0.725532,0.203425,-0.241367,0.611526,1.40064,-0.642273,2.53E-05,-4.61E-05,-7.87E-05,-7.24E-05,9.04E-05 +0.491804,1.01633,0.521674,0.850617,-0.148693,0.374509,0.337763,0.755934,0.829493,2.66E-05,-5.04E-05,-7.85E-05,-7.32E-05,9.06E-05 diff --git a/agent/envs_other/cartpole.py b/agent/envs_other/cartpole.py new file mode 100755 index 0000000..6af1ea3 --- /dev/null +++ b/agent/envs_other/cartpole.py @@ -0,0 +1,327 @@ +# modified from https://github.com/openai/gym/blob/master/gym/envs/classic_control/cartpole.py +# to include simulating disturbed models and an analytical disturbance observer +# the environment is also changed to be episodic with maximum allowed time steps +# the action space is also changed to be continuous Box space + +import math +import gym +from gym import spaces, logger +from gym.utils import seeding +import numpy as np + + +class CartPoleEnv(gym.Env): + """ + Description: + A pole is attached by an un-actuated joint to a cart, which moves along + a frictionless track. The pendulum starts upright, and the goal is to + prevent it from falling over by increasing and reducing the cart's + velocity. + Source: + This environment corresponds to the version of the cart-pole problem + described by Barto, Sutton, and Anderson + Observation: + Type: Box(4) + Num Observation Min Max + 0 Cart Position -4.8 4.8 + 1 Cart Velocity -Inf Inf + 2 Pole Angle -0.418 rad (-24 deg) 0.418 rad (24 deg) + 3 Pole Angular Velocity -Inf Inf + Actions: + Type: Discrete(2) + Num Action + 0 Push cart to the left + 1 Push cart to the right + Note: The amount the velocity that is reduced or increased is not + fixed; it depends on the angle the pole is pointing. This is because + the center of gravity of the pole increases the amount of energy needed + to move the cart underneath it + Reward: + Reward is 1 for every step taken, including the termination step + Starting State: + All observations are assigned a uniform random value in [-0.05..0.05] + Episode Termination: + Pole Angle is more than 12 degrees. + Cart Position is more than 2.4 (center of the cart reaches the edge of + the display). + Episode length is greater than max_ts. + Solved Requirements: + Considered solved when the average return is greater than or equal to + 195.0 over 100 consecutive trials. + """ + + metadata = { + 'render.modes': ['human', 'rgb_array'], + 'video.frames_per_second': 50 + } + + def __init__(self, max_ts, disturbance=None): + self.gravity = 9.8 + self.masscart = 1.0 + self.masspole = 0.1 + self.total_mass = (self.masspole + self.masscart) + self.length = 0.5 # actually half the pole's length + self.polemass_length = (self.masspole * self.length) + self.force_mag = 10.0 + self.tau = 0.02 # seconds between state updates + self.kinematics_integrator = 'euler' + + self.ts = 0 + self.max_ts = max_ts + self.disturbance = disturbance + + # Angle at which to fail the episode + self.theta_threshold_radians = 12 * 2 * math.pi / 360 + self.x_threshold = 2.4 + + # Angle limit set to 2 * theta_threshold_radians so failing observation + # is still within bounds. + high = np.array([self.x_threshold * 2, + np.finfo(np.float32).max, + self.theta_threshold_radians * 2, + np.finfo(np.float32).max], + dtype=np.float32) + + # self.action_space = spaces.Discrete(2) + self.action_space = spaces.Box(low=-1., high=1., shape=(1,), dtype=np.float32) + self.observation_space = spaces.Box(-high, high, dtype=np.float32) + + self.gravity_n = self.gravity + self.masscart_n = self.masscart + self.masspole_n = self.masspole + self.total_mass_n = (self.masspole_n + self.masscart_n) + self.length_n = self.length + self.polemass_length_n = (self.masspole_n * self.length_n) + self.force_mag_n = self.force_mag + + self.obs = None + self.obs_prev = None + self.act_prev = None + + self.action_dist = 0.0 + self.obs_noise = np.zeros(4) + if self.disturbance is not None: + self._apply_disturbance() + + self.seed() + self.viewer = None + self.state = None + + self.steps_beyond_done = None + + def seed(self, seed=None): + self.np_random, seed = seeding.np_random(seed) + return [seed] + + def set_disturbance(self, disturbance): + self.disturbance = disturbance + self._apply_disturbance() + self.reset() + + def _apply_disturbance(self): + for dist_type, _dist in self.disturbance.items(): + for dist_var, dist_val in _dist.items(): + if dist_type == 'parametric_disturbance': + if dist_var == 'cart_mass': + self.masscart *= dist_val + self.total_mass = (self.masspole + self.masscart) + elif dist_var == 'pole_mass': + self.masspole *= dist_val + self.total_mass = (self.masspole + self.masscart) + self.polemass_length = (self.masspole * self.length) + elif dist_var == 'pole_length': + self.length *= dist_val + self.polemass_length = (self.masspole * self.length) + elif dist_var == 'gravity': + self.gravity *= dist_val + else: + raise Exception('* unknown parameter!') + + elif dist_type == 'action_disturbance': + if dist_var == '0': + self.action_dist = (dist_val - 1.0) * self.action_space.high[0] + else: + raise Exception('* unknown action!') + + elif dist_type == 'observation_noise': + obs_ind = int(dist_var) + if obs_ind in range(self.observation_space.shape[0]): + self.obs_noise[obs_ind] =\ + (dist_val - 1.0) * self.observation_space.high[obs_ind - (obs_ind % 2)] # should not be inf + else: + raise Exception('* unknown observation!') + else: + raise Exception('* unknown disturbance type!') + + def step(self, action): + self.ts += 1 + + # err_msg = "%r (%s) invalid" % (action, type(action)) + # assert self.action_space.contains(action), err_msg + + x, x_dot, theta, theta_dot = self.state + self.act_prev = action + action_dist = self.np_random.uniform(low=0., high=np.abs(self.action_dist)) * np.sign(self.action_dist) + action = np.clip(action[0] + action_dist, -1., 1.) + force = self.force_mag * action + # force = self.force_mag if action == 1 else -self.force_mag + costheta = math.cos(theta) + sintheta = math.sin(theta) + + # For the interested reader: + # https://coneural.org/florian/papers/05_cart_pole.pdf + temp = (force + self.polemass_length * theta_dot ** 2 * sintheta) / self.total_mass + thetaacc = (self.gravity * sintheta - costheta * temp) /\ + (self.length * (4.0 / 3.0 - self.masspole * costheta ** 2 / self.total_mass)) + xacc = temp - self.polemass_length * thetaacc * costheta / self.total_mass + + if self.kinematics_integrator == 'euler': + x = x + self.tau * x_dot + x_dot = x_dot + self.tau * xacc + theta = theta + self.tau * theta_dot + theta_dot = theta_dot + self.tau * thetaacc + else: # semi-implicit euler + x_dot = x_dot + self.tau * xacc + x = x + self.tau * x_dot + theta_dot = theta_dot + self.tau * thetaacc + theta = theta + self.tau * theta_dot + + self.state = (x, x_dot, theta, theta_dot) + + done = bool( + x < -self.x_threshold + or x > self.x_threshold + or theta < -self.theta_threshold_radians + or theta > self.theta_threshold_radians + ) + + if not done: + reward = 1.0 + elif self.steps_beyond_done is None: + # Pole just fell! + self.steps_beyond_done = 0 + reward = 1.0 + else: + if self.steps_beyond_done == 0: + logger.warn( + "You are calling 'step()' even though this " + "environment has already returned done = True. You " + "should always call 'reset()' once you receive 'done = " + "True' -- any further steps are undefined behavior." + ) + self.steps_beyond_done += 1 + reward = 0.0 + + if self.ts > self.max_ts: + reward = 100.0 + done = True + + return self._get_obs(), reward, done, {} + + def reset(self): + self.ts = 0 + self.obs = None + self.act_prev = None + high = np.array([self.x_threshold / 2, self.x_threshold / 2, + self.theta_threshold_radians / 2, self.theta_threshold_radians / 2]) + self.state = self.np_random.uniform(low=-high, high=high, size=(4,)) + self.steps_beyond_done = None + return self._get_obs() + + def _get_obs(self): + self.obs_prev = self.obs + self.obs = np.array(self.state) + obs_noise = self.np_random.uniform(low=0., high=np.abs(self.obs_noise)) * np.sign(self.obs_noise) + self.obs += obs_noise + return self.obs + + def render(self, mode='human'): + screen_width = 600 + screen_height = 400 + + world_width = self.x_threshold * 2 + scale = screen_width/world_width + carty = 100 # TOP OF CART + polewidth = 10.0 + polelen = scale * (2 * self.length) + cartwidth = 50.0 + cartheight = 30.0 + + if self.viewer is None: + from gym.envs.classic_control import rendering + self.viewer = rendering.Viewer(screen_width, screen_height) + l, r, t, b = -cartwidth / 2, cartwidth / 2, cartheight / 2, -cartheight / 2 + axleoffset = cartheight / 4.0 + cart = rendering.FilledPolygon([(l, b), (l, t), (r, t), (r, b)]) + self.carttrans = rendering.Transform() + cart.add_attr(self.carttrans) + self.viewer.add_geom(cart) + l, r, t, b = -polewidth / 2, polewidth / 2, polelen - polewidth / 2, -polewidth / 2 + pole = rendering.FilledPolygon([(l, b), (l, t), (r, t), (r, b)]) + pole.set_color(.8, .6, .4) + self.poletrans = rendering.Transform(translation=(0, axleoffset)) + pole.add_attr(self.poletrans) + pole.add_attr(self.carttrans) + self.viewer.add_geom(pole) + self.axle = rendering.make_circle(polewidth/2) + self.axle.add_attr(self.poletrans) + self.axle.add_attr(self.carttrans) + self.axle.set_color(.5, .5, .8) + self.viewer.add_geom(self.axle) + self.track = rendering.Line((0, carty), (screen_width, carty)) + self.track.set_color(0, 0, 0) + self.viewer.add_geom(self.track) + + self._pole_geom = pole + + if self.state is None: + return None + + # Edit the pole polygon vertex + pole = self._pole_geom + l, r, t, b = -polewidth / 2, polewidth / 2, polelen - polewidth / 2, -polewidth / 2 + pole.v = [(l, b), (l, t), (r, t), (r, b)] + + x = self.state + cartx = x[0] * scale + screen_width / 2.0 # MIDDLE OF CART + self.carttrans.set_translation(cartx, carty) + self.poletrans.set_rotation(-x[2]) + + return self.viewer.render(return_rgb_array=mode == 'rgb_array') + + def close(self): + if self.viewer: + self.viewer.close() + self.viewer = None + + def estimate_disturbance(self): + if self.act_prev is None: + return 0. + + # gravity = self.gravity_n + # masscart = self.masscart_n + # masspole = self.masspole_n + total_mass = self.total_mass_n + # length = self.length_n + polemass_length = self.polemass_length_n + force_mag = self.force_mag_n + tau = self.tau + + x, x_dot, theta, theta_dot = self.obs + x_prev, x_dot_prev, theta_prev, theta_dot_prev = self.obs_prev + + temp_prev = (x_dot - x_dot_prev) / tau +\ + (polemass_length * math.cos(theta_prev) * ((theta_dot - theta_dot_prev) / tau)) / total_mass + force_prev = total_mass * temp_prev - polemass_length * theta_dot_prev ** 2 * math.sin(theta_prev) + act_prev_est = force_prev / force_mag + + return act_prev_est - self.act_prev + + def random_action(self): + return self.action_space.sample() + + +if __name__ == "__main__": + from stable_baselines3.common.env_checker import check_env + env = CartPoleEnv(200) + check_env(env) diff --git a/agent/envs_other/nlinks_box2d.py b/agent/envs_other/nlinks_box2d.py new file mode 100755 index 0000000..cf56a48 --- /dev/null +++ b/agent/envs_other/nlinks_box2d.py @@ -0,0 +1,470 @@ +""" +A Box2D environment of a planar manipulator with N links +Box2D only accepts speed control of the joints not force/torque. +The action space is the joint velocities. +""" + +import time +import Box2D +import gym +import numpy as np +from Box2D.b2 import (circleShape, polygonShape, fixtureDef, revoluteJointDef, weldJointDef) +from gym import spaces +from gym.utils import seeding, EzPickle +from gym.envs.classic_control import rendering + +# Slow rendering with output +DEBUG = False +FPS = 50 # Frames per Second + + +class NLinksBox2DEnv(gym.Env, EzPickle): + metadata = { + 'render.modes': ['human', 'rgb_array'], + 'video.frames_per_second': FPS + } + + SCALE = 70.0 + VIEWPORT_W, VIEWPORT_H = 800, 800 + + # Punishing energy (integral of applied forces); huge impact on performance + POWER = False + + LINK_MASS = 1.0 # kg + LINK_HEIGHT = 0.5 # m + LINK_WIDTH = 0.10 # m + + # Min distance for end effector target + MIN_DISTANCE = 0.05 # m + + MAX_JOINT_VEL = 3.0 + + GRAVITY = -9.81 # m/s^2 + + COLOR_ANCHOR = (0., 0., 0.) + COLOR_JOINTS = (0.6, 0.6, .8) + COLOR_BORDER = (0., 0., 0.) + COLOR_LINKS = (.6, .6, 1.) + COLOR_EE = (.6, 1., .6) + COLOR_TARGET = (1., 0.6, 0.6) + COLOR_BACKGROUND = (0.9, 0.9, 1.0) + + MAX_JOINT_TORQUE = 10000 # 80 + + ANCHOR_X = 0.0 + ANCHOR_Y = 0.0 + + FIX_CIRCLE = fixtureDef( + shape=circleShape(radius=LINK_WIDTH), + density=1e-3, + friction=0.0, + restitution=0.0, + categoryBits=0x0020, + maskBits=0x001) + + FIX_POLY = fixtureDef( + shape=polygonShape(box=(LINK_WIDTH / 2, LINK_HEIGHT / 2)), + density=LINK_MASS / (LINK_WIDTH * LINK_HEIGHT), + friction=0.0, + restitution=0.0, + categoryBits=0x0020, + maskBits=0x001) + + time_step = n_links = np_random = world = viewer = None + draw_list = [] + + # The Box2D objects below need to be destroyed when resetting world to release allocated memory + anchor = None # static body to hold the manipulator base fixed + target = None # static body to represent the target position + end_effector = None # dynamic body of end effector + links = [] # dynamic body of links + joint_bodies = [] # dynamic body of joints + joint_fixes = [] # weld joints to connect previous link to the next joint_body + joints = [] # revolute joints to connect previous joint_body to the next link + + def __init__(self, max_ts, n_links, disturbance=None): + + self.max_ts = max_ts + self.n_links = n_links + self.disturbance = disturbance + + EzPickle.__init__(self) + self.seed() + + len_arms = self.n_links * self.LINK_HEIGHT + # x,y observation space is twice big to include anchor deviations from (0,0) + high = [len_arms * 2.0, len_arms * 2.0, len_arms * 2.0, len_arms * 2.0] + for _ in range(self.n_links): + high.append(np.pi) + high.append(self.MAX_JOINT_VEL) + high = np.array(high) + self.action_space = spaces.Box(-np.ones(self.n_links), np.ones(self.n_links), dtype=np.float32) + self.observation_space = spaces.Box(-high, high, dtype=np.float32) + + self.action_dist = np.zeros(self.n_links) + self.obs_noise = np.zeros(self.observation_space.shape[0]) + if self.disturbance is not None: + self._apply_disturbance() + + self.world = Box2D.b2World(gravity=(0, self.GRAVITY)) + + self.reset() + + def _destroy(self): + + for idx in range(len(self.joints)): + self.world.DestroyJoint(self.joints[idx]) + self.joints = [] + + for idx in range(len(self.joint_fixes)): + self.world.DestroyJoint(self.joint_fixes[idx]) + self.joint_fixes = [] + + for idx in range(len(self.joint_bodies)): + self.world.DestroyBody(self.joint_bodies[idx]) + self.joint_bodies = [] + + for idx in range(len(self.links)): + self.world.DestroyBody(self.links[idx]) + self.links = [] + + if self.end_effector: + self.world.DestroyBody(self.end_effector) + self.end_effector = None + + if self.target: + self.world.DestroyBody(self.target) + self.target = None + + if self.anchor: + self.world.DestroyBody(self.anchor) + self.anchor = None + + def _create_anchor(self): + self.anchor = self.world.CreateStaticBody(position=(self.ANCHOR_X, self.ANCHOR_Y), fixtures=self.FIX_CIRCLE) + self.anchor.color1 = self.COLOR_ANCHOR + self.anchor.color2 = self.COLOR_BORDER + # print(f"anchor mass: {self.anchor.mass}") + # print(f"anchor inertia: {self.anchor.inertia}") + # print(f"anchor local center: {self.anchor.localCenter}") + # print(f"anchor world center: {self.anchor.worldCenter}") + + def _create_first_arm(self): + self.joint_bodies.append( + self.world.CreateDynamicBody(position=self.anchor.position, fixtures=self.FIX_CIRCLE)) + self.joint_bodies[0].color1 = self.COLOR_JOINTS + self.joint_bodies[0].color2 = self.COLOR_BORDER + + rjd = weldJointDef(bodyA=self.anchor, bodyB=self.joint_bodies[0], + localAnchorA=(0.0, 0.0), localAnchorB=(0.0, 0.0)) + self.joint_fixes.append(self.world.CreateJoint(rjd)) + + self.links.append( + self.world.CreateDynamicBody(position=self.joint_bodies[0].position - (0.0, self.LINK_HEIGHT / 2), + angle=0.0, fixtures=self.FIX_POLY)) + self.links[0].color1 = self.COLOR_LINKS + self.links[0].color2 = self.COLOR_BORDER + # print(f"link1 mass: {self.links[0].mass}") + # print(f"link1 inertia: {self.links[0].inertia}") + # print(f"link1 local center: {self.links[0].localCenter}") + # print(f"link1 world center: {self.links[0].worldCenter}") + + rjd = revoluteJointDef(bodyA=self.anchor, bodyB=self.links[0], localAnchorA=(0.0, 0.0), + localAnchorB=(0.0, self.LINK_HEIGHT / 2), + enableMotor=True, maxMotorTorque=self.MAX_JOINT_TORQUE, motorSpeed=0.0) + self.joints.append(self.world.CreateJoint(rjd)) + + def _create_next_arm(self): + self.joint_bodies.append( + self.world.CreateDynamicBody( + position=self.links[-1].position - (0.0, self.LINK_HEIGHT / 2), fixtures=self.FIX_CIRCLE)) + self.joint_bodies[-1].color1 = self.COLOR_JOINTS + self.joint_bodies[-1].color2 = self.COLOR_BORDER + + rjd = weldJointDef(bodyA=self.links[-1], bodyB=self.joint_bodies[-1], + localAnchorA=(0.0, -self.LINK_HEIGHT / 2), localAnchorB=(0.0, 0.0)) + self.joint_fixes.append(self.world.CreateJoint(rjd)) + + self.links.append( + self.world.CreateDynamicBody(position=self.joint_bodies[-1].position - (0.0, self.LINK_HEIGHT / 2), + angle=0.0, fixtures=self.FIX_POLY)) + self.links[-1].color1 = self.COLOR_LINKS + self.links[-1].color2 = self.COLOR_BORDER + # print(f"link1 mass: {self.links[-1].massData.mass}") + # print(f"link1 inertia: {self.links[-1].massData.I}") + # print(f"link1 local center: {self.links[-1].localCenter}") + # print(f"link1 world center: {self.links[-1].worldCenter}") + + rjd = revoluteJointDef(bodyA=self.links[-2], bodyB=self.links[-1], localAnchorA=(0.0, -self.LINK_HEIGHT / 2), + localAnchorB=(0.0, self.LINK_HEIGHT / 2), + enableMotor=True, maxMotorTorque=self.MAX_JOINT_TORQUE, motorSpeed=0.0) + self.joints.append(self.world.CreateJoint(rjd)) + + def _create_end_effector(self): + self.end_effector = self.world.CreateDynamicBody(position=self.links[-1].position - (0.0, self.LINK_HEIGHT / 2), + fixtures=self.FIX_CIRCLE) + self.end_effector.color1 = self.COLOR_EE + self.end_effector.color2 = self.COLOR_BORDER + + rjd = weldJointDef(bodyA=self.links[-1], bodyB=self.end_effector, + localAnchorA=(0.0, -self.LINK_HEIGHT / 2), localAnchorB=(0.0, 0.0)) + self.joint_fixes.append(self.world.CreateJoint(rjd)) + + def _create_target(self): + self.target = self.world.CreateStaticBody(position=(self._random_point()), fixtures=self.FIX_CIRCLE) + self.target.color1 = self.COLOR_TARGET + self.target.color2 = self.COLOR_BORDER + + def _random_point(self): + + len_arms = self.n_links * self.LINK_HEIGHT * 1. + + while True: + x = 2 * len_arms * self.np_random.rand() - len_arms + self.anchor.position[0] + y = 2 * len_arms * self.np_random.rand() - len_arms + self.anchor.position[1] + + pos_1 = np.array([x, y]) + pos_2 = np.array([self.anchor.position[0], self.anchor.position[1]]) + + distance = np.linalg.norm(pos_1 - pos_2) + + if self.n_links == 1: + if abs(distance - len_arms) < self.MIN_DISTANCE: + return x, y + else: + if len_arms * 0.95 > distance: + return x, y + + def _calc_power(self, action_arr): + power = 0 + for action in action_arr: + power += abs(action) + + return power + + def _get_distance(self): + + pos1 = np.array([self.end_effector.position[0], self.end_effector.position[1]]) + pos2 = np.array([self.target.position[0], self.target.position[1]]) + + distance = np.linalg.norm(pos1 - pos2) + + return distance + + def _get_terminal(self): + + distance = self._get_distance() + + if distance < self.MIN_DISTANCE: + return True + else: + return False + + def reset(self): + + self.time_step = 0 + + self._destroy() + + self._create_anchor() + self._create_first_arm() + for _ in range(self.n_links - 1): + self._create_next_arm() + self._create_end_effector() + + self._create_target() + + self.links[self.n_links - 1].ground_contact = False + self.draw_list = [self.anchor] + self.links + self.joint_bodies + [self.end_effector] + [self.target] + + # It is needed to perform one world step to correctly initialize joint angles/velocities + for idx in range(len(self.joints)): + self.joints[idx].motorSpeed = 0.0 + self.world.Step(1.0 / FPS, 6 * 300, 2 * 300) + self.world.ClearForces() + + return self._get_state() + + def seed(self, seed=None): + self.np_random, seed = seeding.np_random(seed) + return [seed] + + def _get_state(self): + + tx = self.target.position[0] + ty = self.target.position[1] + + eex = self.end_effector.position[0] + eey = self.end_effector.position[1] + + state = [tx, ty, eex, eey] + + for idx in range(self.n_links): + state.append(self.joints[idx].angle % (np.pi * 2) - np.pi) + state.append(self.joints[idx].speed) + + obs_noise = self.np_random.uniform(low=0., high=np.abs(self.obs_noise)) * np.sign(self.obs_noise) + + return np.array(state + obs_noise) + + def step(self, action): + + self.time_step += 1 + + action_dist = self.np_random.uniform(low=0., high=np.abs(self.action_dist)) * np.sign(self.action_dist) + action += action_dist + + action = np.clip(action, -1., 1.) + power = self._calc_power(action) + + speeds = self.MAX_JOINT_VEL * action + + for idx in range(len(self.joints)): + self.joints[idx].motorSpeed = float(speeds[idx]) + + self.world.Step(1.0 / FPS, 6 * 300, 2 * 300) + self.world.ClearForces() + + state = self._get_state() + + done = self._get_terminal() + + if done: + reward = 300 + else: + reward = -self._get_distance() + if self.POWER: + reward -= power + + if self.time_step > self.max_ts: + done = True + + if DEBUG: + print("dist", self._get_distance()) + print("power", power) + print("reward", reward) + print("state", state) + time.sleep(.1) + + return state, reward, done, {} + + def render(self, mode='human'): + if self.viewer is None: + self.viewer = rendering.Viewer(self.VIEWPORT_W, self.VIEWPORT_H) + + dim = self.VIEWPORT_W / self.SCALE / 2 + self.viewer.set_bounds(-dim, dim, -dim, dim) + + self.viewer.draw_polygon([ + (-self.VIEWPORT_H, -self.VIEWPORT_H), + (self.VIEWPORT_H, -self.VIEWPORT_H), + (self.VIEWPORT_H, self.VIEWPORT_H), + (-self.VIEWPORT_H, self.VIEWPORT_H), + ], color=self.COLOR_BACKGROUND) + + for obj in self.draw_list: + for f in obj.fixtures: + trans = f.body.transform + if type(f.shape) is circleShape: + t = rendering.Transform(translation=trans * f.shape.pos) + self.viewer.draw_circle(f.shape.radius, 30, color=obj.color1).add_attr(t) + self.viewer.draw_circle(f.shape.radius, 30, color=obj.color2, filled=False, linewidth=2).add_attr(t) + else: + path = [trans * v for v in f.shape.vertices] + self.viewer.draw_polygon(path, color=obj.color1) + path.append(path[0]) + self.viewer.draw_polyline(path, color=obj.color2, linewidth=2) + + return self.viewer.render(return_rgb_array=mode == 'rgb_array') + + def close(self): + if self.viewer is not None: + self.viewer.close() + self.viewer = None + + def random_action(self): + return self.action_space.sample() + + def set_disturbance(self, disturbance): + self.disturbance = disturbance + self._apply_disturbance() + self.world.gravity = (0, self.GRAVITY) + self.reset() + + def _apply_disturbance(self): + for dist_type, _dist in self.disturbance.items(): + for dist_var, dist_val in _dist.items(): + if dist_type == 'parametric_disturbance': + if dist_var == 'link_mass': + self.LINK_MASS *= dist_val + self.FIX_POLY = fixtureDef( + shape=polygonShape(box=(self.LINK_WIDTH / 2, self.LINK_HEIGHT / 2)), + density=self.LINK_MASS / (self.LINK_WIDTH * self.LINK_HEIGHT), + friction=0.0, + restitution=0.0, + categoryBits=0x0020, + maskBits=0x001) + elif dist_var == 'link_height': + self.LINK_HEIGHT *= dist_val + self.FIX_POLY = fixtureDef( + shape=polygonShape(box=(self.LINK_WIDTH / 2, self.LINK_HEIGHT / 2)), + density=self.LINK_MASS / (self.LINK_WIDTH * self.LINK_HEIGHT), + friction=0.0, + restitution=0.0, + categoryBits=0x0020, + maskBits=0x001) + elif dist_var == 'gravity': + self.GRAVITY *= dist_val + elif dist_var == 'max_joint_torque': + self.MAX_JOINT_TORQUE *= dist_val + else: + raise Exception('* unknown parameter!') + + elif dist_type == 'action_disturbance': + act_ind = int(dist_var) + if act_ind in range(self.n_links): + self.action_dist[act_ind] = (dist_val - 1.0) * self.action_space.high[act_ind] + else: + raise Exception('* unknown action!') + + elif dist_type == 'observation_noise': + obs_ind = int(dist_var) + if obs_ind in range(self.observation_space.shape[0]): + self.obs_noise[obs_ind] = (dist_val - 1.0) * self.observation_space.high[obs_ind] + else: + raise Exception('* unknown observation!') + else: + raise Exception('* unknown disturbance type!') + + +if __name__ == '__main__': + """ + Test fct for debugging purposes + """ + num_links = 10 + nlinks = NLinksBox2DEnv(max_ts=200, n_links=num_links) + + from stable_baselines3.common.env_checker import check_env + check_env(nlinks) + + # some silent initialization steps + # for _ in range(100): + # nlinks.render() + # zero_act = np.zeros(num_links) + # nlinks.step(action=zero_act) + + while True: + nlinks.render() + # print("anchor", nlinks.anchor.position) + # print("end_eff", nlinks.end_effector.position) + # rnd_act = np.array([0.3, -0.3]) + # rnd_act = np.array([np.random.rand() * 2 - 1, np.random.rand() * 2 - 1]) + rnd_act = [] + for i in range(num_links): + rnd_act.append(np.random.rand() * 2 - 1) + + s, _, d, _ = nlinks.step(action=rnd_act) + # print("tx: {0:2.1f}; ty: {1:2.1f}".format(s[0], s[1])) + # print("eex: {0:2.1f}; eey: {1:2.1f}".format(s[2], s[3])) + if d: + nlinks.reset() diff --git a/agent/envs_other/pendulum.py b/agent/envs_other/pendulum.py new file mode 100755 index 0000000..144c190 --- /dev/null +++ b/agent/envs_other/pendulum.py @@ -0,0 +1,199 @@ +# modified from https://github.com/openai/gym/blob/master/gym/envs/classic_control/pendulum.py +# to include simulating disturbed models and an analytical disturbance observer +# the environment is also changed to be episodic with maximum allowed time steps + +import gym +from gym import spaces +from gym.utils import seeding +import numpy as np +from os import path + + +class PendulumEnv(gym.Env): + metadata = { + 'render.modes': ['human', 'rgb_array'], + 'video.frames_per_second': 30 + } + + def __init__(self, max_ts, disturbance=None): + + self.max_speed = 8. + self.max_torque = 20. + self.dt = 0.05 + self.g = 9.8 + self.m = 1. + self.l = 1. + self.ts = 0 + self.max_ts = max_ts + self.disturbance = disturbance + self.viewer = None + + high = np.array([1., 1., self.max_speed], dtype=np.float32) + self.action_space = spaces.Box( + low=-self.max_torque, + high=self.max_torque, shape=(1,), + dtype=np.float32 + ) + self.observation_space = spaces.Box( + low=-high, + high=high, + dtype=np.float32 + ) + + self.dt_n = self.dt + self.g_n = self.g + self.m_n = self.m + self.l_n = self.l + + self.obs = None + self.obs_prev = None + self.act_prev = None + + self.action_dist = 0.0 + self.obs_noise = np.zeros(3) # TODO: finding robust model against obs noise + if self.disturbance is not None: + self._apply_disturbance() + + self.seed() + + def seed(self, seed=None): + self.np_random, seed = seeding.np_random(seed) + return [seed] + + def set_disturbance(self, disturbance): + self.disturbance = disturbance + self._apply_disturbance() + self.reset() + + def _apply_disturbance(self): + for dist_type, _dist in self.disturbance.items(): + for dist_var, dist_val in _dist.items(): + if dist_type == 'parametric_disturbance': + if dist_var == 'pendulum_mass': + self.m *= dist_val + elif dist_var == 'pendulum_length': + self.l *= dist_val + elif dist_var == 'gravity': + self.g *= dist_val + elif dist_var == 'time_step': + self.dt *= dist_val + else: + raise Exception('* unknown parameter!') + + elif dist_type == 'action_disturbance': + if dist_var == '0': + self.action_dist = (dist_val - 1.0) * self.action_space.high[0] + else: + raise Exception('* unknown action!') + + elif dist_type == 'observation_noise': + obs_ind = int(dist_var) + if obs_ind in range(self.observation_space.shape[0]): + self.obs_noise[obs_ind] = (dist_val - 1.0) * self.observation_space.high[obs_ind] + else: + raise Exception('* unknown observation!') + else: + raise Exception('* unknown disturbance type!') + # print('param dist: ', self.m, self.l, self.g, self.dt) + # print('action dist: ', self.action_dist) + # print('obs noise: ', self.obs_noise) + + def step(self, u): + self.ts += 1 + th, thdot = self.state # th := theta + + g = self.g + m = self.m + l = self.l + dt = self.dt + + u_r = u[0] + action_dist = self.np_random.uniform(low=0., high=np.abs(self.action_dist)) * np.sign(self.action_dist) + u = u_r + action_dist + u = np.clip(u, -self.max_torque, self.max_torque) + costs = angle_normalize(th) ** 2 + .1 * thdot ** 2 + .01 * (u_r ** 2) + + self.last_u = u_r # for rendering + + newthdot = thdot + (-3 * g / (2 * l) * np.sin(th + np.pi) + 3. / (m * l ** 2) * u) * dt + newth = th + newthdot * dt + newthdot = np.clip(newthdot, -self.max_speed, self.max_speed) + + self.state = np.array([newth, newthdot]) + self.act_prev = u_r + + done = bool(self.ts > self.max_ts) + + return self._get_obs(), -costs, done, {} + + def reset(self): + self.ts = 0 + self.obs = None + self.act_prev = None + high = np.array([np.pi, 1]) + self.state = self.np_random.uniform(low=-high, high=high) + self.last_u = None + return self._get_obs() + + def _get_obs(self): + theta, thetadot = self.state + self.obs_prev = self.obs + self.obs = np.array([np.cos(theta), np.sin(theta), thetadot]) + obs_noise = self.np_random.uniform(low=0., high=np.abs(self.obs_noise)) * np.sign(self.obs_noise) + self.obs += obs_noise + return self.obs + + def render(self, mode='human'): + if self.viewer is None: + from gym.envs.classic_control import rendering + self.viewer = rendering.Viewer(500, 500) + self.viewer.set_bounds(-2.2, 2.2, -2.2, 2.2) + rod = rendering.make_capsule(1, .2) + rod.set_color(.8, .3, .3) + self.pole_transform = rendering.Transform() + rod.add_attr(self.pole_transform) + self.viewer.add_geom(rod) + axle = rendering.make_circle(.05) + axle.set_color(0, 0, 0) + self.viewer.add_geom(axle) + fname = path.join(path.dirname(__file__), "misc/clockwise.png") + self.img = rendering.Image(fname, 1., 1.) + self.imgtrans = rendering.Transform() + self.img.add_attr(self.imgtrans) + + self.viewer.add_onetime(self.img) + self.pole_transform.set_rotation(self.state[0] + np.pi / 2) + if self.last_u: + self.imgtrans.scale = (-self.last_u / 2, np.abs(self.last_u) / 2) + + return self.viewer.render(return_rgb_array=mode == 'rgb_array') + + def close(self): + if self.viewer: + self.viewer.close() + self.viewer = None + + def estimate_disturbance(self): + if self.act_prev is None: + return 0. + + g, m, l, dt = self.g_n, self.m_n, self.l_n, self.dt_n + costh, sinth, thdot = self.obs # th := theta + costh_prev, sinth_prev, thdot_prev = self.obs_prev + + act_prev_est = (m * l ** 2) / 3. * ((thdot - thdot_prev) / dt - 3 * g / (2 * l) * sinth_prev) + + return act_prev_est - self.act_prev + + def random_action(self): + return self.action_space.sample() + + +def angle_normalize(x): + return (((x+np.pi) % (2*np.pi)) - np.pi) + + +if __name__ == "__main__": + from stable_baselines3.common.env_checker import check_env + env = PendulumEnv(200) + check_env(env) diff --git a/agent/iiwa_sample_env.py b/agent/iiwa_sample_env.py deleted file mode 100644 index 4e1ea0a..0000000 --- a/agent/iiwa_sample_env.py +++ /dev/null @@ -1,420 +0,0 @@ -""" -A sample Env class inheriting from the DART-Unity Env for the Kuka LBR iiwa manipulator with 7 links and a Gripper -The parent class takes care of integrating DART Engine with Unity simulation environment -Unity is used as the main simulator for physics/rendering computations. -The Unity interface receives joint velocities as commands and returns joint positions and velocities -DART is used to calculate inverse kinematics of the iiwa chain. -DART changes the agent action space from the joint space to the cartesian space -(position-only or pose/SE(3)) of the end-effector. -action_by_pd_control method can be called to implement a Proportional-Derivative control law instead of an RL policy. -Note: Coordinates in the Unity simulator are different from the ones in DART which used here: -The mapping is [X, Y, Z] of Unity is [-y, z, x] of DART -DART uses Eigen library - Geometry module to retrieve transformation matrices of chains of rigidbodies -Documentation: https://eigen.tuxfamily.org/dox/group__Geometry__Module.html -Background details: https://en.wikipedia.org/wiki/Axis%E2%80%93angle_representation -Further code details: https://github.com/dartsim/dart/blob/main/dart/math/Geometry.cpp -""" - -import numpy as np -import dartpy as dart -# import cv2 -# import base64 -from gym import spaces -from dart_envs.iiwa_dart_unity import IiwaDartUnityEnv - - -class IiwaSampleEnv(IiwaDartUnityEnv): - - # Variables below exist in the parent class, hence the names should not be changed - - # Min distance to declare that the target is reached by the end-effector, adapt the values based on your task - MIN_POS_DISTANCE = 0.05 # [m] - MIN_ROT_DISTANCE = 0.1 # [rad] - - # admissible range for joint positions, velocities, accelerations and torques of the iiwa kinematic chain - MAX_JOINT_POS = np.deg2rad([170, 120, 170, 120, 170, 120, 175]) # [rad]: based on the specs - MIN_JOINT_POS = -MAX_JOINT_POS - MAX_JOINT_VEL = np.deg2rad([85, 85, 100, 75, 130, 135, 135]) # [rad/s]: based on the specs - MAX_JOINT_ACC = 3.0 * MAX_JOINT_VEL # [rad/s^2]: just approximation due to no existing data - MAX_JOINT_TORQUE = np.array([320, 320, 176, 176, 110, 40, 40]) # [Nm]: based on the specs - - # admissible range for Cartesian pose translational and rotational velocities and accelerations of the end-effector - MAX_EE_CART_VEL = np.full(3, 10.0) # [m/s] --- not optimized values - MAX_EE_CART_ACC = np.full(3, 3.0) # [m/s^2] --- not optimized values - MAX_EE_ROT_VEL = np.full(3, 4.0) # [rad/s] --- not optimized values - MAX_EE_ROT_ACC = np.full(3, 1.2) # [rad/s^2] --- not optimized values - - def __init__(self, max_ts, orientation_control, use_ik, ik_by_sns, - enable_render, state_type, with_gripper=True, env_id=0): - - # range of vertical, horizontal pixels for the DART viewer - viewport = (0, 0, 500, 500) - - self.state_type = state_type - self.reset_flag = False - self.with_gripper = with_gripper - - # the init of the parent class should be always called, this will in the end call reset() once - super().__init__(max_ts=max_ts, orientation_control=orientation_control, - use_ik=use_ik, ik_by_sns=ik_by_sns, enable_render=enable_render, - viewport=viewport, env_id=env_id) - - # Some attributes that are initialized in the parent class: - # self.reset_counter --> keeps track of the number of resets performed - # self.reset_state --> reset vector for initializing the Unity simulator at each episode - # self.tool_target --> initial position for the gripper state - - # the lines below should stay as it is - self.MAX_EE_VEL = self.MAX_EE_CART_VEL - self.MAX_EE_ACC = self.MAX_EE_CART_ACC - if self.ORIENTATION_CONTROL: - self.MAX_EE_VEL = np.concatenate((self.MAX_EE_ROT_VEL, self.MAX_EE_VEL)) - self.MAX_EE_ACC = np.concatenate((self.MAX_EE_ROT_ACC, self.MAX_EE_ACC)) - - # the lines below wrt action_space_dimension should stay as it is - self.action_space_dimension = self.n_links # there would be 7 actions in case of joint-level control - if self.USE_IK: - # There are three cartesian coordinates x,y,z for inverse kinematic control - self.action_space_dimension = 3 - if self.ORIENTATION_CONTROL: - # and the three rotations around each of the axis - self.action_space_dimension += 3 - if self.with_gripper: - self.action_space_dimension += 1 # gripper velocity - - # Variables below exist in the parent class, hence the names should not be changed - tool_length = 0.2 # [m] allows for some tolerances in maximum observation - # x,y,z of TCP: maximum reach of arm plus tool length in meters - ee_pos_high = np.array([0.95 + tool_length, 0.95 + tool_length, 1.31 + tool_length]) - ee_pos_low = -np.array([0.95 + tool_length, 0.95 + tool_length, 0.39 + tool_length]) - - high = np.empty(0) - low = np.empty(0) - if self.ORIENTATION_CONTROL: - # rx,ry,rz of TCP: maximum orientation in radians without considering dexterous workspace - ee_rot_high = np.full(3, np.pi) - # observation space is distance to target orientation (rx,ry,rz), [rad] - high = np.append(high, ee_rot_high) - low = np.append(low, -ee_rot_high) - - # and distance to target position (dx,dy,dz), [m] - high = np.append(high, ee_pos_high - ee_pos_low) - low = np.append(low, -(ee_pos_high - ee_pos_low)) - - # and joint positions [rad] and possibly velocities [rad/s] - if 'a' in self.state_type: - high = np.append(high, self.MAX_JOINT_POS) - low = np.append(low, self.MIN_JOINT_POS) - if 'v' in self.state_type: - high = np.append(high, self.MAX_JOINT_VEL) - low = np.append(low, -self.MAX_JOINT_VEL) - - # the lines below should stay as it is - self.action_space = spaces.Box(-np.ones(self.action_space_dimension), np.ones(self.action_space_dimension), - dtype=np.float32) - self.observation_space = spaces.Box(low, high, dtype=np.float32) - - self.reset() - - def create_target(self): - """ - defines the target to reach per episode, this should be adapted by the task - should always return rx,ry,rz,x,y,z in order, - i.e., first target orientation rx,ry,rz in radians, and then target position x,y,z in meters - in case of orientation_control=False --> rx,ry,rz are irrelevant and can be set to zero - _random_target_gen_joint_level(): generate a random sample target in the reachable workspace of the iiwa - - :return: Cartesian pose of the target to reach in the task space - """ - # a sample fixed target on the right side of iiwa with orientation pointing downwards - # rx, ry, rz = 0.0, np.pi, 0.0 - # x, y, z = 0.0, 0.57, 0.655 - # target = rx, ry, rz, x, y, z - - # a sample reachable target generator - target = self._random_target_gen_joint_level() - - return target - - def get_pos_error(self): - """ - calculates the Euclidean error from the end-effector to the target position - body_node.getTransform().translation() --> Cartesian position (x,y,z) of the center of that body_node - - :return: Euclidean error from the end-effector to the target position - """ - ee = self.dart_chain.getBodyNode('iiwa_link_ee') # The end-effector rigid-body node - target = self.dart_target.getBodyNode(0) # The target rigid-body node - position_error = target.getTransform().translation() - ee.getTransform().translation() - return position_error - - def get_pos_distance(self): - """ - :return: L2-norm of the Euclidean error from the end-effector to the target position - """ - distance = np.linalg.norm(self.get_pos_error()) - return distance - - def get_rot_error(self): - """ - calculates the Quaternion error from the end-effector to the target orientation - body_node.getTransform().rotation() --> Orientation of the body_node in a 3*3 rotation matrix - body_node.getTransform().quaternion() --> Orientation of the body_node in a unit quaternion form (w,x,y,z) - dart.math.logMap() --> calculates an angle-axis representation of a rotation matrix - - :return: returns Quaternion error from the end-effector to the target orientation - """ - ee = self.dart_chain.getBodyNode('iiwa_link_ee') # The end-effector rigid-body node - target = self.dart_target.getBodyNode(0) # The target rigid-body node - quaternion_error = target.getTransform().quaternion().multiply(ee.getTransform().quaternion().inverse()) - orientation_error = dart.math.logMap(quaternion_error.rotation()) - return orientation_error - - def get_rot_distance(self): - """ - :return: L2-norm of the Quaternion error from the end-effector to the target orientation - """ - distance = np.linalg.norm(self.get_rot_error()) - return distance - - def get_state(self): - """ - defines the environment state, this should be adapted by the task - get_pos_error(): returns Euclidean error from the end-effector to the target position - get_rot_error(): returns Quaternion error from the end-effector to the target orientation - - :return: state for the policy training - """ - state = np.empty(0) - if self.ORIENTATION_CONTROL: - state = np.append(state, self.get_rot_error()) - state = np.append(state, self.get_pos_error()) - if 'a' in self.state_type: - state = np.append(state, self.dart_chain.getPositions()) - if 'v' in self.state_type: - state = np.append(state, self.dart_chain.getVelocities()) - - # the lines below should stay as it is - self.observation_state = np.array(state) - - return self.observation_state - - def get_reward(self, action): - """ - defines the environment reward, this should be adapted by the task - - :param action: is the current action decided by the RL agent - :return: reward for the policy training - """ - # stands for reducing position error - reward = -self.get_pos_distance() - - # stands for reducing orientation error - if self.ORIENTATION_CONTROL: - reward -= 0.5 * self.get_rot_distance() - - # stands for avoiding abrupt changes in actions - reward -= 0.1 * np.linalg.norm(action - self.prev_action) - - # stands for shaping the reward to increase when target is reached to balance at the target - if self.get_terminal_reward(): - reward += 1.0 * (np.linalg.norm(np.ones(self.action_space_dimension)) - np.linalg.norm(action)) - - # the lines below should stay as it is - self.reward_state = reward - - return self.reward_state - - def get_terminal_reward(self): - """ - checks if the target is reached - get_pos_distance(): returns norm of the Euclidean error from the end-effector to the target position - get_rot_distance(): returns norm of the Quaternion error from the end-effector to the target orientation - - :return: a boolean value representing if the target is reached within the defined threshold - """ - if self.get_pos_distance() < self.MIN_POS_DISTANCE: - if not self.ORIENTATION_CONTROL: - return True - if self.get_rot_distance() < self.MIN_ROT_DISTANCE: - return True - - return False - - def get_terminal(self): - """ - checks the terminal conditions for the episode - - :return: a boolean value indicating if the episode should be terminated - """ - - if self.time_step > self.max_ts: - self.reset_flag = True - - return self.reset_flag - - def update_action(self, action): - """ - converts env action to the required unity action by possibly using inverse kinematics from DART - _dart_calc_inv_kinematics() --> changes the action from velocities in the task space (position-only 'x,y,z' - or complete pose 'rx,ry,rz,x,y,z') to velocities in the joint space of the kinematic chain (j1,...,j7) - - :param action: The action vector decided by the RL agent, acceptable range: [-1,+1] - It should be a numpy array with the following shape: [arm_action] or [arm_action, tool_action] - in case of with_gripper=True, tool_action is always a dim-1 scalar value representative of the normalized gripper velocity - arm_action has different dim based on each case of control level: - in case of use_ik=False -> is dim-7 representative of normalized joint velocities - in case of use_ik=True -> there would be two cases: - orientation_control=False -> of dim-3: Normalized EE Cartesian velocity in x,y,z DART coord - orientation_control=True -> of dim-6: Normalized EE Rotational velocity in x,y,z DART coord - followed by Normalized EE Cartesian velocity in x,y,z DART coord - - :return: the command to send to the Unity simulator including joint velocities and possibly gripper position - """ - - # the lines below should stay as it is - self.action_state = action - action = np.clip(action, -1., 1.) - - if self.with_gripper: - # This updates the gripper target by accumulating the tool velocity from the action vector - tool_action = action[-1] - self.tool_target = np.clip((self.tool_target + tool_action), 0.0, 90.0) - # This removes the gripper velocity from the action vector for inverse kinematics calculation - action = action[:-1] - - if self.USE_IK: - task_vel = self.MAX_EE_VEL * action - joint_vel = self._dart_calc_inv_kinematics(task_vel) - else: - joint_vel = self.MAX_JOINT_VEL * action - - unity_action = joint_vel - if self.with_gripper: - unity_action = np.append(unity_action, [float(self.tool_target)]) - - return unity_action - - def update(self, observation): - """ - converts the unity observation to the required env state defined in get_state() - with also computing the reward value from the get_reward(...) and done flag, - it increments the time_step, and outputs done=True when the environment should be reset - - :param observation: is the observation received from the Unity simulator within its [X,Y,Z] coordinate system - 'joint_values': indices [0:7], - 'joint_velocities': indices [7:14], - 'ee_position': indices [14:17], - 'ee_orientation': indices [17:20], - 'target_position': indices [20:23], - 'target_orientation': indices [23:26], - 'object_position': indices [26:29], - 'object_orientation': indices [29:32], - 'gripper_position': indices [32:33], ---(it is optional, in case a gripper is enabled) - 'collision_flag': indices [33:34], ---([32:33] in case of without gripper) - - :return: The state, reward, episode termination flag (done), and an empty info dictionary - """ - - # use commented lines below in case you'd like to save the image observations received from the Unity - # base64_bytes = observation['ImageData'][0].encode('ascii') - # image_bytes = base64.b64decode(base64_bytes) - # image = np.frombuffer(image_bytes, np.uint8) - # image = cv2.imdecode(image, cv2.IMREAD_GRAYSCALE) - # print(image.shape) - # cv2.imwrite("test.jpg", image) - - if observation['Observation'][-1] == 1.0: - self.reset_flag = True - - # the method below handles synchronizing states of the DART kinematic chain with the observation from Unity - # hence it should be always called - self._unity_retrieve_joint_values(observation['Observation']) - - # Class attributes below exist in the parent class, hence the names should not be changed - self.time_step += 1 - - state = self.get_state() - reward = self.get_reward(self.action_state) - done = bool(self.get_terminal()) - info = {} - - self.prev_action = self.action_state - - return state, reward, done, info - - def reset(self, temp=False): - """ - resets the DART-gym environment and creates the reset_state vector to be sent to Unity - - :param temp: not relevant - - :return: The initialized state - """ - - # takes care of resetting the DART chain and should stay as it is - state = super().reset() - - # creates a random target for reaching task, should be adapted for your specific task - random_target = self.create_target() - - # sets the initial reaching target for the current episode, - # should be always called in the beginning of each episode, - # you might need to call it even during the episode run to change the reaching target for the IK-P controller - self.set_target(random_target) - - # initial position for the gripper state, accumulates the tool_action velocity received in update_action - self.tool_target = 0.0 # should be in range [0.0,90.0] - - # movement control of each joint can be disabled by setting zero for that joint index - active_joints = [1] * 7 - - # the lines below should stay as it is, Unity simulator expects these joint values in radians - joint_positions = self.dart_chain.getPositions().tolist() - joint_velocities = self.dart_chain.getVelocities().tolist() - - # the mapping below for the target should stay as it is, unity expects orientations in degrees - target_positions = self.dart_target.getPositions().tolist() - target_X, target_Y, target_Z = -target_positions[4], target_positions[5], target_positions[3] - target_RX, target_RY, target_RZ = np.rad2deg([-target_positions[1], target_positions[2], target_positions[0]]) - target_positions_mapped = [target_X, target_Y, target_Z, target_RX, target_RY, target_RZ] - - # depending on your task, positioning the object might be necessary, - # start from the following sample code, unity expects orientations in degrees - object_X, object_Y, object_Z = np.random.uniform(-1.0, 1.0), 0.05, np.random.uniform(-1.0, 1.0) - object_RX, object_RY, object_RZ = 0.0, 0.0, 0.0 - object_positions_mapped = [object_X, object_Y, object_Z, object_RX, object_RY, object_RZ] - - self.reset_state = active_joints + joint_positions + joint_velocities\ - + target_positions_mapped + object_positions_mapped - - if self.with_gripper: - self.reset_state = self.reset_state + [self.tool_target] - - self.reset_counter += 1 - self.reset_flag = False - - return state - - def action_by_p_control(self, coeff_kp_lin, coeff_kp_rot): - """ - computes the task-space velocity commands proportional to the reaching target error - - :param coeff_kp_lin: proportional coefficient for the translational error - :param coeff_kp_rot: proportional coefficient for the rotational error - - :return: The action in task space - """ - - action_lin = coeff_kp_lin * self.get_pos_error() - action = action_lin - - if self.ORIENTATION_CONTROL: - action_rot = coeff_kp_rot * self.get_rot_error() - action = np.concatenate(([action_rot, action])) - - if self.with_gripper: - tool_vel = 0.0 # zero velocity means no gripper movement - should be adapted for the task - action = np.append(action, [tool_vel]) - - return action diff --git a/agent/main.py b/agent/main.py old mode 100644 new mode 100755 index 2ff37c0..315bda6 --- a/agent/main.py +++ b/agent/main.py @@ -1,84 +1,157 @@ +import os + from config import Config -from iiwa_sample_joint_vel_env import IiwaJointVelEnv -from iiwa_sample_env import IiwaSampleEnv + +import numpy as np +from pathlib import Path +import pandas as pd + +# Import envs # from simulator_vec_env import SimulatorVecEnv +from envs.iiwa_sample_joint_vel_env import IiwaJointVelEnv +from envs.iiwa_sample_env import IiwaSampleEnv + +# Monitor envs +from stable_baselines3.common.vec_env import VecMonitor + +# Models # from stable_baselines3 import PPO -import numpy as np +from utils.helpers import set_seeds -def get_env(config_dict, env_dict): - env_key = config_dict['env_key'] +def get_env(config_dict, dart_env_dict, reward_dict, log_dir): + """ + Set-up the env according to the input dictionary settings + """ + env_key = config_dict['env_key'] def create_env(id=0): - # 'dart' should be always included in the env_key when there is a need to use a dart based environment + + ################################################################################################################################# + # Important: 'dart' substring should always be included in the 'env_key' for dart-based envs. E.g. 'iiwa_sample_dart_unity_env' # + # If 'dart' is not included, IK behaviour can not be used # + ################################################################################################################################# + + # joints control without dart if env_key == 'iiwa_joint_vel': - env = IiwaJointVelEnv(max_ts=250, id=id, config=config_dict) + env = IiwaJointVelEnv(max_ts=dart_env_dict['max_time_step'], id=id, config=config_dict) + + # Reaching the red target sample env + # task-space with dart or joint space control + # model-based control with P-controller available elif env_key == 'iiwa_sample_dart_unity_env': - env = IiwaSampleEnv(max_ts=env_dict['max_time_step'], - orientation_control=env_dict['orientation_control'], - use_ik=env_dict['use_inverse_kinematics'], - ik_by_sns=env_dict['linear_motion_conservation'], - enable_render=env_dict['enable_dart_viewer'], - state_type=config_dict['state'], - with_gripper=config_dict['gripper'], + env = IiwaSampleEnv(max_ts=dart_env_dict['max_time_step'], orientation_control=dart_env_dict['orientation_control'], + use_ik=dart_env_dict['use_inverse_kinematics'], ik_by_sns=dart_env_dict['linear_motion_conservation'], + state_type=config_dict['state'], enable_render=dart_env_dict['enable_dart_viewer'], + task_monitor=dart_env_dict['task_monitor'], with_objects=dart_env_dict['with_objects'], + target_mode=dart_env_dict['target_mode'], target_path=dart_env_dict['target_path'], + goal_type="target", joints_safety_limit=config_dict['joints_safety_limit'], + max_joint_vel=config_dict['max_joint_vel'], max_ee_cart_vel=config_dict['max_ee_cart_vel'], + max_ee_cart_acc=config_dict['max_ee_cart_acc'], max_ee_rot_vel=config_dict['max_ee_rot_vel'], + max_ee_rot_acc=config_dict['max_ee_rot_acc'], random_initial_joint_positions=config_dict['random_initial_joint_positions'], + initial_positions=config_dict['initial_positions'], robotic_tool=config_dict["robotic_tool"], env_id=id) + + # Set env seed # + env.seed((id * 150) + (id + 11)) + return env num_envs = config_dict['num_envs'] env = [create_env for i in range(num_envs)] - env = SimulatorVecEnv(env, config_dict) - return env + env = SimulatorVecEnv(env, config_dict, manual_actions_dict=None, reward_dict=reward_dict) # Set vectorized env + env = VecMonitor(env, log_dir, info_keywords=("success",)) # Monitor envs + return env if __name__ == "__main__": - config_dict = Config.get_config_dict() - env_dict = Config.get_dart_env_dict() - env = get_env(config_dict, env_dict) - - # check policy training and inference for joint velocity environment, use with - - model = PPO("MlpPolicy", env, verbose=1) - model.learn(total_timesteps=100000) - model.save("ppo_trained") - - del model # remove to demonstrate saving and loading - - model = PPO.load("ppo_trained") - - obs = env.reset() - for x in range(1000): - action, _states = model.predict(obs, True) - obs, rewards, dones, info = env.step(action) - - - # check model-based controllers, update environment key in config.py, comment lines 40-51 and uncomment below code - # to test dart-enabled environments - - # control_kp = 1.0 / env.observation_space.high[0] - - # if config_dict['seed'] is not None: - # env.seed(config_dict['seed']) - - # obs = env.reset() - # episode_rewards = [] - # for _ in range(100): - # cum_reward = 0 - # while True: - # if env_dict['use_inverse_kinematics']: - # action = np.reshape(env.env_method('action_by_p_control', control_kp, 2.0 * control_kp), - # (config_dict['num_envs'], env.action_space.shape[0])) - # else: - # action = np.reshape(env.env_method('random_action'), - # (config_dict['num_envs'], env.action_space.shape[0])) - # obs, rewards, dones, info = env.step(action) - # cum_reward += rewards - # if env_dict['enable_dart_viewer']: - # env.render() - # if dones.any(): - # episode_rewards.append(cum_reward) - # break - # mean_reward = np.mean(episode_rewards) - - # print(mean_reward) + main_config = Config() + + # Parse configs # + config_dict = main_config.get_config_dict() + dart_env_dict = main_config.get_dart_env_dict() + reward_dict = main_config.get_reward_dict() + + # Create new folder if not exists for logging # + Path(config_dict["log_dir"]).mkdir(parents=True, exist_ok=True) + + # Build env # + env = get_env(config_dict, dart_env_dict, reward_dict, config_dict["log_dir"]) + + # Train the agent # + if(config_dict['simulation_mode'] == 'train'): + + # Set global seeds and get a PPO seed # + ppo_seed = set_seeds(config_dict["seed"]) + + # Define the model and its hyperparameters # + model = PPO(policy="MlpPolicy", env=env, seed=ppo_seed, tensorboard_log=config_dict["log_dir"], verbose=1) + + # Play some episodes # + # If you retrain the model, you may need to set reset_num_timesteps=False # + model.learn(total_timesteps=dart_env_dict["total_timesteps"], reset_num_timesteps=True, tb_log_name=config_dict["tb_log_name"], log_interval=2) + + print("Training ended. Saving a checkpoint at: " + config_dict["log_dir"]) + + # Save the last model # + model.save(os.path.join(config_dict["log_dir"], "ppo_trained")) + + del model # remove + + elif(config_dict['simulation_mode'] == 'evaluate'): + print("===================================================") + print("RL-based evaluation") + print("===================================================") + + # Load trained agent # + model = PPO.load(os.path.join(config_dict["log_dir"], "ppo_trained")) + model.policy.set_training_mode(False) + + obs = env.reset() + for x in range(1000): # Run some steps for each env + action, _states = model.predict(obs, deterministic=True) # Important: set deterministic to True to use the best learned policy (no exploration) + + obs, rewards, dones, info = env.step(action) + + # Render # + if dart_env_dict['enable_dart_viewer'] and config_dict['env_key'] != 'iiwa_joint_vel': + env.render() + + elif(config_dict['simulation_mode'] == 'evaluate_model_based' and config_dict['env_key'] != 'iiwa_joint_vel'): + # check model-based controllers (e.g. P-controller) # + print("===================================================") + print("Model-based evaluation") + print("===================================================") + + control_kp = 1.0 / env.observation_space.high[0] + + obs = env.reset() + episode_rewards = [] + for _ in range(5): # Play some episodes + cum_reward = 0 + + while True: # Play until we have a successful episode + if dart_env_dict['use_inverse_kinematics']: # Generate an action for the current observation using a P-controller + action = np.reshape(env.env_method('action_by_p_control', control_kp, 2.0 * control_kp), + (config_dict['num_envs'], env.action_space.shape[0])) + else: # Random action + action = np.reshape(env.env_method('random_action'), + (config_dict['num_envs'], env.action_space.shape[0])) + + obs, rewards, dones, info = env.step(action) # Play this action + cum_reward += rewards + + # Render # + if dart_env_dict['enable_dart_viewer']: + env.render() + + if dones.any(): + episode_rewards.append(cum_reward) + break + + mean_reward = np.mean(episode_rewards) + print("Mean reward: " + str(mean_reward)) + else: + print("You have set an invalid simulation_mode or some other settings in the config.py are wrong - aborting") diff --git a/agent/main_advanced.py b/agent/main_advanced.py new file mode 100755 index 0000000..bbc7ab7 --- /dev/null +++ b/agent/main_advanced.py @@ -0,0 +1,123 @@ +import os +from pathlib import Path +import numpy as np + +from config_advanced import ConfigAdvanced + +# Model +from stable_baselines3 import PPO + +# Policy networks +from utils.policy_networks import PolicyNetworkVanillaReLU + +from utils.helpers import set_seeds + +# Set-up envs +from utils_advanced.helpers import get_env + +from utils_advanced.boxes_generator import RandomBoxesGenerator +from utils_advanced.monitoring_agent import SaveOnBestTrainingRewardCallback + +# Evaluate methods +from utils_advanced.evaluate import evaluation_mode + +def train_mode(ppo_seed, config_dict, dart_env_dict, gym_env_dict, hyper_dict, goal_dict, reward_dict, manual_actions_dict=None, randomization_dict=None, randomBoxesGenerator=None): + """ + train the RL agent. Play some episodes, record metrics, and save checkpoints + + :param ppo_seed: ppo seed + :param config_dict: configuration dictionary + :param dart_env_dict: dart dictionary + :param gym_env_dict: env dictionary + :param hyper_dict: hyperparameters dict - models related + :param goal_dict: goal dict - target, or green box + :param reard_dict: reward dict - reward terms values and ranges + :param manual_actions_dict: manual action dict + :param randomization_dict: randomization dict + :param randomBoxesGenerator: boxes generator to spawn boxes for planar grasping + """ + + # Check if other runs already exist in the log dir # + run_num = len([file for file in os.listdir(config_dict["log_dir"]) if file.startswith("run_")]) # How many folders already exist + config_dict["log_dir"] += "run_" + str(run_num) + "/" + + # Create new folder for logs # + Path(config_dict["log_dir"]).mkdir(parents=True, exist_ok=True) + + # Build env # + env = get_env(config_dict, dart_env_dict, gym_env_dict, config_dict["log_dir"], reward_dict, + goal_dict, manual_actions_dict, randomization_dict, randomBoxesGenerator) + + print("Using for training the env: " + config_dict["env_key"]) + + # Callback - save checkpoints and log best reward # + callback_save_on_best = SaveOnBestTrainingRewardCallback(config_dict["check_freq"], save_model_freq=config_dict["save_model_freq"], + log_dir=config_dict["log_dir"], total_timesteps=dart_env_dict["total_timesteps"], + num_envs=config_dict["num_envs"], best_mean=config_dict["best_mean_reward"]) + + # Select policy network # + if hyper_dict["policy_network"] is None: + policy_kwargs = None + print("Using the default SB3 policy") + elif hyper_dict["policy_network"] == "PolicyNetworkVanillaReLU": + policy_kwargs = PolicyNetworkVanillaReLU() + print("Using custom policy with kwargs:\n") + print(policy_kwargs) + else: + policy_kwargs = None + print("This policy network is not supported. Using the default SB3 policy") + + # Define the RL model and its hyperparameters # + model = PPO(policy=hyper_dict["policy"], env=env, seed=ppo_seed, learning_rate=hyper_dict["learning_rate"], ent_coef=hyper_dict["ent_coef"], + vf_coef=hyper_dict["vf_coef"], max_grad_norm=hyper_dict["max_grad_norm"], gae_lambda=hyper_dict["gae_lambda"], n_epochs=hyper_dict["n_epochs"], + n_steps=hyper_dict["n_steps"], batch_size=hyper_dict["batch_size"], gamma=hyper_dict["gamma"], clip_range=hyper_dict["clip_range"], + tensorboard_log=config_dict["log_dir"], policy_kwargs=policy_kwargs, verbose=1) + + # Play some episodes # + model.learn(total_timesteps=dart_env_dict["total_timesteps"], callback=callback_save_on_best, reset_num_timesteps=True, tb_log_name=config_dict["tb_log_name"], log_interval=2) + + print("Training ended") + # Save the last model # + #model.save(os.path.join(config_dict["log_dir"], "ppo_trained")) + + del model # remove + +if __name__ == "__main__": + + main_config = ConfigAdvanced() + + # Parse configs # + config_dict = main_config.get_config_dict() + dart_env_dict = main_config.get_dart_env_dict() + gym_env_dict = main_config.get_env_dict() + reward_dict = main_config.get_reward_dict() + hyper_dict = main_config.get_hyper_dict() + goal_dict = main_config.get_goal_dict() + manual_actions_dict = main_config.get_manual_actions_dict() + randomization_dict = main_config.get_randomization_dict() + + # Set global seeds and get a PPO seed # + ppo_seed = set_seeds(config_dict["seed"]) + + # Set-up the random boxes generator - for planar envs # + if(goal_dict["goal_type"] == "box" and config_dict["env_key"].find("planar") != -1): + randomBoxesGenerator = RandomBoxesGenerator(box_mode=goal_dict["box_mode"], box_samples=goal_dict["box_samples"], box_split=goal_dict["box_split"], box_save_val=goal_dict["box_save_val"], box_load_val=goal_dict["box_load_val"], + box_radius_val=goal_dict["box_radius_val"], box_min_distance_base=goal_dict["box_min_distance_base"], box_max_distance_base=goal_dict["box_max_distance_base"], box_folder=goal_dict["box_folder"], + box_x_min=goal_dict["box_x_min"], box_x_max=goal_dict["box_x_max"], box_x_active=goal_dict["box_x_active"], box_z_min=goal_dict["box_z_min"], box_z_max=goal_dict["box_z_max"], + box_z_active=goal_dict["box_z_active"], box_ry_min=goal_dict["box_ry_min"], box_ry_max=goal_dict["box_ry_max"], box_ry_active=goal_dict["box_ry_active"], box_debug=goal_dict["box_debug"] + ) + else: + randomBoxesGenerator = None + + # Train the RL-agent # + if(config_dict['simulation_mode'] == 'train'): + train_mode(ppo_seed, config_dict, dart_env_dict, gym_env_dict, hyper_dict, goal_dict, reward_dict, + manual_actions_dict, randomization_dict, randomBoxesGenerator) + + # Evaluate agents - model-based or rl-based # + elif(config_dict['simulation_mode'] == 'evaluate' or config_dict["simulation_mode"] == 'evaluate_model_based'): + evaluation_mode(config_dict, dart_env_dict, gym_env_dict, hyper_dict, goal_dict, reward_dict, + manual_actions_dict, randomization_dict, randomBoxesGenerator + ) + else: + print("You have set an invalid simulation_mode or some other settings in the config.py are wrong") diff --git a/agent/models/ruckig_planar_model.py b/agent/models/ruckig_planar_model.py new file mode 100644 index 0000000..e16bbed --- /dev/null +++ b/agent/models/ruckig_planar_model.py @@ -0,0 +1,181 @@ +""" +A time-optimal trajectory generation model - RUCKIG + +Notes: + - The DoF are optimized to reach at the same time the goal + - Support for 2 or 3 DoF control - it is used for planar envs - adapt to your task + - The accelerations and velocities are approximated by 'first-order' approximation - e.g. (dx1 - dx2) / control_cycle + - Docs: https://docs.ruckig.com/index.html + - ruckig allows for both task and joint space control. It is used for task-space control and the agent with IK then maps the velocities to the joint space +Important: + -All the calculations are in the DART coordinate system +""" + +import numpy as np + +from ruckig import InputParameter, OutputParameter, Result, Ruckig + +class RuckigPlanarModel(): + + def __init__(self, env, control_cycle, hyper_dict): + self.envs = env # Gym env: 'iiwa_ruckig_planar_grasping_dart_unity_env' + self.control_cycle = control_cycle # In sec. Set the same value in the Unity simulator + self.dof = np.asarray(hyper_dict["dof"]) + self.target_velocity = hyper_dict["target_vel"] # Velocity in the target pose + self.target_acceleration = hyper_dict["target_acc"] + self.max_vel = hyper_dict["max_vel"] + self.max_acc = hyper_dict["max_acc"] + self.max_jerk = hyper_dict["max_jerk"] + + ########### + # Set DoF # + ########### + self.dof = np.count_nonzero(self.dof) + if(self.dof != 3 and self.dof != 2): + raise Exception("Only 2 or 3 DoF are supported for Ruckig Planar Model") + + self.target_velocity = self.target_velocity[3-self.dof:] + self.target_acceleration = self.target_velocity[3-self.dof:] + self.max_vel = self.max_vel[3-self.dof:] + self.max_acc = self.max_acc[3-self.dof:] + self.max_jerk = self.max_jerk[3-self.dof:] + + ############################### + # DoF we control # + # Dart 0 -> rotation Zd = Yu # + # Dart 1 -> position Xd = Zu # + # Dart 2 -> position Yd = -Xu # + ############################### + + ############################################################################## + # Set ruckig objects for each env. Supports many envs but the processing # + # is performed in a sequential manner - no GPU support or parallelization # + ############################################################################## + self.ruckig_models = {"otg": [], "res": [], "inp": [], "out": [], + "prev_pose": [], "prev_vels": [], "prev_accs": [], "time_step": [] + } + + # See ruckig documentation for better understanding # + for i in range(self.envs.num_envs): + self.ruckig_models["otg"].append(Ruckig(self.dof, self.control_cycle)) + self.ruckig_models["res"].append(Result.Working) # Flag + self.ruckig_models["out"].append(OutputParameter(self.dof)) # Velocities result of optimization + + self.ruckig_models["inp"].append(InputParameter(self.dof)) # Current pose + self.ruckig_models["inp"][i].target_velocity = self.target_velocity + self.ruckig_models["inp"][i].target_acceleration = self.target_acceleration + self.ruckig_models["inp"][i].max_velocity = self.max_vel + self.ruckig_models["inp"][i].max_acceleration = self.max_acc + self.ruckig_models["inp"][i].max_jerk = self.max_jerk + + # For velocity and acceleration 'first-order' approximation # + self.ruckig_models["prev_pose"].append(np.zeros(self.dof)) + self.ruckig_models["prev_vels"].append(np.zeros(self.dof)) + self.ruckig_models["prev_accs"].append(np.zeros(self.dof)) + self.ruckig_models["time_step"].append(1) # Reset - at the beginning of each episode + + def predict(self, obs, deterministic=True): + """ + given the observations from 'iiwa_ruckig_planar_grasping_dart_unity_env' type envs, generate time-optimal task-space velocities using the ruckig algorithm + Note: the agents will receive ruckig generated velocities, and then using IK - update_action() method - joint velocities will be sent to the unity simulator + + :param obs: environments observation shape(num_envs, 5 or 6) - depending on the active DoF (rotation control is active) + - Format: [reset, rz_ee_d, x_ee_d, y_ee_d, rz_box_d, x_box_d, y_box_d] + - Should reset, rotation of ee in dart, x position of the ee in dart, etc + - box pose is used only once (at reset_ruckig_model()) as the box does not move during the planar episode + - Not normalized in [-1, 1] as in most gym agents + :param deterministic: unused + + :return: action - shape(num_envs, 2 or 3). Important: the actions are not normalized in [-1, 1] as in the case of the PPO + """ + action = np.zeros((self.envs.num_envs, self.dof)) # For each env + + # For each env generate the next action given the gym env observations - dart coords # + for i in range(self.envs.num_envs): + + # Reset the target ruckig pose - a new agent episode has started # + # First "inp" is also set - refer to the function for more # + if(obs[i][0] == 1): + self.reset_ruckig_model(obs[i], i) + + if self.ruckig_models["res"][i] == Result.Working: + + # Current pose of the ee # + curr_pose = np.asarray(obs[i][1:self.dof+1]) + + curr_vels = self.ruckig_models["prev_vels"][i] + curr_accs = self.ruckig_models["prev_accs"][i] + + # Velocity approx # + if(self.ruckig_models["time_step"][i] >= 2): + curr_vels = (curr_pose - self.ruckig_models["prev_pose"][i]) / self.control_cycle + + # Acceleration approx # + if(self.ruckig_models["time_step"][i] >= 3): + curr_accs = (curr_vels - self.ruckig_models["prev_vels"][i]) / self.control_cycle + + ################################## + # Update - linear approximation # + # Vel and acc are both available # + ################################## + self.ruckig_models["inp"][i].current_velocity = curr_vels + self.ruckig_models["inp"][i].current_acceleration = curr_accs + + # Save current state # + self.ruckig_models["prev_pose"][i] = curr_pose + self.ruckig_models["prev_vels"][i] = curr_vels + self.ruckig_models["prev_accs"][i] = curr_accs + + # Get the next result given the current "inp". If "inp" is not expected, re-calculate a new trajectory # + self.ruckig_models["res"][i] = self.ruckig_models["otg"][i].update(self.ruckig_models["inp"][i], self.ruckig_models["out"][i]) + + # Get the new generated velocity # + ruckig_vel = self.ruckig_models["out"][i].new_velocity + + ##################################################### + # Ip == out # + # Move to the next step of the generated trajectory # + ##################################################### + self.ruckig_models["out"][i].pass_to_input(self.ruckig_models["inp"][i]) + + action[i] = ruckig_vel # Save to action array for this env + + self.ruckig_models["time_step"][i] += 1 + + return action, None + + def reset_ruckig_model(self, obs, index_model): + """ + reset target task-space pose for ruckig + + :affects: "inp" - current_position, target_position, target_velocity, target_acceleration + "res" - Working + "prev_pos", "prev_vels", "prev_accs", "time_step" + + :param obs: environment (single) observation shape(, 5 or 6) - depending on the active DoF + - Format: [reset, rz_ee_d, x_ee_d, y_ee_d, rz_box_d, x_box_d, y_box_d] + - Should reset, rotation of ee in dart, x position of the ee in dart, etc + - box pose is used to set the target planar pose (above the box) + + :param index_mode: which ruckig object to reset + """ + ############################### + # 3, or 2 DoF are controlled # + # Dart 0 -> rotation Zd = Yu # + # Dart 1 -> position Xd = Zu # + # Dart 2 -> position Yd = -Xu ########4444444444################################## + # obs for 2 DoF -> [reset, rz_ee_d, x_ee_d, y_ee_d, x_box_d, y_box_d] # + # obs for 3 DoF- > [reset, rz_ee_d, x_ee_d, y_ee_d, rz_box_d, x_box_d, y_box_d] # + ################################################4444444444######################## + self.ruckig_models["inp"][index_model].current_position = obs[1:self.dof+1] + + self.ruckig_models["inp"][index_model].target_position = obs[self.dof + 1:] + self.ruckig_models["inp"][index_model].target_velocity = self.target_velocity + self.ruckig_models["inp"][index_model].target_acceleration = self.target_acceleration + + self.ruckig_models["res"][index_model] = Result.Working + + self.ruckig_models["prev_pose"][index_model] = np.zeros(self.dof) + self.ruckig_models["prev_vels"][index_model] = np.zeros(self.dof) + self.ruckig_models["prev_accs"][index_model] = np.zeros(self.dof) + self.ruckig_models["time_step"][index_model] = 1 diff --git a/agent/simulator_vec_env.py b/agent/simulator_vec_env.py old mode 100644 new mode 100755 index 6fb67e0..482655e --- a/agent/simulator_vec_env.py +++ b/agent/simulator_vec_env.py @@ -9,17 +9,22 @@ import json import time -import numpy as np -import roslibpy -#import torch -from stable_baselines3.common.vec_env import DummyVecEnv import subprocess from subprocess import PIPE +import roslibpy import zmq import grpc -from misc import service_pb2_grpc -from misc.service_pb2 import StepRequest +import numpy as np +from stable_baselines3.common.vec_env import DummyVecEnv + +from utils import service_pb2_grpc +from utils.service_pb2 import StepRequest + +try: + from utils_advanced.manual_actions import configure_manual_settings_and_get_manual_function +except: + pass class CommandModel: @@ -29,11 +34,10 @@ def __init__(self, id, command, env_key, value): self.command = command self.value = value - class SimulatorVecEnv(DummyVecEnv): _client = None - def __init__(self, env_fns, config, spaces=None ): + def __init__(self, env_fns, config, manual_actions_dict, reward_dict, spaces=None ): """ envs: list of environments to create """ @@ -47,17 +51,26 @@ def __init__(self, env_fns, config, spaces=None ): self.current_step = 0 self.config = config + self.reward_dict = reward_dict self.communication_type = config['communication_type'] self.port_number = config['port_number'] - print(config["port_number"]) + print("Port number: " + config["port_number"]) self.ip_address = config['ip_address'] + print("Ip address: " + config["ip_address"]) self.start = 0 self.nenvs = len(env_fns) self.train_envs = [env_fn(id=ID) for env_fn, ID in zip(env_fns, [x for x in range(self.nenvs)])] # self.validation_envs = [env_fn(id=ID) for env_fn, ID in zip(env_fns, [x for x in range(self.nenvs)])] self.envs = self.train_envs + print("Number of envs: " + str(len(self.envs))) #self.envs = [env_fn(id=ID) for env_fn, ID in zip(env_fns, [x for x in range(self.nenvs)])] + # Initial position flag for the manipulator/robot after reseting. 1 means different than the default vertical position # + if (config["initial_positions"] is None or np.count_nonzero(config["initial_positions"]) != 0) and config["random_initial_joint_positions"] == False: + self.flag_zero_initial_positions = 0 + else: + self.flag_zero_initial_positions = 1 + if self.communication_type == 'ROS': # Connect to ROS server if SimulatorVecEnv._client is None: @@ -75,6 +88,32 @@ def __init__(self, env_fns, config, spaces=None ): else: print("Please specify either ROS or ZMQ communication mode for this environment") + ################################################################### + # Manual/hard-coded actions to command at the end of the episode # + # Disabled by default. Refer to utils_advanced/ # + ################################################################### + self.robotic_tool = config["robotic_tool"] + if(manual_actions_dict is not None): + self.manual = manual_actions_dict["manual"] + self.manual_behaviour = manual_actions_dict["manual_behaviour"] # Behaviour to excecute, 'planar_grasping' (down/close/up) or 'close_gripper' (close) + self.manual_rewards = manual_actions_dict["manual_rewards"] # True/False -> whether to add rewards/penalties during manual actions + + # Set-up manual actions and return a function which will be called after the end of the agent episode # + self.manual_func = configure_manual_settings_and_get_manual_function(self, manual_actions_dict) + else: + self.manual = False + + if(self.robotic_tool == "None"): + print("The robot has no tool attached to the end-effector") + elif(self.robotic_tool == "2_gripper"): + print("The robot has a 2-finger gripper attached to the end-effector") + elif(self.robotic_tool == "3_gripper"): + print("The robot has a 3-finger gripper attached to the end-effector") + elif(self.robotic_tool == "calibration_pin"): + print("The robot has a calibration pin attached to the end-effector") + else: + print("The robot has no tool attached to the end-effector") + def switch_to_training(self): self.envs = self.train_envs @@ -82,55 +121,87 @@ def switch_to_validation(self): self.envs = self.validation_envs def step(self, actions, dart_convert=True): + """ + :param actions: list[list[float]]] list of lists. Each sublist entails actions that will be transfered to each corresponding env. + These actions originate either from a model-based controller or from the output of a Neural Network of a RL model + + :param dart_convert: bool, if set to True it means the actions are in task-space and must be converted with IK to the joint space. + The UNITY simulator always expects velocities in joint space. + """ + self.current_step += 1 if dart_convert and 'dart' in self.config['env_key']: actions_converted = [] for env, action in zip(self.envs, actions): - act = env.update_action(action) + act = env.update_action(action) # Convert agent action to UNITY format - joint space and tool action actions_converted.append(act) actions = actions_converted - #print("current step:" + str(self.current_step)) - # create request containing all environments with the actions to be executed - request = self._create_request("ACTION", self.envs, actions) - # execute the simulation for all environments and get observations - observations = self._send_request(request) - observations_converted = [] - terminated_environments = [] - rews = [] - dones = [] - infos = [] - for env, observation in zip(self.envs, observations): - obs, rew, done, info = env.update(ast.literal_eval(observation)) - observations_converted.append(obs) - rews.append(rew) - dones.append(done) - infos.append(info) - if done: - terminated_environments.append(env) - # TODO: Make sure the last observation for terminated environment is the correct one: - # https://github.com/hill-a/stable-baselines/issues/400 talks about reset observation being wrong, - # use terminated observation instead - # TODO: make sure this does not cause problems, when the when images are transfered it might be slow - # to get all environment observations again - # reset all the terminated environments - [env.reset() for env in terminated_environments] + #################################################################################################### + # Send actions to UNITY (JSON message) and update the envs with the new observation # + # after executing these actions (update the dart kinematic (robotic) chain, agents time-step, etc) # + #################################################################################################### + terminated_environments, rews, infos, observations_converted, dones = self._send_actions_and_update(actions) + + # Render: Not advised to set 'enable_dart_viewer': True, during RL-training. Use it only for debugging # + if(self.config["simulation_mode"] == 'train'): + self.render() + + ########################################################################################################## + # TODO: Make sure the last observation for terminated environment is the correct one: # + # https://github.com/hill-a/stable-baselines/issues/400 talks about reset observation being wrong, # + # use terminated observation instead # + # TODO: make sure this does not cause problems, when the when images are transfered it might be slow # + # to get all environment observations again # + ########################################################################################################## + + # Reset all the terminated environments # if len(terminated_environments) > 0: - request = self._create_request("RESET", terminated_environments) - # currently, the simulator returns array of all the environments, not just the terminated ones - observations = self._send_request(request) + + #################################################################################### + # Manual actions at the end of the agent episode. # + # Disabled by default. Refer to utils_advanced/ # + # Note: the envs should terminate at the same timestep due to UNITY sychronization # + # for collided envs -> send zero velocities for the remaining steps # + #################################################################################### + if(self.manual == True): + self.manual_func(self, rews, infos, self.manual_rewards) + + # Successful episode: give terminal reward # for env in terminated_environments: - obs, _, _, _ = self.envs[env.id].update(ast.literal_eval(observations[env.id])) - observations_converted[env.id] = obs + if env.get_terminal_reward(): + rews[env.id] += self.reward_dict["reward_terminal"] + infos[env.id]["success"] = True + + ########################################### + # Reset gym envs and UNITY simulator envs # + ########################################### + [env.reset() for env in terminated_environments] + observations_converted = self._send_reset_and_update(terminated_environments, time_step_update=True) + + # Render: Not advised to set 'enable_dart_viewer': True, during RL-training. Use it only for debugging # + if(self.config["simulation_mode"] == 'train'): + self.render() + + ######################################################################################## + # The manipulator resets to non-zero initial joint positions. # + # Correct the UNITY observation and update the dart chain due to UNITY synchronization # + # Important: Make sure the agent episodes terminate at the same time-step in this case # + ######################################################################################## + if(self.flag_zero_initial_positions == 1): + observations_converted = self._send_zero_vel_and_update(self.envs, True) + return np.stack(observations_converted), np.stack(rews), np.stack(dones), infos def step_wait(self): # only because VecFrameStack uses step_async to provide the actions, then step_wait to execute a step return self.step(self.actions) - def _create_request(self, command, environments, actions=None): + """ + Create request to send to the UNITY simulator + """ content = '' if command == "ACTION": for act, env in zip(actions, environments): @@ -163,7 +234,6 @@ def _send_request(self, content): reply = self.stub.step(StepRequest(data=content)) return self._parse_result(reply.data) - def _parse_result(self, result): if self.communication_type == 'ROS': return ast.literal_eval(result['value']) @@ -175,14 +245,131 @@ def _parse_result(self, result): def reset(self, should_reset=True): if should_reset: [env.reset() for env in self.envs] - request = self._create_request("RESET", self.envs) + + # Reset UNITY environments and update the agent envs (dart chain) # + self._send_reset_and_update(self.envs, time_step_update=False) + + # Correct dart chain # + observations_converted = self._send_zero_vel_and_update(self.envs, True) + + return np.array(observations_converted) + + ########### + # Helpers # + ########### + def _send_reset_and_update(self, envs, time_step_update=True): + """ + send a reset JSON command and update the envs (time steps, observations, dart chains) + + :param envs: envs + :param time_step_update: whether to update the timestep of the agents + + :return: obs_converted + """ + + # UNITY # + request = self._create_request("RESET", envs) observations = self._send_request(request) + + # Agents # + observations_converted, _, _, _ = self._update_envs(observations, time_step_update=time_step_update) + + return observations_converted + + def _send_actions_and_update(self, actions): + """ + send actions to the UNITY envs and update the agents envs with the returned observations (time steps, observations, dart chains) + Note: send zero velocities to collided envs + + :param actions: agent actions (UNITY format) for each env to be send to UNITY simulator + + :return: terminated_environments, rews, infos, obs_converted, dones + """ + + rews = [0] * len(self.envs) + if(self.config['env_key'] == 'iiwa_joint_vel'): + action_dim = self.config['num_joints'] + 1 # For gripper + elif(self.config['robotic_tool'].find("gripper") == -1): + action_dim = 7 + else: + action_dim = 8 + + # For collided envs send zero velocities # + for env in self.envs: + if env.collided_env == 1: + actions[env.id] = np.zeros(action_dim) + + #print("current step:" + str(self.current_step)) + # create request containing all environments with the actions to be executed + + ##################################################################################################### + # Execute a UNITY simulation step for all environments and parse the returned observations (result) # + ##################################################################################################### + request = self._create_request("ACTION", self.envs, actions) + observations = self._send_request(request) + + ########################################################################################## + # Update the envs using the obs returned from UNITY (dart chain of the manipulator, etc) # + ########################################################################################## observations_converted = [] - for env, observation in zip(self.envs, observations): - obs, _, _, _ = env.update(ast.literal_eval(observation)) - observations_converted.append(obs) + terminated_environments = [] # Assume done in the same timestep else move it outside the for + dones = [] + infos = [] - return np.array(observations_converted) + observations_converted, rews, dones, infos = self._update_envs(observations, time_step_update=True) + for env, done in zip(self.envs, dones): # Scan for terminated envs + if done is True: + terminated_environments.append(env) + + return terminated_environments, rews, infos, observations_converted, dones + + def _send_zero_vel_and_update(self, envs, time_step_update=True): + """ + send zero velocities to all UNITY envs and update the agent envs + + Note: this function is needed to be called when the manipulator resets to different initial joints + positions than the vertical default position. It corrects the dart chain of the envs due to UNITY synchronization + + :param envs: envs + :param time_step_update: whether to update the timestep of the agents + + :return: obs_converted + """ + # Send zero velocities to the UNITY envs # + if(self.config['env_key'] == 'iiwa_joint_vel'): + action_dim = self.config['num_joints'] + 1 + elif(self.config['robotic_tool'].find("gripper") == -1): + action_dim = 7 + else: + action_dim = 8 + + actions = np.zeros((len(envs), action_dim)) + request = self._create_request("ACTION", envs, actions) + observations = self._send_request(request) + + # Update the agents envs # + observations_converted, _, _, _ = self._update_envs(observations, time_step_update=time_step_update) + + return observations_converted + + def _update_envs(self, observations, time_step_update): + """ + update the agent envs (dart chain, time_step etc.) given the new UNITY observations + + :param observations: UNITY format + :param time_step_update: whether to update the timestep of the agents + + :return: terminated_environments, rews, infos, obs_converted, dones + """ + env_stack = [] + for obs, env in zip(observations, self.envs): + env_stack.append(env.update(ast.literal_eval(obs), time_step_update)) + + return [list(param) for param in zip(*env_stack)] + + ############### + # End helpers # + ############### def reset_task(self): pass @@ -193,8 +380,17 @@ def close(self): def __len__(self): return self.nenvs + def render(self, mode=None): + """ + Override default vectorized render behaviour. SB3 renders all envs into a single window. This behaviour is + incompatible with our task_monitor.py implementation. Instead render each env into separate windows + """ + if(self.config["env_key"] != 'iiwa_joint_vel'): + for env in self.envs: + if(env.dart_sim.enable_viewer): # Render is active + env.render() # Calling destructor # def __del__(self): # print("Destructor called") - # self.env_process.terminate() \ No newline at end of file + # self.env_process.terminate() diff --git a/agent/utils/dart_guide.py b/agent/utils/dart_guide.py new file mode 100644 index 0000000..46c5158 --- /dev/null +++ b/agent/utils/dart_guide.py @@ -0,0 +1,108 @@ +""" +Help code for: + 1. how to retrieve transforamtion matrices of chains of rigidbodies in target (dart) + + 2. how to generate reachable targets for the end-effector pose + +DART uses the Eigen library - Geometry module documentation: https://eigen.tuxfamily.org/dox/group__Geometry__Module.html + +body_node.getTransform().translation() --> Cartesian position (x,y,z) of the center of that body_node +body_node.getTransform().rotation() --> Orientation of the body_node in a 3*3 rotation matrix +body_node.getTransform().quaternion() --> Orientation of the body_node in a unit quaternion form (w,x,y,z) +dart.math.logMap() --> calculates an angle-axis representation of a rotation matrix + +background details: https://en.wikipedia.org/wiki/Axis%E2%80%93angle_representation +further code details: https://github.com/dartsim/dart/blob/main/dart/math/Geometry.cpp +""" + +import dartpy as dart +import numpy as np + +def get_pos_error(self): + ee = self.dart_sim.chain.getBodyNode('iiwa_link_ee') # The end-effector rigid-body node + target = self.dart_sim.target.getBodyNode(0) # The target rigid-body node + position_error = target.getTransform().translation() - ee.getTransform().translation() + + return position_error + +def get_pos_distance(self): + distance = np.linalg.norm(self.get_pos_error()) + + return distance + +def get_rot_error(self): + ee = self.dart_sim.chain.getBodyNode('iiwa_link_ee') # The end-effector rigid-body node + target = self.dart_sim.target.getBodyNode(0) # The target rigid-body node + quaternion_error = target.getTransform().quaternion().multiply(ee.getTransform().quaternion().inverse()) + orientation_error = dart.math.logMap(quaternion_error.rotation()) # angle-axis x, y, z + + return orientation_error + +def get_rot_distance(self): + distance = np.linalg.norm(self.get_rot_error()) + + return distance + +def get_ee_pos(self): + """ + return the position of the ee in dart cords + + :return: x, y, z position of the ee in dart coords system + """ + x, y, z = self.dart_sim.chain.getBodyNode('iiwa_link_ee').getTransform().translation() + + return x, y, z + +def get_ee_orient(self): + """ + return the orientation of the ee in angle-axis + + :return: rx, ry, rz orientation of the ee in angle-axis in dart coords system + """ + rot_mat = self.dart_sim.chain.getBodyNode('iiwa_link_ee').getTransform().rotation() + rx, ry, rz = dart.math.logMap(rot_mat) + + return rx, ry, rz + +def _random_target_gen_joint_level(self): + """ + generate a rechable target in task-space for the manipulator + + :return: rx, ry, rz, x, y, z pose the ee in dart coords system (angle-axis) + """ + + # backing up joint positions of the arm in radians + positions = self.dart_sim.chain.getPositions() + + # If your gym observation state is different than the default, # + # please adapt the self.observation_indices['joint_pos'] # + joint_indices_start = self.observation_indices['joint_pos'] + + while True: + # generating a random valid observation including joint positions + random_state = self.observation_space.sample() + + # selecting the randomly generated joint positions from the agent state # + # Note: the agent state containts additional information other than the joints positions of the robot # + initial_positions = random_state[joint_indices_start:joint_indices_start + self.n_links] + + # computing the forward kinematics to retrieve end-effector pose + self.dart_sim.chain.setPositions(initial_positions) + ee = self.dart_sim.chain.getBodyNode('iiwa_link_ee') + rx, ry, rz = dart.math.logMap(ee.getTransform().rotation()) # SO(3) to so(3): Angle * Axis of Rotation + x, y, z = ee.getTransform().translation() + + # conditions for the generated targets - adapt it for your task + if z > 0.1: + break + + # reverting the joint positions back to the original ones + self.dart_sim.chain.setPositions(positions) + + # resetting the orientation vector if it is not controlled + if not self.dart_sim.orientation_control: + rx, ry, rz = 0.0, 0.0, 0.0 + + # initial_positions vector is a valid IK solution to the [rx, ry, rz, x, y, z] + + return rx, ry, rz, x, y, z diff --git a/agent/utils/helpers.py b/agent/utils/helpers.py new file mode 100644 index 0000000..97436d9 --- /dev/null +++ b/agent/utils/helpers.py @@ -0,0 +1,10 @@ +import torch +import random +import numpy as np + +def set_seeds(seed: int) -> int: + torch.manual_seed(seed + 135) + np.random.seed(seed + 235) + random.seed(seed + 335) + + return seed + 435 diff --git a/agent/utils/policy_networks.py b/agent/utils/policy_networks.py new file mode 100644 index 0000000..2dc9184 --- /dev/null +++ b/agent/utils/policy_networks.py @@ -0,0 +1,24 @@ +import torch + +def PolicyNetworkVanillaReLU(): + """ + returns a default policy network from SB3 but with ReLU activation functions + + :return: policy_kwargs required for RL models like PPO + """ + policy_kwargs = dict(activation_fn=torch.nn.ReLU, + net_arch=[dict(pi=[64, 64], vf=[64, 64])], + ) + + return policy_kwargs + +################## +# Other examples # +################## +# Note: when using images as state representation, the default PPO architecture of 64 neurons per layer may be too small to solve the task + +# net_arch=[128, 128, dict(pi=[64,64], vf=[64,64])], + +# policy_kwargs = dict(activation_fn=torch.nn.ReLU, net_arch=[dict(pi=[64, 64], vf=[64, 64])], +# features_extractor_class=your_pytorch_network_class_here, +# features_extractor_kwargs=give_your_kwargs_when_your_pytorch_network_class_is_instantiated) diff --git a/agent/misc/service_pb2.py b/agent/utils/service_pb2.py similarity index 100% rename from agent/misc/service_pb2.py rename to agent/utils/service_pb2.py diff --git a/agent/misc/service_pb2_grpc.py b/agent/utils/service_pb2_grpc.py similarity index 98% rename from agent/misc/service_pb2_grpc.py rename to agent/utils/service_pb2_grpc.py index 4b90ca6..ee3fd6f 100644 --- a/agent/misc/service_pb2_grpc.py +++ b/agent/utils/service_pb2_grpc.py @@ -2,7 +2,7 @@ """Client and server classes corresponding to protobuf-defined services.""" import grpc -from misc import service_pb2 as service__pb2 +from utils import service_pb2 as service__pb2 class CommunicationServiceStub(object): diff --git a/agent/utils/simulator_configuration.py b/agent/utils/simulator_configuration.py new file mode 100644 index 0000000..70f89d3 --- /dev/null +++ b/agent/utils/simulator_configuration.py @@ -0,0 +1,230 @@ +import traceback +import xml.etree.ElementTree as ET +import copy + +def update_simulator_configuration(config, xml_file): + """ + Update the UNITY simulator .xml file based on the settings that the user has been provided in a Config() type of class + + Note: to change more options either adapt the UNITY simulator manually or extend this function, and the Config() class + + :param config: configuration object + :param xml_file: path of the .xml file + + :return: bool. Returns False when an error was encountered and the .xml file could not be updated properly + """ + + tree = ET.parse(xml_file) + oldTree = copy.deepcopy(tree) + + root = tree.getroot() + + # Port # + field = root.find('CommunicationType') + oldFieldText = field.text + newFieldText = config.config_dict["communication_type"] + if(newFieldText != "GRPC" and newFieldText != "ROS" and newFieldText != "ZMQ"): + print("Given CommunicationType field is invalid") + else: + field.text = newFieldText + if(oldFieldText != field.text): + field.set('updated', 'yes') + + # Port # + field = root.find('PortNumber') + oldFieldText = field.text + field.text = config.config_dict["port_number"] + if(oldFieldText != field.text): + field.set('updated', 'yes') + + # Control cycles # + field = root.find('TimestepDurationInSeconds') + oldFieldText = field.text + field.text = str(config.dart_dict["control_cycle"]) + if(oldFieldText != field.text): + field.set('updated', 'yes') + + field = root.find('PhyscsSimulationIncrementInSeconds') + oldFieldText = field.text + field.text = str(config.dart_dict["unity_cycle"]) + if(oldFieldText != field.text): + field.set('updated', 'yes') + + # Images # + field = root.find('EnableObservationImage') + oldFieldText = field.text + newFieldText = str(config.config_dict["use_images"]).lower() + if(newFieldText != "true" and newFieldText != "false"): + print("Given EnableObservationImage field is invalid") + else: + field.text = newFieldText + if(oldFieldText != field.text): + field.set('updated', 'yes') + + field = root.find('ObservationImageWidth') + oldFieldText = field.text + field.text = str(config.config_dict["image_size"]) + if(oldFieldText != field.text): + field.set('updated', 'yes') + + field = root.find('ObservationImageHeight') + oldFieldText = field.text + field.text = str(config.config_dict["image_size"]) + if(oldFieldText != field.text): + field.set('updated', 'yes') + + # Camera position # + field = root.find('ObservationCameras')[0][0] + x = field[0] + oldFieldText = x.text + x.text = str(config.config_dict["camera_position"][0]) + if(oldFieldText != x.text): + x.set('updated', 'yes') + + y = field[1] + oldFieldText = y.text + y.text = str(config.config_dict["camera_position"][1]) + if(oldFieldText != y.text): + y.set('updated', 'yes') + + z = field[2] + oldFieldText = z.text + z.text = str(config.config_dict["camera_position"][2]) + if(oldFieldText != z.text): + z.set('updated', 'yes') + + # Camera rotation # + field = root.find('ObservationCameras')[0][1] + x = field[0] + oldFieldText = x.text + x.text = str(config.config_dict["camera_rotation"][0]) + if(oldFieldText != x.text): + x.set('updated', 'yes') + + y = field[1] + oldFieldText = y.text + y.text = str(config.config_dict["camera_rotation"][1]) + if(oldFieldText != y.text): + y.set('updated', 'yes') + + z = field[2] + oldFieldText = z.text + z.text = str(config.config_dict["camera_rotation"][2]) + if(oldFieldText != z.text): + z.set('updated', 'yes') + + # End effector # + field = root.find('EnableEndEffector') + oldFieldText = field.text + newFieldText = str(config.config_dict["enable_end_effector"]).lower() + if(newFieldText != "true" and newFieldText != "false"): + print("Given EnableEndEffector field is invalid") + else: + field.text = newFieldText + if(oldFieldText != field.text): + field.set('updated', 'yes') + + field = root.find('EndEffectorModel') + oldFieldText = field.text + if config.config_dict["robotic_tool"] == "3_gripper": + model = "ROBOTIQ_3F" + elif config.config_dict["robotic_tool"] == "2_gripper": + model = "ROBOTIQ_2F85" + elif config.config_dict["robotic_tool"] == "calibration_pin": + model = "CALIBRATION_PIN" + else: + model = "ROBOTIQ_3F" + + field.text = model + if(oldFieldText != field.text): + field.set('updated', 'yes') + + # Box # + try: # Might not be availavle depending on the simulator version + field = root.find('ItemSizeX') + oldFieldText = field.text + field.text = str(config.goal_dict["box_dim"][0]) + if(oldFieldText != field.text): + field.set('updated', 'yes') + + field = root.find('ItemSizeY') + oldFieldText = field.text + field.text = str(config.goal_dict["box_dim"][1]) + if(oldFieldText != field.text): + field.set('updated', 'yes') + + field = root.find('ItemSizeZ') + oldFieldText = field.text + field.text = str(config.goal_dict["box_dim"][2]) + if(oldFieldText != field.text): + field.set('updated', 'yes') + except: + pass + + # Randomization # + field = root.find('RandomizeLatency') + oldFieldText = field.text + newFieldText = str(config.randomization_dict["randomize_latency"]).lower() + if(newFieldText != "true" and newFieldText != "false"): + print("Given RandomizeLatency field is invalid") + else: + field.text = newFieldText + if(oldFieldText != field.text): + field.set('updated', 'yes') + + field = root.find('RandomizeTorque') + oldFieldText = field.text + newFieldText = str(config.randomization_dict["randomize_torque"]).lower() + if(newFieldText != "true" and newFieldText != "false"): + print("Given RandomizeTorque field is invalid") + else: + field.text = newFieldText + if(oldFieldText != field.text): + field.set('updated', 'yes') + + field = root.find('RandomizeAppearance') + oldFieldText = field.text + newFieldText = str(config.randomization_dict["randomize_appearance"]).lower() + if(newFieldText != "true" and newFieldText != "false"): + print("Given RandomizeAppearance field is invalid") + else: + field.text = newFieldText + if(oldFieldText != field.text): + field.set('updated', 'yes') + + try: # Might not be availavle depending on the simulator version + field = root.find('EnableShadows') + oldFieldText = field.text + newFieldText = str(config.randomization_dict["enable_shadows"]).lower() + if(newFieldText != "true" and newFieldText != "false"): + print("Given EnableShadows field is invalid") + else: + field.text = newFieldText + if(oldFieldText != field.text): + field.set('updated', 'yes') + + field = root.find('ShadowType') + oldFieldText = field.text + newFieldText = config.randomization_dict["shadow_type"] + if(newFieldText != "Soft" and newFieldText != "Hard"): + print("Given ShadowType field is invalid") + else: + field.text = newFieldText + if(oldFieldText != field.text): + field.set('updated', 'yes') + except: + pass + + try: + # Write the updated .xml file + tree.write(xml_file) + + except Exception as e: + print("Warning: the .xml file has not been updated properly. An Exception was occurred:") + print(traceback.format_exc()) + + oldTree.write(xml_file) # Revert to the old .xml file + + return False + + return True diff --git a/agent/utils/task_monitor.cpython-37m-x86_64-linux-gnu.so b/agent/utils/task_monitor.cpython-37m-x86_64-linux-gnu.so new file mode 100755 index 0000000..5dc889b Binary files /dev/null and b/agent/utils/task_monitor.cpython-37m-x86_64-linux-gnu.so differ diff --git a/agent/utils_advanced/boxes_generator.py b/agent/utils_advanced/boxes_generator.py new file mode 100644 index 0000000..e6d9e23 --- /dev/null +++ b/agent/utils_advanced/boxes_generator.py @@ -0,0 +1,277 @@ +""" +A box generator class to spawn random boxes in the UNITY simulator +It allows to define the range where the boxes will be spawned in the workspace of the robot +""" + +import os + +import scipy +import scipy.spatial + +import numpy as np +import matplotlib.pyplot as plt + +from pathlib import Path + +class RandomBoxesGenerator: + def __init__(self, box_mode="train", box_samples=400, box_split=0.1, box_save_val=False, box_load_val=False, + box_radius_val=0.01, box_min_distance_base=0.475, box_max_distance_base=0.67, box_folder="./agent/logs/dataset/", + box_x_min=-0.67, box_x_max=0.67, box_x_active=True, box_z_min=0.42, box_z_max=0.67, box_z_active=True, + box_ry_min=-np.inf, box_ry_max=np.inf, box_ry_active=False, box_debug=[0.48159, 0.05, 0.4406864, 0.0, 0.0, 0.0]): + + self.box_mode = box_mode # Train, test, debug + self.box_samples = box_samples + self.index_train = 0 # Current index + self.index_val = 0 # Starting index of val boxes + self.val_size = 0 + self.train_size = 0 + self.box_split = box_split # Val split -> 0.1, 0.2, etc + self.box_load_val = box_load_val # Load validation boxes from a saved dataset + self.box_folder = box_folder # Folder dataset path to load (val), and/or save the generated dataset (train, val) + self.box_radius_val = box_radius_val # Exclude spawned train boxes that are close to the val boxes within this radius (threshold) in meters. e.g. 0.01 + self.box_min_distance_base = box_min_distance_base + self.box_max_distance_base = box_max_distance_base + self.box_save_val = box_save_val # Save generated dataset + self.box_debug = box_debug + + ############## + # Set ranges # + ############## + self.box_x_min = box_x_min + self.box_x_max = box_x_max + self.box_x_active = box_x_active + self.box_z_min = box_z_min + self.box_z_max = box_z_max + self.box_z_active = box_z_active + self.box_ry_min = box_ry_min + self.box_ry_max = box_ry_max + self.box_ry_active = box_ry_active + + ###################### + # Set-up the Dataset # + ###################### + + # Train split # + self.objects_X, self.objects_Y, self.objects_Z, self.objects_RY = [], [], [], [] + + # Val split # + self.objects_X_val, self.objects_Y_val, self.objects_Z_val, self.objects_RY_val = [], [], [], [] + + # Create the directory to save the dataset # + Path(self.box_folder).mkdir(parents=True, exist_ok=True) + + # Set-up the train and val splits # + if (self.box_mode == "debug"): + self.val_size = 1 + self.train_size = 1 + else: + self.val_size = int(self.box_samples * self.box_split) + self.train_size = self.box_samples - self.val_size + + # Useful struct for computing fast euclidean distances. Refer to self._get_samples() # + self.val_set = np.column_stack((self.objects_X_val, self.objects_Z_val)) + + # Generate always a random train set # + for i in range(self.box_samples): + X, Y, Z, _, RY, _ = self._get_samples(self.objects_X_val, self.objects_Z_val) + + self.objects_X.append(X) + self.objects_Y.append(Y) + self.objects_Z.append(Z) + self.objects_RY.append(RY) + + self.objects_X_train = self.objects_X[:self.train_size] + self.objects_Y_train = self.objects_Y[:self.train_size] + self.objects_Z_train = self.objects_Z[:self.train_size] + self.objects_RY_train = self.objects_RY[:self.train_size] + + # Load validation boxes # + if (self.box_load_val == True): + self._load_val() + else: # Do not load - use random (newly generated) + self.objects_X_val = self.objects_X[self.train_size:] + self.objects_Y_val = self.objects_Y[self.train_size:] + self.objects_Z_val = self.objects_Z[self.train_size:] + self.objects_RY_val = self.objects_RY[self.train_size:] + + # Save the box dataset # + if (self.box_save_val == True): + self._save_dataset() + + def __call__(self): + """ + return the next box from the dataset + + :return: x, y, z, 0, ry, 0 box coords - in unity coords system - orientation in degrees + """ + + if (self.box_mode == "train"): + box = [ + self.objects_X_train[self.index_train], self.objects_Y_train[self.index_train], + self.objects_Z_train[self.index_train], 0.0, self.objects_RY_train[self.index_train], 0.0 + ] + + self.index_train += 1 + if (self.index_train == self.train_size): # Important: start from the beginning + self.index_train = 0 + + elif (self.box_mode == "val"): + box = [ + self.objects_X_val[self.index_val], self.objects_Y_val[self.index_val], + self.objects_Z_val[self.index_val], 0.0, self.objects_RY_val[self.index_val], 0.0 + ] + + self.index_val += 1 + if (self.index_val == self.val_size): + self.index_val = 0 + + elif (self.box_mode == "debug"): + box = self.box_debug + + else: + raise Exception("Not valid box mode") + + return box + + def _load_val(self): + """ + load the validation split from a saved dataset file + """ + + val_set_file = np.loadtxt(os.path.join(self.box_folder, "val_data.txt"), delimiter=",") + + for box in val_set_file: + self.objects_X_val.append(box[0]) + self.objects_Y_val.append(box[1]) + self.objects_Z_val.append(box[2]) + self.objects_RY_val.append(box[3]) + + self.objects_X_val = np.asarray(self.objects_X_val) + self.objects_Y_val = np.asarray(self.objects_Y_val) + self.objects_Z_val = np.asarray(self.objects_Z_val) + self.objects_RY_val = np.asarray(self.objects_Z_val) + + self.val_size = len(self.objects_X_val) + + def _get_samples(self, objects_X_val, objects_Z_val): + """ + generate one random box with coordinates that complies with the user-defined constraints + + if 'box_radius_val' is not 0.0: keep spawning until the generated box is far apart from all the val boxes by the defined threshold + + :param objects_X_val: x validation boxes coords + + :return: x, y, z, 0, ry, 0 box coords - in unity coords system - orientation in degrees + """ + while True: + if(self.box_x_active): + X = np.random.uniform(self.box_x_min, self.box_x_max, size=1)[0] + else: + X = 0 + + Y = 0.05 # Hard-coded box height. Adapt to your task + + if(self.box_z_active): + Z = np.random.uniform(self.box_z_min, self.box_z_max, size=1)[0] + else: + Z = 0 + + if(self.box_ry_active): + RY = np.random.uniform(self.box_ry_min, self.box_ry_max, size=1)[0] + else: + RY = 0 + + distance_to_base = np.sqrt(X**2 + Z**2) + if (distance_to_base > self.box_min_distance_base and distance_to_base < self.box_max_distance_base): + + # Train boxes must be far apart from the validation boxes # + if(self.box_radius_val >= 0.001): # Threshold is active + point_set = np.asarray([[X, Z]]) # Current generated box + + # Calculate the distances to all other validation boxes from the generated box # + dist_radius = scipy.spatial.distance.cdist(self.val_set, point_set, 'euclidean') + + # Check violation # + violation = False + for val_dist in dist_radius: + if (val_dist <= self.box_radius_val): + violation = True + + if violation: # Box too close to a validation box - spawn another box + continue + + return X, Y, Z, 0.0, RY, 0.0 + + def _save_dataset(self): + """ + save the generated dataset: + - .txt formant: x,y,z,ry + - saves also some .png plots + """ + + # Train split # + x_np_train = np.asarray(self.objects_X_train) + x_np_train = x_np_train.reshape(x_np_train.shape[0], 1) + z_np_train = np.asarray(self.objects_Z_train) + z_np_train = z_np_train.reshape(z_np_train.shape[0], 1) + ry_np_train = np.asarray(self.objects_RY_train) + ry_np_train = ry_np_train.reshape(ry_np_train.shape[0], 1) + y_np_train = np.ones_like(x_np_train) * 0.05 + train_data = np.concatenate((x_np_train, y_np_train, z_np_train, ry_np_train), axis=1) + + # Val split # + x_np_val = np.asarray(self.objects_X_val) + x_np_val = x_np_val.reshape(x_np_val.shape[0], 1) + z_np_val = np.asarray(self.objects_Z_val) + z_np_val = z_np_val.reshape(z_np_val.shape[0], 1) + ry_np_val = np.asarray(self.objects_RY_val) + ry_np_val = ry_np_val.reshape(ry_np_val.shape[0], 1) + y_np_val = np.ones_like(x_np_val) * 0.05 + val_data = np.concatenate((x_np_val, y_np_val, z_np_val, ry_np_val), axis=1) + + # Save to .txt files # + np.savetxt(os.path.join(self.box_folder, "train_data.txt"), train_data, delimiter=",") + np.savetxt(os.path.join(self.box_folder, "val_data.txt"), val_data, delimiter=",") + + ######### + # Plots # + ######### + + # x - z coords # + fig = plt.figure() + ax1 = fig.add_subplot(111) + + ax1.scatter(x_np_train, z_np_train, s=10, c='#726bff', marker="s", label='train') + ax1.scatter(x_np_val, z_np_val, s=10, c='#ff6e66', marker="o", label='val') + plt.legend(loc='upper left') + plt.xlabel("x") + plt.ylabel("z") + plt.title("dataset") + plt.savefig(os.path.join(self.box_folder, 'box_dataset_x_z_axis.png')) + + # ry - y coords # + fig = plt.figure() + ax1 = fig.add_subplot(111) + + # Adapt box height to your task # + ax1.scatter(ry_np_train, 0.05 * np.ones_like(ry_np_train), s=10, c='b', marker="s", label='train') + ax1.scatter(ry_np_val, 0.05 * np.ones_like(ry_np_val), s=10, c='r', marker="o", label='val') + plt.legend(loc='upper left') + plt.xlabel("ry (deg)") + plt.ylabel("y") + plt.title("dataset") + plt.savefig(os.path.join(self.box_folder, 'box_dataset_ry_y_axis.png')) + + # x - z - ry coords # + fig = plt.figure() + ax1 = fig.add_subplot(111, projection='3d') + + ax1.scatter(x_np_train, z_np_train, ry_np_train, s=10, c='b', marker="s", label='train') + ax1.scatter(x_np_val, z_np_val, ry_np_val, s=10, c='r', marker="o", label='val') + plt.legend(loc='upper left') + ax1.set_xlabel("x") + ax1.set_ylabel("z") + ax1.set_zlabel("ry (deg)") + + plt.title("dataset") + plt.savefig(os.path.join(self.box_folder, 'box_dataset.png')) diff --git a/agent/utils_advanced/evaluate.py b/agent/utils_advanced/evaluate.py new file mode 100644 index 0000000..a848aaa --- /dev/null +++ b/agent/utils_advanced/evaluate.py @@ -0,0 +1,389 @@ +import os +from pathlib import Path + +import numpy as np +import pandas as pd + +# Import envs # +from simulator_vec_env import SimulatorVecEnv +from envs.iiwa_sample_joint_vel_env import IiwaJointVelEnv +from envs.iiwa_sample_env import IiwaSampleEnv + +# Planar grasping +from envs_advanced.iiwa_numerical_planar_grasping_env import IiwaNumericalPlanarGraspingEnv +from envs_advanced.iiwa_end_to_end_planar_grasping_env import IiwaEndToEndPlanarGraspingEnv +from envs_advanced.iiwa_ruckig_planar_grasping_env import IiwaRuckigPlanarGraspingEnv + +# Monitor +from stable_baselines3.common.vec_env import VecMonitor +from torch.utils.tensorboard import SummaryWriter + +# Models # +from stable_baselines3 import PPO +from models.ruckig_planar_model import RuckigPlanarModel + +# Set-up envs +from utils_advanced.helpers import get_env + +def evaluation_mode(config_dict, dart_env_dict, gym_env_dict, hyper_dict, goal_dict, reward_dict, manual_actions_dict=None, randomization_dict=None, randomBoxesGenerator=None): + """ + evaluate a single checkpoint or a whole directory that may include many runs and many checkpoints per run. Save results in .csv file + + each environment will call a dedicated evaluation function - adapt and extend to your task + + :param config_dict: configuration dictionary + :param dart_env_dict: dart dictionary + :param gym_env_dict: env dictionary + :param hyper_dict: hyperparameters dict - models related + :param goal_dict: goal dict - target, or green box + :param reard_dict: reward dict - reward terms values and ranges + :param manual_actions_dict: manual action dict + :param randomization_dict: randomization dict + :param randomBoxesGenerator: boxes generator to spawn boxes for planar grasping + """ + + env_key = config_dict['env_key'] + + # Build env # + env = get_env(config_dict, dart_env_dict, gym_env_dict, config_dict["log_dir"], reward_dict, goal_dict, manual_actions_dict, randomization_dict, randomBoxesGenerator) + + print("Using for evaluation the env: " + config_dict["env_key"]) + + if env_key.find("planar") != -1: # Planar envs + _evaluate_planar_grasping(env, config_dict, dart_env_dict, hyper_dict, randomBoxesGenerator) + elif env_key == "iiwa_sample_dart_unity_env" or env_key == "iiwa_joint_vel": + _evaluate_base_env(env, config_dict, dart_env_dict) + else: + raise Exception("This type of env has no available evaluation method. Create one in the evaluate.py first - aborting") + +def _evaluate_base_env(env, config_dict, dart_env_dict): + """ + evaluate iiwa_sample_joint_vel_env, or iiwa_joint_vel envs + + :param env: gym env + :param config_dict: configuration dict + :param dart_env_dict: dart dictionary + """ + + if(config_dict["simulation_mode"] == "evaluate"): + run_name = config_dict["model_evaluation_type"].split("/") + run_name = run_name[-2] + "_" + run_name[-1] # e.g. run_0/model_3125_0 + + print("===================================================") + print("(RL) Model evaluation: " + str(run_name)) + print("===================================================") + + # Load trained agent # + model = PPO.load(os.path.join(config_dict["model_evaluation_type"])) + model.policy.set_training_mode(False) + + obs = env.reset() + for x in range(1000): # Run some steps for each env + action, _states = model.predict(obs, deterministic=True) # Important: set deterministic to True to use the best learned policy (no exploration) + + obs, rewards, dones, info = env.step(action) + + if dart_env_dict['enable_dart_viewer'] and config_dict['env_key'] != 'iiwa_joint_vel': + env.render() + + elif(config_dict['simulation_mode'] == 'evaluate_model_based' and config_dict['env_key'] != 'iiwa_joint_vel'): + # check model-based controllers (e.g. P-controller) # + print("===================================================") + print("Model-based evaluation") + print("===================================================") + + control_kp = 1.0 / env.observation_space.high[0] + + obs = env.reset() + episode_rewards = [] + for _ in range(5): # Play some episodes + cum_reward = 0 + + while True: # Play until we have a successful episode + + if dart_env_dict['use_inverse_kinematics']: # Generate an action for the current observation using a P-controller + action = np.reshape(env.env_method('action_by_p_control', control_kp, 2.0 * control_kp), + (config_dict['num_envs'], env.action_space.shape[0])) + else: # Random action + action = np.reshape(env.env_method('random_action'), + (config_dict['num_envs'], env.action_space.shape[0])) + + obs, rewards, dones, info = env.step(action) # Play this action + cum_reward += rewards + + # Render # + if dart_env_dict['enable_dart_viewer']: + env.render() + + if dones.any(): + episode_rewards.append(cum_reward) + break + + mean_reward = np.mean(episode_rewards) + + print("Mean reward: " + str(mean_reward)) + else: + print("You have set an invalid simulation_mode or some other settings in the config.py are wrong - aborting") + +def _evaluate_planar_grasping(env, config_dict, dart_env_dict, hyper_dict, randomBoxesGenerator): + """ + evaluate a saved model or many runs and many checkpoints per run. Print some stats and save pandas dfs (.csv) in an evaluation folder + + :param env: gym env + :param config_dict: configuration dictionary + :param dart_env_dict: dart dictionary + :param hyper_dict: hyperparameters dict. Models related: RUCKIG, and for PPO model, a checkpoint will be loaded (with saved hyperparams) + :param randomBoxesGenerator: spawn boxes for evaluating planar grasping agents + """ + + ######################################################################## + # Df to save evaluation stats # + # Columns: # + # run: 0, 1, 2, .. -> folder with checkpoints # + # timestep: 0, ..., total_timestep -> many checkpoints # + # Note: evaluate the agent in the same boxes for each saved checkpoint # + # e.g. run_0/model_3125_0, run_0/model_6250_1, etc # + ######################################################################## + + # Save all runs and all checkpoints per run # + df = pd.DataFrame(columns=['evaluation_name', 'run', 'timestep', 'reward', 'success_ratio']) + + # Keep a separate file for the best model # + df_best = pd.DataFrame(columns=['evaluation_name', + 'best_succ_ratio_model', 'best_succ_ratio', 'best_succ_ratio_reward', + 'best_reward_model', 'best_reward', 'best_reward_succ_ratio']) + + # Set dataset to test mode # + randomBoxesGenerator.mode = "test" + + ############################################ + # Keep best stats from all runs and models # + ############################################ + best_succ_ratio = 0 + best_succ_ratio_model = None + best_succ_ratio_reward = -np.inf # For best success ratio model save its corresponding reward value + + best_reward = -np.inf + best_reward_model = None + best_reward_succ_ratio = 0 # For best reward model save its corresponding success ratio value + + log_dir = config_dict["log_dir"] + + # Evaluate a single model # + if (config_dict["model_evaluation_type"] != "all" or config_dict["env_key"] == 'iiwa_ruckig_planar_grasping_dart_unity_env'): + + # Create a dir to save the evaluation pandas dfs # + Path(log_dir + "evaluation_single_dfs/").mkdir(parents=True, exist_ok=True) + + # Load model # + if(config_dict["model"] == "PPO"): + model = PPO.load(config_dict["model_evaluation_type"]) + model.policy.set_training_mode(False) + + run_name = config_dict["model_evaluation_type"].split("/") + run_name = run_name[-2] + "_" + run_name[-1] # e.g. run_0/model_3125_0 + + elif(config_dict["model"] == "RUCKIG"): + model = RuckigPlanarModel(env, control_cycle=dart_env_dict["control_cycle"], hyper_dict=hyper_dict) + run_name = "ruckig_model" + + print("===================================================") + print("Model evaluation: " + str(run_name)) + print("===================================================") + + # Evaluate single model # + mean_r, succ_ratio = _evaluate_planar_grapsing_single_model(env=env, model=model, log_dir=log_dir, run_name=run_name, model_id=0, + config_dict=config_dict, dart_env_dict=dart_env_dict, episodes_test=randomBoxesGenerator.val_size) + + # One model -> equivalent best stats # + best_reward = mean_r + best_reward_model = run_name + best_reward_succ_ratio = succ_ratio + + best_succ_ratio = succ_ratio + best_succ_ratio_model = run_name + best_succ_ratio_reward = mean_r + ###################################### + + # Save dfs # + df.loc[0] = [config_dict["evaluation_name"], 0, 0, mean_r, succ_ratio] + df.to_csv(log_dir + "evaluation_single_dfs/" + "df_evaluation_" + run_name + ".csv", index=False) + + df_best.loc[0] = [config_dict["evaluation_name"], + best_succ_ratio_model, best_succ_ratio, best_succ_ratio_reward, + best_reward_model, best_reward, best_reward_succ_ratio + ] + + df_best.to_csv(log_dir + "evaluation_single_dfs/" + "df_evaluation_best_" + run_name + ".csv", index=False) + + else: # Evaluate many runs and their saved checkpoints + + # Create a dir to save the evaluation pandas dfs # + Path(log_dir + "evaluation_all_dfs/").mkdir(parents=True, exist_ok=True) + + # Scan logs folder and find how many runs exist # + # logs/ -> run_0/, run_1/ # + log_dirs = [filename for filename in os.listdir(log_dir[:-1]) if filename.startswith("run_")] + log_dirs = sorted(log_dirs, key=lambda x: int(x.split("_")[1])) + + model_i = 0 # For run_0, run_1 + j = 0 # For pandas rows -> one df for all runs + + for log_dir_name in log_dirs: # run_i + print("=================================") + print("Model evaluation: " + str(model_i + 1)) + print("=================================") + + # Scan the checkpoints of this runs and sort them by the time step # + models_names = [ + filename for filename in os.listdir(log_dir + log_dir_name + "/") + if filename.startswith("model") and filename.endswith(".zip") + ] + + models_names = sorted(models_names, key=lambda x: int(x.split("_")[1])) + + # First time step 0 - baseline for all runs - set the initial reward value from the config_advanced.py - adapt if the reward denifition or task changes # + df.loc[j] = [config_dict["evaluation_name"], model_i + 1, 0, config_dict["reward_baseline"], 0.0] + j += 1 + + for i, m_name in enumerate(models_names): # Load and evaluate the current checkpoint of the run_i + print("Checkpoint number: " + str(i + 1)) + + # Load saved model # + model = PPO.load(log_dir + log_dir_name + "/" + m_name[:-4]) # Remove .zip + model.policy.set_training_mode(False) + + # Evaluate model # + mean_r, succ_ratio = _evaluate_planar_grapsing_single_model(env=env, model=model, log_dir=log_dir, run_name=log_dir_name, + model_id=m_name.split("_")[1], config_dict=config_dict, + dart_env_dict=dart_env_dict, episodes_test=randomBoxesGenerator.val_size) + # Update best stats # + if (mean_r > best_reward): # reward-based + best_reward = mean_r + best_reward_model = log_dir + log_dir_name + "/" + m_name # which model had the best reward + best_reward_succ_ratio = succ_ratio + + if (succ_ratio > best_succ_ratio): # success ratio-based + best_succ_ratio = succ_ratio + best_succ_ratio_model = log_dir + log_dir_name + "/" + m_name + best_succ_ratio_reward = mean_r + + # Save results for the current checkpoint # + df.loc[j] = [config_dict["evaluation_name"], model_i + 1, m_name.split("_")[1], mean_r, succ_ratio] + j += 1 + + # All checkpoints were evaluated for the run_i. Go to the next run + model_i += 1 + + # Save pandas df of all checkpoints and runs # + df.to_csv(log_dir + "evaluation_all_dfs/" + "df_evaluation_all" + ".csv", index=False) + + # Save pandas df of the best stats # + df_best.loc[0] = [ + config_dict["evaluation_name"], + best_succ_ratio_model, best_succ_ratio, best_succ_ratio_reward, + best_reward_model, best_reward, best_reward_succ_ratio + ] + df_best.to_csv(log_dir + "evaluation_all_dfs/" + "df_evaluation_best" + ".csv", index=False) + + # Print the best stats in console # + print("best_reward: " + str(best_reward)) + print("best_reward_model: " + str(best_reward_model)) + print("best_reward_succ_ratio: " + str(best_reward_succ_ratio)) + + print("best_succ_ratio: " + str(best_succ_ratio)) + print("best_succ_ratio_model: " + str(best_succ_ratio_model)) + print("best_succ_ratio_reward: " + str(best_succ_ratio_reward)) + + # Revert dataset to train mode # + randomBoxesGenerator.mode = "train" + +def _evaluate_planar_grapsing_single_model(env, model, log_dir, run_name, model_id, config_dict, dart_env_dict, episodes_test): + """ + evaluate a saved model - grasping planar environment. It writes metrics in a tensorboard summary file + + :param env: gym env + :param model: saved model to evaluate + :param log_dir: log directory to save the tensorboard summary file + :param run_name: name of the run e.g. run_0 + :param model_id: checkpoint id e.g. model_200 -> 200 + :param config_dict: configuration dict + :param dart_env_dict: dart dict + :param episodes_test: how many evaluation targets (boxes) to test + + :return: mean_r, succ_ratio -> mean reward and success ratio for all the episodes (episodes_test) + """ + + # Save results to tensorboard summary file # + writer = SummaryWriter(str(log_dir) + "/evaluation_tb_" + run_name) + + mean_reward = 0 + success_count = 0 # #successfull episodes + total_episodes = 0 # total episodes + + num_envs = config_dict["num_envs"] + + # Save total reward per episode per each env. These detailed stats are not printed by default (see below) # + total_rewards = np.zeros((num_envs, 5000)) # Assume max 5000 episodes played + total_success = np.zeros((num_envs, 5000)) # Assume max 5000 episodes played + num_episode = 0 # All envs terminate at the same time step + + ############################################################################################################## + # Calculate how many steps to play for the required "episodes_test" given the number of the envs, # + # and the 'max_time_step defined' per each episode # + # Imporant: make sure your values result in a perfect division, else some target boxes will not be evaluated # + ############################################################################################################## + time_steps = int(episodes_test * dart_env_dict["max_time_step"] / num_envs) + + obs = env.reset() + for _ in range(time_steps): + + action, _states = model.predict(obs, deterministic=True) + obs, rewards, dones, info = env.step(action) + total_rewards[:, num_episode] += rewards + + # Render # + if dart_env_dict['enable_dart_viewer']: + env.render() + + for i in range(num_envs): # Check if done + if dones[i]: + if(i == 0): # Assume envs terminate at the same time step + num_episode += 1 + + mean_reward += total_rewards[i][num_episode - 1] # Append total reward of this env in the cumulative sum, 'mean_rewards' + + if (info[i]["success"] == True): # Successful episode + success_count += 1 + total_success[i][num_episode - 1] = 1 + else: + total_success[i][num_episode - 1] = 0 + + total_episodes += 1 + + # Fix metrics # + succ_ratio = (success_count * 100) / total_episodes + mean_r = mean_reward / float(total_episodes) + + # Log metrics in tb summary file # + writer.add_scalar('success_ratio', succ_ratio, model_id) + writer.add_scalar('total_test_episodes', total_episodes, model_id) + writer.add_scalar('mean_reward', mean_r, model_id) + writer.flush() + + # Print some stats and metrics in the console # + print("End of model evaluation with name: " + run_name + " and id: " + str(model_id)) + print("Number of targets that were tested: " + str(total_episodes)) + print("Success ratio: " + str(succ_ratio)) + print("Mean reward: " + str(mean_r)) + + # Uncomment these lines for more detailed results (per episode) # + #print("Total reward for 50 episodes for every env") + #print(total_rewards[:, 0:50]) + #print("Success flag for 50 episodes for every env") + #print(total_success[:, 0:50]) + + print("") + writer.close() + + return [mean_r, succ_ratio] diff --git a/agent/utils_advanced/helpers.py b/agent/utils_advanced/helpers.py new file mode 100644 index 0000000..e169cff --- /dev/null +++ b/agent/utils_advanced/helpers.py @@ -0,0 +1,150 @@ +import numpy as np + +# Import envs # + +# Base envs +from envs.iiwa_sample_joint_vel_env import IiwaJointVelEnv +from envs.iiwa_sample_env import IiwaSampleEnv + +# Planar grasping # +from envs_advanced.iiwa_numerical_planar_grasping_env import IiwaNumericalPlanarGraspingEnv +from envs_advanced.iiwa_end_to_end_planar_grasping_env import IiwaEndToEndPlanarGraspingEnv +from envs_advanced.iiwa_ruckig_planar_grasping_env import IiwaRuckigPlanarGraspingEnv + +# Monitor envs +from stable_baselines3.common.vec_env import VecMonitor +from simulator_vec_env import SimulatorVecEnv + +def get_env(config_dict, dart_env_dict, gym_env_dict, log_dir, reward_dict=None, goal_dict=None, manual_actions_dict=None, randomization_dict=None, randomBoxesGenerator=None): + """ + Set-up the env according to the input dictionary settings + """ + + # Some basic checks # + if(config_dict["simulation_mode"] == "train" and config_dict["env_key"] == "iiwa_ruckig_planar_grasping_dart_unity_env"): + raise Exception("Can not train RUCKIG model - aborting") + + if(config_dict["env_key"] == "iiwa_ruckig_planar_grasping_dart_unity_env" and config_dict["model"] != "RUCKIG"): + raise Exception("For ruckig env use only the RUCKIG model - aborting") + + if(config_dict["env_key"] != "iiwa_ruckig_planar_grasping_dart_unity_env" and config_dict["model"] == "RUCKIG"): + raise Exception("For non ruckig env do not use the RUCKIG model - aborting") + + if(config_dict["env_key"] == "iiwa_end_to_end_planar_grasping_dart_unity_env" and config_dict["use_images"] == False): + raise Exception("For end_to_end env enable the images - aborting") + + if(config_dict["env_key"].find("planar") != -1 and ((reward_dict is None) or (goal_dict is None) or (randomization_dict is None) or (randomBoxesGenerator is None))): + raise Exception("Missing dictionaries for planar envs - aborting") + + if(config_dict["env_key"].find("planar") != -1 and (manual_actions_dict is None or manual_actions_dict["manual"] == False)): + raise Exception("Please enable manual actions for planar envs - aborting") + + if(config_dict["env_key"].find("planar") != -1 and (manual_actions_dict["manual_behaviour"] != "planar_grasping")): + raise Exception("Please enable manual planar grasing actions for planar envs - aborting") + + if(config_dict["env_key"].find("planar") != -1 and (config_dict["enable_end_effector"] == False or config_dict["robotic_tool"].find("gripper") == -1)): + raise Exception("Please enable the gripper for planar envs - aborting") + + if(config_dict["env_key"].find("planar") != -1 and (goal_dict["goal_type"] != "box")): + raise Exception("Please enable boxes as targets for planar envs - aborting") + + if(config_dict["env_key"].find("planar") != -1 and (goal_dict["box_ry_active"] == True and np.isclose(reward_dict["reward_pose_weight"], 0.0))): + raise Exception("Add more weight to the reward pose to activate 3 DoF control for planar envs when the rotation is active in spanwed boxes - aborting") + + if((manual_actions_dict["manual"] == True) and (manual_actions_dict["manual_behaviour"] == "planar_grasping" or manual_actions_dict["manual_behaviour"] == "close_gripper") and (config_dict["enable_end_effector"] == False or config_dict["robotic_tool"].find("gripper") == -1)): + raise Exception("Please enable the gripper for planar_grasping and close_gripper manual actions - aborting") + + if((manual_actions_dict["manual"] == True) and (manual_actions_dict["manual_behaviour"] == "planar_grasping" or manual_actions_dict["manual_behaviour"] == "close_gripper") and (config_dict["env_key"] == "iiwa_joint_vel")): + raise Exception("Can not use dart-based manual actions for iiwa_joint_vel env - aborting") + # End checks # + + env_key = config_dict['env_key'] + def create_env(id=0): + + ################################################################################################################################# + # Important: 'dart' substring should always be included in the 'env_key' for dart-based envs. E.g. 'iiwa_sample_dart_unity_env' # + # If 'dart' is not included, IK behaviour can not be used # + ################################################################################################################################# + + # joints control without dart + if env_key == 'iiwa_joint_vel': + env = IiwaJointVelEnv(max_ts=dart_env_dict['max_time_step'], id=id, config=config_dict) + + # Reaching the red target sample env + # task-space with dart or joint joint space control + # model-based control with P-controller available + elif env_key == 'iiwa_sample_dart_unity_env': + env = IiwaSampleEnv(max_ts=dart_env_dict['max_time_step'], orientation_control=dart_env_dict['orientation_control'], + use_ik=dart_env_dict['use_inverse_kinematics'], ik_by_sns=dart_env_dict['linear_motion_conservation'], + state_type=config_dict['state'], enable_render=dart_env_dict['enable_dart_viewer'], + task_monitor=dart_env_dict['task_monitor'], with_objects=dart_env_dict['with_objects'], + target_mode=dart_env_dict['target_mode'], target_path=dart_env_dict['target_path'], + goal_type=goal_dict['goal_type'], joints_safety_limit=config_dict['joints_safety_limit'], + max_joint_vel=config_dict['max_joint_vel'], max_ee_cart_vel=config_dict['max_ee_cart_vel'], + max_ee_cart_acc=config_dict['max_ee_cart_acc'], max_ee_rot_vel=config_dict['max_ee_rot_vel'], + max_ee_rot_acc=config_dict['max_ee_rot_acc'], random_initial_joint_positions=config_dict['random_initial_joint_positions'], + initial_positions=config_dict['initial_positions'], robotic_tool=config_dict["robotic_tool"], + env_id=id) + + # Planar RL grasping using the true numeric observations from the UNITY simulator # + elif env_key == 'iiwa_numerical_planar_grasping_dart_unity_env': + env = IiwaNumericalPlanarGraspingEnv(max_ts=dart_env_dict['max_time_step'], orientation_control=dart_env_dict['orientation_control'], + use_ik=dart_env_dict['use_inverse_kinematics'], ik_by_sns=dart_env_dict['linear_motion_conservation'], + state_type=config_dict['state'], enable_render=dart_env_dict['enable_dart_viewer'], + task_monitor=dart_env_dict['task_monitor'], with_objects=dart_env_dict['with_objects'], + target_mode=dart_env_dict['target_mode'], goal_type=goal_dict['goal_type'], + randomBoxesGenerator=randomBoxesGenerator, joints_safety_limit=config_dict['joints_safety_limit'], + max_joint_vel=config_dict['max_joint_vel'], max_ee_cart_vel=config_dict['max_ee_cart_vel'], + max_ee_cart_acc=config_dict['max_ee_cart_acc'], max_ee_rot_vel=config_dict['max_ee_rot_vel'], + max_ee_rot_acc=config_dict['max_ee_rot_acc'], random_initial_joint_positions=config_dict['random_initial_joint_positions'], + initial_positions=config_dict['initial_positions'], noise_enable_rl_obs=randomization_dict['noise_enable_rl_obs'], + noise_rl_obs_ratio=randomization_dict['noise_rl_obs_ratio'], reward_dict=reward_dict, + agent_kp=gym_env_dict['agent_kp'], agent_kpr=gym_env_dict['agent_kpr'], + robotic_tool=config_dict["robotic_tool"], + env_id=id) + + # Planar RL grasping using image observations as state representation - end-to-end learning # + elif env_key == 'iiwa_end_to_end_planar_grasping_dart_unity_env': + env = IiwaEndToEndPlanarGraspingEnv(max_ts=dart_env_dict['max_time_step'], orientation_control=dart_env_dict['orientation_control'], + use_ik=dart_env_dict['use_inverse_kinematics'], ik_by_sns=dart_env_dict['linear_motion_conservation'], + state_type=config_dict['state'], enable_render=dart_env_dict['enable_dart_viewer'], task_monitor=dart_env_dict['task_monitor'], + target_mode=dart_env_dict['target_mode'], goal_type=goal_dict['goal_type'], + randomBoxesGenerator=randomBoxesGenerator, joints_safety_limit=config_dict['joints_safety_limit'], + max_joint_vel=config_dict['max_joint_vel'], max_ee_cart_vel=config_dict['max_ee_cart_vel'], + max_ee_cart_acc=config_dict['max_ee_cart_acc'], max_ee_rot_vel=config_dict['max_ee_rot_vel'], + max_ee_rot_acc=config_dict['max_ee_rot_acc'], random_initial_joint_positions=config_dict['random_initial_joint_positions'], + initial_positions=config_dict['initial_positions'], noise_enable_rl_obs=randomization_dict['noise_enable_rl_obs'], + noise_rl_obs_ratio=randomization_dict['noise_rl_obs_ratio'], reward_dict=reward_dict, + agent_kp=gym_env_dict['agent_kp'], agent_kpr=gym_env_dict['agent_kpr'], + image_size=config_dict['image_size'], robotic_tool=config_dict["robotic_tool"], + gripper_type=config_dict["gripper_type"], + env_id=id) + + # Planar grasping using time-optimal trajectory generation method - RUCKIG # + elif env_key == 'iiwa_ruckig_planar_grasping_dart_unity_env': + env = IiwaRuckigPlanarGraspingEnv(max_ts=dart_env_dict['max_time_step'], orientation_control=dart_env_dict['orientation_control'], + use_ik=dart_env_dict['use_inverse_kinematics'], ik_by_sns=dart_env_dict['linear_motion_conservation'], + state_type=config_dict['state'], enable_render=dart_env_dict['enable_dart_viewer'], + task_monitor=dart_env_dict['task_monitor'], with_objects=dart_env_dict['with_objects'], + target_mode=dart_env_dict['target_mode'], goal_type=goal_dict['goal_type'], + randomBoxesGenerator=randomBoxesGenerator, joints_safety_limit=config_dict['joints_safety_limit'], + max_joint_vel=config_dict['max_joint_vel'], max_ee_cart_vel=config_dict['max_ee_cart_vel'], + max_ee_cart_acc=config_dict['max_ee_cart_acc'], max_ee_rot_vel=config_dict['max_ee_rot_vel'], + max_ee_rot_acc=config_dict['max_ee_rot_acc'], random_initial_joint_positions=config_dict['random_initial_joint_positions'], + initial_positions=config_dict['initial_positions'], noise_enable_rl_obs=randomization_dict['noise_enable_rl_obs'], + noise_rl_obs_ratio=randomization_dict['noise_rl_obs_ratio'], reward_dict=reward_dict, + agent_kp=gym_env_dict['agent_kp'], agent_kpr=gym_env_dict['agent_kpr'], + threshold_p_model_based=gym_env_dict["threshold_p_model_based"], robotic_tool=config_dict["robotic_tool"], + env_id=id) + + # Set env seed # + env.seed((id * 150) + (id + 11)) + + return env + + num_envs = config_dict['num_envs'] + env = [create_env for i in range(num_envs)] + env = SimulatorVecEnv(env, config_dict, manual_actions_dict, reward_dict) # Set vectorized env + env = VecMonitor(env, log_dir, info_keywords=("success",)) # Monitor envs + + return env diff --git a/agent/utils_advanced/manual_actions.py b/agent/utils_advanced/manual_actions.py new file mode 100644 index 0000000..9ba1f7f --- /dev/null +++ b/agent/utils_advanced/manual_actions.py @@ -0,0 +1,381 @@ +""" +Manual actions functionality used in simulator_vec_env.py - see step() method + +Note: you can also implement your own manual function - in that case create a new function below and call it in the simulator_vec_env.py +""" + +import ast +import numpy as np + +def configure_manual_settings_and_get_manual_function(vec_env, manual_actions_dict): + """ + Depending of the input manual_behaviour (see config_advanced.py) setting, set the class variables of a SimulatorVecEnv class related to the manual actions, + and return a corresponding manual function to excecute after the end of the episode - step() method during reseting in simulator_vec_env.py + + :param vec_env: SimulatorVecEnv object + :param manual_actions_dict: manual actions dictionary settings + + :return: manual_func: returns a function to excecute at the end of the episode - step() method during reseting - SimulatorVecEnv class + format: manual_func(vec_env, rews, infos, enable_rewards=True) + where: + vec_env -> SimulatorVecEnv object (contains the envs objects, etc) + rews -> rewards list of all envs + infos -> infos list of dictionaries of all envs + enable_rewards -> whether to give reward/penalties during manual actions + """ + + manual_func = _send_no_manual # Do nothing by default + + # Planar envs: Down -> close -> up # + if(vec_env.manual_behaviour == "planar_grasping"): + # Clip gripper action to this limit # + if(vec_env.robotic_tool.find("3_gripper") != -1): + vec_env.gripper_clip = 90 + elif(vec_env.robotic_tool.find("2_gripper") != -1): + vec_env.gripper_clip = 250 + else: + vec_env.gripper_clip = 90 + + vec_env.manual_rewards = manual_actions_dict["manual_rewards"] # For enable_rewards + vec_env.manual_kp = manual_actions_dict["manual_kp"] # Gain for P-controller, position error + vec_env.manual_kpr = manual_actions_dict["manual_kpr"] + vec_env.manual_tol = manual_actions_dict["manual_tol"] # Tolerance of P-controller + vec_env.manual_down_height = manual_actions_dict["manual_down_height"] + vec_env.manual_up_height = manual_actions_dict["manual_up_height"] + vec_env.manual_close_steps = manual_actions_dict["manual_close_steps"] # How many steps to close the gripper (with 'manual_close_vel' per each step) + vec_env.manual_close_vel = manual_actions_dict["manual_close_vel"] # Velocity value to close the gripper per each step + vec_env.manual_steps_same_state = manual_actions_dict["manual_steps_same_state"] # How many steps the ee is allowed to not moved during the manual actions down/up + vec_env.manual_tol_same_state = manual_actions_dict["manual_tol_same_state"] # If the gripper has not moved more than this distance threshold value within 'steps_same_state' -> it means collision + + # Controll all DoF with a P-controller during the manual actions # + vec_env.config_p_controller = {"kpr_x": 1, + "kpr_y": 1, + "kpr_z": 1, + "kp_x": 1, + "kp_y": 1, + "kp_z": 1 + } + + + ##################################################################################################### + # prev_dist: needed for 'manual_steps_same_state'. Save the previous distance of the ee to the goal # + # and compare it to the current distance. See _go_down_up_manual function below # + #################################################################################################################################### + # pid_step: counter to reset the p-controllers target pose for each env. During the down/up manual actions, # + # keep in the other DoF the same pose as the pose that the agent had at the end of the RL episode # + # e.g. rotation of the gripper - it means the P-controller does not correct the rotation when approaching the target box # + #################################################################################################################################### + vec_env.prev_dist = [] + vec_env.pid_step = [] + for env_idx in range(len(vec_env.envs)): + vec_env.pid_step.append(0) + vec_env.prev_dist.append(0) + + # Set grasping manual function to call at the end of the episode # + manual_func = _send_manual_actions_planar_grasping + + # Close the gripper only # + elif(vec_env.manual_behaviour == "close_gripper"): + # Clip gripper action to this limit # + if(vec_env.robotic_tool.find("3_gripper") != -1): + vec_env.gripper_clip = 90 + elif(vec_env.robotic_tool.find("2_gripper") != -1): + vec_env.gripper_clip = 250 + else: + vec_env.gripper_clip = 90 + + vec_env.manual_close_steps = manual_actions_dict["manual_close_steps"] + vec_env.manual_close_vel = manual_actions_dict["manual_close_vel"] + + # Set close manual function to call at the end of the episode # + manual_func = _send_manual_actions_close_gripper + + # Invalid option do nothing # + else: + manual_func = _send_no_manual + + return manual_func + +################################## +# Manual actions implementations # +################################## + +###################### +# 0. Nothing happens # +###################### +def _send_no_manual(vec_env, rews=None, infos=None, enable_rewards=False): + pass + +###################### +# 1. Planar grasping # +###################### +def _send_manual_actions_planar_grasping(vec_env, rews, infos, enable_rewards=True): + """ + Planar grasping manual actions: + 1. down + 2. close + 3. up + + :param rews: rewards of envs + :param infos: infos dicts of envs + :param enable_rewards: whether to give rewards/penalties during manual actions + """ + envs_active = [] + for env in vec_env.envs: # Send manual actions commands only to valid envs + if env.collided_env != 1: + envs_active.append(env) + + # 1.Down # + envs_active = _go_down_up_manual(vec_env, envs_active, rews, vec_env.manual_down_height, gripper=0.0, enable_rewards=enable_rewards) + + # 2.Close # + envs_active = _close_manual(vec_env, envs_active, rews, steps=vec_env.manual_close_steps, vel=vec_env.manual_close_vel, enable_rewards=enable_rewards) + + # 3.Up # + gripper_pos = np.clip(vec_env.manual_close_steps * vec_env.manual_close_vel, 0, vec_env.gripper_clip) # Gripper is closed - during up movement + envs_active = _go_down_up_manual(vec_env, envs_active, rews, vec_env.manual_up_height, gripper=gripper_pos, enable_rewards=enable_rewards) + +######################## +# 2. Close the gripper # +######################## +def _send_manual_actions_close_gripper(vec_env, rews, infos, enable_rewards=True): + """ + Close the gripper at the end of the episode + + :param rews: rewards of envs + :param infos: infos dicts of envs + :param enable_rewards: whether to give rewards/penalties during manual actions + """ + envs_active = [] + for env in vec_env.envs: # Send manual actions commands only to valid envs + if env.collided_env != 1: + envs_active.append(env) + + # 1. Close # + envs_active = _close_manual(vec_env, envs_active, rews, steps=vec_env.manual_close_steps, vel=vec_env.manual_close_vel, enable_rewards=enable_rewards) + +########### +# Helpers # +########### +def _update_envs_manual_and_check_collisions(vec_env, envs_active, rews, observations, enable_rewards=True): + """ + update the active envs (dart chain, etc) during the manual actions and give rewards/collision penalties if enabled + + :param observations: by unity + :param time_step_update: whether to update the timestep of the gym agents + - it is always set to False, as the RL-episode has been finished - else adapt to your task + + :return: terminated_environments, rews, infos, obs_converted, dones - see simulator_vec_env.py step() method for more details + """ + envs_active_new = [] + + for env in envs_active: + observation = observations[env.id] + ob_coll = ast.literal_eval(observation)["Observation"][33] + + ########################################################################## + # During the manual actions, the RL agents do not generate any velocites # + # This line of code is related to the task monitor visualization # + ########################################################################## + env.action_state = np.zeros(env.action_space_dimension) + + # Update only valid envs # + if (float(ob_coll) != 1.0 and env.joints_limits_violation() == False): + env.update(ast.literal_eval(observation), time_step_update=False) + envs_active_new.append(env) + + # Collision - add penalty # + else: + if (enable_rewards): + rews[env.id] += vec_env.reward_dict["reward_collision"] + env.collided_env = 1 + env.collision_flag = True # Give penality only + + # Render: Not advised to set 'enable_dart_viewer': True, during RL-training. Use it only for debugging # + vec_env.render() + + return envs_active_new + +def _go_down_up_manual(vec_env, envs_active, rews, height_target, gripper=0.0, enable_rewards=True): + """ + go down or up by using a P-controller + + :param envs_active: active envs + :param rews: rewards of envs + :param height_target: height to move the ee + :param gripper: gipper position to keep during up/down (e.g. close/open) + :param enable_rewards: whether to give penalties during manual actions + + :return: envs_active - which envs did not collided during manual actions + """ + + for env in envs_active: + env.dart_sim.ik_by_sns = True # Use sns during manual actions - adapt if needed + + # Set viewer and the change target during the up movement # + if(gripper != 0.0): + + ############################################################################################################################ + # Note: in the dart viewer the ee at the goal is more up as we assume that we have a gripper (urdf) but the gripper is not # + # yet visualized in the viewer. The vec_env.dart_sim.get_pos_distance() returns 0 correctly at the goal # + ############################################################################################################################ + target_object_X, _, target_object_Z = env.init_object_pose_unity[0], env.init_object_pose_unity[1], env.init_object_pose_unity[2] + tool_length = 0.0 # Adapt if the height of the box changes + target_object_RX, target_object_RY, target_object_RZ = env.get_box_rotation_in_target_dart_coords_angle_axis() + + target = [target_object_RX, target_object_RY, target_object_RZ, target_object_Z, -target_object_X, height_target + tool_length] + env.set_target(target) # Set the dart target + + # Unity expects velocities in joint space for each env # + # Send zero velocities to collided envs # + actions = gripper * np.ones((len(vec_env.envs), 8)) + + ################################################################### + # Save the last pose of the ee before the manual actions and keep # + # the DoF that we do not control with the manual actions fixed # + # e.g. do not rotate the gripper during the up/down movement # + ################################################################### + target_pos_quat = [None] * len(vec_env.envs) # Target orientation in quat + target_pos_x = np.zeros(len(vec_env.envs)) + target_pos_z = np.zeros(len(vec_env.envs)) + + # Set the target height # + target_pos_y = height_target + + # Wait all envs to finish # + while True: + actions[:, :7] = np.zeros((len(vec_env.envs), 7)) + flag_active = False # At least one envs has not finished with its manual actions when set to True + + # Active envs (non-collided) during this loop # + envs_active_new = [] + for env in envs_active: + + # Position of the ee in Unity # + ee_x, ee_y, ee_z, _, _, _ = env.get_ee_pose_unity() + + # Current orientation of ee in quaternion in Dart # + curr_quat = env.get_rot_ee_quat() + + # Set target pose - 0 timestep # + if (vec_env.pid_step[env.id] == 0): + target_pos_quat[env.id] = curr_quat # Keep fixed orientation + target_pos_x[env.id] = ee_x # Only move down/up keep. Keep fixed x, z position (dart) + target_pos_z[env.id] = ee_z + + # Current positional errors in dart # + x_diff = target_pos_z[env.id] - ee_z + y_diff = -target_pos_x[env.id] + ee_x + z_diff = target_pos_y - ee_y + + # Current orientation error in dart # + rx_diff, ry_diff, rz_diff = env.get_rot_error_from_quaternions(target_pos_quat[env.id], curr_quat) + + ############################################################################################# + # Distance from the goal pose # + # If we moved in some directions other than up/down make corrections using the P-controller # + ############################################################################################# + dist = np.linalg.norm(np.array([x_diff, y_diff, z_diff, rx_diff, ry_diff, rz_diff])) + + # Goal reached - zero vel should be send to the Unity simulator # + if (dist < vec_env.manual_tol): + envs_active_new.append(env) + continue + + ########################################################################################### + # Check if we have moved at all within "vec_env.manual_steps_same_state" time # + # e.g. case: ee fingers bounce at the top surface of the box when we go down -> collision # + ########################################################################################### + if (vec_env.pid_step[env.id] != 0 and vec_env.pid_step[env.id] % vec_env.manual_steps_same_state == 0): + + # The ee has not moved a lot from the previous time -> collision # + if (abs(vec_env.prev_dist[env.id] - dist) <= vec_env.manual_tol_same_state): + if (enable_rewards): + rews[env.id] += vec_env.reward_dict["reward_collision"] + else: + pass + + env.collided_env = 1 + env.collision_flag = True # Give penality only once + continue + + ###################################################### + # No collision yet and the goal has not been reached # + # Save current distance to the goal from the ee # + ###################################################### + vec_env.prev_dist[env.id] = dist + + ################################################################################## + # Calculate joint velocities. Call the custom P-controller that first calculates # + # task-space velocities and then returns, using IK, joint-space velocities # + ################################################################################## + vel = env.action_by_p_controller_custom(rx_diff, ry_diff, rz_diff, x_diff, y_diff, z_diff, vec_env.manual_kpr, vec_env.manual_kp, config_p_controller=vec_env.config_p_controller) + + actions[env.id][0:7] = vel # Set action for this env + + # Update P-controller step # + vec_env.pid_step[env.id] += 1 + flag_active = True # At lease one env is active - continue with manual actions + envs_active_new.append(env) + + # No more actions to take: all envs have reached their goal, and/or no more active envs # + if (flag_active == False): + break + else: + ############################ + # Send JSON action command # + ############################ + request = vec_env._create_request("ACTION", vec_env.envs, actions) + observations = vec_env._send_request(request) + + ##################################################### + # Update envs (dart chain) and check for collisions # + ##################################################### + envs_active = _update_envs_manual_and_check_collisions(vec_env, envs_active_new, rews, observations, enable_rewards) + + ############################################################ + # Manual action down/up finished. Set envs_active with # + # the envs that did not collided during this manual action # + ############################################################ + envs_active = envs_active_new + + # Deactivate sns and reset P-controller # + for env in vec_env.envs: + vec_env.pid_step[env.id] = 0 + vec_env.prev_dist[env.id] = 0 + env.dart_sim.ik_by_sns = False + + return envs_active + +def _close_manual(vec_env, envs_active, rews, steps=4, vel=15, enable_rewards=True): + """ + close the gripper + + :param envs_active: active envs + :param rews: rewards of envs + :param steps: how many gripper commands to send + :param vel: velocity per command + :param enable_rewards: whether to give reward/penalties during manual actions + + :return: envs_active - which envs did not collided during manual actions + """ + + actions_close = np.zeros((len(vec_env.envs), 8)) # Do not move the robot: joints - zeros, and gripper position + + for j in range(steps): # Send gripper position for this steps + + for env in envs_active: # For each active env + actions_close[env.id][7] = np.clip((actions_close[env.id][7] + vel), 0.0, vec_env.gripper_clip) + + ############################ + # Send JSON action command # + ############################ + request = vec_env._create_request("ACTION", vec_env.envs, actions_close) + observations = vec_env._send_request(request) + + ##################################################### + # Update envs (dart chain) and check for collisions # + ##################################################### + _update_envs_manual_and_check_collisions(vec_env, envs_active, rews, observations, enable_rewards=enable_rewards) + + return envs_active diff --git a/agent/utils_advanced/monitoring_agent.py b/agent/utils_advanced/monitoring_agent.py new file mode 100644 index 0000000..bdc6f6d --- /dev/null +++ b/agent/utils_advanced/monitoring_agent.py @@ -0,0 +1,98 @@ +""" +Monitoring functionality for SB3 gym agents - save checkpoints and additional metrics +""" + +import os +import sys +from pathlib import Path +from tqdm import tqdm + +import numpy as np + +from stable_baselines3.common.results_plotter import load_results +from stable_baselines3.common.callbacks import BaseCallback + +from torch.utils.tensorboard import SummaryWriter + +class SaveOnBestTrainingRewardCallback(BaseCallback): + def __init__(self, check_freq: int, save_model_freq: int, log_dir: str, total_timesteps: int, num_envs: int, best_mean="inf", verbose=1): + + super(SaveOnBestTrainingRewardCallback, self).__init__(verbose) + + self.check_freq = check_freq # Check if there is any improvement from the previous best model - frequency + self.save_model_freq = save_model_freq # Save model checkpoints at this frequency + self.model_id = 0 + self.log_dir = log_dir + self.num_envs = num_envs + self.save_path = os.path.join(log_dir, 'best_model') # One best model per run + self.save_path_model = os.path.join(log_dir, 'model') # Save many model checkpoints + self.best_mean_success = 0.0 + + # Create path to save models if not exists # + # Tensorboard summary writer and custom metrics # + Path(self.log_dir).mkdir(parents=True, exist_ok=True) + + custom_logs_path = log_dir + "custom_log_tb/" + Path(custom_logs_path).mkdir(parents=True, exist_ok=True) + + writer = SummaryWriter(custom_logs_path) + self.writer = writer + + if (best_mean == "inf"): # Training from the beginning + self.best_mean_reward = -np.inf + else: # Best model already exists and the user has defined a 'best_mean_reward' value + self.best_mean_reward = float(best_mean) + + + self.progress_bar = tqdm(total=total_timesteps, file=sys.stdout) + self.progress_bar.set_postfix({"Mean Reward": 0.0, "Success ratio": 0.0}) + + def _on_step(self) -> bool: + ''' + Update the best_mean_reward - using the 50 last episodes (adapt if needed)- and save metrics and checkpoints + ''' + if self.n_calls % self.save_model_freq == 0: # Save model checkpoint + print("Saving model number: " + str(self.model_id)) + self.model.save(self.save_path_model + "_" + str(self.num_timesteps) + "_" + str(self.model_id)) + self.model_id += 1 + + if self.n_calls % self.check_freq == 0: # Update best_mean_reward and metrics + + results = load_results(self.log_dir) + + if len(results.index) > 0: + mean_reward = np.mean(results["r"].tail(50)) + mean_success = np.mean(results["success"].tail(50)) + + # Update summary writer # + self.writer.add_scalar('Reward/ep_rew_best', self.best_mean_reward, self.num_timesteps) + self.writer.add_scalar('Reward/ep_reward_mean', mean_reward, self.num_timesteps) + self.writer.add_scalar('Reward/ep_success_mean', mean_success, self.num_timesteps) + self.writer.flush() + + self.progress_bar.update(self.n_calls * self.num_envs - self.progress_bar.n) + self.progress_bar.set_postfix({"Mean Reward": mean_reward, "Success ratio": mean_success}) + + if mean_success > self.best_mean_success: + self.best_mean_success = mean_success + + # New best model: save it - based on the reward # + if mean_reward > self.best_mean_reward: + print(f"Saving new best model to {self.save_path}.zip") + self.model.save(self.save_path) + self.best_mean_reward = mean_reward + + return True + + def _on_training_end(self) -> None: + """ + Print some stats at the end of the agenttraining + """ + + print("Training ended with best mean reward: " + str(self.best_mean_reward)) + self.writer.add_scalar('best_reward', self.best_mean_reward) + self.writer.flush() + + self.progress_bar.close() + self.writer.close() + diff --git a/docker/requirements.txt b/docker/requirements.txt deleted file mode 100644 index d9b6994..0000000 --- a/docker/requirements.txt +++ /dev/null @@ -1,81 +0,0 @@ -absl-py==0.7.1 -asn1crypto==0.24.0 -astor==0.8.0 -atari-py==0.2.6 -attrs==20.3.0 -autobahn==20.12.2 -Automat==20.2.0 -Box2D==2.3.10 -ccd-tools==0.8.17 -cffi==1.14.4 -cloudpickle==1.6.0 -cmake==3.18.4.post1 -constantly==15.1.0 -cryptography==3.3.1 -cycler==0.10.0 -Cython==0.29.21 -decorator==4.4.2 -future==0.18.2 -gast==0.2.2 -geccoinv==0.8.6 -glfw==2.0.0 -google-pasta==0.1.7 -grpcio==1.21.1 -gym==0.17.3 -h5py==2.9.0 -hyperlink==20.0.1 -idna==2.6 -imageio==2.9.0 -incremental==17.5.0 -joblib==1.0.0 -Keras-Applications==1.0.8 -Keras-Preprocessing==1.1.0 -keyring==10.6.0 -keyrings.alt==3.0 -kiwisolver==1.3.1 -lockfile==0.12.2 -Markdown==3.1.1 -matplotlib==3.3.3 -networkx==2.5 -numpy==1.16.4 -opencv-python==4.4.0.46 -pandas==1.1.5 -Pillow==8.0.1 -protobuf==3.19.4 -pyaml==20.4.0 -pycparser==2.20 -pycrypto==2.6.1 -pyglet==1.5.0 -#pygobject==3.26.1 -PyHamcrest==2.0.2 -pyparsing==2.4.7 -#python-apt==1.6.4 -python-dateutil==2.8.1 -pytz==2020.4 -PyWavelets==1.1.1 -pyxdg==0.25 -PyYAML==5.3.1 -pyzmq==20.0.0 -roslibpy==1.1.0 -scikit-image==0.17.2 -scikit-learn==0.24.1 -scikit-optimize==0.8.1 -scipy==1.5.4 -SecretStorage==2.3.1 -sip-models==0.1.2 -six==1.13.0 -#stable-baselines==2.10.1 -tensorboard==1.14.0 -tensorflow-estimator==1.14.0 -tensorflow-gpu==1.14.0 -termcolor==1.1.0 -threadpoolctl==2.1.0 -tifffile==2020.9.3 -torch==1.10.2 -Twisted==20.3.0 -txaio==20.4.1 -Werkzeug==0.15.4 -wandb==0.12.11 -wrapt==1.11.2 -zmq==0.0.0 -zope.interface==5.2.0