diff --git a/examples/find_and_avoid_v2_video/controllers/dqn_controller/DQN_testing.py b/examples/find_and_avoid_v2_video/controllers/dqn_controller/DQN_testing.py new file mode 100644 index 00000000..6fe1b4a4 --- /dev/null +++ b/examples/find_and_avoid_v2_video/controllers/dqn_controller/DQN_testing.py @@ -0,0 +1,153 @@ +import numpy as np +import torch +import random +from stable_baselines3 import DQN +from sb3_contrib.common.maskable.evaluation import evaluate_policy + + +def mask_fn(env): + return env.get_action_mask() + + +def run(experiment_name, env, deterministic, use_masking, testing_results_filename=None, tests_per_difficulty=100, + seed=None): + # Difficulty setup + """ + - diff_0: Corridor without obstacles, varied distance, trivial + - diff_1: Corridor map with one row of two obstacles. Forces the agent to move around obstacles. + - diff_2: Corridor map with two rows of four obstacles. Same as before but more difficult. + - diff_3: Corridor map with three rows of six obstacles. Same as before but more difficult. + - diff_4: Corridor map with four of eight obstacles. Same as before but more difficult. + - diff_5: Random map with all available obstacles. Final complex difficulty in a more general environment. + """ + difficulty_dict = {"diff_0": {"type": "corridor", "number_of_obstacles": 0, + "min_target_dist": 1, "max_target_dist": 3}, + "diff_1": {"type": "corridor", "number_of_obstacles": 2, + "min_target_dist": 2, "max_target_dist": 2}, + "diff_2": {"type": "corridor", "number_of_obstacles": 4, + "min_target_dist": 3, "max_target_dist": 3}, + "diff_3": {"type": "corridor", "number_of_obstacles": 6, + "min_target_dist": 4, "max_target_dist": 4}, + "diff_4": {"type": "corridor", "number_of_obstacles": 8, + "min_target_dist": 5, "max_target_dist": 5}, + "diff_5": {"type": "random", "number_of_obstacles": 25, + "min_target_dist": 5, "max_target_dist": 12}} + difficulty_keys = list(difficulty_dict.keys()) + + env.reset_on_collisions = -1 # Never terminate on collisions + env.set_maximum_episode_steps(env.maximum_episode_steps * 2) # Double episode timeout limit + # Set default reward weights to make rewards comparable on testing + env.set_reward_weight_dict(target_distance_weight=0.0, target_angle_weight=0.0, dist_sensors_weight=0.0, + target_reach_weight=1000.0, collision_weight=1.0, + smoothness_weight=0.0, speed_weight=0.0) + experiment_dir = f"./experiments/{experiment_name}" + load_path = experiment_dir + f"/{experiment_name}_diff_0_agent" + + if seed is not None: + random.seed(seed) + torch.manual_seed(seed) + np.random.seed(seed) + + try: + model = DQN.load(load_path) # NOQA + except FileNotFoundError: + load_path += ".zip" + model = DQN.load(load_path) # NOQA + + env.set_difficulty(difficulty_dict["diff_0"], "diff_0") + print("################### SB3 EVALUATION STARTED ###################") + print("Evaluating for 100 episodes in diff_0 (random map).") + print(f"Experiment name: {experiment_name}, deterministic: {deterministic}") + # Evaluate the agent with sb3 + mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=tests_per_difficulty, + deterministic=deterministic, use_masking=use_masking) + print("################### SB3 EVALUATION FINISHED ###################") + print(f"Experiment name: {experiment_name}, deterministic: {deterministic}") + print(f"Mean reward: {mean_reward} \n" + f"STD reward: {std_reward}") + + print("################### CUSTOM EVALUATION STARTED ###################") + print(f"Experiment name: {experiment_name}, deterministic: {deterministic}") + diff_ind = 0 + env.set_difficulty(difficulty_dict[difficulty_keys[diff_ind]], difficulty_keys[diff_ind]) + + import csv + header = [experiment_name] + for i in range(len(difficulty_keys)): + for j in range(tests_per_difficulty): + header.append(f"{difficulty_keys[i]}") + + episode_rewards = ["reward"] + done_reasons = ["done_reason"] + steps_row = ["steps"] + if testing_results_filename is None: + file_name = "/testing_results.csv" if not deterministic else "/testing_results_det.csv" + else: + file_name = f"/{testing_results_filename}.csv" if not deterministic else f"/{testing_results_filename}_det.csv" + with open(experiment_dir + file_name, 'w', encoding='UTF8') as f: + writer = csv.writer(f) + writer.writerow(header) + steps = 0 + cumulative_rew = 0.0 + tests_count = 0 + obs = env.reset() + successes_counter = 0 + while True: + if use_masking: + action_masks = mask_fn(env) + else: + action_masks = None + action, _states = model.predict(obs, deterministic=deterministic)#, action_masks=action_masks) + obs, rewards, done, info = env.step(action) + steps += 1 + cumulative_rew += rewards + if done: + episode_rewards.append(cumulative_rew) + steps_row.append(steps) + if info["done_reason"] == "reached target": + successes_counter += 1 + print(f"{experiment_name} - Episode reward: {cumulative_rew}, steps: {steps}") + full_test_count = (tests_per_difficulty * diff_ind) + (tests_count + 1) + max_test_count = tests_per_difficulty * len(difficulty_keys) + try: + print(f"Reached target percentage: {100 * successes_counter / full_test_count} %") + except ZeroDivisionError: + print(f"Reached target percentage: {100 * successes_counter} %") + print(f"Testing progress: " + f"{round((full_test_count / max_test_count) * 100.0, 2)}" + f"%, {full_test_count} / {max_test_count}") + done_reasons.append(info['done_reason']) + cumulative_rew = 0.0 + steps = 0 + + tests_count += 1 + if tests_count == tests_per_difficulty: + diff_ind += 1 + try: + env.set_difficulty(difficulty_dict[difficulty_keys[diff_ind]], key=difficulty_keys[diff_ind]) + except IndexError: + print("Testing complete.") + break + tests_count = 0 + obs = env.reset() + + writer.writerow(episode_rewards) + writer.writerow(done_reasons) + writer.writerow(steps_row) + writer.writerow(["sb3 rew:", mean_reward, "sb3 std:", std_reward]) + print("################### CUSTOM EVALUATION FINISHED ###################") + # Continue running until user shuts the simulation down + # steps = 0 + # cumulative_rew = 0.0 + # obs = env.reset() + # while True: + # action_masks = mask_fn(env) + # action, _states = model.predict(obs, deterministic=deterministic, action_masks=action_masks) + # obs, rewards, done, info = env.step(action) + # cumulative_rew += rewards + # steps += 1 + # if done: + # print(f"{experiment_name} - Episode reward: {cumulative_rew}, steps: {steps}") + # cumulative_rew = 0.0 + # steps = 0 + # obs = env.reset() diff --git a/examples/find_and_avoid_v2_video/controllers/dqn_controller/DQN_trainer.py b/examples/find_and_avoid_v2_video/controllers/dqn_controller/DQN_trainer.py new file mode 100644 index 00000000..a5086c9a --- /dev/null +++ b/examples/find_and_avoid_v2_video/controllers/dqn_controller/DQN_trainer.py @@ -0,0 +1,308 @@ +import os +import numpy as np +import torch + +from stable_baselines3.common.monitor import Monitor +from stable_baselines3.common.callbacks import BaseCallback +from stable_baselines3.common.logger import HParam, Video +from sb3_contrib.common.wrappers import ActionMasker +from stable_baselines3 import DQN + +from find_and_avoid_v2_robot_supervisor import FindAndAvoidV2RobotSupervisor + + +class AdditionalInfoCallback(BaseCallback): + """ + A custom callback that derives from ``BaseCallback``. + + :param verbose: (int) Verbosity level 0: not output 1: info 2: debug + """ + + def __init__(self, experiment_name, env, current_difficulty=None, render_interval=100, verbose=1): + super(AdditionalInfoCallback, self).__init__(verbose) + # Those variables will be accessible in the callback + # (they are defined in the base class) + # The RL model + # self.model = None # type: BaseRLModel + # An alias for self.model.get_env(), the environment used for training + # self.training_env = None # type: Union[gym.Env, VecEnv, None] + # Number of time the callback was called + # self.n_calls = 0 # type: int + # self.num_timesteps = 0 # type: int + # local and global variables + # self.locals = None # type: Dict[str, Any] + # self.globals = None # type: Dict[str, Any] + # The logger object, used to report things in the terminal + # self.logger = None # type: logger.Logger + # # Sometimes, for event callback, it is useful + # # to have access to the parent object + # self.parent = None # type: Optional[BaseCallback] + self.experiment_name = experiment_name + self.current_difficulty = current_difficulty + self.env = env + self.frames = [] # List to store frames for GIF creation + self.episode_cnt = 1 + self.record = False + self.render_interval=render_interval + + def _on_training_start(self) -> None: + """ + This method is called before the first rollout starts. + """ + self.logger.record("experiment_name", self.experiment_name) + self.logger.record("difficulty", self.current_difficulty) + hparam_dict = { + "experiment_name": self.experiment_name, + "difficulty": self.current_difficulty, + "algorithm": self.model.__class__.__name__, + "gamma": self.model.gamma, # NOQA + "batch_size": self.model.batch_size, # NOQA + "maximum_episode_steps": self.env.maximum_episode_steps, + "step_window": self.env.step_window, + "seconds_window": self.env.seconds_window, + "add_action_to_obs": self.env.add_action_to_obs, + "reset_on_collisions": self.env.reset_on_collisions, + "on_target_threshold": self.env.on_target_threshold, + "ds_type": self.env.ds_type, + "ds_noise": self.env.ds_noise, + "target_distance_weight": self.env.reward_weight_dict["dist_tar"], + "tar_angle_weight": self.env.reward_weight_dict["ang_tar"], + "dist_sensors_weight": self.env.reward_weight_dict["dist_sensors"], + "tar_reach_weight": self.env.reward_weight_dict["tar_reach"], + "collision_weight": self.env.reward_weight_dict["collision"], + "smoothness_weight": self.env.reward_weight_dict["smoothness_weight"], + "speed_weight": self.env.reward_weight_dict["speed_weight"], + + } + # define the metrics that will appear in the `HPARAMS` Tensorboard tab by referencing their tag + # Tensorboard will find & display metrics from the `SCALARS` tab + metric_dict = { + "rollout/ep_len_mean": 0, + "rollout/ep_rew_mean": 0, + "rollout/collision termination count": 0.0, + "rollout/reach target count": 0.0, + "rollout/timeout count": 0.0, + "rollout/reset count": 0.0, + "rollout/success percentage": 0.0, + "rollout/smoothness": 0.0, + "rollout/min_dist_reached": 0.0, + "learning rate": self.model.learning_rate, + } + self.logger.record( + "hparams", + HParam(hparam_dict, metric_dict), + exclude=("stdout", "log", "json", "csv"), + ) + + def _on_rollout_start(self) -> None: + """ + A rollout is the collection of environment interaction + using the current policy. + This event is triggered before collecting new samples. + """ + + def _on_step(self) -> bool: + """ + This method will be called by the model after each call to `env.step()`. + + For child callback (of an `EventCallback`), this will be called + when the event is triggered. + + :return: (bool) If the callback returns False, training is aborted early. + """ + if self.env.done: + if self.episode_cnt % self.render_interval == 0: + self.env.camera.enable(self.env.timestep * 10) # basic time step = 32 + self.record = True + else: + self.env.camera.disable() + self.record = False + self.episode_cnt+=1 + self.frames = [] + print(f'Starting Episode {self.episode_cnt} ...') + + return True + + def _on_rollout_end(self) -> None: + """ + This event is triggered before updating the policy. + """ + self.logger.record("general/experiment_name", self.experiment_name) + self.logger.record("general/difficulty", self.current_difficulty) + self.logger.record("rollout/reset count", self.env.reset_count) + self.logger.record("rollout/reach target count", self.env.reach_target_count) + self.logger.record("rollout/collision termination count", self.env.collision_termination_count) + self.logger.record("rollout/timeout count", self.env.timeout_count) + if len(self.env.smoothness_list) >= 1: + self.logger.record("rollout/smoothness", float(np.mean(self.env.smoothness_list))) + else: + self.logger.record("rollout/smoothness", -1.0) + if len(self.env.min_dist_reached_list) > 1: + self.logger.record("rollout/min_dist_reached", np.mean(self.env.min_dist_reached_list)) + else: + self.logger.record("rollout/min_dist_reached", 0.0) + if self.env.reach_target_count == 0 or self.env.reset_count == 0: + self.logger.record("rollout/success percentage", 0.0) + else: + self.logger.record("rollout/success percentage", + self.env.reach_target_count / self.env.reset_count) + + if self.record: + # Save the frames to tensorboard + frame = self.env.render(mode='rgb_array') # (c, h, w) + self.frames.append(frame) + video = np.asarray([self.frames]) + self.logger.record("visualization", + Video(torch.from_numpy(video), fps=30), + exclude=("stdout", "log", "json", "csv")) + + def _on_training_end(self) -> None: + """ + This event is triggered before exiting the `learn()` method. + """ + self.env.clear_smoothness_list() + self.env.clear_min_dist_reached_list() + pass + + +def mask_fn(env): + return env.get_action_mask() + + +def run(experiment_name="experiment", + experiment_description="", + manual_control=False, + only_test=False, + maximum_episode_steps=16_384, + step_window=1, + seconds_window=1, + add_action_to_obs=False, + ds_params=None, + reset_on_collisions=4_096, + on_tar_threshold=0.1, + target_dist_weight=1.0, + target_angle_weight=1.0, + dist_sensors_weight=10.0, + target_reach_weight=1000.0, + collision_weight=100.0, + smoothness_weight=0.0, + speed_weight=0.0, + map_w=7, map_h=7, + cell_size=None, + seed=None, + net_arch=None, + gamma=0.999, + lr_rate=None, + difficulty_dict=None, + n_steps=2048, + batch_size=64, + ds_denial_list=None, + tau=1e-3, + num_steps_for_update=4, + exp_frac=0.2, + eps_init=1.0, + eps_final=0.01, + reward_avg_pts=100, + log_interval=4, + render_interval=100 + ): + + experiment_dir = f"./experiments/{experiment_name}" + + if seed is not None: + torch.manual_seed(seed) + np.random.seed(seed) + + if lr_rate is None: + lr_rate = lambda f: f * 3e-4 # NOQA Linear decreasing schedule + + + if ds_params is None: + ds_type = "generic" + ds_n_rays = 1 + ds_aperture = 1.57 + ds_resolution = -1.0 + ds_noise = 0.0 + max_ds_range = 100.0 + else: + ds_type = ds_params["ds_type"] + ds_n_rays = ds_params["ds_n_rays"] + ds_aperture = ds_params["ds_aperture"] + ds_resolution = ds_params["ds_resolution"] + ds_noise = ds_params["ds_noise"] + max_ds_range = ds_params["max_ds_range"] + + env = \ + Monitor( + ActionMasker( + FindAndAvoidV2RobotSupervisor(experiment_description, + maximum_episode_steps, + step_window=step_window, + seconds_window=seconds_window, + add_action_to_obs=add_action_to_obs, + max_ds_range=max_ds_range, + reset_on_collisions=reset_on_collisions, + manual_control=manual_control, + on_target_threshold=on_tar_threshold, + ds_type=ds_type, + ds_n_rays=ds_n_rays, + ds_aperture=ds_aperture, + ds_resolution=ds_resolution, + ds_noise=ds_noise, + ds_denial_list=ds_denial_list, + target_distance_weight=target_dist_weight, + target_angle_weight=target_angle_weight, + dist_sensors_weight=dist_sensors_weight, + target_reach_weight=target_reach_weight, + collision_weight=collision_weight, + smoothness_weight=smoothness_weight, + speed_weight=speed_weight, + map_width=map_w, map_height=map_h, + cell_size=cell_size, seed=seed), + action_mask_fn=mask_fn)) + if not only_test: + if not os.path.exists(experiment_dir): + os.makedirs(experiment_dir) + env.export_parameters(experiment_dir + f"/{experiment_name}_parameters.json", + net_arch, gamma, n_steps, batch_size) + + policy_kwargs = dict(activation_fn=torch.nn.ReLU, net_arch=net_arch) + + # DQN model + model = DQN(policy= 'MlpPolicy', + env=env, + learning_rate=lr_rate, + learning_starts=batch_size, + batch_size=batch_size, + tau=tau, + gamma=gamma, + train_freq=num_steps_for_update, + exploration_fraction=exp_frac, + exploration_initial_eps=eps_init, + exploration_final_eps=eps_final, + stats_window_size=reward_avg_pts, + tensorboard_log=experiment_dir, + policy_kwargs=policy_kwargs) + + model = None + + difficulty_keys = list(difficulty_dict.keys()) + printing_callback = AdditionalInfoCallback(experiment_name, env, verbose=1, + render_interval=render_interval, + current_difficulty=difficulty_keys[0]) + + for diff_key in difficulty_keys: + env.set_difficulty(difficulty_dict[diff_key]) + printing_callback.current_difficulty = diff_key + + model.learn(total_timesteps=difficulty_dict[diff_key]["total_timesteps"], + tb_log_name=diff_key, + reset_num_timesteps=False, + callback=printing_callback, + log_interval=log_interval) + + model.save(experiment_dir + f"/{experiment_name}_{diff_key}_agent") + print("################### TRAINING FINISHED ###################") + else: + print(f"Training skipped, only testing for experiment: {experiment_name}.") + return env diff --git a/examples/find_and_avoid_v2_video/controllers/dqn_controller/dqn_controller.py b/examples/find_and_avoid_v2_video/controllers/dqn_controller/dqn_controller.py new file mode 100644 index 00000000..54c7f9e9 --- /dev/null +++ b/examples/find_and_avoid_v2_video/controllers/dqn_controller/dqn_controller.py @@ -0,0 +1,130 @@ +''' +DQN Controller for the mobile robot +''' +import DQN_trainer +import DQN_testing + +experiment_name = "trained_agent" +experiment_description = """The baseline agent trained on default parameters.""" +only_test = False # If true, the trained agent from "experiment_name" will be loaded and evaluated +test_results_filename = "testing_results" + +# Testing setup +deterministic = False +use_masking = False +manual_control = False + +# Distance sensors setup +ds_params = {"ds_type": "sonar", # generic, sonar + "ds_n_rays": 4, # 1, 4 + "ds_aperture": 0.1, # 1.57, 0.1 + "ds_resolution": -1.0, # -1.0, 1.0 + "ds_noise": 0.025, # 0.0, 0.025 + "max_ds_range": 100 # in cm + } +""" +- diff_0: Random map with a few obstacles, mostly easy clear or almost clear paths to the target for initial learning. +- diff_1: Corridor map with one row of two obstacles. Forces the agent to move around obstacles more. +- diff_2: Corridor map with two rows of four obstacles. Same as before but more difficult. +- diff_3: Corridor map with three rows of six obstacles. Same as before but more difficult. +- diff_4: Corridor map with four or five rows of eight or ten obstacles. Same as before but more difficult. +- diff_5: Random map with all available obstacles. Final complex difficulty in a more general environment. +""" +difficulty_dict = {"diff_0": {"type": "random", "number_of_obstacles": 10, + "min_target_dist": 10, "max_target_dist": 10, "total_timesteps": 262144}, + "diff_1": {"type": "corridor", "number_of_obstacles": 2, + "min_target_dist": 2, "max_target_dist": 2, "total_timesteps": 524288}, + "diff_2": {"type": "corridor", "number_of_obstacles": 4, + "min_target_dist": 3, "max_target_dist": 3, "total_timesteps": 524288}, + "diff_3": {"type": "corridor", "number_of_obstacles": 6, + "min_target_dist": 4, "max_target_dist": 4, "total_timesteps": 524288}, + "diff_4": {"type": "corridor", "number_of_obstacles": 10, + "min_target_dist": 5, "max_target_dist": 6, "total_timesteps": 524288}, + "diff_5": {"type": "random", "number_of_obstacles": 25, + "min_target_dist": 10, "max_target_dist": 12, "total_timesteps": 1048576}} + +# Environment setup +maximum_episode_steps = 4096 # Steps for episode timeout + +step_window = 1 # Latest steps of observations +seconds_window = 1 # How many latest seconds of observations +add_action_to_obs = True +reset_on_collisions = 1 # Reset on number of collisions +on_tar_threshold = 1 # The distance under which the robot is considered "on target" + +# How many test episodes to run on sb3 evaluation and then for each difficulty in the custom +# evaluation, see DQN_testing.py +tests_per_difficulty = 10 + +# Distance sensor denial list +ds_denial_list = [] + +# Reward weights +target_dist_weight = 1.0 +target_angle_weight = 1.0 +dist_sensors_weight = 1.0 +target_reach_weight = 1.0 +collision_weight = 1.0 +smoothness_weight = 0.0 +speed_weight = 0.0 + +# Training setup +batch_size = 64 +gamma = 0.999 +lr_rate = 3e-4 +tau=1e-3 +num_steps_for_update = 4 +exp_frac = 0.2 # fraction of training time for decaying exploration rate +eps_init = 1.0 +eps_final = 0.05 +reward_avg_pts = 100 +log_interval=4 +render_interval=100 # number of episodes before redering video to tensorboard +net_arch = [64, 64] # Quality network architecture (hidden layers) + +# Map setup +map_w, map_h = 7, 7 +cell_size = None + +seed = 1 +env = DQN_trainer.run(experiment_name=experiment_name, + experiment_description=experiment_description, + manual_control=manual_control, + only_test=only_test, + maximum_episode_steps=maximum_episode_steps, + step_window=step_window, + seconds_window=seconds_window, + add_action_to_obs=add_action_to_obs, + ds_params=ds_params, + reset_on_collisions=reset_on_collisions, + on_tar_threshold=on_tar_threshold, + target_dist_weight=target_dist_weight, + target_angle_weight=target_angle_weight, + dist_sensors_weight=dist_sensors_weight, + target_reach_weight=target_reach_weight, + collision_weight=collision_weight, + smoothness_weight=smoothness_weight, + speed_weight=speed_weight, + net_arch=net_arch, + n_steps=None, + batch_size=batch_size, + gamma=gamma, + map_w=map_w, + map_h=map_h, + cell_size=cell_size, + lr_rate=lr_rate, + difficulty_dict=difficulty_dict, + seed=seed, + tau=tau, + num_steps_for_update=num_steps_for_update, + exp_frac=exp_frac, + eps_init=eps_init, + eps_final=eps_final, + reward_avg_pts=reward_avg_pts, + log_interval=log_interval, + render_interval=render_interval + ) +seed = 2 +env.ds_denial_list = ds_denial_list # Distance sensor denial is added only for testing +DQN_testing.run(experiment_name, env, deterministic, use_masking, testing_results_filename=test_results_filename, + tests_per_difficulty=tests_per_difficulty, seed=seed) diff --git a/examples/find_and_avoid_v2_video/controllers/dqn_controller/find_and_avoid_v2_robot_supervisor.py b/examples/find_and_avoid_v2_video/controllers/dqn_controller/find_and_avoid_v2_robot_supervisor.py new file mode 100644 index 00000000..6e79f38a --- /dev/null +++ b/examples/find_and_avoid_v2_video/controllers/dqn_controller/find_and_avoid_v2_robot_supervisor.py @@ -0,0 +1,1424 @@ +import random +from warnings import warn +import numpy as np +from gym.spaces import Box, Discrete +from deepbots.supervisor import RobotSupervisorEnv +from utilities import normalize_to_range, get_distance_from_target, get_angle_from_target +from controller import Supervisor, Keyboard + + +class FindAndAvoidV2RobotSupervisor(RobotSupervisorEnv): + """ + This is the updated and expanded second version of find and avoid. + For this example, a simple custom differential drive robot is used, equipped with + forward-facing distance and touch sensors. The goal is to navigate to a target by modifying + the speeds of the left and right motor, while avoiding obstacles using the distance and touch sensors. + The agent observes its distance to the target as well as its relative facing angle, its motor speeds, the touch + and distance sensor values and its latest action. The observation can also be augmented with observations + from earlier steps. + + Distance sensors scheme + Sensor indices/positions: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] + Frontal sensors slice: [4: frontal :9] + Left/right sensors slices: [0: left :5] [8: right :13] + + :param description: A description that can be saved in an exported file + :type description: str + :param maximum_episode_steps: The maximum steps per episode before timeout reset + :type maximum_episode_steps: int + :param step_window: How many steps of observations to add in the observation window, defaults to 1 + :type step_window: int, optional + :param seconds_window: How many seconds of observations to add in the observation window, defaults to 1 + :type seconds_window: int, optional + :param add_action_to_obs: Whether to add the latest action one-hot vector to the observation, defaults to True + :type add_action_to_obs: bool, optional + :param reset_on_collisions: On how many steps of collisions to reset, defaults to 0 + :type reset_on_collisions: int, optional + :param manual_control: Whether to override agent actions with user keyboard control, defaults to False + :type manual_control: bool, optional + :param on_target_threshold: The threshold under which the robot is considered on target, defaults to 0.1 + :type on_target_threshold: float, optional + :param max_ds_range: The maximum range of the distance sensors in cm, defaults to 100.0 + :type max_ds_range: float, optional + :param ds_type: The type of distance sensors to use, can be either "generic" or "sonar", defaults to "generic" + :type ds_type: str, optional + :param ds_n_rays: The number of rays per sensor, defaults to 1 + :type ds_n_rays: int, optional + :param ds_aperture: The angle at which the rays of each sensor are spread, defaults to 0.1 radians + :type ds_aperture: float, optional + :param ds_resolution: Minimum resolution of the sensors, the minimum change it can read, defaults to -1 (infinite) + :type ds_resolution: float, optional + :param ds_noise: The percentage of gaussian noise to add to the distance sensors, defaults to 0.0 + :type ds_noise: float, optional + :param ds_denial_list: The list of distance sensor indices to disable, defaults to None + :type ds_denial_list: list, optional + :param target_distance_weight: The target distance reward weight, defaults to 1.0 + :type target_distance_weight: float, optional + :param target_angle_weight: The target angle reward weight, defaults to 1.0 + :type target_angle_weight: float, optional + :param dist_sensors_weight: The distance sensors reward weight, defaults to 1.0 + :type dist_sensors_weight: float, optional + :param smoothness_weight: The smoothness reward weight, defaults to 1.0 + :type smoothness_weight: float, optional + :param speed_weight: The speed reward weight, defaults to 1.0 + :type speed_weight: float, optional + :param target_reach_weight: The target reach reward weight, defaults to 1.0 + :type target_reach_weight: float, optional + :param collision_weight: The collision reward weight, defaults to 1.0 + :type collision_weight: float, optional + :param map_width: The map width, defaults to 7 + :type map_width: int, optional + :param map_height: The map height, defaults to 7 + :type map_height: int, optional + :param cell_size: The cell size, defaults to None, [0.5, 0.5] + :type cell_size: list, optional + :param seed: The random seed, defaults to None + :type seed: int, optional + """ + + def __init__(self, description, maximum_episode_steps, step_window=1, seconds_window=0, add_action_to_obs=True, + reset_on_collisions=0, manual_control=False, on_target_threshold=0.1, + max_ds_range=100.0, ds_type="generic", ds_n_rays=1, ds_aperture=0.1, + ds_resolution=-1, ds_noise=0.0, ds_denial_list=None, + target_distance_weight=1.0, target_angle_weight=1.0, dist_sensors_weight=1.0, + target_reach_weight=1.0, collision_weight=1.0, smoothness_weight=1.0, speed_weight=1.0, + map_width=7, map_height=7, cell_size=None, seed=None): + super().__init__() + + ################################################################################################################ + # General + self.seed = seed + if seed is not None: + random.seed(seed) + self.experiment_desc = description + self.manual_control = manual_control + + # Viewpoint stuff used to reset camera position + self.viewpoint = self.getFromDef("VIEWPOINT") + self.viewpoint_position = self.viewpoint.getField("position").getSFVec3f() + self.viewpoint_orientation = self.viewpoint.getField("orientation").getSFRotation() + + # Keyboard control + self.keyboard = Keyboard() + self.keyboard.enable(self.timestep) + + if ds_denial_list is None: + self.ds_denial_list = [] + else: + self.ds_denial_list = ds_denial_list + + # Camera setup + # Add a camera device to capture frames + self.camera = self.getDevice("camera") # Ensure you have a camera device named "camera" in your Webots world + ################################################################################################################ + # Robot setup + self.done = False + # Set up various robot components + self.robot = self.getSelf() + self.number_of_distance_sensors = 13 # Fixed according to ds that exist on robot + + # Set up gym observation and action spaces + # The action mapping is as follows: + # - 0: Increase both motor speeds, forward action + # - 1: Decrease both motor speeds, backward action + # - 2: Increase right motor speed, decrease left motor speed, turn left + # - 3: Increase left motor speed, decrease right motor speed, turn right + # - 4: No change in motor speeds, no action + self.action_space = Discrete(5) + + self.add_action_to_obs = add_action_to_obs + self.step_window = step_window + self.seconds_window = seconds_window + self.obs_list = [] + # Set up observation low values + # Distance to target, angle to target, motor speed left, motor speed right, touch left, touch right + single_obs_low = [0.0, -1.0, -1.0, -1.0, 0.0, 0.0] + # Add action one-hot vector + if self.add_action_to_obs: + single_obs_low.extend([0.0 for _ in range(self.action_space.n)]) + # Append distance sensor values + single_obs_low.extend([0.0 for _ in range(self.number_of_distance_sensors)]) + + # Set up corresponding observation high values + single_obs_high = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0] + if self.add_action_to_obs: + single_obs_high.extend([1.0 for _ in range(self.action_space.n)]) + single_obs_high.extend([1.0 for _ in range(self.number_of_distance_sensors)]) + + # Expand sizes depending on step window and seconds window + self.single_obs_size = len(single_obs_low) + obs_low = [] + obs_high = [] + for _ in range(self.step_window + self.seconds_window): + obs_low.extend(single_obs_low) + obs_high.extend(single_obs_high) + self.obs_list.extend([0.0 for _ in range(self.single_obs_size)]) + # Memory is used for creating the windows in get_observation() + self.obs_memory = [[0.0 for _ in range(self.single_obs_size)] + for _ in range((self.seconds_window * int(np.ceil(1000 / self.timestep))) + + self.step_window)] + self.observation_counter_limit = int(np.ceil(1000 / self.timestep)) + self.observation_counter = self.observation_counter_limit + + # Finally initialize space + self.observation_space = Box(low=np.array(obs_low), + high=np.array(obs_high), + dtype=np.float64) + + # Set up sensors + self.distance_sensors = [] + self.ds_max = [] + self.ds_type = ds_type + self.ds_n_rays = ds_n_rays + self.ds_aperture = ds_aperture + self.ds_resolution = ds_resolution + self.ds_noise = ds_noise + # The minimum distance sensor thresholds, under which there is an obstacle obstructing forward movement + # Note that these values are highly dependent on how the sensors are placed on the robot + self.ds_thresholds = [8.0, 8.0, 8.0, 10.15, 14.7, 13.15, + 12.7, + 13.15, 14.7, 10.15, 8.0, 8.0, 8.0] + # Loop through the ds_group node to set max sensor values, initialize the devices, set the type, etc. + robot_children = self.robot.getField("children") + for childNodeIndex in range(robot_children.getCount()): + robot_child = robot_children.getMFNode(childNodeIndex) # NOQA + if robot_child.getTypeName() == "Group": + ds_group = robot_child.getField("children") + for i in range(self.number_of_distance_sensors): + self.distance_sensors.append(self.getDevice(f"distance sensor({str(i)})")) + self.distance_sensors[-1].enable(self.timestep) # NOQA + ds_node = ds_group.getMFNode(i) + ds_node.getField("lookupTable").setMFVec3f(4, [max_ds_range / 100.0, max_ds_range]) + ds_node.getField("lookupTable").setMFVec3f(3, [0.75 * max_ds_range / 100.0, 0.75 * max_ds_range, + self.ds_noise]) + ds_node.getField("lookupTable").setMFVec3f(2, [0.5 * max_ds_range / 100.0, 0.5 * max_ds_range, + self.ds_noise]) + ds_node.getField("lookupTable").setMFVec3f(1, [0.25 * max_ds_range / 100.0, 0.25 * max_ds_range, + self.ds_noise]) + ds_node.getField("type").setSFString(self.ds_type) + ds_node.getField("numberOfRays").setSFInt32(self.ds_n_rays) + ds_node.getField("aperture").setSFFloat(self.ds_aperture) + ds_node.getField("resolution").setSFFloat(self.ds_resolution) + self.ds_max.append(max_ds_range) # NOQA + + # Touch sensors are used to determine when the robot collides with an obstacle + self.touch_sensor_left = self.getDevice("touch sensor left") + self.touch_sensor_left.enable(self.timestep) # NOQA + self.touch_sensor_right = self.getDevice("touch sensor right") + self.touch_sensor_right.enable(self.timestep) # NOQA + + # Set up motors + self.left_motor = self.getDevice("left_wheel") + self.right_motor = self.getDevice("right_wheel") + self.motor_speeds = [0.0, 0.0] + self.set_velocity(self.motor_speeds[0], self.motor_speeds[1]) + + # Grab target node + self.target = self.getFromDef("TARGET") + self.target.getField("rotation").setSFRotation([0.0, 0.0, 1.0, 0.0]) + + ################################################################################################################ + # Set up miscellaneous + # Various robot and target metrics, for the current step and the previous step + self.on_target_threshold = on_target_threshold # Threshold that defines whether robot is considered "on target" + self.initial_target_distance = 0.0 + self.initial_target_angle = 0.0 + self.current_tar_d = 0.0 # Distance to target + self.previous_tar_d = 0.0 + self.current_tar_a = 0.0 # Angle to target in respect to the facing angle of the robot + self.previous_tar_a = 0.0 + self.current_dist_sensors = [0.0 for _ in range(len(self.distance_sensors))] # Latest distance sensor values + self.previous_dist_sensors = [0.0 for _ in range(len(self.distance_sensors))] + self.current_touch_sensors = [0.0, 0.0] + self.current_position = [0, 0] # World position + self.previous_position = [0, 0] + self.current_rotation = 0.0 # Facing angle + self.previous_rotation = 0.0 + self.current_rotation_change = 0.0 # Latest facing angle change + self.previous_rotation_change = 0.0 + + # Various episode/training metrics, etc. + self.current_timestep = 0 + self.collisions_counter = 0 # Counter of collisions during the episode + self.reset_on_collisions = reset_on_collisions # Upper limit of number of collisions before reset + self.maximum_episode_steps = maximum_episode_steps # Steps before timeout + self.done_reason = "" # Used to terminate episode and print the reason the episode is done + self.reset_count = -1 # How many resets of the env overall, -1 to disregard the very first reset + self.reach_target_count = 0 # How many times the target was reached + self.collision_termination_count = 0 # How many times an episode was terminated due to collisions + self.timeout_count = 0 # How many times an episode timed out + self.min_distance_reached = float("inf") # The current episode minimum distance to target reached + self.min_dist_reached_list = [] # Used to store latest minimum distances reached, used as training metric + self.smoothness_list = [] # Used to store the episode smoothness rewards, used as training metric + self.episode_accumulated_reward = 0.0 # The reward accumulated in the current episode + self.touched_obstacle_left = False + self.touched_obstacle_right = False + self.mask = [True for _ in range(self.action_space.n)] # The action mask + self.trigger_done = False # Used to trigger the done condition + self.just_reset = True # Whether the episode was just reset + + # Dictionary holding the weights for the various reward components + self.reward_weight_dict = {"dist_tar": target_distance_weight, "ang_tar": target_angle_weight, + "dist_sensors": dist_sensors_weight, "tar_reach": target_reach_weight, + "collision": collision_weight, "smoothness_weight": smoothness_weight, + "speed_weight": speed_weight} + + ################################################################################################################ + # Map stuff + self.map_width, self.map_height = map_width, map_height + if cell_size is None: + self.cell_size = [0.5, 0.5] + # Center map to (0, 0) + origin = [-(self.map_width // 2) * self.cell_size[0], (self.map_height // 2) * self.cell_size[1]] + self.map = Grid(self.map_width, self.map_height, origin, self.cell_size) + + # Obstacle references and starting positions used to reset them + self.all_obstacles = [] + self.all_obstacles_starting_positions = [] + for childNodeIndex in range(self.getFromDef("OBSTACLES").getField("children").getCount()): + child = self.getFromDef("OBSTACLES").getField("children").getMFNode(childNodeIndex) # NOQA + self.all_obstacles.append(child) + self.all_obstacles_starting_positions.append(child.getField("translation").getSFVec3f()) + + # Wall references + self.walls = [self.getFromDef("WALL_1"), self.getFromDef("WALL_2")] + self.walls_starting_positions = [self.getFromDef("WALL_1").getField("translation").getSFVec3f(), + self.getFromDef("WALL_2").getField("translation").getSFVec3f()] + + # Path node references and starting positions used to reset them + self.all_path_nodes = [] + self.all_path_nodes_starting_positions = [] + for childNodeIndex in range(self.getFromDef("PATH").getField("children").getCount()): + child = self.getFromDef("PATH").getField("children").getMFNode(childNodeIndex) # NOQA + self.all_path_nodes.append(child) + self.all_path_nodes_starting_positions.append(child.getField("translation").getSFVec3f()) + + self.current_difficulty = {} + self.number_of_obstacles = 0 # The number of obstacles to use, set from set_difficulty method + if self.number_of_obstacles > len(self.all_obstacles): + warn(f"\n \nNumber of obstacles set is greater than the number of obstacles that exist in the " + f"world ({self.number_of_obstacles} > {len(self.all_obstacles)}).\n" + f"Number of obstacles is set to {len(self.all_obstacles)}.\n ") + self.number_of_obstacles = len(self.all_obstacles) + + self.path_to_target = [] # The map cells of the path + # The min and max (manhattan) distances of the target length allowed, set from set_difficulty method + self.min_target_dist = 1 + self.max_target_dist = 1 + + def set_reward_weight_dict(self, target_distance_weight, target_angle_weight, dist_sensors_weight, + target_reach_weight, collision_weight, smoothness_weight, speed_weight): + """ + Utility function to help change the reward weight dictionary on runtime. + """ + self.reward_weight_dict = {"dist_tar": target_distance_weight, "ang_tar": target_angle_weight, + "dist_sensors": dist_sensors_weight, "tar_reach": target_reach_weight, + "collision": collision_weight, "smoothness_weight": smoothness_weight, + "speed_weight": speed_weight} + + def set_maximum_episode_steps(self, new_value): + """ + This is required to be able to change the value of timeout steps for sb3 to register it. + """ + self.maximum_episode_steps = new_value + + def set_difficulty(self, difficulty_dict, key=None): + """ + Sets the difficulty and corresponding variables with a difficulty dictionary provided. + + :param difficulty_dict: A dictionary containing the difficulty information, e.g. + {"type": "t", "number_of_obstacles": X, "min_target_dist": Y, "max_target_dist": Z}, where type can be + "random" or "corridor". + :type difficulty_dict: dict + :param key: The key of the difficulty dictionary, if provided, the method prints it along with the + difficulty dict, defaults to None + :type key: str, optional + """ + self.current_difficulty = difficulty_dict + self.number_of_obstacles = difficulty_dict["number_of_obstacles"] + self.min_target_dist = difficulty_dict["min_target_dist"] + self.max_target_dist = difficulty_dict["max_target_dist"] + if key is not None: + print(f"Changed difficulty to: {key}, {difficulty_dict}") + else: + print("Changed difficulty to:", difficulty_dict) + + def get_action_mask(self): + """ + Returns the mask for the current state. The mask is a list of bools where each element corresponds to an + action, and if the bool is False the corresponding action is masked, i.e. disallowed. + Action masking allows the agent to perform certain actions under certain conditions, disallowing illogical + decisions. + + Mask is modified first by the touch sensors, and if no collisions are detected, secondly by + the distance sensors. + + The action mapping is as follows: + - 0: Increase both motor speeds, forward action + - 1: Decrease both motor speeds, backward action + - 2: Increase right motor speed, decrease left motor speed, turn left + - 3: Increase left motor speed, decrease right motor speed, turn right + - 4: No change in motor speeds, no action + + :return: The action mask list of bools + :rtype: list of booleans + """ + self.mask = [True for _ in range(self.action_space.n)] + # Mask backward action that will cause the agent to move backwards by default + if self.motor_speeds[0] <= 0.0 and self.motor_speeds[1] <= 0.0: + self.mask[1] = False + + # Create various flag lists for the distance sensors + # Whether any sensor is reading under its minimum threshold, and calculate and store how much + reading_under_threshold = [0.0 for _ in range(self.number_of_distance_sensors)] + # Whether there is any obstacle under half the max range of the distance sensors + detecting_obstacle = [False for _ in range(self.number_of_distance_sensors)] + # Whether there is an obstacle really close, i.e. under half the minimum threshold, in front + front_under_half_threshold = False + for i in range(len(self.current_dist_sensors)): + if self.current_dist_sensors[i] <= self.ds_max[i] / 2: + detecting_obstacle[i] = True + # Sensor is reading under threshold, store how much under threshold it reads + if self.current_dist_sensors[i] < self.ds_thresholds[i]: + reading_under_threshold[i] = self.ds_thresholds[i] - self.current_dist_sensors[i] + # Frontal sensor (index 4 to 8) is reading under half threshold + if i in [4, 5, 6, 7, 8] and self.current_dist_sensors[i] < (self.ds_thresholds[i] / 2): + front_under_half_threshold = True + # Split left and right slices to use later + reading_under_threshold_left = reading_under_threshold[0:5] + reading_under_threshold_right = reading_under_threshold[8:13] + + # First modify mask using the touch sensors as they are more important than distance sensors + # Unmask backward and mask forward if a touch sensor is detecting collision + if any(self.current_touch_sensors): + self.mask[0] = False + self.mask[1] = True + # Set flags to keep masking/unmasking until robot is clear of obstacles + # Distinguish between left and right touch + if self.current_touch_sensors[0]: + self.touched_obstacle_left = True + if self.current_touch_sensors[1]: + self.touched_obstacle_right = True + # Not touching obstacles and can't detect obstacles with distance sensors, + # can stop masking forward and unmasking backwards + elif not any(reading_under_threshold): + self.touched_obstacle_left = False + self.touched_obstacle_right = False + + # Keep masking forward and unmasking backwards as long as a touched_obstacle flag is True + if self.touched_obstacle_left or self.touched_obstacle_right: + self.mask[0] = False + self.mask[1] = True + + if self.touched_obstacle_left and not self.touched_obstacle_right: + # Touched on left, mask left action, unmask right action + self.mask[2] = False + self.mask[3] = True + if self.touched_obstacle_right and not self.touched_obstacle_left: + # Touched on right, mask right action, unmask left action + self.mask[3] = False + self.mask[2] = True + # If there are no touching obstacles, modify mask by distance sensors + else: + # Obstacles very close in front + if front_under_half_threshold: + # Mask forward + self.mask[0] = False + + # Target is straight ahead, no obstacles close-by + if not any(detecting_obstacle) and abs(self.current_tar_a) < 0.1: + # Mask left and right turning + self.mask[2] = self.mask[3] = False + + angle_threshold = 0.1 + # No obstacles on the right and target is on the right or + # no obstacles on the right and obstacles on the left regardless of target direction + if not any(reading_under_threshold_right): + if self.current_tar_a <= - angle_threshold or any(reading_under_threshold_left): + self.mask[2] = False # Mask left + + # No obstacles on the left and target is on the left or + # no obstacles on the left and obstacles on the right regardless of target direction + if not any(reading_under_threshold_left): + if self.current_tar_a >= angle_threshold or any(reading_under_threshold_right): + self.mask[3] = False # Mask right + + # Both left and right sensors are reading under threshold + if any(reading_under_threshold_left) and any(reading_under_threshold_right): + # Calculate the sum of how much each sensor's threshold is surpassed + sum_left = sum(reading_under_threshold_left) + sum_right = sum(reading_under_threshold_right) + # If left side has obstacles closer than right + if sum_left - sum_right < -5.0: + self.mask[2] = True # Unmask left + # If right side has obstacles closer than left + elif sum_left - sum_right > 5.0: + self.mask[3] = True # Unmask right + # If left and right side have obstacles on roughly equal distances + else: + # Enable touched condition + self.touched_obstacle_right = self.touched_obstacle_left = True + return self.mask + + def get_observations(self, action=None): + """ + This method returns the observation list of the agent. + A single observation consists of the distance and angle to the target, the current motor speeds, + the touch sensor values, the latest action represented by a one-hot vector, + and finally the distance sensor values. + + All values are normalized in their respective ranges, where appropriate: + - Distance is normalized to [0.0, 1.0] + - Angle is normalized to [-1.0, 1.0] + - Motor speeds are already constrained within [-1.0, 1.0] + - Touch sensor values can only been 0 or 1 + - Distance sensor values are normalized to [1.0, 0.0] + This is done so the input gets a large activation value when the sensor returns + small values, i.e. an obstacle is close. + + All observations are held in a memory (self.obs_memory) and the current observation is augmented with + self.step_window steps of the latest single observations and with self.seconds_window seconds of + observations. This means that for self.step_window=2 and self.seconds_window=2, the observation + is the latest two single observations plus an observation from 1 second in the past and an observation + from 2 seconds in the past. + + :param action: The latest action, defaults to None to match signature of parent method + :type action: int, optional + :return: Observation list + :rtype: list + """ + if self.just_reset: + self.previous_tar_d = self.current_tar_d + self.previous_tar_a = self.current_tar_a + # Add distance, angle, motor speeds + obs = [normalize_to_range(self.current_tar_d, 0.0, self.initial_target_distance, 0.0, 1.0, clip=True), + normalize_to_range(self.current_tar_a, -np.pi, np.pi, -1.0, 1.0, clip=True), + self.motor_speeds[0], self.motor_speeds[1]] + # Add touch sensor values + obs.extend(self.current_touch_sensors) + + if self.add_action_to_obs: + # Add action one-hot + action_one_hot = [0.0 for _ in range(self.action_space.n)] + try: + action_one_hot[action] = 1.0 + except IndexError: + pass + obs.extend(action_one_hot) + + # Add distance sensor values + ds_values = [] + for i in range(len(self.distance_sensors)): + ds_values.append(normalize_to_range(self.current_dist_sensors[i], 0, self.ds_max[i], 1.0, 0.0)) + obs.extend(ds_values) + + self.obs_memory = self.obs_memory[1:] # Drop oldest + self.obs_memory.append(obs) # Add the latest observation + + # Add the latest observations based on self.step_window + dense_obs = ([self.obs_memory[i] for i in range(len(self.obs_memory) - 1, + len(self.obs_memory) - 1 - self.step_window, -1)]) + + diluted_obs = [] + counter = 0 + for j in range(len(self.obs_memory) - 1 - self.step_window, 0, -1): + counter += 1 + if counter >= self.observation_counter_limit - 1: + diluted_obs.append(self.obs_memory[j]) + counter = 0 + self.obs_list = [] + for single_obs in diluted_obs: + for item in single_obs: + self.obs_list.append(item) + for single_obs in dense_obs: + for item in single_obs: + self.obs_list.append(item) + + return self.obs_list + + def get_reward(self, action): + """ + This method calculates the reward. The reward consists of various components that get weighted and added into + the final reward that gets returned. + + - The distance to target reward is firstly the negative normalized current to target creating a continuous + reward based on how far the agent is from the target. Bonus reward is also added everytime a new minimum + distance is achieved in the episode, giving an additional incentive moving along a path that closes the + distance. This bonus reward is similar to the pit escape problem reward. + - Reach target reward is 0.0, unless the current distance to target is under the on_target_threshold when it + becomes 1.0 - 0.5 * (current_timestep/max_timesteps), and reset is triggered (done). This way when the target + is reached, half the reward is scaled based on the time it took for the robot to reach the target. + - If the current angle to target is over a certain threshold, the angle to target reward is calculated based + on the latest change of the angle to target, if positive and over a small threshold, the reward is 1.0, + if negative and under a small threshold, the reward is -1.0. If the absolute change is under the threshold, the + reward is 0.0. If the current angle to target is under a certain threshold (i.e. facing the target), + the agent is reward with 1.0. + The threshold is pi/4 scaled by the normalized distance to target, i.e. the closer the robot is to the target, + the more it is rewarded for turning towards it and the stricter is the threshold under which the robot is + considered facing the target. + - The distance sensor reward is calculated by first calculating a sum. For each sensor reading over the minimum + ds threshold, +1 is added to the sum, otherwise -1 is added. Then the average is taken and normalized + to [-1.0, 0.0], with different initial ranges depending on what type of sensor is used (generic or sonar). + The final reward is scaled by the normalized distance to target. The closer the robot is to the target the less + important this reward is. + - Collision reward is 0.0, unless a or both the touch sensors detects a collision and it becomes -1.0. + It also counts the number of collisions and triggers a reset (done) when the set limit + reset_on_collisions is reached. + - Smoothness reward is calculated based on the rotational speed of the robot. The higher the latest absolute + facing angle change the closer the reward is to -1.0. The closer the value is to 0.0 (no turning at all), the + closer the reward is to 1.0. The final reward is scaled by the normalized distance to target. + The closer the robot is to the target the less important this reward is. Note the custom value for maximum + rotational speed used for normalizing. + - Speed reward is calculated based on the translational speed of the robot. The higher the latest distance moved + is, the closer the reward is to 1.0. The closer the value is to 0.0 (no distance moved), the closer the reward + is to -1.0. The final reward is scaled by the normalized distance to target. The closer the robot is to the + target the less important this reward is. Note the custom value for maximum distance moved used for normalizing. + + Finally, the angle to target reward is zeroed out if there is a non-negative distance sensor reward or + non-negative collision reward, to allow the agent to turn and move around obstacles without penalizing. + + All these rewards are multiplied with their corresponding weights taken from reward_weight_dict and summed into + the final reward. + + :param action: The latest action + :type action: int + :return: The total step reward + :rtype: float + """ + # If episode was just reset, set previous and current target distance and angle + if self.just_reset: + self.previous_tar_d = self.current_tar_d = self.initial_target_distance + self.previous_tar_a = self.current_tar_a = self.initial_target_angle + ################################################################################################################ + # Distance to target rewards + # Reward for decreasing distance to the target + + # The normalized current distance to target is used to scale rewards + normalized_current_tar_d = normalize_to_range(self.current_tar_d, + 0.0, self.initial_target_distance, 0.0, 1.0, clip=True) + + # Initial distance reward is minus the normalized distance to the target + dist_tar_reward = -normalized_current_tar_d + # If min distance is decreased, add +1 reward + if round(self.current_tar_d, 4) - round(self.min_distance_reached, 4) < 0.0: + dist_tar_reward += 1.0 + self.min_distance_reached = self.current_tar_d + + # Reward for reaching the target, i.e. decreasing the real distance under the threshold + # Final reward is modified by the time it took to reach the target + reach_tar_reward = 0.0 + if self.current_tar_d < self.on_target_threshold: + reach_tar_reward = 1.0 - 0.5 * self.current_timestep / self.maximum_episode_steps + self.done_reason = "reached target" # This triggers termination of episode + + ################################################################################################################ + # Angle to target reward + # Reward for decreasing angle to the target + # If turning towards the target apply +1.0, if turning away apply -1.0 + if abs(self.current_tar_a) > (np.pi / 4) * normalized_current_tar_d: + if round(abs(self.previous_tar_a) - abs(self.current_tar_a), 3) > 0.001: + ang_tar_reward = 1.0 + elif round(abs(self.previous_tar_a) - abs(self.current_tar_a), 3) < -0.001: + ang_tar_reward = -1.0 + else: + ang_tar_reward = 0.0 + else: + ang_tar_reward = 1.0 + ################################################################################################################ + # Obstacle avoidance rewards + # Reward for distance sensors values + dist_sensors_reward = 0 + for i in range(len(self.distance_sensors)): + if self.current_dist_sensors[i] < self.ds_thresholds[i]: + # If any sensor is under threshold add penalty + dist_sensors_reward -= 1.0 + else: + # Otherwise add reward + dist_sensors_reward += 1.0 + dist_sensors_reward /= self.number_of_distance_sensors + # Realistically not all sonar sensors can read under the thresholds set, if robot is boxed in + # with a wall in front, wall on the left and right, only the two left and two right-most sensors as + # well as the three frontal ones will read low values, resulting in an average reward of -0.077, + # which is normalized to -1.0. + if self.ds_type == "sonar": + dist_sensors_reward = round(normalize_to_range(dist_sensors_reward, -0.077, 1.0, -1.0, 0.0, clip=True), 4) + elif self.ds_type == "generic": + dist_sensors_reward = round(normalize_to_range(dist_sensors_reward, -1.0, 1.0, -1.0, 0.0, clip=True), 4) + # Final value is multiplied by current distance, so this reward gets less important, the closer + # the robot is to the target. + dist_sensors_reward *= normalized_current_tar_d + + # Penalty for collisions + # Check if the robot has collided with anything, assign negative reward + collision_reward = 0.0 + if any(self.current_touch_sensors): + self.collisions_counter += 1 + if self.collisions_counter >= self.reset_on_collisions - 1 and self.reset_on_collisions != -1: + self.done_reason = "collision" # This triggers termination of episode + collision_reward = -1.0 + + ################################################################################################################ + # Rewards for driving smoothly and at speed + # Smoothness reward based on angular velocity, -1.0 for fast turning, 1.0 for no turning + # Multiplied by the current distance to target, means that the farther away the robot is the more important + # it is to move smoothly. Near the target, violent turning maneuvers might be needed. + smoothness_reward = round( + -abs(normalize_to_range(self.current_rotation_change, -0.0183, 0.0183, -1.0, 1.0, clip=True)), 2) + if not self.just_reset: + self.smoothness_list.append(smoothness_reward) # To use as metric + smoothness_reward *= normalized_current_tar_d + + # Speed reward based on distance moved on last step. Obviously, straight movement produces better reward. + # This also means that neutral turns are also penalized because the position is not changing. + # Similar to smoothness reward, moving at speed is less important near the target. + dist_moved = np.linalg.norm([self.current_position[0] - self.previous_position[0], + self.current_position[1] - self.previous_position[1]]) + speed_reward = normalize_to_range(dist_moved, 0.0, 0.0012798, -1.0, 1.0) + speed_reward *= normalized_current_tar_d + + ################################################################################################################ + # Reward modification based on whether there are obstacles detected nearby + if dist_sensors_reward != 0.0 or any(self.current_touch_sensors): + ang_tar_reward = 0.0 + + ################################################################################################################ + # Total reward calculation + weighted_dist_tar_reward = self.reward_weight_dict["dist_tar"] * dist_tar_reward + weighted_ang_tar_reward = self.reward_weight_dict["ang_tar"] * ang_tar_reward + weighted_dist_sensors_reward = self.reward_weight_dict["dist_sensors"] * dist_sensors_reward + weighted_reach_tar_reward = self.reward_weight_dict["tar_reach"] * reach_tar_reward + weighted_collision_reward = self.reward_weight_dict["collision"] * collision_reward + weighted_smoothness_reward = self.reward_weight_dict["smoothness_weight"] * smoothness_reward + weighted_speed_reward = self.reward_weight_dict["speed_weight"] * speed_reward + + # Add various weighted rewards together + reward = (weighted_dist_tar_reward + weighted_ang_tar_reward + weighted_dist_sensors_reward + + weighted_collision_reward + weighted_reach_tar_reward + weighted_smoothness_reward + + weighted_speed_reward) + + self.episode_accumulated_reward += reward + + if self.just_reset: + # For the first step in each episode (just reset), return 0.0 reward. + return 0.0 + else: + return reward + + def is_done(self): + """ + Episode done triggers when the done_reason string is set which happens in the reward function, when the maximum + number of collisions is reached or the target is reached. This method handles the episode termination on + timeout and sets the done_reason string appropriately. + + :return: Whether the episode is done + :rtype: bool + """ + if self.done_reason != "": + return True + # Timeout + if self.current_timestep >= self.maximum_episode_steps: + self.done_reason = "timeout" + return True + return False + + def reset(self): + """ + Resets the simulation physics and objects and re-initializes robot and target positions, + along any other variables that need to be reset to their original values. + + The new map is created depending on difficulty, and viewpoint is reset. + """ + self.simulationResetPhysics() + super(Supervisor, self).step(int(self.getBasicTimeStep())) # NOQA + self.obs_memory = [[0.0 for _ in range(self.single_obs_size)] + for _ in range((self.seconds_window * int(np.ceil(1000 / self.timestep))) + + self.step_window)] + self.observation_counter = self.observation_counter_limit + # Reset path and various values + self.trigger_done = False + self.path_to_target = None + self.motor_speeds = [0.0, 0.0] + self.set_velocity(self.motor_speeds[0], self.motor_speeds[1]) + self.collisions_counter = 0 + + # Set robot random rotation + self.robot.getField("rotation").setSFRotation([0.0, 0.0, 1.0, random.uniform(-np.pi, np.pi)]) + + # Randomize obstacles and target + if self.current_difficulty["type"] == "random": + while True: + # Randomize robot and obstacle positions + self.randomize_map("random") + self.simulationResetPhysics() + # Set the target in a valid position and find a path to it + # and repeat until a reachable position has been found for the target + self.path_to_target = self.get_random_path(add_target=True) + if self.path_to_target is not None: + self.path_to_target = self.path_to_target[1:] # Remove starting node + break + elif self.current_difficulty["type"] == "corridor": + while True: + max_distance_allowed = 1 + # Randomize robot and obstacle positions + self.randomize_map("corridor") + self.simulationResetPhysics() + # Set the target in a valid position and find a path to it + # and repeat until a reachable position has been found for the target + self.path_to_target = self.get_random_path(add_target=False) + if self.path_to_target is not None: + self.path_to_target = self.path_to_target[1:] # Remove starting node + break + max_distance_allowed += 1 + self.place_path(self.path_to_target) + self.just_reset = True + + # Reset viewpoint so it plays nice + self.viewpoint.getField("position").setSFVec3f(self.viewpoint_position) + self.viewpoint.getField("orientation").setSFRotation(self.viewpoint_orientation) + + # Finally, reset any other values and count any metrics + self.reset_count += 1 + if self.done_reason != "": + print(f"Reward: {self.episode_accumulated_reward}, steps: {self.current_timestep}, " + f"done reason:{self.done_reason}") + if self.done_reason == "collision": + self.collision_termination_count += 1 + elif self.done_reason == "reached target": + self.reach_target_count += 1 + elif self.done_reason == "timeout": + self.timeout_count += 1 + self.done_reason = "" + self.current_timestep = 0 + self.initial_target_distance = get_distance_from_target(self.robot, self.target) + self.initial_target_angle = get_angle_from_target(self.robot, self.target) + self.min_dist_reached_list.append(self.min_distance_reached) + self.min_distance_reached = self.initial_target_distance - 0.01 + self.episode_accumulated_reward = 0.0 + self.current_dist_sensors = [self.ds_max[i] for i in range(len(self.distance_sensors))] + self.previous_dist_sensors = [self.ds_max[i] for i in range(len(self.distance_sensors))] + self.current_touch_sensors = [0.0, 0.0] + self.current_position = list(self.robot.getPosition()[:2]) + self.previous_position = list(self.robot.getPosition()[:2]) + self.current_rotation = self.get_robot_rotation() + self.previous_rotation = self.get_robot_rotation() + self.current_rotation_change = 0.0 + self.previous_rotation_change = 0.0 + self.current_tar_d = 0.0 + self.previous_tar_d = 0.0 + self.current_tar_a = 0.0 + self.previous_tar_a = 0.0 + self.touched_obstacle_left = False + self.touched_obstacle_right = False + self.mask = [True for _ in range(self.action_space.n)] + return self.get_default_observation() + + def clear_smoothness_list(self): + """ + Method used to trigger the cleaning of the smoothness_list on demand. + """ + self.smoothness_list = [] + + def clear_min_dist_reached_list(self): + """ + Method used to trigger the cleaning of the min_dist_reached_list on demand. + """ + self.min_dist_reached_list = [] + + def get_default_observation(self): + """ + Basic get_default_observation implementation that returns a zero vector + in the shape of the observation space. + :return: A list of zeros in shape of the observation space + :rtype: list + """ + return [0.0 for _ in range(self.observation_space.shape[0])] + + def get_robot_rotation(self): + # Fix rotation vector, because Webots randomly flips Z + temp_rot = self.robot.getField("rotation").getSFRotation() + if temp_rot[2] < 0.0: + return -temp_rot[3] + else: + return temp_rot[3] + + def update_current_metrics(self): + """ + Updates any metric that needs to be updated in each step. It serves as a unified place for updating metrics + used in various methods. This runs after each simulation step. + """ + # Save previous values + self.previous_tar_d = self.current_tar_d + self.previous_tar_a = self.current_tar_a + self.previous_dist_sensors = self.current_dist_sensors + self.previous_position = self.current_position + self.previous_rotation = self.current_rotation + self.previous_rotation_change = self.current_rotation_change + + # Target distance and angle + self.current_tar_d = get_distance_from_target(self.robot, self.target) + self.current_tar_a = get_angle_from_target(self.robot, self.target) + + # Get current position + self.current_position = list(self.robot.getPosition()[:2]) + + # Get current rotation + self.current_rotation = self.get_robot_rotation() + # To get rotation change we need to make sure there's not a big change from -pi to pi + if self.current_rotation * self.previous_rotation < 0.0: + self.current_rotation_change = self.previous_rotation_change + else: + self.current_rotation_change = self.current_rotation - self.previous_rotation + + # Get all distance sensor values + self.current_dist_sensors = [] # Values are in range [0, self.ds_max] + for ds in self.distance_sensors: + self.current_dist_sensors.append(ds.getValue()) # NOQA + + # Deprive robot of distance sensors + # Distance sensors whose index is in the denial list get their value overwritten with the max value + for i in self.ds_denial_list: + self.current_dist_sensors[i] = self.ds_max[i] + + # Get both touch sensor values + self.current_touch_sensors = [self.touch_sensor_left.getValue(), self.touch_sensor_right.getValue()] # NOQA + + def step(self, action): + """ + Step override method which slightly modifies the parent. + It applies the previous action, steps the simulation, updates the metrics with new values and then + gets the new observation, reward, done flag and info and returns them. + + :param action: The action to perform + :type action: int + :return: new observation, reward, done flag, info + :rtype: tuple + """ + action = self.apply_action(action) + + if super(Supervisor, self).step(self.timestep) == -1: # NOQA + exit() + + self.update_current_metrics() + self.current_timestep += 1 + + obs = self.get_observations(action) + rew = self.get_reward(action) + done = self.is_done() + info = self.get_info() + self.done = done + if self.just_reset: + self.just_reset = False + + return ( + obs, + rew, + done, + info + ) + + def apply_action(self, action): + """ + This method gets an integer action value [0, 1, ...] where each value corresponds to an action. + + The integer-action mapping is as follows: + - 0: Increase both motor speeds, forward action + - 1: Decrease both motor speeds, backward action + - 2: Increase right motor speed, decrease left motor speed, turn left + - 3: Increase left motor speed, decrease right motor speed, turn right + - 4: No change in motor speeds, no action + + This method also incorporates the keyboard control and if the user presses any of the + control buttons that correspond to the aforementioned actions. + + The key-action mapping is as follows: + - W: Increase both motor speeds, forward action + - S: Decrease both motor speeds, backward action + - A: Increase right motor speed, decrease left motor speed, turn left + - D: Increase left motor speed, decrease right motor speed, turn right + - X: Set motor speeds to zero, stop + + More keys are used to print helpful information: + - O: Print latest observation + - R: Print latest reward + - M: Print latest action mask and current motor speeds + + Finally, the motor speeds are clipped to [-1, 1] and applied to the motors. + + :param action: The action to execute + :type action: int + :return: The action executed + :rtype: int + """ + key = self.keyboard.getKey() + if key == ord("O"): # Print latest observation + print(self.obs_memory[-1]) + if key == ord("R"): # Print latest reward + print(self.get_reward(action)) + if key == ord("M"): # Print current mask + names = ["Forward", "Backward", "Left", "Right", "No action"] + print([names[i] for i in range(len(self.mask)) if self.mask[i]]) + print(self.motor_speeds) + + if self.manual_control: + action = 4 + if key == ord("W") and self.mask[0]: # Increase both motor speeds + action = 0 + if key == ord("S") and self.mask[1]: # Decrease both motor speeds + action = 1 + if key == ord("A") and self.mask[2]: # Increase right motor speed, decrease left motor speed, turn left + action = 2 + if key == ord("D") and self.mask[3]: # Decrease right motor speed, increase left motor speed, turn right + action = 3 + if key == ord("X"): # No action + action = 4 + self.motor_speeds = [0.0, 0.0] + + if action == 0: # Increase both motor speeds + if self.motor_speeds[0] < 1.0: + self.motor_speeds[0] += 0.25 + if self.motor_speeds[1] < 1.0: + self.motor_speeds[1] += 0.25 + elif action == 1: # Decrease both motor speeds + if self.motor_speeds[0] > -1.0: + self.motor_speeds[0] -= 0.25 + if self.motor_speeds[1] > -1.0: + self.motor_speeds[1] -= 0.25 + elif action == 2: # Increase right motor speed, decrease left motor speed, turn left + if self.motor_speeds[0] > -1.0: + self.motor_speeds[0] -= 0.25 + if self.motor_speeds[1] < 1.0: + self.motor_speeds[1] += 0.25 + elif action == 3: # Decrease right motor speed, increase left motor speed, turn right + if self.motor_speeds[0] < 1.0: + self.motor_speeds[0] += 0.25 + if self.motor_speeds[1] > -1.0: + self.motor_speeds[1] -= 0.25 + elif action == 4: # No action + pass + + self.motor_speeds = np.clip(self.motor_speeds, -1.0, 1.0) + # Apply motor speeds + self.set_velocity(self.motor_speeds[0], self.motor_speeds[1]) + return action + + def set_velocity(self, v_left, v_right): + """ + Sets the two motor velocities. + :param v_left: velocity value for left motor + :type v_left: float + :param v_right: velocity value for right motor + :type v_right: float + """ + self.left_motor.setPosition(float('inf')) # NOQA + self.right_motor.setPosition(float('inf')) # NOQA + self.left_motor.setVelocity(v_left) # NOQA + self.right_motor.setVelocity(v_right) # NOQA + + def get_info(self): + """ + Returns the reason the episode is done when the episode terminates. + + :return: Dictionary containing information + :rtype: dict + """ + if self.done_reason != "": + return {"done_reason": self.done_reason} + else: + return {} + + def render(self, mode='rgb_array'): + ''' + We expect `render()` to return a uint8 array with values in [0, 255] or a float array + with values in [0, 1], also the shape of image must be (channel, hight, width) + ''' + if mode == 'rgb_array': + # Capture the current frame from the camera + frame_str = self.camera.getImage() # Do NOT use getImageArray() + H, W = self.camera.getHeight(), self.camera.getWidth() + frame = np.zeros(shape=(3, H, W), dtype=np.uint8) + if frame_str is not None: + for x in range(W): + for y in range(H): + frame[0][y][x] = self.camera.imageGetRed(frame_str, W, x, y) + frame[1][y][x] = self.camera.imageGetGreen(frame_str, W, x, y) + frame[2][y][x] = self.camera.imageGetBlue(frame_str, W, x, y) + return frame + + elif mode == 'human': + # Display the frame in a window (optional, requires OpenCV or similar) + frame = self.render(mode='rgb_array') + if frame is not None: + import cv2 + cv2.imshow("Webots Simulation", frame) + cv2.waitKey(1) + else: + raise NotImplementedError(f"Render mode '{mode}' is not supported.") + + def export_parameters(self, path, + net_arch, gamma, n_steps, batch_size): + """ + Exports all parameters that define the environment/experiment setup. + + :param path: The path to save the export + :type path: str + :param net_arch: The network architectures, e.g. dict(pi=[1024, 512, 256], vf=[2048, 1024, 512]) + :type net_arch: dict with two lists + :param gamma: The gamma value + :type gamma: float + :param n_steps: Number of steps between each training session for sb3 + :type n_steps: int + :param batch_size: The batch size used during training + :type batch_size: int + """ + import json + param_dict = {"experiment_description": self.experiment_desc, + "seed": self.seed, + "n_steps:": n_steps, + "batch_size": batch_size, + "maximum_episode_steps": self.maximum_episode_steps, + "add_action_to_obs": self.add_action_to_obs, + "step_window": self.step_window, + "seconds_window": self.seconds_window, + "ds_params": { + "max range": self.ds_max, + "type": self.ds_type, + "rays": self.ds_n_rays, + "aperture": self.ds_aperture, + "resolution": self.ds_resolution, + "noise": self.ds_noise, + "minimum thresholds": self.ds_thresholds}, + "rewards_weights": self.reward_weight_dict, + "map_width": self.map_width, "map_height": self.map_height, "cell_size": self.cell_size, + "difficulty": self.current_difficulty, + "dqn_params": { + "net_arch": net_arch, + "gamma": gamma, + } + } + with open(path, 'w') as fp: + json.dump(param_dict, fp, indent=4) + + #################################################################################################################### + # Map-related methods below + + def remove_objects(self): + """ + Removes objects from arena, by setting their original translations and rotations. + """ + for object_node, starting_pos in zip(self.all_obstacles, self.all_obstacles_starting_positions): + object_node.getField("translation").setSFVec3f(starting_pos) + object_node.getField("rotation").setSFRotation([0, 0, 1, 0]) + for path_node, starting_pos in zip(self.all_path_nodes, self.all_path_nodes_starting_positions): + path_node.getField("translation").setSFVec3f(starting_pos) + path_node.getField("rotation").setSFRotation([0, 0, 1, 0]) + for wall_node, starting_pos in zip(self.walls, self.walls_starting_positions): + wall_node.getField("translation").setSFVec3f(starting_pos) + wall_node.getField("rotation").setSFRotation([0, 0, 1, -1.5708]) + + def randomize_map(self, type_="random"): + """ + Randomizes the obstacles on the map, by first removing all the objects and emptying the grid map. + Then, based on the type_ argument provided, places the set number of obstacles in various random configurations. + + - "random": places the number_of_obstacles in free positions on the map and randomizes their rotation + - "corridor": creates a corridor placing the robot at the start and the target at a distance along the corridor. + It then places the obstacles on each row along the corridor between the target and the robot, making sure + there is a valid path, i.e. consecutive rows should have free cells either diagonally or in the same column + + :param type_: The type of randomization, either "random" or "corridor", defaults to "random" + :type type_: str, optional + """ + self.remove_objects() + self.map.empty() + robot_z = 0.0399261 # Custom z value for the robot to avoid physics issues + + if type_ == "random": + self.map.add_random(self.robot, robot_z) # Add robot in a random position + for obs_node in random.sample(self.all_obstacles, self.number_of_obstacles): + self.map.add_random(obs_node) + obs_node.getField("rotation").setSFRotation([0.0, 0.0, 1.0, random.uniform(-np.pi, np.pi)]) + elif type_ == "corridor": + # Add robot to starting position + self.map.add_cell((self.map_width - 1) // 2, self.map_height - 1, self.robot, robot_z) + robot_coordinates = [(self.map_width - 1) // 2, self.map_height - 1] + # Limit the provided min, max target distances + if self.max_target_dist > self.map_height - 1: + print(f"max_target_dist set out of range, setting to: {min(self.max_target_dist, self.map_height - 1)}") + if self.min_target_dist > self.map_height - 1: + print(f"min_target_dist set out of range, setting to: {min(self.min_target_dist, self.map_height - 1)}") + # Get true min max target positions + min_target_pos = self.map_height - 1 - min(self.max_target_dist, self.map_height - 1) + max_target_pos = self.map_height - 1 - min(self.min_target_dist, self.map_height - 1) + if min_target_pos == max_target_pos: + target_y = min_target_pos + else: + target_y = random.randint(min_target_pos, max_target_pos) + # Finally add target + self.map.add_cell(robot_coordinates[0], target_y, self.target) + + # If there is space between target and robot, add obstacles + if abs(robot_coordinates[1] - target_y) > 1: + # We add two obstacles on each row between the target and robot so there is one free cell for the path + # To generate the obstacle placements within the corridor, we need to make sure that there is + # a free path within the corridor that leads from one row to the next. + # This means we need to avoid the case where there's a free place in the first column and on the next + # row the free place is in the third row. + def add_two_obstacles(): + col_choices = [robot_coordinates[0] + i for i in range(-1, 2, 1)] + random_col_1_ = random.choice(col_choices) + col_choices.remove(random_col_1_) + random_col_2_ = random.choice(col_choices) + col_choices.remove(random_col_2_) + return col_choices[0], random_col_1_, random_col_2_ + + max_obstacles = (abs(robot_coordinates[1] - target_y) - 1) * 2 + random_sample = random.sample(self.all_obstacles, min(max_obstacles, self.number_of_obstacles)) + prev_free_col = 0 + for row_coord, obs_node_index in \ + zip(range(target_y + 1, robot_coordinates[1]), range(0, len(random_sample), 2)): + # For each row between the robot and the target, add 2 obstacles + if prev_free_col == 0: + # If previous free column is the center one, any positions for the new row are valid + prev_free_col, random_col_1, random_col_2 = add_two_obstacles() + else: + # If previous free column is not the center one, then the new free one cannot be + # on the other side + current_free_col, random_col_1, random_col_2 = add_two_obstacles() + while abs(prev_free_col - current_free_col) == 2: + current_free_col, random_col_1, random_col_2 = add_two_obstacles() + prev_free_col = current_free_col + self.map.add_cell(random_col_1, row_coord, random_sample[obs_node_index]) + random_sample[obs_node_index].getField("rotation"). \ + setSFRotation([0.0, 0.0, 1.0, random.uniform(-np.pi, np.pi)]) + self.map.add_cell(random_col_2, row_coord, random_sample[obs_node_index + 1]) + random_sample[obs_node_index + 1].getField("rotation"). \ + setSFRotation([0.0, 0.0, 1.0, random.uniform(-np.pi, np.pi)]) + + # Abuse the grid map and add wall objects as placeholder to limit path finding within the corridor + for row_coord in range(target_y + 1, robot_coordinates[1]): + self.map.add_cell(robot_coordinates[0] - 2, row_coord, self.walls[0]) + self.map.add_cell(robot_coordinates[0] + 2, row_coord, self.walls[1]) + new_position = [-0.75, + self.walls_starting_positions[0][1], + self.walls_starting_positions[0][2]] + self.walls[0].getField("translation").setSFVec3f(new_position) + new_position = [0.75, + self.walls_starting_positions[1][1], + self.walls_starting_positions[1][2]] + self.walls[1].getField("translation").setSFVec3f(new_position) + + def get_random_path(self, add_target=True): + """ + Returns a path to the target or None if path is not found. Based on the add_target flag it also places the + target randomly at a certain manhattan min/max distance to the robot. + + :param add_target: Whether to also add the target before returning the path, defaults to True + :type add_target: bool, optional + """ + robot_coordinates = self.map.find_by_name("robot") + if add_target: + if not self.map.add_near(robot_coordinates[0], robot_coordinates[1], + self.target, + min_distance=self.min_target_dist, max_distance=self.max_target_dist): + return None # Need to re-randomize obstacles as add_near failed + return self.map.bfs_path(robot_coordinates, self.map.find_by_name("target")) + + def place_path(self, path): + """ + Place the path nodes (the small deepbots logos) on their proper places depending on the path generated. + + :param path: The path list + :type path: list + """ + for p, l in zip(path, self.all_path_nodes): + self.map.add_cell(p[0], p[1], l) + + def find_dist_to_path(self): + """ + This method is not currently used. It calculates the closest point and distance to the path, + returning both. + + :return: The minimum distance to the path and the corresponding closest point on the path + :rtype: tuple + """ + + def dist_to_line_segm(p, l1, l2): + v = l2 - l1 + w = p - l1 + c1 = np.dot(w, v) + if c1 <= 0: + return np.linalg.norm(p - l1), l1 + c2 = np.dot(v, v) + if c2 <= c1: + return np.linalg.norm(p - l2), l2 + b = c1 / c2 + pb = l1 + b * v + return np.linalg.norm(p - pb), pb + + np_path = np.array([self.map.get_world_coordinates(self.path_to_target[i][0], self.path_to_target[i][1]) + for i in range(len(self.path_to_target))]) + robot_pos = np.array(self.robot.getPosition()[:2]) + + if len(np_path) == 1: + return np.linalg.norm(np_path[0] - robot_pos), np_path[0] + + min_distance = float('inf') + closest_point = None + for i in range(np_path.shape[0] - 1): + edge = np.array([np_path[i], np_path[i + 1]]) + distance, point_on_line = dist_to_line_segm(robot_pos, edge[0], edge[1]) + min_distance = min(min_distance, distance) + closest_point = point_on_line + return min_distance, closest_point + + +#################################################################################################################### +# Grid class used to create the random obstacle map + + +class Grid: + """ + The grid map used to place all objects in the arena and find the paths. + + Partially coded by OpenAI's ChatGPT. + """ + + def __init__(self, width, height, origin, cell_size): + self.grid = [[None for _ in range(width)] for _ in range(height)] + self.origin = origin + self.cell_size = cell_size + + def size(self): + return len(self.grid[0]), len(self.grid) + + def add_cell(self, x, y, node, z=None): + if self.grid[y][x] is None and self.is_in_range(x, y): + self.grid[y][x] = node + if z is None: + node.getField("translation").setSFVec3f( + [self.get_world_coordinates(x, y)[0], self.get_world_coordinates(x, y)[1], node.getPosition()[2]]) + else: + node.getField("translation").setSFVec3f( + [self.get_world_coordinates(x, y)[0], self.get_world_coordinates(x, y)[1], z]) + return True + return False + + def remove_cell(self, x, y): + if self.is_in_range(x, y): + self.grid[y][x] = None + else: + warn("Can't remove cell outside grid range.") + + def get_cell(self, x, y): + if self.is_in_range(x, y): + return self.grid[y][x] + else: + warn("Can't return cell outside grid range.") + return None + + def get_neighbourhood(self, x, y): + if self.is_in_range(x, y): + neighbourhood_coords = [(x + 1, y), (x - 1, y), (x, y + 1), (x, y - 1), + (x + 1, y + 1), (x - 1, y - 1), + (x - 1, y + 1), (x + 1, y - 1)] + neighbourhood_nodes = [] + for nc in neighbourhood_coords: + if self.is_in_range(nc[0], nc[1]): + neighbourhood_nodes.append(self.get_cell(nc[0], nc[1])) + return neighbourhood_nodes + else: + warn("Can't get neighbourhood of cell outside grid range.") + return None + + def is_empty(self, x, y): + if self.is_in_range(x, y): + if self.grid[y][x]: + return False + else: + return True + else: + warn("Coordinates provided are outside grid range.") + return None + + def empty(self): + self.grid = [[None for _ in range(len(self.grid[0]))] for _ in range(len(self.grid))] + + def add_random(self, node, z=None): + x = random.randint(0, len(self.grid[0]) - 1) + y = random.randint(0, len(self.grid) - 1) + if self.grid[y][x] is None: + return self.add_cell(x, y, node, z=z) + else: + self.add_random(node, z=z) + + def add_near(self, x, y, node, min_distance=1, max_distance=1): + # Make sure the randomly selected cell is not occupied + for tries in range(self.size()[0] * self.size()[1]): + coords = self.get_random_neighbor(x, y, min_distance, max_distance) + if coords and self.add_cell(coords[0], coords[1], node): + return True # Return success, the node was added + return False # Failed to insert near + + def get_random_neighbor(self, x, y, d_min, d_max): + neighbors = [] + rows = self.size()[0] + cols = self.size()[1] + for i in range(-d_max, d_max + 1): + for j in range(-d_max, d_max + 1): + if i == 0 and j == 0: + continue + if 0 <= x + i < rows and 0 <= y + j < cols: + distance = abs(x + i - x) + abs(y + j - y) + if d_min <= distance <= d_max: + neighbors.append((x + i, y + j)) + if len(neighbors) == 0: + return None + return random.choice(neighbors) + + def get_world_coordinates(self, x, y): + if self.is_in_range(x, y): + world_x = self.origin[0] + x * self.cell_size[0] + world_y = self.origin[1] - y * self.cell_size[1] + return world_x, world_y + else: + return None, None + + def get_grid_coordinates(self, world_x, world_y): + x = round((world_x - self.origin[0]) / self.cell_size[0]) + y = -round((world_y - self.origin[1]) / self.cell_size[1]) + if self.is_in_range(x, y): + return x, y + else: + return None, None + + def find_by_name(self, name): + for y in range(len(self.grid)): + for x in range(len(self.grid[0])): + if self.grid[y][x] and self.grid[y][x].getField("name").getSFString() == name: # NOQA + return x, y + return None + + def is_in_range(self, x, y): + if (0 <= x < len(self.grid[0])) and (0 <= y < len(self.grid)): + return True + return False + + def bfs_path(self, start, goal): + start = tuple(start) + goal = tuple(goal) + queue = [(start, [start])] # (coordinates, path to coordinates) + visited = set() + visited.add(start) + while queue: + coords, path = queue.pop(0) + for dx, dy in [(1, 0), (-1, 0), (0, 1), (0, -1), + (1, 1), (-1, -1), (1, -1), (-1, 1)]: # neighbors + x, y = coords + x2, y2 = x + dx, y + dy + if self.is_in_range(x2, y2) and (x2, y2) not in visited: + if self.grid[y2][x2] is not None and (x2, y2) == goal: + return path + [(x2, y2)] + elif self.grid[y2][x2] is None: + visited.add((x2, y2)) + queue.append(((x2, y2), path + [(x2, y2)])) + return None diff --git a/examples/find_and_avoid_v2_video/controllers/dqn_controller/readme.md b/examples/find_and_avoid_v2_video/controllers/dqn_controller/readme.md new file mode 100644 index 00000000..916c2ec4 --- /dev/null +++ b/examples/find_and_avoid_v2_video/controllers/dqn_controller/readme.md @@ -0,0 +1,46 @@ +# Todo: +1- Import Video from logger: +in training script: +```python +from stable_baselines3.common.logger import HParam, Video +``` +2- Add necessary variables to AdditionalInfoCallback class: +add in the constructor: +```python + self.frames = [] # List to store frames for GIF creation + self.episode_cnt = 1 + self.record = False + self.render_interval=render_interval +``` +3- Implement the on_step() event handler to record frames periodically: +```python +if self.env.done: + if self.episode_cnt % self.render_interval == 0: + self.env.camera.enable(self.env.timestep * 10) # basic time step = 32 + self.record = True + else: + self.env.camera.disable() + self.record = False + self.episode_cnt+=1 + self.frames = [] + print(f'Starting Episode {self.episode_cnt} ...') +``` +4- Add video creation and logging to tensorboard to on_rollout_end() event: +if self.record: +```python + # Save the frames to tensorboard + frame = self.env.render(mode='rgb_array') # (c, h, w) + self.frames.append(frame) + video = np.asarray([self.frames]) + self.logger.record("visualization", + Video(torch.from_numpy(video), fps=30), + exclude=("stdout", "log", "json", "csv")) +``` +5- Add render parameters to run() function: +```python +run(... , + log_interval=4, + render_interval=100) +``` +# Results: +"Visualization" tab is added to Tensorboard monitoring & is updated after a predefined sequence of episodes, hence more ability to track the agent performance across different difficulty levels. \ No newline at end of file diff --git a/examples/find_and_avoid_v2_video/controllers/dqn_controller/utilities.py b/examples/find_and_avoid_v2_video/controllers/dqn_controller/utilities.py new file mode 100644 index 00000000..ae131353 --- /dev/null +++ b/examples/find_and_avoid_v2_video/controllers/dqn_controller/utilities.py @@ -0,0 +1,88 @@ +import math +import numpy as np + + +def normalize_to_range(value, min_val, max_val, new_min, new_max, clip=False): + """ + Normalizes value to a specified new range by supplying the current range. + + :param value: value to be normalized + :type value: float + :param min_val: value's min value, value ∈ [min_val, max_val] + :type min_val: float + :param max_val: value's max value, value ∈ [min_val, max_val] + :type max_val: float + :param new_min: normalized range min value + :type new_min: float + :param new_max: normalized range max value + :type new_max: float + :param clip: whether to clip normalized value to new range or not, defaults to False + :type clip: bool, optional + :return: normalized value ∈ [new_min, new_max] + :rtype: float + """ + value = float(value) + min_val = float(min_val) + max_val = float(max_val) + new_min = float(new_min) + new_max = float(new_max) + + if clip: + return np.clip((new_max - new_min) / (max_val - min_val) * (value - max_val) + new_max, new_min, new_max) + else: + return (new_max - new_min) / (max_val - min_val) * (value - max_val) + new_max + + +def get_distance_from_target(robot_node, target_node): + robot_coordinates = robot_node.getField('translation').getSFVec3f() + target_coordinate = target_node.getField('translation').getSFVec3f() + + dx = robot_coordinates[0] - target_coordinate[0] + dy = robot_coordinates[1] - target_coordinate[1] + distance_from_target = math.sqrt(dx * dx + dy * dy) + return distance_from_target + + +def get_angle_from_target(robot_node, target, node_mode=True, is_abs=False): + """ + Returns the angle between the facing vector of the robot and the target position. + Explanation can be found here https://math.stackexchange.com/a/14180. + :param robot_node: The robot Webots node + :type robot_node: controller.node.Node + :param target: The target Webots node or position + :type target: controller.node.Node or [x, y] + :param node_mode: Whether the target is given as a Webots node + :type node_mode: bool + :param is_abs: Whether to return the absolute value of the angle. When True, + eliminates clockwise, anti-clockwise direction and returns [0, π] + :type is_abs: bool + :return: The angle between the facing vector of the robot and the target position + :rtype: float, [-π, π] + """ + # The sign of the z-axis is needed to flip the rotation sign, because Webots seems to randomly + # switch between positive and negative z-axis as the robot rotates. + robot_angle = robot_node.getField('rotation').getSFRotation()[3] * \ + np.sign(robot_node.getField('rotation').getSFRotation()[2]) + + robot_coordinates = robot_node.getField('translation').getSFVec3f() + if node_mode: + target_coordinate = target.getField('translation').getSFVec3f() + else: + target_coordinate = target + + x_r = (target_coordinate[0] - robot_coordinates[0]) + y_r = (target_coordinate[1] - robot_coordinates[1]) + if x_r == 0 and y_r == 0: + return 0.0 + + angle_dif = math.atan2(y_r, x_r) + angle_dif = angle_dif - robot_angle + if angle_dif > np.pi: + angle_dif = angle_dif - (2 * np.pi) + if angle_dif < -np.pi: + angle_dif = angle_dif + (2 * np.pi) + + if is_abs: + angle_dif = abs(angle_dif) + + return angle_dif diff --git a/examples/find_and_avoid_v2_video/doc/find_avoid_v2_trained.gif b/examples/find_and_avoid_v2_video/doc/find_avoid_v2_trained.gif new file mode 100644 index 00000000..202c63bb Binary files /dev/null and b/examples/find_and_avoid_v2_video/doc/find_avoid_v2_trained.gif differ diff --git a/examples/find_and_avoid_v2_video/readme.md b/examples/find_and_avoid_v2_video/readme.md new file mode 100644 index 00000000..916c2ec4 --- /dev/null +++ b/examples/find_and_avoid_v2_video/readme.md @@ -0,0 +1,46 @@ +# Todo: +1- Import Video from logger: +in training script: +```python +from stable_baselines3.common.logger import HParam, Video +``` +2- Add necessary variables to AdditionalInfoCallback class: +add in the constructor: +```python + self.frames = [] # List to store frames for GIF creation + self.episode_cnt = 1 + self.record = False + self.render_interval=render_interval +``` +3- Implement the on_step() event handler to record frames periodically: +```python +if self.env.done: + if self.episode_cnt % self.render_interval == 0: + self.env.camera.enable(self.env.timestep * 10) # basic time step = 32 + self.record = True + else: + self.env.camera.disable() + self.record = False + self.episode_cnt+=1 + self.frames = [] + print(f'Starting Episode {self.episode_cnt} ...') +``` +4- Add video creation and logging to tensorboard to on_rollout_end() event: +if self.record: +```python + # Save the frames to tensorboard + frame = self.env.render(mode='rgb_array') # (c, h, w) + self.frames.append(frame) + video = np.asarray([self.frames]) + self.logger.record("visualization", + Video(torch.from_numpy(video), fps=30), + exclude=("stdout", "log", "json", "csv")) +``` +5- Add render parameters to run() function: +```python +run(... , + log_interval=4, + render_interval=100) +``` +# Results: +"Visualization" tab is added to Tensorboard monitoring & is updated after a predefined sequence of episodes, hence more ability to track the agent performance across different difficulty levels. \ No newline at end of file diff --git a/examples/find_and_avoid_v2_video/requirements/requirements.txt b/examples/find_and_avoid_v2_video/requirements/requirements.txt new file mode 100644 index 00000000..a0baff3b --- /dev/null +++ b/examples/find_and_avoid_v2_video/requirements/requirements.txt @@ -0,0 +1,7 @@ +deepbots>=0.1.3.dev4 +sb3-contrib +stable-baselines3<=1.8.0 +numpy +torch +tensorboard +moviepy \ No newline at end of file diff --git a/examples/find_and_avoid_v2_video/worlds/.find_avoid_v2_world.wbproj b/examples/find_and_avoid_v2_video/worlds/.find_avoid_v2_world.wbproj new file mode 100644 index 00000000..962de758 --- /dev/null +++ b/examples/find_and_avoid_v2_video/worlds/.find_avoid_v2_world.wbproj @@ -0,0 +1,10 @@ +Webots Project File version R2023a +perspectives: 000000ff00000000fd0000000200000001000001bb0000017afc0200000001fb0000001400540065007800740045006400690074006f007201000000130000017a0000003f00ffffff000000030000035500000039fc0100000001fb0000001a0043006f006e0073006f006c00650041006c006c0041006c006c0100000000000003550000006900ffffff000001980000017a00000001000000020000000100000008fc00000000 +simulationViewPerspectives: 000000ff000000010000000200000101000003380100000002010000000100 +sceneTreePerspectives: 000000ff00000001000000030000001c00000139000000fa0100000002010000000200 +maximizedDockId: -1 +centralWidgetVisible: 1 +orthographicViewHeight: 1 +textFiles: -1 +consoles: Console:All:All +renderingDevicePerspectives: robot:camera;1;0.229687;0;0 diff --git a/examples/find_and_avoid_v2_video/worlds/find_avoid_v2_world.wbt b/examples/find_and_avoid_v2_video/worlds/find_avoid_v2_world.wbt new file mode 100644 index 00000000..9e80e168 --- /dev/null +++ b/examples/find_and_avoid_v2_video/worlds/find_avoid_v2_world.wbt @@ -0,0 +1,940 @@ +#VRML_SIM R2023a utf8 + +EXTERNPROTO "https://raw.githubusercontent.com/cyberbotics/webots/R2022b/projects/objects/backgrounds/protos/TexturedBackground.proto" +EXTERNPROTO "https://raw.githubusercontent.com/cyberbotics/webots/R2022b/projects/objects/backgrounds/protos/TexturedBackgroundLight.proto" +EXTERNPROTO "https://raw.githubusercontent.com/cyberbotics/webots/R2022b/projects/objects/floors/protos/RectangleArena.proto" +EXTERNPROTO "https://raw.githubusercontent.com/cyberbotics/webots/R2022b/projects/appearances/protos/Parquetry.proto" +EXTERNPROTO "https://raw.githubusercontent.com/cyberbotics/webots/R2023a/projects/objects/factory/containers/protos/CardboardBox.proto" +EXTERNPROTO "https://raw.githubusercontent.com/cyberbotics/webots/R2023a/projects/objects/chairs/protos/WoodenChair.proto" +EXTERNPROTO "https://raw.githubusercontent.com/cyberbotics/webots/R2023a/projects/objects/kitchen/breakfast/protos/JamJar.proto" + +WorldInfo { +} +DEF VIEWPOINT Viewpoint { + orientation -0.3833461039047917 0.37464636620598124 0.8442072404982502 1.7619347030701011 + position -0.9605156484435913 -3.921647450771648 4.478294738183916 + follow "robot" + followType "None" +} +TexturedBackground { +} +TexturedBackgroundLight { +} +RectangleArena { + rotation 0 1 0 0 + floorSize 3.6 3.6 + floorTileSize 1 1 + floorAppearance Parquetry { + } +} +DEF ROBOT Robot { + translation 1.4630326780287426 -1.5182156603577182 0.03978789437407481 + rotation -0.0011680995457829634 -6.797439205859879e-05 0.9999993154612321 -3.018952332792635 + children [ + Camera { + translation 0.1 0 0 + fieldOfView 1.0472 + width 640 + height 480 + near 0.1 + far 10 + } + DEF CASTOR_WHEEL_FRONT Solid { + translation 0.04 0 -0.03 + children [ + DEF CASTOR Shape { + appearance PBRAppearance { + baseColor 0.12549 0.290196 0.529412 + metalness 0 + } + geometry Sphere { + radius 0.01 + } + } + ] + name "CASTOR_WHEEL" + boundingObject USE CASTOR + physics Physics { + } + } + DEF CASTOR_WHEEL_BACK Solid { + translation -0.04 0 -0.03 + children [ + DEF CASTOR Shape { + appearance PBRAppearance { + baseColor 0.12549 0.290196 0.529412 + metalness 0 + } + geometry Sphere { + radius 0.01 + } + } + ] + name "CASTOR_WHEEL(1)" + boundingObject USE CASTOR + physics Physics { + } + } + DEF BODY Shape { + appearance PBRAppearance { + baseColor 0.917647 0.145098 0.145098 + roughness 1 + metalness 0 + } + geometry Box { + size 0.1 0.1 0.05 + } + } + DEF WHEEL1 HingeJoint { + jointParameters HingeJointParameters { + position 2.255958866588215 + axis 0 1 0 + anchor 0 0.06 0 + } + device [ + RotationalMotor { + name "left_wheel" + } + ] + endPoint Solid { + translation 5.890755460649622e-07 0.06000001867654499 -1.9765027438409503e-06 + rotation 0.3179303859527065 0.6704166495246852 -0.6704191119949211 2.5259458824157353 + children [ + DEF WHEEL Shape { + appearance PBRAppearance { + baseColor 0.305882 0.898039 0.25098 + roughness 1 + metalness 0 + } + geometry Cylinder { + height 0.02 + radius 0.04 + subdivision 24 + } + } + ] + boundingObject USE WHEEL + physics Physics { + } + linearVelocity -0.021480940182653056 -0.0027396623607232372 3.10139292398708e-06 + angularVelocity 0.06173352528753441 -0.4956208867668231 -0.398072572430894 + } + } + DEF WHEEL2 HingeJoint { + jointParameters HingeJointParameters { + position -0.13597212836894682 + axis 0 1 0 + anchor 0 -0.06 0 + } + device [ + RotationalMotor { + name "right_wheel" + } + ] + endPoint Solid { + translation 1.3554858461798663e-06 -0.06000000980942243 -1.4706876214762954e-06 + rotation -0.9953956286091749 0.06777724162761838 -0.0677774893537454 4.707770319200099 + children [ + USE WHEEL + ] + name "solid(1)" + boundingObject USE WHEEL + physics Physics { + } + linearVelocity 0.025853682213104814 0.0032707732520232723 -2.8175839891879903e-06 + angularVelocity -0.09508755031867233 0.7445598436474758 -0.3981521741661517 + } + } + DEF DS Group { + children [ + DistanceSensor { + translation 0.053 -4.28515e-18 0.01 + rotation 0 0 1 1.570796327 + name "distance sensor(0)" + lookupTable [ + 0.015 0 0 + 0.25 25 0.025 + 0.5 50 0.025 + 0.75 75 0.025 + 1 100 0 + ] + type "sonar" + numberOfRays 4 + aperture 0.1 + } + DistanceSensor { + translation 0.051 0 0.01 + rotation 0 0 1 1.308996939 + name "distance sensor(1)" + lookupTable [ + 0.013 0 0 + 0.25 25 0.025 + 0.5 50 0.025 + 0.75 75 0.025 + 1 100 0 + ] + type "sonar" + numberOfRays 4 + aperture 0.1 + } + DistanceSensor { + translation 0.051 0 0.01 + rotation 0 0 1 1.047197551 + name "distance sensor(2)" + lookupTable [ + 0.012 0 0 + 0.25 25 0.025 + 0.5 50 0.025 + 0.75 75 0.025 + 1 100 0 + ] + type "sonar" + numberOfRays 4 + aperture 0.1 + } + DistanceSensor { + translation 0.051 0 0.01 + rotation 0 0 1 0.785398163 + name "distance sensor(3)" + lookupTable [ + 0.008 0 0 + 0.25 25 0.025 + 0.5 50 0.025 + 0.75 75 0.025 + 1 100 0 + ] + type "sonar" + numberOfRays 4 + aperture 0.1 + } + DistanceSensor { + translation 0.051 0 0.01 + rotation 0 0 1 0.523598776 + name "distance sensor(4)" + lookupTable [ + 0.007 0 0 + 0.25 25 0.025 + 0.5 50 0.025 + 0.75 75 0.025 + 1 100 0 + ] + type "sonar" + numberOfRays 4 + aperture 0.1 + } + DistanceSensor { + translation 0.051 0 0.01 + rotation 0 0 1 0.2617993877991494 + name "distance sensor(5)" + lookupTable [ + 0.006 0 0 + 0.25 25 0.025 + 0.5 50 0.025 + 0.75 75 0.025 + 1 100 0 + ] + type "sonar" + numberOfRays 4 + aperture 0.1 + } + DistanceSensor { + translation 0.051 0 0.01 + name "distance sensor(6)" + lookupTable [ + 0.006 0 0 + 0.25 25 0.025 + 0.5 50 0.025 + 0.75 75 0.025 + 1 100 0 + ] + type "sonar" + numberOfRays 4 + aperture 0.1 + } + DistanceSensor { + translation 0.051 0 0.01 + rotation 0 0 1 -0.261799 + name "distance sensor(7)" + lookupTable [ + 0.006 0 0 + 0.25 25 0.025 + 0.5 50 0.025 + 0.75 75 0.025 + 1 100 0 + ] + type "sonar" + numberOfRays 4 + aperture 0.1 + } + DistanceSensor { + translation 0.051 0 0.01 + rotation 0 0 1 -0.523598776 + name "distance sensor(8)" + lookupTable [ + 0.007 0 0 + 0.25 25 0.025 + 0.5 50 0.025 + 0.75 75 0.025 + 1 100 0 + ] + type "sonar" + numberOfRays 4 + aperture 0.1 + } + DistanceSensor { + translation 0.051 0 0.01 + rotation 0 0 1 -0.785398163 + name "distance sensor(9)" + lookupTable [ + 0.008 0 0 + 0.25 25 0.025 + 0.5 50 0.025 + 0.75 75 0.025 + 1 100 0 + ] + type "sonar" + numberOfRays 4 + aperture 0.1 + } + DistanceSensor { + translation 0.051 0 0.01 + rotation 0 0 1 -1.047197551 + name "distance sensor(10)" + lookupTable [ + 0.012 0 0 + 0.25 25 0.025 + 0.5 50 0.025 + 0.75 75 0.025 + 1 100 0 + ] + type "sonar" + numberOfRays 4 + aperture 0.1 + } + DistanceSensor { + translation 0.051 0 0.01 + rotation 0 0 1 -1.309 + name "distance sensor(11)" + lookupTable [ + 0.013 0 0 + 0.25 25 0.025 + 0.5 50 0.025 + 0.75 75 0.025 + 1 100 0 + ] + type "sonar" + numberOfRays 4 + aperture 0.1 + } + DistanceSensor { + translation 0.053 0 0.01 + rotation 0 0 1 -1.570796327 + name "distance sensor(12)" + lookupTable [ + 0.015 0 0 + 0.25 25 0.025 + 0.5 50 0.025 + 0.75 75 0.025 + 1 100 0 + ] + type "sonar" + numberOfRays 4 + aperture 0.1 + } + ] + } + DEF DS_BODY Solid { + translation 0.05 0 0.01 + children [ + Shape { + appearance PBRAppearance { + } + geometry Box { + size 0.01 0.01 0.01 + } + } + ] + name "DS_BODY" + } + TouchSensor { + translation 0.047524636585188906 0.031018466410938084 -0.0100030558070138 + rotation 1 -8.229684459317903e-12 -8.268201391817455e-12 1.5708000000000244 + children [ + DEF TOUCH_BODY_LEFT Shape { + appearance PBRAppearance { + baseColor 0.203922 0.396078 0.643137 + metalness 0 + } + geometry Capsule { + height 0.05 + radius 0.015 + } + } + ] + name "touch sensor left" + boundingObject USE TOUCH_BODY_LEFT + physics Physics { + } + linearVelocity -0.012435393057909241 0.017439833745988204 2.9303335762355528e-05 + angularVelocity -0.00013840093801268776 0.0005684110191763519 -0.39741140628886346 + } + TouchSensor { + translation 0.04752681641212475 -0.031015123496495267 -0.01000303762210282 + rotation 1 -8.229664679869155e-12 -8.268201390020631e-12 1.5708000000000246 + children [ + DEF TOUCH_BODY_RIGHT Shape { + appearance PBRAppearance { + baseColor 0.203922 0.396078 0.643137 + metalness 0 + } + geometry Capsule { + height 0.05 + radius 0.015 + } + } + ] + name "touch sensor right" + boundingObject USE TOUCH_BODY_RIGHT + physics Physics { + } + linearVelocity 0.012022857241512846 0.020545304660051857 2.5513623130054728e-05 + angularVelocity -0.0001384009380111389 0.0005684110191793801 -0.3974114062888629 + } + ] + boundingObject USE BODY + physics Physics { + } + controller "dqn_controller" + supervisor TRUE + linearVelocity 0.002177259762583937 0.00026459673480673605 -2.1463650872757584e-07 + angularVelocity -0.0001384009381892966 0.0005684110191874754 -0.39741140524773766 +} +DEF TARGET Solid { + translation -1 1 0.001 + children [ + Shape { + appearance Appearance { + material Material { + } + texture ImageTexture { + url [ + "https://avatars.githubusercontent.com/u/57842071?s=256" + ] + } + } + geometry Plane { + size 0.24 0.24 + } + } + ] + name "target" +} +DEF OBSTACLES Group { + children [ + DEF OBSTACLE_CBL1 CardboardBox { + translation 5.5 0 0.051 + name "cardboard box long 1" + size 0.1 0.4 0.1 + } + DEF OBSTACLE_CBL2 CardboardBox { + translation 5.62 0 0.051 + name "cardboard box long 2" + size 0.1 0.4 0.1 + } + DEF OBSTACLE_CBL3 CardboardBox { + translation 0 0 0.051 + rotation 0 0 1 0.6750528396923623 + name "cardboard box long 3" + size 0.1 0.4 0.1 + } + DEF OBSTACLE_CBL4 CardboardBox { + translation -0.5 -1 0.051 + rotation 0 0 1 1.8964309944316993 + name "cardboard box long 4" + size 0.1 0.4 0.1 + } + DEF OBSTACLE_CBL5 CardboardBox { + translation 5.86 0 0.051 + name "cardboard box long 5" + size 0.1 0.4 0.1 + } + DEF OBSTACLE_CB1 CardboardBox { + translation 5.5 0.29 0.051 + name "cardboard box 1" + size 0.1 0.1 0.1 + } + DEF OBSTACLE_CB2 CardboardBox { + translation 0.5 1 0.051 + rotation 0 0 1 -0.9694928872058699 + name "cardboard box 2" + size 0.1 0.1 0.1 + } + DEF OBSTACLE_CB3 CardboardBox { + translation 5.74 0.29 0.051 + name "cardboard box 3" + size 0.1 0.1 0.1 + } + DEF OBSTACLE_CB4 CardboardBox { + translation 1.5 1.5 0.051 + rotation 0 0 1 1.230454191142342 + name "cardboard box 4" + size 0.1 0.1 0.1 + } + DEF OBSTACLE_CB5 CardboardBox { + translation 5.98 0.29 0.051 + name "cardboard box 5" + size 0.1 0.1 0.1 + } + DEF OBSTACLE_CBLA1 CardboardBox { + translation 5.86 0.68 0.051 + name "cardboard box large 1" + size 0.4 0.4 0.1 + } + DEF OBSTACLE_CBLA2 CardboardBox { + translation 5.86 1.13 0.051 + name "cardboard box large 2" + size 0.4 0.4 0.1 + } + DEF OBSTACLE_CBLA3 CardboardBox { + translation -1.5 -0.5 0.051 + rotation 0 0 1 -1.7486628333388992 + name "cardboard box large 3" + size 0.4 0.4 0.1 + } + DEF OBSTACLE_CBLA4 CardboardBox { + translation 5.86 2.06 0.051 + name "cardboard box large 4" + size 0.4 0.4 0.1 + } + DEF OBSTACLE_CBLA5 CardboardBox { + translation -1.5 1.5 0.051 + rotation 0 0 1 -2.981711593667146 + name "cardboard box large 5" + size 0.4 0.4 0.1 + } + DEF OBSTACLE_C1 WoodenChair { + translation 0.5 1.5 0 + rotation 0 0 1 2.5222431035629294 + name "wooden chair 1" + } + DEF OBSTACLE_C2 WoodenChair { + translation 6.46 0.46 0 + name "wooden chair 2" + } + DEF OBSTACLE_C3 WoodenChair { + translation 6.46 0.91 0 + name "wooden chair 3" + } + DEF OBSTACLE_C4 WoodenChair { + translation 6.46 1.38 0 + name "wooden chair 4" + } + DEF OBSTACLE_C5 WoodenChair { + translation 6.46 1.84 0 + name "wooden chair 5" + } + DEF OBSTACLE_JJ1 JamJar { + translation 0.00010682609903209674 -0.999982958600103 -0.0006972498946076489 + rotation 1.192413342257399e-07 -6.863281809713801e-08 -0.9999999999999906 1.780661740822881 + name "jam jar 1" + } + DEF OBSTACLE_JJ2 JamJar { + translation 1.000127339555708 0.999968656116071 -0.0006972498630802987 + rotation -1.5503788880464504e-07 -8.664954728444458e-08 0.9999999999999841 1.6395903398123983 + name "jam jar 2" + } + DEF OBSTACLE_JJ3 JamJar { + translation 5.31 0.060000000000000005 -0.000696200001152357 + rotation 0.8058748299851758 0.514588092757442 -0.29285636955446664 9.867770818618206e-17 + name "jam jar 3" + } + DEF OBSTACLE_JJ4 JamJar { + translation 5.31 0.16 -0.000696200001152357 + rotation -0.10984613843259676 0.993679563759594 0.02312467162987777 8.04376441069383e-17 + name "jam jar 4" + } + DEF OBSTACLE_JJ5 JamJar { + translation 0.00010387640615806782 1.499975735539507 -0.0006972498964337784 + rotation -1.1104234354127807e-07 -1.1777814114456478e-08 0.9999999999999938 2.4712884716297205 + name "jam jar 5" + } + ] +} +DEF WALLS Group { + children [ + DEF WALL_1 Solid { + translation 4.94 5.96488e-06 0.05 + rotation 0 0 1 -1.5708 + children [ + DEF BODY Shape { + appearance PBRAppearance { + roughness 1 + metalness 0.4 + } + geometry Box { + size 3.6 0.025 0.1 + } + } + ] + name "wall 1" + boundingObject USE BODY + } + DEF WALL_2 Solid { + translation 4.99 6.01603e-06 0.05 + rotation 0 0 1 -1.5708 + children [ + DEF BODY Shape { + appearance PBRAppearance { + roughness 1 + metalness 0.4 + } + geometry Box { + size 3.6 0.025 0.1 + } + } + ] + name "wall 2" + boundingObject USE BODY + } + ] +} +DEF PATH Group { + children [ + DEF p0 Solid { + translation 1 -1.5 0.001 + children [ + Shape { + appearance Appearance { + material Material { + transparency 0.75 + } + texture ImageTexture { + url [ + "https://avatars.githubusercontent.com/u/57842071?s=256" + ] + } + } + geometry Plane { + size 0.12 0.12 + } + } + ] + name "p" + } + DEF p1 Solid { + translation 0.5 -1 0.001 + children [ + Shape { + appearance Appearance { + material Material { + transparency 0.75 + } + texture ImageTexture { + url [ + "https://avatars.githubusercontent.com/u/57842071?s=256" + ] + } + } + geometry Plane { + size 0.12 0.12 + } + } + ] + name "p(1)" + } + DEF p2 Solid { + translation 0 -0.5 0.001 + children [ + Shape { + appearance Appearance { + material Material { + transparency 0.75 + } + texture ImageTexture { + url [ + "https://avatars.githubusercontent.com/u/57842071?s=256" + ] + } + } + geometry Plane { + size 0.12 0.12 + } + } + ] + name "p(2)" + } + DEF p3 Solid { + translation -0.5 0 0.001 + children [ + Shape { + appearance Appearance { + material Material { + transparency 0.75 + } + texture ImageTexture { + url [ + "https://avatars.githubusercontent.com/u/57842071?s=256" + ] + } + } + geometry Plane { + size 0.12 0.12 + } + } + ] + name "p(3)" + } + DEF p4 Solid { + translation -0.5 0.5 0.001 + children [ + Shape { + appearance Appearance { + material Material { + transparency 0.75 + } + texture ImageTexture { + url [ + "https://avatars.githubusercontent.com/u/57842071?s=256" + ] + } + } + geometry Plane { + size 0.12 0.12 + } + } + ] + name "p(4)" + } + DEF p5 Solid { + translation 5.13 0.610001 0.001 + children [ + Shape { + appearance Appearance { + material Material { + transparency 0.75 + } + texture ImageTexture { + url [ + "https://avatars.githubusercontent.com/u/57842071?s=256" + ] + } + } + geometry Plane { + size 0.12 0.12 + } + } + ] + name "p(5)" + } + DEF p6 Solid { + translation 5.13 0.730001 0.001 + children [ + Shape { + appearance Appearance { + material Material { + transparency 0.75 + } + texture ImageTexture { + url [ + "https://avatars.githubusercontent.com/u/57842071?s=256" + ] + } + } + geometry Plane { + size 0.12 0.12 + } + } + ] + name "p(6)" + } + DEF p7 Solid { + translation 5.13 0.850001 0.001 + children [ + Shape { + appearance Appearance { + material Material { + transparency 0.75 + } + texture ImageTexture { + url [ + "https://avatars.githubusercontent.com/u/57842071?s=256" + ] + } + } + geometry Plane { + size 0.12 0.12 + } + } + ] + name "p(7)" + } + DEF p8 Solid { + translation 5.13 0.970001 0.001 + children [ + Shape { + appearance Appearance { + material Material { + transparency 0.75 + } + texture ImageTexture { + url [ + "https://avatars.githubusercontent.com/u/57842071?s=256" + ] + } + } + geometry Plane { + size 0.12 0.12 + } + } + ] + name "p(8)" + } + DEF p9 Solid { + translation 5.13 1.1 0.001 + children [ + Shape { + appearance Appearance { + material Material { + transparency 0.75 + } + texture ImageTexture { + url [ + "https://avatars.githubusercontent.com/u/57842071?s=256" + ] + } + } + geometry Plane { + size 0.12 0.12 + } + } + ] + name "p(9)" + } + DEF p10 Solid { + translation 5.13 1.23 0.001 + children [ + Shape { + appearance Appearance { + material Material { + transparency 0.75 + } + texture ImageTexture { + url [ + "https://avatars.githubusercontent.com/u/57842071?s=256" + ] + } + } + geometry Plane { + size 0.12 0.12 + } + } + ] + name "p(10)" + } + DEF p11 Solid { + translation 5.13 1.36 0.001 + children [ + Shape { + appearance Appearance { + material Material { + transparency 0.75 + } + texture ImageTexture { + url [ + "https://avatars.githubusercontent.com/u/57842071?s=256" + ] + } + } + geometry Plane { + size 0.12 0.12 + } + } + ] + name "p(11)" + } + DEF p12 Solid { + translation 5.13 1.49 0.001 + children [ + Shape { + appearance Appearance { + material Material { + transparency 0.75 + } + texture ImageTexture { + url [ + "https://avatars.githubusercontent.com/u/57842071?s=256" + ] + } + } + geometry Plane { + size 0.12 0.12 + } + } + ] + name "p(12)" + } + DEF p13 Solid { + translation 5.13 1.61 0.001 + children [ + Shape { + appearance Appearance { + material Material { + transparency 0.75 + } + texture ImageTexture { + url [ + "https://avatars.githubusercontent.com/u/57842071?s=256" + ] + } + } + geometry Plane { + size 0.12 0.12 + } + } + ] + name "p(13)" + } + DEF p14 Solid { + translation 5.13 1.74 0.001 + children [ + Shape { + appearance Appearance { + material Material { + transparency 0.75 + } + texture ImageTexture { + url [ + "https://avatars.githubusercontent.com/u/57842071?s=256" + ] + } + } + geometry Plane { + size 0.12 0.12 + } + } + ] + name "p(14)" + } + DEF p15 Solid { + translation 5.13 1.86 0.001 + children [ + Shape { + appearance Appearance { + material Material { + transparency 0.75 + } + texture ImageTexture { + url [ + "https://avatars.githubusercontent.com/u/57842071?s=256" + ] + } + } + geometry Plane { + size 0.12 0.12 + } + } + ] + name "p(15)" + } + ] +}