-
Notifications
You must be signed in to change notification settings - Fork 10
Expand file tree
/
Copy pathDiscreteHighwayEnvironment.m
More file actions
284 lines (255 loc) · 11.6 KB
/
DiscreteHighwayEnvironment.m
File metadata and controls
284 lines (255 loc) · 11.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
classdef DiscreteHighwayEnvironment < rl.env.MATLABEnvironment
%HIGHWAYENVIRONMENT: Template for defining custom environment in MATLAB
% using Deep Traffic Lab
%% Properties (set properties' attributes accordingly)
properties
% Sample time
Ts = 0.1
% Episode Maximum Run Time
StopTime = 0.01*2000
% Visualization Parameters
SumoVisualization = true
DSDVisialization = true
% Egovehicle Name
EgoID = 'ego01'
% Reward Gains
CollisionReward = -1 % The reward received when colliding with a vehicle.
RightLaneReward = 0.1 % The reward received when driving on the right-most lanes, linearly mapped to
% zero for other lanes.
HighSpeedReward = 0.4 % The reward received when driving at full speed, linearly mapped to zero for
% lower speeds according to config["reward_speed_range"].
RewardSpeedRange = [10, 40];
%Number of Lanes Available
NumLanes = 4;
end
properties (SetAccess = protected, GetAccess = public)
% Vehicle Scenario
Scenario
% Number of traffic vehicles created
NumTrafficActors = 0;
end
properties
% Initialize system state [x, y, dx, dy, theta]'
States = [0 0 0 0 0]';
% MetaValues
HasCollided = false;
CurrentLane = [];
%Previous States
PreviousLane = [];
end
properties(Access = protected)
% Initialize internal flag to indicate episode termination
IsDone = false
% traffic environment class
TrafficEnv
% Ego Vehicle List Holder
Ego = [];
end
properties(Access = private)
HasPlot = false;
end
%% Necessary Methods
methods
% Contructor method creates an instance of the environment
% Change class name and constructor name accordingly
% In this constructor method, the driving Scenario defined as part
% of the environment files is parsed as Scenario. Additionally, the
% configuration files (SumoConfigFile, EgoConfigFile,
% TrafficConfigfile) should be parsed as well to initialize the
% SUMO connection and define traffic actors accordingly.
% Additionally, in order to properly visualize the environment, a
% variable containing a visualization command should be parsed in
% here instead of using a separate method.
function this = DiscreteHighwayEnvironment(Scenario, SumoConfigfile, ...
EgoConfigFile, TrafficConfigFile, visualization)
arguments
Scenario
SumoConfigfile (1,:) char
EgoConfigFile (1,:) char
TrafficConfigFile (1,:) char
visualization (1,1) logical = false;
end
% Initialize Observation settings (For more information refer
% to
% (https://www.mathworks.com/help/reinforcement-learning/ref/rl.util.rlnumericspec.html)
% or
% (https://www.mathworks.com/help/reinforcement-learning/ref/rl.util.rlfinitesetspec.html)
ObservationInfo(1) = rlNumericSpec([100,100,1],'LowerLimit',0,'UpperLimit',1);
ObservationInfo(1).Name = 'OccupancyGrid';
ObservationInfo(1).Description = 'x*y occupancy grid matrix';
ObservationInfo(2) = rlNumericSpec([5 1]);
ObservationInfo(2).Name = 'VehicleStates';
ObservationInfo(2).Description = 'x, y, dx, dy, theta';
ObservationInfo(3) = rlNumericSpec([1 1]);
ObservationInfo(3).Name = 'CurrentLane';
ObservationInfo(3).Description = 'Vehicle Current Lane';
% Initialize Action settings
ActionInfo = rlFiniteSetSpec(1:5);
ActionInfo.Name = 'VehicleDiscreteMetaAction';
% The following line implements built-in functions of RL env
this = this@rl.env.MATLABEnvironment(ObservationInfo,ActionInfo);
% Initialize scenario
this.Scenario = Scenario;
% Initialize traffic environmenttraci.
this.TrafficEnv = TrafficEnvironment(Scenario, ...
SumoConfigfile, ...
EgoConfigFile,...
TrafficConfigFile,...
this.StopTime,...
'SampleTime', this.Ts,...
'SumoVisualization', this.SumoVisualization,...
'LaneChangeType','Resolution');
% Propagate option for visualization
this.DSDVisialization = visualization;
end
% Apply system dynamics and simulates the environment with the
% given action for one step.
function [Observation,Reward,IsDone,LoggedSignals] = step(this,Action)
LoggedSignals = [];
% Get action
command = this.getAction(Action);
% Command Action on ego
this.Ego.step(command);
% Perform simulation update
simulationHasStopped = ~this.TrafficEnv.step;
% Check if ego has collided
collisionContainer = this.TrafficEnv.check_ego_collisions;
this.HasCollided = collisionContainer(this.EgoID);
hasCollided = this.HasCollided;
% Check if ego has arrived
hasArrived = this.Ego.HasArrived;
% Check terminal condition
IsDone = simulationHasStopped || hasCollided || hasArrived;
% Update system states
this.States = this.Ego.states;
this.PreviousLane = this.CurrentLane;
this.CurrentLane = this.Ego.CurrentLane;
% Update Observations
% Create Initial Observations
occupancyGrid = double(this.Ego.EgoMap.getOccupancy);
states = this.States;
currentLane = this.Ego.CurrentLane;
% If curves are too pronounced, it is better to put a
% default currentLane due to a bug on currentLane method
if isempty(currentLane)
currentLane = 0;
end
% Construct observation list
Observation = {occupancyGrid,states,currentLane};
% Get reward
Reward = getReward(this);
if isempty(Reward)
Reward = 0;
end
% Make sure dimensions are correct
assert(all(size(occupancyGrid) == [100,100]),'error size mismatch occupancy grid')
assert(all(size(states) == [5,1]),'error size mismatch states')
assert(~isempty(currentLane),'error current lane is empty')
assert(all(size(currentLane) == [1,1]),'error size mismatch currentLane')
assert(all(size(IsDone) == [1,1]),'error size mismatch isDone')
assert(~isempty(IsDone),'error empty isDone')
assert(all(size(Reward) == [1,1]),'error size mismatch isDone')
assert(~isempty(Reward),'error empty Reward')
% (optional) use notifyEnvUpdated to signal that the
% environment has been updated (e.g. to update visualization)
notifyEnvUpdated(this);
end
% Reset environment to initial state and output initial observation
function InitialObservation = reset(this)
% Deploy traffic and populate ego vehicle
[hasBeenCreated, this.NumTrafficActors, egos] = ...
this.TrafficEnv.deploy_traffic();
% Check that environment has been created
assert(hasBeenCreated,'Enviroonment could not be created, please check configurations')
% Decompose Ego since there is only one ego
this.Ego = egos{1};
% Reset Collisions just in case
this.HasCollided = false;
% Propagate states
this.States = egos{1}.states;
this.CurrentLane = this.Ego.CurrentLane;
this.PreviousLane = this.CurrentLane;
% Create Initial Observations
InitialOccupancyGrid = double(this.Ego.EgoMap.getOccupancy);
InitialStatesObs = this.States;
InitialLane = this.PreviousLane;
% construct observation list
InitialObservation = {InitialOccupancyGrid,InitialStatesObs,InitialLane};
% (optional) use notifyEnvUpdated to signal that the
% environment has been updated (e.g. to update visualization)
notifyEnvUpdated(this);
% If visualization option was defined, generate plots in here
% only once
if this.DSDVisialization && ~this.HasPlot
this.TrafficEnv.create_chase_visualization(this.EgoID)
% Update the visualization
envUpdatedCallback(this)
this.HasPlot = true;
end
end
end
%% Optional Methods (set methods' attributes accordingly)
methods
% Helper methods to create the environment
% Reward function
function reward = getReward(this)
% Determine if vehicle if on farmost right lane
[cl,numlanes] = currentLane(this.Ego.Vehicle);
if isempty(numlanes)
cl = -1;
numlanes = 0;
end
% Scale speed
scaledSpeed = this.linearMap(this.States(3),...
this.RewardSpeedRange,[0,1]);
% Compute reward
reward = ...
this.CollisionReward*this.HasCollided + ...
this.RightLaneReward*(cl==numlanes)+...
this.HighSpeedReward*scaledSpeed;
% Normalize reward to a value between 0 and 1
reward = this.linearMap(reward,[this.CollisionReward, ...
this.RightLaneReward+this.HighSpeedReward], [0,1]);
end
% (optional) Visualization method
function plot(this)
% Initiate the visualization
plot(this.Scenario)
% Update the visualization
envUpdatedCallback(this)
end
% (optional) Properties validation through set methods
function set.States(this,state)
validateattributes(state,{'struct'},{});
position = double(state.Position(:));
velocity = double(state.Velocity(:));
heading = double(state.Heading(:));
this.States = [position; velocity; heading];
notifyEnvUpdated(this);
end
end
methods (Access = protected)
% (optional) update visualization everytime the environment is updated
% (notifyEnvUpdated is called)
function envUpdatedCallback(this)
end
end
methods (Static)
% Discrete Action integer value between 1 and 5
function command = getAction(action)
validateattributes(action,{'numeric'},{'scalar','>=',1,'<=',5})
command = action;
end
function value = linearMap(value, xInterval, yInterval)
% Performs a linear map of value form XInterval to yInterval
% -------------------------------------------------------------
value = yInterval(1)+(value-xInterval(1))*...
(yInterval(2)-yInterval(1))/(xInterval(2)-xInterval(1));
if value > yInterval(2)
value = yInterval(2);
elseif value < yInterval(1)
value = yInterval(1);
end
end
end
end