deep-traffic-lab/Example/StraightHighwayDiscreteMetaAction/DiscreteHighwayEnvironment.m at master · mathworks/deep-traffic-lab · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
classdef DiscreteHighwayEnvironment < rl.env.MATLABEnvironment
    %HIGHWAYENVIRONMENT: Template for defining custom environment in MATLAB
    % using Deep Traffic Lab

    %% Properties (set properties' attributes accordingly)
    properties
        % Sample time
        Ts = 0.1
        % Episode Maximum Run Time
        StopTime = 0.01*2000
        % Visualization Parameters
        SumoVisualization = true
        DSDVisialization = true
        % Egovehicle Name
        EgoID = 'ego01'
        % Reward Gains
        CollisionReward = -1      % The reward received when colliding with a vehicle.
        RightLaneReward = 0.1      % The reward received when driving on the right-most lanes, linearly mapped to
                                   % zero for other lanes.
        HighSpeedReward = 0.4      % The reward received when driving at full speed, linearly mapped to zero for
                                   % lower speeds according to config["reward_speed_range"].
        RewardSpeedRange = [10, 40];
        %Number of Lanes Available
        NumLanes = 4;
    end

    properties (SetAccess = protected, GetAccess = public)
        % Vehicle Scenario
        Scenario
        % Number of traffic vehicles created
        NumTrafficActors = 0;
    end

    properties
        % Initialize system state [x, y, dx, dy, theta]'
        States = [0 0 0 0 0]';
        % MetaValues
        HasCollided = false;
        CurrentLane = [];
        %Previous States
        PreviousLane = [];
    end

    properties(Access = protected)
        % Initialize internal flag to indicate episode termination
        IsDone = false
        % traffic environment class
        TrafficEnv
        % Ego Vehicle List Holder
        Ego = [];
    end

    properties(Access = private)
        HasPlot = false;
    end

    %% Necessary Methods
    methods
        % Contructor method creates an instance of the environment
        % Change class name and constructor name accordingly
        % In this constructor method, the driving Scenario defined as part
        % of the environment files is parsed as Scenario. Additionally, the
        % configuration files (SumoConfigFile, EgoConfigFile,
        % TrafficConfigfile) should be parsed as well to initialize the
        % SUMO connection and define traffic actors accordingly.
        % Additionally, in order to properly visualize the environment, a
        % variable containing a visualization command should be parsed in
        % here instead of using a separate method.

        function this = DiscreteHighwayEnvironment(Scenario, SumoConfigfile, ...
            EgoConfigFile, TrafficConfigFile, visualization)
            arguments
                Scenario
                SumoConfigfile (1,:) char
                EgoConfigFile (1,:) char
                TrafficConfigFile (1,:) char
                visualization (1,1) logical = false;
            end
            % Initialize Observation settings (For more information refer
            % to
            % (https://www.mathworks.com/help/reinforcement-learning/ref/rl.util.rlnumericspec.html)
            % or
            % (https://www.mathworks.com/help/reinforcement-learning/ref/rl.util.rlfinitesetspec.html)
            ObservationInfo(1) = rlNumericSpec([100,100,1],'LowerLimit',0,'UpperLimit',1);
            ObservationInfo(1).Name = 'OccupancyGrid';
            ObservationInfo(1).Description = 'x*y occupancy grid matrix';
            ObservationInfo(2) = rlNumericSpec([5 1]);
            ObservationInfo(2).Name = 'VehicleStates';
            ObservationInfo(2).Description = 'x, y, dx, dy, theta';
            ObservationInfo(3) = rlNumericSpec([1 1]);
            ObservationInfo(3).Name = 'CurrentLane';
            ObservationInfo(3).Description = 'Vehicle Current Lane';
            % Initialize Action settings
            ActionInfo = rlFiniteSetSpec(1:5);
            ActionInfo.Name = 'VehicleDiscreteMetaAction';
            % The following line implements built-in functions of RL env
            this = this@rl.env.MATLABEnvironment(ObservationInfo,ActionInfo);
            % Initialize scenario
            this.Scenario = Scenario;
            % Initialize traffic environmenttraci.
            this.TrafficEnv = TrafficEnvironment(Scenario, ...
                SumoConfigfile, ...
                EgoConfigFile,...
                TrafficConfigFile,...
                this.StopTime,...
                'SampleTime', this.Ts,...
                'SumoVisualization', this.SumoVisualization,...
                'LaneChangeType','Resolution');
            % Propagate option for visualization
            this.DSDVisialization = visualization;
        end

        % Apply system dynamics and simulates the environment with the
        % given action for one step.
        function [Observation,Reward,IsDone,LoggedSignals] = step(this,Action)
            LoggedSignals = [];

            % Get action
            command = this.getAction(Action);

            % Command Action on ego
            this.Ego.step(command);
            % Perform simulation update
            simulationHasStopped = ~this.TrafficEnv.step;

            % Check if ego has collided
            collisionContainer = this.TrafficEnv.check_ego_collisions;
            this.HasCollided = collisionContainer(this.EgoID);
            hasCollided = this.HasCollided;
            % Check if ego has arrived
            hasArrived = this.Ego.HasArrived;
            % Check terminal condition
            IsDone = simulationHasStopped || hasCollided || hasArrived;

            % Update system states
            this.States = this.Ego.states;
            this.PreviousLane = this.CurrentLane;
            this.CurrentLane = this.Ego.CurrentLane;
            % Update Observations
            % Create Initial Observations
            occupancyGrid = double(this.Ego.EgoMap.getOccupancy);
            states = this.States;
            currentLane = this.Ego.CurrentLane;
            % If curves are too pronounced, it is better to put a
            % default currentLane due to a bug on currentLane method
            if isempty(currentLane)
                currentLane = 0;
            end

            % Construct observation list
            Observation = {occupancyGrid,states,currentLane};

            % Get reward
            Reward = getReward(this);
            if isempty(Reward)
                Reward = 0;
            end

            % Make sure dimensions are correct
            assert(all(size(occupancyGrid) == [100,100]),'error size mismatch occupancy grid')
            assert(all(size(states) == [5,1]),'error size mismatch states')
            assert(~isempty(currentLane),'error current lane is empty')
            assert(all(size(currentLane) == [1,1]),'error size mismatch currentLane')
            assert(all(size(IsDone) == [1,1]),'error size mismatch isDone')
            assert(~isempty(IsDone),'error empty isDone')
            assert(all(size(Reward) == [1,1]),'error size mismatch isDone')
            assert(~isempty(Reward),'error empty Reward')

            % (optional) use notifyEnvUpdated to signal that the
            % environment has been updated (e.g. to update visualization)
            notifyEnvUpdated(this);
        end

        % Reset environment to initial state and output initial observation
        function InitialObservation = reset(this)

            % Deploy traffic and populate ego vehicle
            [hasBeenCreated, this.NumTrafficActors, egos] = ...
                this.TrafficEnv.deploy_traffic();

            % Check that environment has been created
            assert(hasBeenCreated,'Enviroonment could not be created, please check configurations')

            % Decompose Ego since there is only one ego
            this.Ego = egos{1};
            % Reset Collisions just in case
            this.HasCollided = false;
            % Propagate states
            this.States = egos{1}.states;
            this.CurrentLane = this.Ego.CurrentLane;
            this.PreviousLane = this.CurrentLane;
            % Create Initial Observations
            InitialOccupancyGrid = double(this.Ego.EgoMap.getOccupancy);
            InitialStatesObs = this.States;
            InitialLane = this.PreviousLane;
            % construct observation list
            InitialObservation = {InitialOccupancyGrid,InitialStatesObs,InitialLane};

            % (optional) use notifyEnvUpdated to signal that the
            % environment has been updated (e.g. to update visualization)
            notifyEnvUpdated(this);

            % If visualization option was defined, generate plots in here
            % only once
            if this.DSDVisialization && ~this.HasPlot
                this.TrafficEnv.create_chase_visualization(this.EgoID)
                % Update the visualization
                envUpdatedCallback(this)
                this.HasPlot = true;
            end
        end
    end
    %% Optional Methods (set methods' attributes accordingly)
    methods
        % Helper methods to create the environment

        % Reward function
        function reward = getReward(this)
            % Determine if vehicle if on farmost right lane
            [cl,numlanes] = currentLane(this.Ego.Vehicle);
            if isempty(numlanes)
                cl = -1;
                numlanes = 0;
            end
            % Scale speed
            scaledSpeed = this.linearMap(this.States(3),...
                this.RewardSpeedRange,[0,1]);
            % Compute reward
            reward = ...
                this.CollisionReward*this.HasCollided + ...
                this.RightLaneReward*(cl==numlanes)+...
                this.HighSpeedReward*scaledSpeed;
            % Normalize reward to a value between 0 and 1
            reward = this.linearMap(reward,[this.CollisionReward, ...
                this.RightLaneReward+this.HighSpeedReward], [0,1]);
        end

        % (optional) Visualization method
        function plot(this)
            % Initiate the visualization
            plot(this.Scenario)

            % Update the visualization
            envUpdatedCallback(this)
        end

        % (optional) Properties validation through set methods
        function set.States(this,state)
            validateattributes(state,{'struct'},{});
            position = double(state.Position(:));
            velocity = double(state.Velocity(:));
            heading = double(state.Heading(:));
            this.States = [position; velocity; heading];
            notifyEnvUpdated(this);
        end
    end

    methods (Access = protected)
        % (optional) update visualization everytime the environment is updated
        % (notifyEnvUpdated is called)
        function envUpdatedCallback(this)
        end
    end
    methods (Static)
        % Discrete Action integer value between 1 and 5
        function command = getAction(action)
            validateattributes(action,{'numeric'},{'scalar','>=',1,'<=',5})
            command = action;
        end


        function value = linearMap(value, xInterval, yInterval)
            % Performs a linear map of value form XInterval to yInterval
            % -------------------------------------------------------------
            value = yInterval(1)+(value-xInterval(1))*...
                (yInterval(2)-yInterval(1))/(xInterval(2)-xInterval(1));
            if value > yInterval(2)
                value = yInterval(2);
            elseif value < yInterval(1)
                value = yInterval(1);
            end
        end
    end
end