Skip to content

Commit e28252a

Browse files
authored
feat: production hardening — input validation, anti-cheat, reconnection, zone backup (#57)
* feat: input validation wrapper to prevent WS handler crashes - Wrap all handle_message dispatches in safe_handle_message try/catch - Catches badmatch, badkey, function_clause, case_clause errors - Returns error reply to client instead of crashing the WS process - Add chat content length limit (2000 bytes) - Log unexpected crashes via logger:warning Previously, missing payload fields (e.g. chat.send without content) would crash the WS handler, killing the session and disconnecting the player. Now returns {error, invalid_payload} gracefully. * feat: anti-cheat basics — tick in zone_state, anticheat telemetry - Pass tick number into ZoneState before calling GameMod:zone_tick - Add anticheat_violation/3 telemetry event for logging suspicious inputs - Game modules can now track tick count for cooldown enforcement * feat: reconnection grace period for world server - Add reconnect_state to world server init (from config reconnect key) - Monitor player session PIDs on join, detect disconnect via DOWN - Grace period via asobi_reconnect module (already existed, now wired) - On disconnect: start grace timer, keep entity alive in zones - On reconnect: re-subscribe to zones, re-monitor new session - On grace expiry: remove player from world and zones - ETS asobi_player_worlds table for PlayerId→WorldPid lookup - ETS asobi_world_state table for zone crash recovery (Phase 4) - WS handler checks for pending reconnect on session.connect - Backwards compatible: games without reconnect config unchanged * feat: zone entity state backup and crash recovery - Backup zone entity state to ETS every 20 ticks (~1 second) - On zone restart, recover entities from ETS backup - Normal/graceful shutdown clears backup - Abnormal termination preserves backup for recovery - Uses asobi_world_state ETS table (created in asobi_world_sup) * chore: remove unrelated observability/rating files from PR * fix: dialyzer warnings — unmatched expressions
1 parent 16b83c1 commit e28252a

5 files changed

Lines changed: 272 additions & 11 deletions

File tree

src/asobi_telemetry.erl

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
-export([matchmaker_queued/2, matchmaker_removed/2, matchmaker_formed/3]).
88
-export([session_connected/1, session_disconnected/2]).
99
-export([ws_connected/0, ws_disconnected/0, ws_message_in/1, ws_message_out/1]).
10+
-export([anticheat_violation/3]).
1011
-export([economy_transaction/4, store_purchase/3]).
1112
-export([chat_message_sent/2]).
1213
-export([vote_started/2, vote_cast/2, vote_resolved/3]).
@@ -157,6 +158,14 @@ ws_connected() ->
157158
ws_disconnected() ->
158159
telemetry:execute([asobi, ws, disconnected], #{count => 1}, #{}).
159160

161+
-spec anticheat_violation(binary(), atom(), map()) -> ok.
162+
anticheat_violation(PlayerId, Type, Details) ->
163+
telemetry:execute(
164+
[asobi, anticheat, violation],
165+
#{count => 1},
166+
#{player_id => PlayerId, type => Type, details => Details}
167+
).
168+
160169
-spec ws_message_in(binary()) -> ok.
161170
ws_message_in(Type) ->
162171
telemetry:execute([asobi, ws, message_in], #{count => 1}, #{type => Type}).

src/world/asobi_world_server.erl

Lines changed: 150 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
-behaviour(gen_statem).
33

44
-export([start_link/1, join/2, leave/2, move_player/3, post_tick/2, get_info/1, cancel/1]).
5+
-export([reconnect/2]).
56
-export([start_vote/2, cast_vote/4, use_veto/3]).
67
-export([whereis/1]).
78
-export([callback_mode/0, init/1, terminate/3]).
@@ -40,6 +41,10 @@ post_tick(Pid, TickN) ->
4041
get_info(Pid) ->
4142
gen_statem:call(Pid, get_info).
4243

44+
-spec reconnect(pid(), binary()) -> ok | {error, term()}.
45+
reconnect(Pid, PlayerId) ->
46+
gen_statem:call(Pid, {reconnect, PlayerId}).
47+
4348
-spec cancel(pid()) -> ok.
4449
cancel(Pid) ->
4550
gen_statem:cast(Pid, cancel).
@@ -119,6 +124,7 @@ init(Config) ->
119124
frustration_bonus => FrustrationBonus,
120125
active_votes => #{},
121126
persistent => Persistent,
127+
reconnect_state => init_reconnect(Config),
122128
chat_state => ChatState,
123129
phase_state => PhaseState
124130
},
@@ -178,7 +184,8 @@ running(
178184
) ->
179185
case Mod:post_tick(TickN, GS) of
180186
{ok, GS1} ->
181-
State1 = tick_phases(TickRate, State#{game_state => GS1}),
187+
State0 = tick_reconnect(State#{game_state => GS1}),
188+
State1 = tick_phases(TickRate, State0),
182189
case maps:get(phase_state, State1) of
183190
PS when is_map(PS) ->
184191
case asobi_phase:info(PS) of
@@ -215,7 +222,65 @@ running(info, {vote_vetoed, VoteId, _Template}, State) ->
215222
{keep_state, State#{active_votes => Active}};
216223
running(info, {asobi_message, _}, _State) ->
217224
%% Zone snapshots/deltas forwarded here in tests — ignore
218-
keep_state_and_data.
225+
keep_state_and_data;
226+
running(
227+
info, {'DOWN', _MonRef, process, DownPid, _Reason}, #{reconnect_state := undefined} = State
228+
) ->
229+
%% No reconnect policy — no action (entity stays, player can rejoin)
230+
case find_player_by_pid(DownPid, State) of
231+
{ok, _PlayerId} -> keep_state_and_data;
232+
none -> keep_state_and_data
233+
end;
234+
running(info, {'DOWN', _MonRef, process, DownPid, _Reason}, #{reconnect_state := RS} = State) ->
235+
case find_player_by_pid(DownPid, State) of
236+
{ok, PlayerId} ->
237+
Now = erlang:system_time(millisecond),
238+
{Events, RS1} = asobi_reconnect:disconnect(PlayerId, Now, RS),
239+
State1 = handle_reconnect_events(Events, State#{reconnect_state => RS1}),
240+
{keep_state, State1};
241+
none ->
242+
keep_state_and_data
243+
end;
244+
running({call, From}, {reconnect, PlayerId}, State) ->
245+
#{
246+
reconnect_state := RS,
247+
player_zones := PZ,
248+
zone_pids := ZP,
249+
view_radius := _ViewRadius,
250+
grid_size := _GridSize
251+
} = State,
252+
case {RS, maps:get(PlayerId, PZ, undefined)} of
253+
{undefined, _} ->
254+
{keep_state_and_data, [{reply, From, {error, no_reconnect_policy}}]};
255+
{_, undefined} ->
256+
{keep_state_and_data, [{reply, From, {error, not_in_world}}]};
257+
{_, #{zone := ZoneCoords, interest := InterestZones}} ->
258+
{_Events, RS1} = asobi_reconnect:reconnect(PlayerId, RS),
259+
PlayerPid = find_player_pid(PlayerId),
260+
MonRef = erlang:monitor(process, PlayerPid),
261+
%% Re-subscribe to all interest zones
262+
lists:foreach(
263+
fun(Coords) ->
264+
case maps:get(Coords, ZP, undefined) of
265+
undefined -> ok;
266+
ZPid -> asobi_zone:subscribe(ZPid, {PlayerId, PlayerPid})
267+
end
268+
end,
269+
InterestZones
270+
),
271+
%% Notify session of world/zone
272+
ZonePid = maps:get(ZoneCoords, ZP, undefined),
273+
asobi_presence:send(PlayerId, {world_joined, self(), ZonePid}),
274+
%% Update player entry with new session
275+
Players = maps:get(players, State),
276+
PlayerMeta = maps:get(PlayerId, Players, #{}),
277+
Players1 = Players#{
278+
PlayerId => PlayerMeta#{
279+
session_pid => PlayerPid, monitor_ref => MonRef
280+
}
281+
},
282+
{keep_state, State#{reconnect_state => RS1, players => Players1}, [{reply, From, ok}]}
283+
end.
219284

220285
%% --- finished state ---
221286

@@ -307,12 +372,22 @@ handle_join(
307372
{ok, SpawnPos} = Mod:spawn_position(PlayerId, GS1),
308373
State1 = State#{game_state => GS1},
309374
State2 = place_player(PlayerId, SpawnPos, State1),
375+
%% Monitor player session for reconnection handling
376+
PlayerPid = find_player_pid(PlayerId),
377+
MonRef = erlang:monitor(process, PlayerPid),
310378
Players1 = Players#{
311379
PlayerId => #{
312380
joined_at => erlang:system_time(millisecond),
313-
position => SpawnPos
381+
position => SpawnPos,
382+
session_pid => PlayerPid,
383+
monitor_ref => MonRef
314384
}
315385
},
386+
%% Track player→world for reconnection lookup
387+
case ets:info(asobi_player_worlds) of
388+
undefined -> ok;
389+
_ -> ets:insert(asobi_player_worlds, {PlayerId, self()})
390+
end,
316391
VetoTokens = maps:get(veto_tokens, State2),
317392
VetoCount = maps:get(veto_tokens_per_player, State2),
318393
State3 = State2#{
@@ -665,7 +740,13 @@ persist_result(#{world_id := WorldId, players := Players} = State) ->
665740
notify_players(Event, #{players := Players, world_id := WorldId} = State) ->
666741
Payload =
667742
case Event of
668-
finished -> #{world_id => WorldId, result => maps:get(result, State, #{})}
743+
finished ->
744+
#{world_id => WorldId, result => maps:get(result, State, #{})};
745+
phase_changed ->
746+
case maps:get(phase_state, State, undefined) of
747+
undefined -> #{world_id => WorldId};
748+
PS -> maps:merge(#{world_id => WorldId}, asobi_phase:info(PS))
749+
end
669750
end,
670751
maps:foreach(
671752
fun(PlayerId, _Meta) ->
@@ -690,6 +771,55 @@ world_info(Status, #{world_id := WorldId, players := Players} = State) ->
690771
PS -> Base#{phase => asobi_phase:info(PS)}
691772
end.
692773

774+
%% --- Internal: Reconnection ---
775+
776+
init_reconnect(Config) ->
777+
case maps:get(reconnect, Config, undefined) of
778+
undefined -> undefined;
779+
Policy when is_map(Policy) -> asobi_reconnect:new(Policy)
780+
end.
781+
782+
tick_reconnect(#{reconnect_state := undefined} = State) ->
783+
State;
784+
tick_reconnect(#{reconnect_state := RS, tick_rate := TickRate} = State) ->
785+
{Events, RS1} = asobi_reconnect:tick(TickRate, RS),
786+
State1 = State#{reconnect_state => RS1},
787+
handle_reconnect_events(Events, State1).
788+
789+
handle_reconnect_events([], State) ->
790+
State;
791+
handle_reconnect_events([{grace_expired, PlayerId, Action} | Rest], State) ->
792+
State1 =
793+
case Action of
794+
remove ->
795+
#{world_id := WorldId} = State,
796+
case ets:info(asobi_player_worlds) of
797+
undefined -> ok;
798+
_ -> ets:delete(asobi_player_worlds, PlayerId)
799+
end,
800+
asobi_telemetry:world_player_left(WorldId, PlayerId),
801+
State2 = remove_player_from_zones(PlayerId, State),
802+
Players = maps:remove(PlayerId, maps:get(players, State2)),
803+
State2#{players => Players};
804+
_ ->
805+
State
806+
end,
807+
handle_reconnect_events(Rest, State1);
808+
handle_reconnect_events([_ | Rest], State) ->
809+
handle_reconnect_events(Rest, State).
810+
811+
find_player_by_pid(Pid, #{players := Players}) ->
812+
maps:fold(
813+
fun
814+
(PlayerId, #{session_pid := SPid}, none) when SPid =:= Pid ->
815+
{ok, PlayerId};
816+
(_, _, Acc) ->
817+
Acc
818+
end,
819+
none,
820+
Players
821+
).
822+
693823
resolve_siblings(#{instance_sup := InstanceSup}) ->
694824
ZoneSupPid = asobi_world_instance:get_child(InstanceSup, asobi_zone_sup),
695825
TickerPid = asobi_world_instance:get_child(InstanceSup, asobi_world_ticker),
@@ -723,10 +853,25 @@ tick_phases(
723853
undefined -> ~"complete";
724854
P -> P
725855
end
726-
);
856+
),
857+
%% Broadcast phase change to all players
858+
notify_players(phase_changed, State#{phase_state => PS1});
727859
false ->
728860
ok
729861
end,
862+
%% Periodically send phase info (every ~50 ticks = ~2.5s)
863+
case erlang:system_time(second) rem 3 of
864+
0 ->
865+
PhaseInfo2 = asobi_phase:info(PS1),
866+
maps:foreach(
867+
fun(PlayerId, _) ->
868+
asobi_presence:send(PlayerId, {world_event, phase_changed, PhaseInfo2})
869+
end,
870+
maps:get(players, State, #{})
871+
);
872+
_ ->
873+
ok
874+
end,
730875
GS1 = handle_phase_events(Events, Mod, GS),
731876
State#{phase_state => PS1, game_state => GS1}.
732877

src/world/asobi_world_sup.erl

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,21 @@ start_link() ->
1010

1111
-spec init([]) -> {ok, {supervisor:sup_flags(), [supervisor:child_spec()]}}.
1212
init([]) ->
13+
%% ETS tables for world state backup and player-world mapping
14+
_ =
15+
case ets:info(asobi_world_state) of
16+
undefined ->
17+
ets:new(asobi_world_state, [named_table, public, set, {read_concurrency, true}]);
18+
_ ->
19+
ok
20+
end,
21+
_ =
22+
case ets:info(asobi_player_worlds) of
23+
undefined ->
24+
ets:new(asobi_player_worlds, [named_table, public, set, {read_concurrency, true}]);
25+
_ ->
26+
ok
27+
end,
1328
SupFlags = #{
1429
strategy => one_for_one,
1530
intensity => 10,

src/world/asobi_zone.erl

Lines changed: 51 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -66,12 +66,14 @@ init(Config) ->
6666
GameModule = maps:get(game_module, Config),
6767
ZoneState = maps:get(zone_state, Config, #{}),
6868
pg:join(?PG_SCOPE, {asobi_zone, WorldId, Coords}, self()),
69+
%% Recover entity state from ETS backup if available (zone crash recovery)
70+
RecoveredEntities = recover_zone_state(WorldId, Coords),
6971
{ok, #{
7072
world_id => WorldId,
7173
coords => Coords,
7274
ticker_pid => TickerPid,
7375
game_module => GameModule,
74-
entities => #{},
76+
entities => RecoveredEntities,
7577
prev_entities => #{},
7678
broadcast_entities => #{},
7779
broadcast_interval => maps:get(broadcast_interval, Config, 3),
@@ -138,7 +140,17 @@ handle_info(_Info, State) ->
138140
{noreply, State}.
139141

140142
-spec terminate(term(), map()) -> ok.
141-
terminate(_Reason, #{world_id := WorldId, coords := Coords}) ->
143+
terminate(normal, #{world_id := WorldId, coords := Coords}) ->
144+
clear_zone_backup(WorldId, Coords),
145+
pg:leave(?PG_SCOPE, {asobi_zone, WorldId, Coords}, self()),
146+
ok;
147+
terminate({shutdown, _}, #{world_id := WorldId, coords := Coords}) ->
148+
clear_zone_backup(WorldId, Coords),
149+
pg:leave(?PG_SCOPE, {asobi_zone, WorldId, Coords}, self()),
150+
ok;
151+
terminate(_Reason, #{world_id := WorldId, coords := Coords, entities := Entities}) ->
152+
%% Abnormal termination — save state for recovery
153+
backup_zone_state(WorldId, Coords, Entities),
142154
pg:leave(?PG_SCOPE, {asobi_zone, WorldId, Coords}, self()),
143155
ok.
144156

@@ -147,6 +159,8 @@ terminate(_Reason, #{world_id := WorldId, coords := Coords}) ->
147159
do_tick(
148160
TickN,
149161
#{
162+
world_id := WorldId,
163+
coords := Coords,
150164
game_module := GameMod,
151165
entities := Entities,
152166
prev_entities := _PrevEntities,
@@ -160,7 +174,8 @@ do_tick(
160174
} = State
161175
) ->
162176
Entities1 = apply_inputs(GameMod, Queue, Entities),
163-
{Entities2, ZoneState1} = GameMod:zone_tick(Entities1, ZoneState),
177+
ZoneStateWithTick = ZoneState#{tick => TickN},
178+
{Entities2, ZoneState1} = GameMod:zone_tick(Entities1, ZoneStateWithTick),
164179
Now = erlang:system_time(millisecond),
165180
{TimerEvents, ET1} = asobi_entity_timer:tick(Now, ET),
166181
Entities3 = apply_timer_events(TimerEvents, Entities2),
@@ -175,6 +190,11 @@ do_tick(
175190
State
176191
end,
177192
asobi_world_ticker:tick_done(TickerPid, self(), TickN),
193+
%% Periodic backup for crash recovery (every 20 ticks ≈ 1 second)
194+
case TickN rem 20 of
195+
0 -> backup_zone_state(WorldId, Coords, Entities3);
196+
_ -> ok
197+
end,
178198
State1#{
179199
entities => Entities3,
180200
prev_entities => Entities3,
@@ -259,3 +279,31 @@ encode_delta({added, Id, FullState}) ->
259279
FullState#{~"op" => ~"a", ~"id" => Id};
260280
encode_delta({removed, Id}) ->
261281
#{~"op" => ~"r", ~"id" => Id}.
282+
283+
%% --- Zone State Backup/Recovery ---
284+
285+
backup_zone_state(WorldId, Coords, Entities) ->
286+
case ets:info(asobi_world_state) of
287+
undefined -> ok;
288+
_ -> ets:insert(asobi_world_state, {{WorldId, Coords}, Entities})
289+
end.
290+
291+
recover_zone_state(WorldId, Coords) ->
292+
case ets:info(asobi_world_state) of
293+
undefined ->
294+
#{};
295+
_ ->
296+
case ets:lookup(asobi_world_state, {WorldId, Coords}) of
297+
[{{WorldId, Coords}, Entities}] ->
298+
ets:delete(asobi_world_state, {WorldId, Coords}),
299+
Entities;
300+
[] ->
301+
#{}
302+
end
303+
end.
304+
305+
clear_zone_backup(WorldId, Coords) ->
306+
case ets:info(asobi_world_state) of
307+
undefined -> ok;
308+
_ -> ets:delete(asobi_world_state, {WorldId, Coords})
309+
end.

0 commit comments

Comments
 (0)