Skip to content

Commit e14f14c

Browse files
authored
Merge pull request #120 from smart-mcp-proxy/feature/docker-recovery
docs: Docker recovery critical analysis and improvement roadmap
2 parents 0108379 + 19d30aa commit e14f14c

23 files changed

Lines changed: 2342 additions & 122 deletions

cmd/mcpproxy-tray/internal/api/client.go

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -541,6 +541,25 @@ func (c *Client) RestartServer(serverName string) error {
541541
return nil
542542
}
543543

544+
// ForceReconnectAllServers triggers reconnection attempts for all upstream servers
545+
func (c *Client) ForceReconnectAllServers(reason string) error {
546+
endpoint := "/api/v1/servers/reconnect"
547+
if reason != "" {
548+
endpoint = endpoint + "?reason=" + url.QueryEscape(reason)
549+
}
550+
551+
resp, err := c.makeRequest("POST", endpoint, nil)
552+
if err != nil {
553+
return err
554+
}
555+
556+
if !resp.Success {
557+
return fmt.Errorf("API error: %s", resp.Error)
558+
}
559+
560+
return nil
561+
}
562+
544563
// TriggerOAuthLogin triggers OAuth login for a server
545564
func (c *Client) TriggerOAuthLogin(serverName string) error {
546565
endpoint := fmt.Sprintf("/api/v1/servers/%s/login", serverName)

cmd/mcpproxy-tray/internal/state/machine.go

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,8 @@ func (m *Machine) determineNewState(currentState State, event Event) State {
205205
return StateCoreErrorPortConflict
206206
case EventDBLocked:
207207
return StateCoreErrorDBLocked
208+
case EventDockerUnavailable:
209+
return StateCoreErrorDocker
208210
case EventConfigError:
209211
return StateCoreErrorConfig
210212
case EventPermissionError:
@@ -225,6 +227,8 @@ func (m *Machine) determineNewState(currentState State, event Event) State {
225227
return StateCoreErrorPortConflict
226228
case EventDBLocked:
227229
return StateCoreErrorDBLocked
230+
case EventDockerUnavailable:
231+
return StateCoreErrorDocker
228232
case EventConfigError:
229233
return StateCoreErrorConfig
230234
case EventPermissionError:
@@ -252,6 +256,8 @@ func (m *Machine) determineNewState(currentState State, event Event) State {
252256
return StateCoreErrorPortConflict
253257
case EventDBLocked:
254258
return StateCoreErrorDBLocked
259+
case EventDockerUnavailable:
260+
return StateCoreErrorDocker
255261
case EventConfigError:
256262
return StateCoreErrorConfig
257263
case EventPermissionError:
@@ -287,20 +293,28 @@ func (m *Machine) determineNewState(currentState State, event Event) State {
287293
return StateShuttingDown
288294
}
289295

290-
case StateCoreErrorPortConflict, StateCoreErrorDBLocked, StateCoreErrorGeneral:
296+
case StateCoreErrorPortConflict, StateCoreErrorDBLocked, StateCoreErrorPermission, StateCoreErrorGeneral:
291297
switch event {
292298
case EventShutdown:
293299
return StateShuttingDown
294-
// Error states persist - require user to fix issue manually
295-
// No auto-retry or auto-transition to failed state
300+
// Error states persist - require user to fix issue manually
301+
// No auto-retry or auto-transition to failed state
296302
}
297303

298304
case StateCoreErrorConfig:
299305
switch event {
300306
case EventShutdown:
301307
return StateShuttingDown
302-
// Config errors persist - require user to fix config manually
303-
// Stay in StateCoreErrorConfig for all other events
308+
// Config errors persist - require user to fix config manually
309+
// Stay in StateCoreErrorConfig for all other events
310+
}
311+
312+
case StateCoreErrorDocker:
313+
switch event {
314+
case EventRetry:
315+
return StateLaunchingCore
316+
case EventShutdown:
317+
return StateShuttingDown
304318
}
305319

306320
case StateFailed:

cmd/mcpproxy-tray/internal/state/states.go

Lines changed: 42 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,12 @@ const (
3434
// StateCoreErrorDBLocked represents core failed due to database lock
3535
StateCoreErrorDBLocked State = "core_error_db_locked"
3636

37+
// StateCoreErrorDocker represents core failed due to Docker being unavailable
38+
StateCoreErrorDocker State = "core_error_docker"
39+
40+
// StateCoreRecoveringDocker represents Docker recovery in progress
41+
StateCoreRecoveringDocker State = "core_recovering_docker"
42+
3743
// StateCoreErrorConfig represents core failed due to configuration error
3844
StateCoreErrorConfig State = "core_error_config"
3945

@@ -84,6 +90,12 @@ const (
8490
// EventPermissionError indicates core failed due to permission error
8591
EventPermissionError Event = "permission_error"
8692

93+
// EventDockerUnavailable indicates Docker engine is unavailable or paused
94+
EventDockerUnavailable Event = "docker_unavailable"
95+
96+
// EventDockerRecovered indicates Docker engine became available again
97+
EventDockerRecovered Event = "docker_recovered"
98+
8799
// EventGeneralError indicates core failed with general error
88100
EventGeneralError Event = "general_error"
89101

@@ -112,7 +124,7 @@ type Info struct {
112124

113125
// GetInfo returns metadata for a given state
114126
func GetInfo(state State) Info {
115-
timeout90s := 90 * time.Second // Must exceed health monitor's readinessTimeout (60s)
127+
timeout90s := 90 * time.Second // Must exceed health monitor's readinessTimeout (60s)
116128
timeout5s := 5 * time.Second
117129
timeout10s := 10 * time.Second
118130

@@ -133,7 +145,7 @@ func GetInfo(state State) Info {
133145
Name: StateWaitingForCore,
134146
Description: "Waiting for core to become ready",
135147
UserMessage: "Core starting up...",
136-
Timeout: &timeout90s, // Increased to 90s to allow Docker isolation startup (health timeout is 60s)
148+
Timeout: &timeout90s, // Increased to 90s to allow Docker isolation startup (health timeout is 60s)
137149
},
138150
StateConnectingAPI: {
139151
Name: StateConnectingAPI,
@@ -176,6 +188,20 @@ func GetInfo(state State) Info {
176188
CanRetry: false,
177189
// No timeout - config errors persist until user fixes the config
178190
},
191+
StateCoreErrorDocker: {
192+
Name: StateCoreErrorDocker,
193+
Description: "Docker engine unavailable or paused",
194+
UserMessage: "Docker engine unavailable - resume Docker Desktop",
195+
IsError: true,
196+
CanRetry: true,
197+
},
198+
StateCoreRecoveringDocker: {
199+
Name: StateCoreRecoveringDocker,
200+
Description: "Docker recovery in progress",
201+
UserMessage: "Docker engine recovered - reconnecting servers...",
202+
CanRetry: false,
203+
Timeout: &timeout10s,
204+
},
179205
StateCoreErrorPermission: {
180206
Name: StateCoreErrorPermission,
181207
Description: "Core failed due to permission error",
@@ -229,6 +255,7 @@ func CanTransition(from, to State) bool {
229255
StateWaitingForCore,
230256
StateCoreErrorPortConflict,
231257
StateCoreErrorDBLocked,
258+
StateCoreErrorDocker,
232259
StateCoreErrorConfig,
233260
StateCoreErrorGeneral,
234261
StateShuttingDown,
@@ -237,7 +264,8 @@ func CanTransition(from, to State) bool {
237264
StateConnectingAPI,
238265
StateCoreErrorPortConflict, // ADD: Handle port conflict
239266
StateCoreErrorDBLocked, // ADD: Handle DB lock
240-
StateCoreErrorConfig, // ADD: Handle config error
267+
StateCoreErrorDocker,
268+
StateCoreErrorConfig, // ADD: Handle config error
241269
StateCoreErrorGeneral,
242270
StateLaunchingCore, // Retry
243271
StateShuttingDown,
@@ -247,7 +275,8 @@ func CanTransition(from, to State) bool {
247275
StateReconnecting,
248276
StateCoreErrorPortConflict, // ADD: Handle port conflict during connection
249277
StateCoreErrorDBLocked, // ADD: Handle DB lock during connection
250-
StateCoreErrorConfig, // ADD: Handle config error during connection
278+
StateCoreErrorDocker,
279+
StateCoreErrorConfig, // ADD: Handle config error during connection
251280
StateCoreErrorGeneral,
252281
StateShuttingDown,
253282
},
@@ -273,6 +302,15 @@ func CanTransition(from, to State) bool {
273302
// Error persists - only shutdown allowed
274303
StateShuttingDown,
275304
},
305+
StateCoreErrorDocker: {
306+
StateCoreRecoveringDocker, // Transition to recovering when Docker comes back
307+
StateShuttingDown,
308+
},
309+
StateCoreRecoveringDocker: {
310+
StateLaunchingCore, // Launch core after Docker recovery
311+
StateCoreErrorDocker, // Back to error if Docker fails again
312+
StateShuttingDown,
313+
},
276314
StateCoreErrorGeneral: {
277315
// Error persists - only shutdown allowed
278316
StateShuttingDown,

0 commit comments

Comments
 (0)