From 173eb9c8677dd7346722dd5474d66f1db2b9ed87 Mon Sep 17 00:00:00 2001 From: Khang Date: Sun, 10 May 2026 21:32:53 -0400 Subject: [PATCH 1/2] Introduce PtySession lifecycle owner --- .../__tests__/pty-manager-extended.test.ts | 62 +++++- src/main/__tests__/pty-manager.test.ts | 52 ++--- src/main/ipc-handlers.ts | 10 +- src/main/pty-manager.ts | 167 +++++---------- src/main/pty-session.ts | 194 ++++++++++++++++++ 5 files changed, 329 insertions(+), 156 deletions(-) create mode 100644 src/main/pty-session.ts diff --git a/src/main/__tests__/pty-manager-extended.test.ts b/src/main/__tests__/pty-manager-extended.test.ts index 21f873f5..5d8d54ce 100644 --- a/src/main/__tests__/pty-manager-extended.test.ts +++ b/src/main/__tests__/pty-manager-extended.test.ts @@ -9,6 +9,8 @@ const mockInstances: Array<{ write: ReturnType; resize: ReturnType; kill: ReturnType; + pause: ReturnType; + resume: ReturnType; }> = []; vi.mock('node-pty', () => ({ @@ -19,7 +21,9 @@ vi.mock('node-pty', () => ({ onExit: vi.fn(), write: vi.fn(), resize: vi.fn(), - kill: vi.fn() + kill: vi.fn(), + pause: vi.fn(), + resume: vi.fn() }; mockInstances.push(instance); return instance; @@ -123,6 +127,62 @@ describe('PtyManager — extended', () => { manager.kill('ghost'); // should not throw }); + it('kill() twice is safe and only kills the process once', () => { + manager.create({ paneId: 'p1', cwd: '/tmp', shell: '/bin/sh' }); + + manager.kill('p1'); + manager.kill('p1'); + + expect(mockInstances[0].kill).toHaveBeenCalledTimes(1); + }); + + it('write() after natural exit is ignored', () => { + manager.create({ paneId: 'p1', cwd: '/tmp', shell: '/bin/sh' }); + const registeredHandler = mockInstances[0].onExit.mock.calls[0][0] as (e: { + exitCode: number; + }) => void; + + registeredHandler({ exitCode: 0 }); + manager.write('p1', 'after exit'); + + expect(mockInstances[0].write).not.toHaveBeenCalled(); + }); + + it('resize() after natural exit is ignored', () => { + manager.create({ paneId: 'p1', cwd: '/tmp', shell: '/bin/sh' }); + const registeredHandler = mockInstances[0].onExit.mock.calls[0][0] as (e: { + exitCode: number; + }) => void; + + registeredHandler({ exitCode: 0 }); + manager.resize('p1', 120, 40); + + expect(mockInstances[0].resize).not.toHaveBeenCalled(); + }); + + it('resize() after kill is ignored', () => { + manager.create({ paneId: 'p1', cwd: '/tmp', shell: '/bin/sh' }); + + manager.kill('p1'); + manager.resize('p1', 120, 40); + + expect(mockInstances[0].resize).not.toHaveBeenCalled(); + }); + + it('gc() does not kill protected PTYs', () => { + manager.create({ paneId: 'p1', cwd: '/tmp', shell: '/bin/sh' }); + manager.create({ paneId: 'p2', cwd: '/tmp', shell: '/bin/sh' }); + manager.protect('p1'); + + const killed = manager.gc(new Set()); + + expect(killed).toEqual(['p2']); + expect(mockInstances[0].kill).not.toHaveBeenCalled(); + expect(mockInstances[1].kill).toHaveBeenCalledTimes(1); + expect(manager.has('p1')).toBe(true); + expect(manager.has('p2')).toBe(false); + }); + it('uses provided cols and rows when creating PTY', async () => { const pty = await import('node-pty'); manager.create({ paneId: 'p1', cwd: '/home', shell: '/bin/bash', cols: 200, rows: 50 }); diff --git a/src/main/__tests__/pty-manager.test.ts b/src/main/__tests__/pty-manager.test.ts index e618408e..76695bf7 100644 --- a/src/main/__tests__/pty-manager.test.ts +++ b/src/main/__tests__/pty-manager.test.ts @@ -155,34 +155,21 @@ describe('PtyManager batching and cleanup', () => { expect(secondCallback).not.toHaveBeenCalled(); }); - it('disposes previous exitDisposable when onExit is called again', () => { + it('registers one internal exit listener even when onExit is re-registered', () => { manager.create({ paneId: 'pane-1', cwd: '/tmp', shell: '/bin/zsh' }); const mockPty = (ptyModule.spawn as ReturnType).mock.results[0].value; - const firstDispose = vi.fn(); - const secondDispose = vi.fn(); - mockPty.onExit - .mockReturnValueOnce({ dispose: firstDispose }) - .mockReturnValueOnce({ dispose: secondDispose }); - manager.onExit('pane-1', vi.fn()); - // Second registration should dispose the first listener manager.onExit('pane-1', vi.fn()); - expect(firstDispose).toHaveBeenCalledTimes(1); - expect(secondDispose).not.toHaveBeenCalled(); + expect(mockPty.onExit).toHaveBeenCalledTimes(1); }); it('does not fire duplicate exit callbacks when onExit is re-registered', () => { manager.create({ paneId: 'pane-1', cwd: '/tmp', shell: '/bin/zsh' }); const mockPty = (ptyModule.spawn as ReturnType).mock.results[0].value; - // Track which callbacks get registered on the mock PTY - const registeredCallbacks: Array<(e: { exitCode: number }) => void> = []; - mockPty.onExit.mockImplementation((cb: (e: { exitCode: number }) => void) => { - registeredCallbacks.push(cb); - return { dispose: vi.fn() }; - }); + const registeredCallback = mockPty.onExit.mock.calls[0][0] as (e: { exitCode: number }) => void; const firstCallback = vi.fn(); const secondCallback = vi.fn(); @@ -190,11 +177,11 @@ describe('PtyManager batching and cleanup', () => { manager.onExit('pane-1', firstCallback); manager.onExit('pane-1', secondCallback); - // Simulate PTY exit on the second (active) listener - registeredCallbacks[1]({ exitCode: 0 }); + // Simulate PTY exit on the single internal listener + registeredCallback({ exitCode: 0 }); expect(secondCallback).toHaveBeenCalledWith(0); - // First callback should NOT have been called (its listener was disposed) + // First callback should NOT have been called (it was replaced) expect(firstCallback).not.toHaveBeenCalled(); }); @@ -203,17 +190,12 @@ describe('PtyManager batching and cleanup', () => { const mockPty = (ptyModule.spawn as ReturnType).mock.results[0].value; const dataDisposable = mockPty.onData.mock.results[0].value; - // Register an exit handler that captures the onExit callback - const registeredCallbacks: Array<(e: { exitCode: number }) => void> = []; - mockPty.onExit.mockImplementation((cb: (e: { exitCode: number }) => void) => { - registeredCallbacks.push(cb); - return { dispose: vi.fn() }; - }); + const registeredCallback = mockPty.onExit.mock.calls[0][0] as (e: { exitCode: number }) => void; manager.onExit('pane-1', vi.fn()); // Simulate natural PTY exit - registeredCallbacks[0]({ exitCode: 0 }); + registeredCallback({ exitCode: 0 }); expect(dataDisposable.dispose).toHaveBeenCalled(); }); @@ -260,15 +242,21 @@ describe('PtyManager batching and cleanup', () => { expect(pausedFlush).toBeDefined(); }); + it('removes pane on natural PTY exit even without an external exit callback', () => { + manager.create({ paneId: 'pane-1', cwd: '/tmp', shell: '/bin/zsh' }); + const mockPty = (ptyModule.spawn as ReturnType).mock.results[0].value; + const registeredCallback = mockPty.onExit.mock.calls[0][0] as (e: { exitCode: number }) => void; + + registeredCallback({ exitCode: 0 }); + + expect(manager.has('pane-1')).toBe(false); + }); + it('clears flush timer on natural PTY exit when it is the last PTY', () => { manager.create({ paneId: 'pane-1', cwd: '/tmp', shell: '/bin/zsh' }); const mockPty = (ptyModule.spawn as ReturnType).mock.results[0].value; - const registeredCallbacks: Array<(e: { exitCode: number }) => void> = []; - mockPty.onExit.mockImplementation((cb: (e: { exitCode: number }) => void) => { - registeredCallbacks.push(cb); - return { dispose: vi.fn() }; - }); + const registeredCallback = mockPty.onExit.mock.calls[0][0] as (e: { exitCode: number }) => void; manager.onData('pane-1', vi.fn()); manager.onExit('pane-1', vi.fn()); @@ -276,7 +264,7 @@ describe('PtyManager batching and cleanup', () => { expect(vi.getTimerCount()).toBeGreaterThan(0); // Simulate natural exit - registeredCallbacks[0]({ exitCode: 0 }); + registeredCallback({ exitCode: 0 }); expect(vi.getTimerCount()).toBe(0); }); diff --git a/src/main/ipc-handlers.ts b/src/main/ipc-handlers.ts index a58c1756..f6f6c7b2 100644 --- a/src/main/ipc-handlers.ts +++ b/src/main/ipc-handlers.ts @@ -170,16 +170,14 @@ export function registerIpcHandlers( // Attach to a pre-created PTY: drain its buffered output so the renderer // can replay what arrived before the terminal component mounted. ipcMain.handle(IPC_CHANNELS.PTY_ATTACH, (_event, { paneId }: { paneId: string }) => { - const entry = ptyManager.get(paneId); - if (!entry) return { data: '' }; - const data = entry.outputBuffer; - entry.outputBuffer = ''; + const drained = ptyManager.drainBuffer(paneId); + if (!drained) return { data: '' }; // Resume the PTY if it was paused due to buffer overflow (e.g. during // a hard refresh when the renderer wasn't consuming data). - if (entry.paused) { + if (drained.wasPaused) { ptyManager.resume(paneId); } - return { data }; + return { data: drained.data }; }); // Garbage-collect orphaned PTYs: renderer sends list of active pane IDs, diff --git a/src/main/pty-manager.ts b/src/main/pty-manager.ts index ecc804b6..c3f1bc29 100644 --- a/src/main/pty-manager.ts +++ b/src/main/pty-manager.ts @@ -1,6 +1,7 @@ import * as pty from 'node-pty'; import { getDefaultShell } from './shell-detection'; import { createLogger } from './logger'; +import { PtySession, type PtyShutdownReason } from './pty-session'; const log = createLogger('pty'); @@ -24,37 +25,24 @@ export type PtyCreateResult = { pid: number; }; -type PtyEntry = { - process: pty.IPty; - paneId: string; - cwd: string; - outputBuffer: string; - paused: boolean; - dataDisposable: pty.IDisposable | null; - exitDisposable: pty.IDisposable | null; -}; - const FLUSH_INTERVAL_MS = 16; -const BUFFER_OVERFLOW_BYTES = 256 * 1024; export class PtyManager { - private ptys = new Map(); + private sessions = new Map(); /** PTYs that must not be killed by the renderer-driven GC. */ private protectedPtys = new Set(); - private dataCallbacks = new Map void>(); private flushTimer: ReturnType | null = null; create(opts: PtyCreateOptions): PtyCreateResult { - if (this.ptys.has(opts.paneId)) { + const existing = this.sessions.get(opts.paneId); + if (existing) { // Idempotent: return existing PTY info (handles HMR reloads in dev where the // renderer-side createdPtys Set is reset but the main process map persists) - const existing = this.ptys.get(opts.paneId); - if (!existing) return { paneId: opts.paneId, pid: 0 }; log.debug('PTY already exists, returning existing pid', { paneId: opts.paneId, - pid: existing.process.pid + pid: existing.pid }); - return { paneId: opts.paneId, pid: existing.process.pid }; + return { paneId: opts.paneId, pid: existing.pid }; } const shell = opts.shell ?? getDefaultShell(); @@ -83,49 +71,23 @@ export class PtyManager { env: { ...(opts.env ?? process.env), FLEET_SESSION: '1' } }); - const entry: PtyEntry = { + const session = new PtySession({ process: proc, paneId: opts.paneId, cwd: opts.cwd, - outputBuffer: '', - paused: false, - dataDisposable: null, - exitDisposable: null - }; - - // Register the internal buffering callback immediately at create time so - // the IDisposable is captured and can be disposed during kill(). - entry.dataDisposable = proc.onData((data: string) => { - entry.outputBuffer += data; - if (entry.outputBuffer.length > BUFFER_OVERFLOW_BYTES) { - log.debug('backpressure pause', { - paneId: opts.paneId, - bufferBytes: entry.outputBuffer.length - }); - entry.paused = true; - this.flushPane(opts.paneId); - proc.pause(); - } + onEnded: (paneId) => this.removeSession(paneId) }); + this.sessions.set(opts.paneId, session); - this.ptys.set(opts.paneId, entry); - - return { paneId: opts.paneId, pid: proc.pid }; + return { paneId: opts.paneId, pid: session.pid }; } write(paneId: string, data: string): void { - const entry = this.ptys.get(paneId); - if (entry) { - entry.process.write(data); - } + this.sessions.get(paneId)?.write(data); } resize(paneId: string, cols: number, rows: number): void { - const entry = this.ptys.get(paneId); - if (entry) { - log.debug('resize', { paneId, cols, rows }); - entry.process.resize(cols, rows); - } + this.sessions.get(paneId)?.resize(cols, rows); } protect(paneId: string): void { @@ -133,22 +95,12 @@ export class PtyManager { } kill(paneId: string): void { - const entry = this.ptys.get(paneId); - if (entry) { - log.debug('kill', { paneId, pid: entry.process.pid }); - entry.dataDisposable?.dispose(); - entry.exitDisposable?.dispose(); - this.dataCallbacks.delete(paneId); - entry.process.kill(); - this.ptys.delete(paneId); - this.protectedPtys.delete(paneId); - this.clearFlushTimerIfEmpty(); - } + this.shutdownSession(paneId, 'user'); } killAll(): void { - for (const [paneId] of this.ptys) { - this.kill(paneId); + for (const paneId of Array.from(this.sessions.keys())) { + this.shutdownSession(paneId, 'app-quit'); } if (this.flushTimer) { clearInterval(this.flushTimer); @@ -157,41 +109,41 @@ export class PtyManager { } has(paneId: string): boolean { - return this.ptys.has(paneId); + return this.sessions.has(paneId); } - get(paneId: string): PtyEntry | undefined { - return this.ptys.get(paneId); + get(paneId: string): PtySession | undefined { + return this.sessions.get(paneId); } paneIds(): string[] { - return Array.from(this.ptys.keys()); + return Array.from(this.sessions.keys()); } getCwd(paneId: string): string | undefined { - return this.ptys.get(paneId)?.cwd; + return this.sessions.get(paneId)?.cwd; } updateCwd(paneId: string, cwd: string): void { - const entry = this.ptys.get(paneId); - if (entry) entry.cwd = cwd; + const session = this.sessions.get(paneId); + if (session) session.cwd = cwd; } getPid(paneId: string): number | undefined { - return this.ptys.get(paneId)?.process.pid; + return this.sessions.get(paneId)?.pid; } /** Returns the current foreground process name for a PTY (e.g. "zsh", "node", "claude"). */ getProcessName(paneId: string): string | undefined { - return this.ptys.get(paneId)?.process.process; + return this.sessions.get(paneId)?.processName; } /** Kill any PTY whose paneId is not in the given set of active IDs (and not protected). */ gc(activePaneIds: Set): string[] { const killed: string[] = []; - for (const paneId of this.ptys.keys()) { + for (const paneId of Array.from(this.sessions.keys())) { if (!activePaneIds.has(paneId) && !this.protectedPtys.has(paneId)) { - this.kill(paneId); + this.shutdownSession(paneId, 'gc'); killed.push(paneId); } } @@ -204,17 +156,10 @@ export class PtyManager { * this method wires up the flush callback and starts the shared flush timer. */ onData(paneId: string, callback: (data: string, paused: boolean) => void): void { - const entry = this.ptys.get(paneId); - if (!entry) return; - - if (this.dataCallbacks.has(paneId)) { - log.warn('onData already registered for pane, skipping to prevent silent overwrite', { - paneId - }); - return; - } + const session = this.sessions.get(paneId); + if (!session) return; - this.dataCallbacks.set(paneId, callback); + session.onData(callback); // Start shared flush timer if not already running this.flushTimer ??= setInterval(() => this.flushAll(), FLUSH_INTERVAL_MS); @@ -222,51 +167,39 @@ export class PtyManager { /** Resume a paused PTY (called by renderer after consuming a batch). */ resume(paneId: string): void { - const entry = this.ptys.get(paneId); - if (entry) { - log.debug('resume', { paneId }); - entry.paused = false; - entry.process.resume(); - } + this.sessions.get(paneId)?.resume(); } onExit(paneId: string, callback: (exitCode: number) => void): void { - const entry = this.ptys.get(paneId); - if (entry) { - // Dispose previous exit listener to prevent stacking (e.g. on HMR re-register) - entry.exitDisposable?.dispose(); - entry.exitDisposable = entry.process.onExit(({ exitCode }) => { - log.debug('exit', { paneId, exitCode }); - entry.dataDisposable?.dispose(); - this.dataCallbacks.delete(paneId); - this.ptys.delete(paneId); - this.protectedPtys.delete(paneId); - this.clearFlushTimerIfEmpty(); - callback(exitCode); - }); - } + this.sessions.get(paneId)?.onExit(callback); + } + + drainBuffer(paneId: string): { data: string; wasPaused: boolean } | undefined { + return this.sessions.get(paneId)?.drainBuffer(); + } + + private shutdownSession(paneId: string, reason: PtyShutdownReason): void { + const session = this.sessions.get(paneId); + if (!session) return; + session.shutdown(reason); + } + + private removeSession(paneId: string): void { + this.sessions.delete(paneId); + this.protectedPtys.delete(paneId); + this.clearFlushTimerIfEmpty(); } private clearFlushTimerIfEmpty(): void { - if (this.ptys.size === 0 && this.flushTimer) { + if (this.sessions.size === 0 && this.flushTimer) { clearInterval(this.flushTimer); this.flushTimer = null; } } - private flushPane(paneId: string): void { - const entry = this.ptys.get(paneId); - if (!entry?.outputBuffer) return; - const callback = this.dataCallbacks.get(paneId); - if (callback) { - callback(entry.outputBuffer, entry.paused); - entry.outputBuffer = ''; - } - } - private flushAll(): void { - for (const paneId of this.ptys.keys()) { - this.flushPane(paneId); + for (const session of this.sessions.values()) { + session.flush(); } } } diff --git a/src/main/pty-session.ts b/src/main/pty-session.ts new file mode 100644 index 00000000..89e2b740 --- /dev/null +++ b/src/main/pty-session.ts @@ -0,0 +1,194 @@ +import type * as pty from 'node-pty'; +import { createLogger } from './logger'; + +const log = createLogger('pty'); + +export type PtySessionState = 'starting' | 'running' | 'exiting' | 'exited' | 'killed'; +export type PtyShutdownReason = 'user' | 'gc' | 'app-quit' | 'process-exit'; + +export type PtySessionOptions = { + paneId: string; + cwd: string; + process: pty.IPty; + onEnded: (paneId: string) => void; +}; + +const BUFFER_OVERFLOW_BYTES = 256 * 1024; + +type DataCallback = (data: string, paused: boolean) => void; +type ExitCallback = (exitCode: number) => void; + +export class PtySession { + readonly paneId: string; + + private readonly process: pty.IPty; + private readonly onEnded: (paneId: string) => void; + private state: PtySessionState = 'starting'; + private dataCallback: DataCallback | null = null; + private exitCallback: ExitCallback | null = null; + private dataDisposable: pty.IDisposable | null = null; + private exitDisposable: pty.IDisposable | null = null; + private outputBuffer = ''; + private paused = false; + private hasShutdown = false; + + constructor(opts: PtySessionOptions) { + this.paneId = opts.paneId; + this.cwd = opts.cwd; + this.process = opts.process; + this.onEnded = opts.onEnded; + + this.dataDisposable = this.process.onData((data: string) => { + this.handleData(data); + }); + + this.exitDisposable = this.process.onExit(({ exitCode }) => { + this.handleExit(exitCode); + }); + + this.state = 'running'; + } + + cwd: string; + + get pid(): number { + return this.process.pid; + } + + get processName(): string | undefined { + return this.process.process; + } + + get lifecycleState(): PtySessionState { + return this.state; + } + + write(data: string): void { + if (!this.isRunning()) { + log.debug('write ignored for inactive PTY', { paneId: this.paneId, state: this.state }); + return; + } + this.process.write(data); + } + + resize(cols: number, rows: number): void { + if (!this.isRunning()) { + log.debug('resize ignored for inactive PTY', { paneId: this.paneId, state: this.state }); + return; + } + log.debug('resize', { paneId: this.paneId, cols, rows }); + this.process.resize(cols, rows); + } + + onData(callback: DataCallback): void { + if (!this.isRunning()) return; + + if (this.dataCallback) { + log.warn('onData already registered for pane, skipping to prevent silent overwrite', { + paneId: this.paneId + }); + return; + } + + this.dataCallback = callback; + } + + onExit(callback: ExitCallback): void { + if (this.state === 'exited' || this.state === 'killed') return; + this.exitCallback = callback; + } + + flush(): void { + if (!this.outputBuffer) return; + if (!this.dataCallback) return; + + this.dataCallback(this.outputBuffer, this.paused); + this.outputBuffer = ''; + } + + drainBuffer(): { data: string; wasPaused: boolean } { + const data = this.outputBuffer; + const wasPaused = this.paused; + this.outputBuffer = ''; + return { data, wasPaused }; + } + + resume(): void { + if (this.state === 'exited' || this.state === 'killed') return; + + log.debug('resume', { paneId: this.paneId }); + this.paused = false; + this.process.resume(); + } + + shutdown(reason: PtyShutdownReason): void { + if (this.hasShutdown || this.state === 'exited' || this.state === 'killed') return; + + this.hasShutdown = true; + this.state = reason === 'process-exit' ? 'exited' : 'exiting'; + log.debug('shutdown', { paneId: this.paneId, pid: this.process.pid, reason }); + + this.disposeListeners(); + this.dataCallback = null; + this.exitCallback = null; + this.outputBuffer = ''; + this.paused = false; + + if (reason !== 'process-exit') { + try { + this.process.kill(); + } catch (err) { + log.warn('failed to kill PTY', { + paneId: this.paneId, + error: err instanceof Error ? err.message : String(err) + }); + } + this.state = 'killed'; + } + + this.onEnded(this.paneId); + } + + private handleData(data: string): void { + if (!this.isRunning()) return; + + this.outputBuffer += data; + if (this.outputBuffer.length > BUFFER_OVERFLOW_BYTES) { + log.debug('backpressure pause', { + paneId: this.paneId, + bufferBytes: this.outputBuffer.length + }); + this.paused = true; + this.flush(); + this.process.pause(); + } + } + + private handleExit(exitCode: number): void { + if (this.hasShutdown || this.state === 'exited' || this.state === 'killed') return; + + const callback = this.exitCallback; + this.hasShutdown = true; + this.state = 'exited'; + log.debug('exit', { paneId: this.paneId, exitCode }); + + this.disposeListeners(); + this.dataCallback = null; + this.exitCallback = null; + this.outputBuffer = ''; + this.paused = false; + this.onEnded(this.paneId); + callback?.(exitCode); + } + + private isRunning(): boolean { + return this.state === 'running'; + } + + private disposeListeners(): void { + this.dataDisposable?.dispose(); + this.dataDisposable = null; + this.exitDisposable?.dispose(); + this.exitDisposable = null; + } +} From 1e4de5490c5847758d02f736f65be419f336e129 Mon Sep 17 00:00:00 2001 From: Khang Date: Sun, 10 May 2026 21:36:52 -0400 Subject: [PATCH 2/2] Update Warp stability plan for PtySession --- docs/warp-architecture-stability-report.md | 375 +++++++++++++++++++++ 1 file changed, 375 insertions(+) create mode 100644 docs/warp-architecture-stability-report.md diff --git a/docs/warp-architecture-stability-report.md b/docs/warp-architecture-stability-report.md new file mode 100644 index 00000000..df1e2216 --- /dev/null +++ b/docs/warp-architecture-stability-report.md @@ -0,0 +1,375 @@ +# Warp Architecture Stability Report + +Date: 2026-05-10 + +## Purpose + +Review `reference/warp` for architectural patterns Fleet can adopt to become more stable and robust. This is intentionally not a feature comparison; the focus is ownership, lifecycle, event flow, diagnostics, and testability. + +## Executive Summary + +Warp's most transferable architectural patterns are: + +- explicit PTY/session ownership and deterministic shutdown +- centralized PTY write arbitration +- typed event dispatch from low-level terminal events into UI/app models +- throttled/coalesced rendering and state updates +- bounded diagnostics/telemetry buffers +- privacy-aware logging/redaction +- scoped feature-flag overrides for tests +- hermetic lifecycle/integration tests + +Fleet already has the beginnings of this architecture with `PtyManager`, typed `EventBus`, `PtyDataRouter`, `SocketSupervisor`, targeted tests, and learnings docs. The highest-leverage improvement is to make each pane's PTY a first-class lifecycle-owned session object. + +## Highest-Leverage Architectural Improvements for Fleet + +### 1. Introduce a per-session lifecycle owner + +Warp has a `TerminalManager` that owns the terminal model, PTY event loop, view, controllers, inactive receivers, and teardown path. Its drop path sends shutdown, joins the event-loop thread, closes receivers, and kills the PTY only in the correct cases. + +Fleet previously had `src/main/pty-manager.ts` as a central registry where each PTY entry directly held: + +- process +- output buffer +- paused flag +- data/exit disposables +- cwd + +Fleet now has the first iteration of this pattern in `src/main/pty-session.ts`, with `PtyManager` acting as the session registry/delegator. Each `PtySession` owns: + +- node-pty process +- data subscription +- exit subscription +- output batching buffer +- paused/resume state +- lifecycle state: `starting | running | exiting | exited | killed` +- idempotent teardown + +Still to add in later phases: + +- write queue/state +- resize state beyond guarded forwarding +- per-session diagnostics + +Why this matters for Fleet: many historical Fleet learnings are lifecycle bugs — duplicate PTYs, blank terminals after pane close, hard refresh paused PTY, and lingering sessions. A session owner gives us one place to enforce ordering and cleanup. + +Recommended shape: + +```ts +class PtySession { + readonly paneId: string; + private state: 'starting' | 'running' | 'exiting' | 'exited' = 'starting'; + + write(data: string): void; + resize(cols: number, rows: number): void; + onData(cb: (data: string, paused: boolean) => void): void; + onExit(cb: (exitCode: number) => void): void; + + shutdown(reason: 'user' | 'gc' | 'app-quit' | 'process-exit'): void; +} +``` + +Implemented in PR #204: `PtyManager` is now a map of session owners rather than the place where all lifecycle logic accumulates. It still owns cross-session policy such as protected PTYs, renderer-driven GC, and the shared flush timer. + +### 2. Add a centralized PTY write arbiter + +Warp routes writes through `PtyController`, not directly from views. It coordinates raw bytes, user commands, agent commands, in-band commands, cancellation, and readiness. + +Fleet currently exposes: + +```ts +PtyManager.write(paneId, data) +PtyManager.resize(paneId, cols, rows) +``` + +That is simple, but different call paths can interleave writes without policy. For agent terminals, this can cause subtle corruption: user keystrokes, automated commands, approval responses, and bridge/control messages can race. + +Fleet should adopt a lightweight write queue per `PtySession`. PR #204 already added lifecycle guards so writes/resizes after exit or kill are ignored; the remaining work is arbitration between valid writes: + +- classify writes: + - `user-input` + - `agent-input` + - `control` + - `bootstrap` +- define priority/cancellation rules: + - user input can cancel stale automated writes + - writes after exit are ignored and logged — implemented as lifecycle guard in PR #204 + - resize after exit is ignored and logged — implemented as lifecycle guard in PR #204 + - pending bootstrap writes can only happen during `starting` or `running` + +This does not need Warp's complexity. A small queue/state machine would make Fleet more predictable. + +### 3. Separate raw PTY data flow from app/model events + +Warp bridges low-level terminal parsing events into a typed `ModelEventDispatcher`. Fleet has `src/main/event-bus.ts` with typed events, which is good, but PTY output, CWD detection, notifications, activity state, and copilot/session state are still relatively independent streams. + +A stronger architecture would be: + +```text +Raw PTY data + -> parsers/detectors + -> typed session events + -> stores/UI/socket notifications +``` + +Example event model: + +```ts +type SessionEvent = + | { type: 'pty:data'; paneId: string; bytes: number; timestamp: number } + | { type: 'pty:exit'; paneId: string; exitCode: number | null } + | { type: 'cwd:changed'; paneId: string; cwd: string; source: 'osc7' | 'poll' } + | { type: 'activity:changed'; paneId: string; state: ActivityState } + | { type: 'agent:phase-changed'; sessionId: string; phase: CopilotSessionPhase }; +``` + +Fleet already has pieces of this. The improvement is to make the dispatcher the only way derived state changes are emitted, with tests for "no duplicate event on no-op update." + +### 4. Add diff-before-emit rules for noisy state + +Warp avoids redundant events for unchanged session/environment state. + +Fleet can apply this in: + +- `CopilotSessionStore.processHookEvent` +- CWD polling +- activity tracker +- notification state +- workspace/layout save events +- socket subscription events + +For example, `CopilotSessionStore.processHookEvent` always calls `onChange` after processing, even if the effective session state did not change. That is simple, but noisy. A snapshot/diff helper would reduce React churn and make behavior more testable. + +Pattern: + +```ts +const before = stableSessionSnapshot(session); +applyEvent(session, event); +if (!deepEqual(before, stableSessionSnapshot(session))) { + this.onChange?.(); +} +``` + +This is not only about performance; it prevents feedback loops and makes race bugs easier to reason about. + +### 5. Add bounded diagnostics ring buffers + +Warp uses bounded telemetry/network diagnostics buffers and bounded queues so diagnostic collection cannot destabilize the app. + +Fleet has logging via `src/main/logger.ts`, but no apparent bounded per-subsystem diagnostics buffer. Fleet should add an in-memory ring buffer for: + +- PTY lifecycle events +- PTY writes/resizes/exits +- socket API requests +- bridge websocket messages +- copilot hook events +- image jobs +- workspace/layout persistence + +Example: + +```ts +type DiagnosticEvent = { + timestamp: number; + subsystem: 'pty' | 'socket' | 'bridge' | 'copilot' | 'layout'; + level: 'debug' | 'info' | 'warn' | 'error'; + message: string; + meta?: Record; +}; +``` + +Keep only the last N events, e.g. 500 or 1000. Expose it through the socket API or a debug UI later. + +This would help debug "Fleet feels unstable" issues without requiring users to reproduce with verbose logs. + +### 6. Add safe/redacted logging helpers + +Warp has safe logging and secret redaction before data leaves the device. Fleet logs paths, commands, prompts, image prompts, socket details, bridge events, and errors. + +Fleet should add a small redaction layer: + +- redact obvious API keys/tokens +- redact bridge tokens +- redact `FLEET_BRIDGE_TOKEN` +- redact env values unless allowlisted +- avoid logging full terminal data by default + +Possible shape: + +```ts +log.info('Bridge connection accepted', redactMeta({ paneId, token })); +``` + +or implement redaction inside `createLogger`. + +This is architectural robustness because once diagnostics are expanded, privacy/safety needs to be built in. + +### 7. Add typed feature flags with runtime/test overrides + +Warp has typed feature flags with scoped test overrides and propagation into spawned threads. + +Fleet currently uses settings, but there is no obvious typed feature flag layer. For robustness work, this would let Fleet ship risky internals behind flags: + +- new PTY session owner +- PTY write queue +- bounded diagnostics +- stricter no-op event suppression +- new copilot JSONL parser behavior + +Recommended shape: + +```ts +export type FeatureFlag = + | 'ptySessionOwner' + | 'ptyWriteQueue' + | 'boundedDiagnostics' + | 'strictSessionEvents'; + +export function isFeatureEnabled(flag: FeatureFlag): boolean; +export function withFeatureOverride(flag: FeatureFlag, enabled: boolean, fn: () => T): T; +``` + +This gives us dogfoodability without scattering `process.env` checks everywhere. + +### 8. Make async lifecycle waits explicit and timeout-bound + +Warp's agent/terminal driver uses explicit conditions and timeouts for lifecycle milestones: + +- wait for terminal bootstrap +- wait for session sharing +- classify timeout errors distinctly + +Fleet has some timeouts already, e.g. annotate service and CLI tests, but terminal/agent lifecycle would benefit from explicit conditions: + +- wait for PTY created +- wait for first output +- wait for bridge connected +- wait for copilot session observed +- wait for process exit + +This prevents promises from waiting forever and makes tests deterministic. + +## Fleet-Specific Priority Order + +### Phase 1 — PTY lifecycle hardening + +Status: mostly implemented in PR #204. + +Completed: + +1. Introduced `PtySession` owner internally behind `PtyManager`. +2. Made teardown idempotent and stateful. +3. Moved PTY attach buffer draining behind `PtyManager.drainBuffer()` so callers no longer mutate output buffer internals directly. +4. Added tests for: + - duplicate create returns existing session + - kill twice is safe + - resize after kill is ignored + - write after natural exit is ignored + - resize after natural exit is ignored + - natural exit cleans buffers/listeners and removes the session + - natural exit removes the session even without an external `onExit` callback + - GC does not kill protected PTYs + +Remaining Phase 1 follow-ups: + +- Consider replacing `PtyManager.get()` with narrower read-only accessors so callers cannot grow new dependencies on `PtySession` internals. +- Add an integration-style lifecycle test with a real PTY once the harness can do this hermetically. + +### Phase 2 — Write queue and event hygiene + +1. Add per-session write queue / arbiter. +2. Classify writes by source. +3. Add no-op suppression to noisy stores. +4. Add tests for write ordering and stale writes. + +### Phase 3 — Diagnostics + +1. Add bounded ring buffer. +2. Instrument PTY lifecycle, socket server, Fleet bridge, and copilot hook events. +3. Add redaction helpers. +4. Add a socket/debug command to dump diagnostics. + +### Phase 4 — Feature flags and hermetic integration tests + +1. Add typed feature flags with scoped test overrides. +2. Add integration-style tests with isolated HOME and temp workspace. +3. Test full path: + - create workspace + - create PTY + - send command + - receive output + - close pane + - assert no live listeners/PTYs + +## Concrete Patterns from Warp to Adapt + +- Use ownership boundaries, not shared ad-hoc lifecycle logic. +- Centralize PTY writes; do not let every caller write directly. +- Throttle UI wakeups separately from PTY data ingestion. +- Use bounded buffers everywhere diagnostics/telemetry can grow. +- Treat no-op state changes as no events. +- Use explicit conditions with timeouts for lifecycle milestones. +- Make cleanup deterministic and idempotent. +- Test lifecycle sequencing more than visual behavior. + +## Evidence from Warp + +Key files inspected: + +- `reference/warp/app/src/terminal/local_tty/terminal_manager.rs` +- `reference/warp/app/src/terminal/local_tty/event_loop.rs` +- `reference/warp/app/src/terminal/writeable_pty/pty_controller.rs` +- `reference/warp/app/src/terminal/model/session.rs` +- `reference/warp/app/src/terminal/model_events.rs` +- `reference/warp/app/src/terminal/event_listener.rs` +- `reference/warp/app/src/terminal/view.rs` +- `reference/warp/app/src/terminal/local_tty/spawner.rs` +- `reference/warp/app/src/terminal/local_tty/server/mod.rs` +- `reference/warp/crates/warp_features/src/lib.rs` +- `reference/warp/app/src/crash_reporting/mod.rs` +- `reference/warp/app/src/server/telemetry/collector.rs` +- `reference/warp/app/src/server/telemetry/secret_redaction.rs` +- `reference/warp/app/src/server/network_logging.rs` +- `reference/warp/app/src/server/network_logging_tests.rs` +- `reference/warp/app/src/terminal/writeable_pty/pty_controller_tests.rs` +- `reference/warp/app/src/terminal/model/session_tests.rs` +- `reference/warp/.agents/skills/warp-integration-test/SKILL.md` + +## Risks and Adaptation Notes + +- Warp is Rust/WarpUI; Fleet is Electron/TypeScript/node-pty, so patterns should be adapted, not ported wholesale. +- Warp still uses panics for some hard invariants; Fleet should prefer recoverable errors at Electron process boundaries. +- Terminal-server subprocess isolation may be overkill for Fleet initially. Lifecycle ownership, bounded queues, and diagnostics are higher-leverage first steps. + +## Recommended First Implementation + +Status: implemented in PR #204. + +Fleet now turns each pane's PTY into a stateful `PtySession` owner and moves writes/resizes/data/exit cleanup through it while keeping the public `PtyManager` API mostly stable. + +Implemented pieces: + +- `src/main/pty-session.ts` owns the PTY process, data/exit listeners, output buffer, paused state, lifecycle state, and idempotent shutdown. +- `src/main/pty-manager.ts` stores `PtySession` instances and delegates per-session lifecycle operations. +- `PTY_ATTACH` uses `PtyManager.drainBuffer()` instead of mutating PTY entry internals directly. +- Natural PTY exits now clean up the session even if no external `onExit` callback was registered. +- Writes/resizes after natural exit or kill are ignored. + +This directly addresses the class of bugs Fleet has historically hit: + +- duplicate PTYs +- renderer reload races +- stale listeners +- writes/resizes after exit +- blank terminal after close +- paused PTYs after refresh +- cleanup ordering bugs + +Validation for PR #204: + +```bash +npm run typecheck:node +npx vitest run src/main/__tests__/pty-manager.test.ts src/main/__tests__/pty-manager-extended.test.ts +``` + +Full lint was also attempted, but the repo currently has unrelated pre-existing lint errors outside this change.