fix(terminal): auto-respawn PTY daemon when it dies mid-session

When the daemon process died (e.g. from a signal, OOM, or cascading
from force-quitting child processes), all terminals froze permanently
with "connect ENOENT daemon-v1.sock" because there was no recovery
path — DaemonSpawner.handle stayed non-null and ensureRunning() never
re-spawned.

Add lazy daemon respawn: DaemonPtyAdapter.withDaemonRetry() catches
daemon-death errors (ENOENT, ECONNREFUSED, "Connection lost"), tears
down stale client state, forks a fresh daemon, and retries the
operation once.
This commit is contained in:
Jinwoo-H 2026-04-20 22:26:59 -04:00
parent b3f99b5ae1
commit f6d5f62e60
4 changed files with 112 additions and 1 deletions

View file

@ -160,7 +160,16 @@ export async function initDaemonPtyProvider(): Promise<void> {
const newAdapter = new DaemonPtyAdapter({
socketPath: info.socketPath,
tokenPath: info.tokenPath,
historyPath: getHistoryDir()
historyPath: getHistoryDir(),
// Why: when the daemon process dies (e.g. killed by a signal, OOM, or
// cascading from a force-quit of child processes), the adapter's
// ensureConnected() detects the dead socket and calls this to fork a
// replacement daemon before retrying the connection.
respawn: async () => {
console.warn('[daemon] Daemon process died — respawning')
newSpawner.resetHandle()
await newSpawner.ensureRunning()
}
})
spawner = newSpawner

View file

@ -641,4 +641,50 @@ describe('DaemonPtyAdapter (IPtyProvider)', () => {
)
})
})
describe('respawn on daemon death', () => {
it('respawns the daemon and retries when the socket disappears', async () => {
let respawnServer: DaemonServer | undefined
const respawnFn = vi.fn(async () => {
respawnServer = new DaemonServer({
socketPath,
tokenPath,
spawnSubprocess: () => createMockSubprocess()
})
await respawnServer.start()
})
const respawnAdapter = new DaemonPtyAdapter({ socketPath, tokenPath, respawn: respawnFn })
// First spawn succeeds normally
const r1 = await respawnAdapter.spawn({ cols: 80, rows: 24 })
expect(r1.id).toBeDefined()
// Kill the server to simulate daemon death
await server.shutdown()
// Next spawn should detect the dead socket, call respawn, and succeed
const r2 = await respawnAdapter.spawn({ cols: 80, rows: 24 })
expect(r2.id).toBeDefined()
expect(respawnFn).toHaveBeenCalledOnce()
respawnAdapter.dispose()
await respawnServer?.shutdown()
})
it('propagates the error when no respawn callback is provided', async () => {
const noRespawnAdapter = new DaemonPtyAdapter({ socketPath, tokenPath })
// First spawn succeeds
await noRespawnAdapter.spawn({ cols: 80, rows: 24 })
// Kill the server
await server.shutdown()
// Next spawn should fail with the original socket error
await expect(noRespawnAdapter.spawn({ cols: 80, rows: 24 })).rejects.toThrow()
noRespawnAdapter.dispose()
})
})
})

View file

@ -17,6 +17,9 @@ export type DaemonPtyAdapterOptions = {
/** Directory for disk-based terminal history. When set, the adapter writes
* raw PTY output to disk for cold restore on daemon crash. */
historyPath?: string
/** Called when the daemon socket is unreachable (process died). Expected to
* fork a fresh daemon so the next connection attempt can succeed. */
respawn?: () => Promise<void>
}
const MAX_TOMBSTONES = 1000
@ -32,6 +35,7 @@ export class DaemonPtyAdapter implements IPtyProvider {
private client: DaemonClient
private historyManager: HistoryManager | null
private historyReader: HistoryReader | null
private respawnFn: (() => Promise<void>) | null
private dataListeners: ((payload: { id: string; data: string }) => void)[] = []
private exitListeners: ((payload: { id: string; code: number }) => void)[] = []
private removeEventListener: (() => void) | null = null
@ -54,6 +58,7 @@ export class DaemonPtyAdapter implements IPtyProvider {
})
this.historyManager = opts.historyPath ? new HistoryManager(opts.historyPath) : null
this.historyReader = opts.historyPath ? new HistoryReader(opts.historyPath) : null
this.respawnFn = opts.respawn ?? null
}
getHistoryManager(): HistoryManager | null {
@ -61,6 +66,10 @@ export class DaemonPtyAdapter implements IPtyProvider {
}
async spawn(opts: PtySpawnOptions): Promise<PtySpawnResult> {
return this.withDaemonRetry(() => this.doSpawn(opts))
}
private async doSpawn(opts: PtySpawnOptions): Promise<PtySpawnResult> {
await this.ensureConnected()
const sessionId =
@ -366,6 +375,28 @@ export class DaemonPtyAdapter implements IPtyProvider {
this.setupEventRouting()
}
// Why: when the daemon process dies, operations fail with ENOENT (socket
// gone), ECONNREFUSED, or "Connection lost" (socket closed mid-request).
// Rather than leaving all terminals permanently broken until app restart,
// this wrapper detects daemon-death errors, tears down the stale client
// state, forks a fresh daemon via respawnFn, reconnects, and retries the
// operation once. If respawn itself fails, the error propagates normally.
private async withDaemonRetry<T>(fn: () => Promise<T>): Promise<T> {
try {
return await fn()
} catch (err) {
if (!this.respawnFn || !isDaemonGoneError(err)) {
throw err
}
console.warn('[daemon] Operation failed, respawning:', (err as Error).message)
this.removeEventListener?.()
this.removeEventListener = null
this.client.disconnect()
await this.respawnFn()
return await fn()
}
}
private setupEventRouting(): void {
if (this.removeEventListener) {
return
@ -402,3 +433,21 @@ export class DaemonPtyAdapter implements IPtyProvider {
})
}
}
// Why: ENOENT means the socket file was deleted (daemon crashed and cleaned
// up, or was killed). ECONNREFUSED means the file exists but nothing is
// listening (rare race). "Connection lost" / "Not connected" mean the daemon
// died while we had an active or stale connection — the client detected the
// socket close but we still tried to use it. All indicate the daemon is
// gone and a respawn should be attempted.
function isDaemonGoneError(err: unknown): boolean {
if (!(err instanceof Error)) {
return false
}
const code = (err as NodeJS.ErrnoException).code
if (code === 'ENOENT' || code === 'ECONNREFUSED') {
return true
}
const msg = err.message
return msg === 'Connection lost' || msg === 'Not connected'
}

View file

@ -42,6 +42,13 @@ export class DaemonSpawner {
return { socketPath: this.socketPath, tokenPath: this.tokenPath }
}
// Why: after the daemon process dies unexpectedly, the cached handle is
// stale. Clearing it lets the next ensureRunning() fork a fresh daemon
// instead of returning the dead socket path.
resetHandle(): void {
this.handle = null
}
async shutdown(): Promise<void> {
if (!this.handle) {
return