diff --git a/src/renderer/src/App.tsx b/src/renderer/src/App.tsx index d1857e9d..fbed311b 100644 --- a/src/renderer/src/App.tsx +++ b/src/renderer/src/App.tsx @@ -295,28 +295,17 @@ function App(): React.JSX.Element { return () => window.removeEventListener('beforeunload', captureAndFlush) }, []) - // Periodically capture terminal scrollback buffers and persist to disk. - // Why: the shutdown path captures buffers in beforeunload, but periodic - // saves provide a safety net so scrollback is available on restart even - // if an unexpected exit (crash, force-kill) bypasses normal shutdown. - useEffect(() => { - const PERIODIC_SAVE_INTERVAL_MS = 3 * 60_000 - const timer = window.setInterval(() => { - if (!useAppStore.getState().workspaceSessionReady || shutdownBufferCaptures.size === 0) { - return - } - for (const capture of shutdownBufferCaptures) { - try { - capture() - } catch { - // Don't let one pane's failure block the rest. - } - } - const state = useAppStore.getState() - void window.api.session.set(buildWorkspaceSessionPayload(state)) - }, PERIODIC_SAVE_INTERVAL_MS) - return () => window.clearInterval(timer) - }, []) + // Why there is no periodic scrollback save: PR #461 added a 3-minute + // setInterval that re-serialized every mounted TerminalPane's scrollback + // so a crash wouldn't lose in-session output. With many panes of + // accumulated output, each tick blocked the renderer main thread for + // several seconds (serialize is synchronous and does a binary search on + // >512KB buffers), causing visible input lag across the whole app. + // The durable replacement is the out-of-process terminal daemon + // (PR #729), which preserves buffers across renderer crashes with no + // main-thread work. Non-daemon users lose in-session scrollback on an + // unexpected exit — an acceptable tradeoff vs. periodic UI stalls, and + // in line with how most terminal apps behave. useEffect(() => { if (!persistedUIReady) { diff --git a/tests/e2e/helpers/orca-restart.ts b/tests/e2e/helpers/orca-restart.ts new file mode 100644 index 00000000..6a7dc3ff --- /dev/null +++ b/tests/e2e/helpers/orca-restart.ts @@ -0,0 +1,213 @@ +/** + * Two-launch Electron helper for restart-persistence tests. + * + * Why: the default `orcaPage` fixture creates a fresh `userDataDir` per test + * and deletes it on close, which is incompatible with a test that needs to + * quit the app and relaunch against the *same* on-disk state. This helper + * owns the shared userDataDir and gives each caller an `app`+`page` pair + * wired to it. + */ + +import { + _electron as electron, + expect, + type ElectronApplication, + type Page, + type TestInfo +} from '@stablyai/playwright-test' +import { execSync } from 'child_process' +import { existsSync, mkdtempSync, rmSync } from 'fs' +import os from 'os' +import path from 'path' + +type LaunchedOrca = { + app: ElectronApplication + page: Page +} + +type RestartSession = { + userDataDir: string + launch: () => Promise + /** Gracefully close a launch, letting beforeunload flush session state. */ + close: (app: ElectronApplication) => Promise + /** Remove the shared userDataDir after the test is done. */ + dispose: () => void +} + +function shouldLaunchHeadful(testInfo: TestInfo): boolean { + return testInfo.project.metadata.orcaHeadful === true +} + +function launchEnv(userDataDir: string, headful: boolean): NodeJS.ProcessEnv { + const { ELECTRON_RUN_AS_NODE: _unused, ...cleanEnv } = process.env + void _unused + return { + ...cleanEnv, + NODE_ENV: 'development', + ORCA_E2E_USER_DATA_DIR: userDataDir, + ...(headful ? { ORCA_E2E_HEADFUL: '1' } : { ORCA_E2E_HEADLESS: '1' }) + } +} + +/** + * Create a restart session tied to a persistent userDataDir. + * + * Why: keep the launch wiring identical to the shared fixture (mainPath, + * env stripping, headful toggle) so behavior differences between fixtures + * don't leak in as false positives for persistence bugs. + */ +export function createRestartSession(testInfo: TestInfo): RestartSession { + const mainPath = path.join(process.cwd(), 'out', 'main', 'index.js') + const userDataDir = mkdtempSync(path.join(os.tmpdir(), 'orca-e2e-restart-')) + const headful = shouldLaunchHeadful(testInfo) + + const launch = async (): Promise => { + const app = await electron.launch({ + args: [mainPath], + env: launchEnv(userDataDir, headful) + }) + const page = await app.firstWindow({ timeout: 120_000 }) + await page.waitForLoadState('domcontentloaded') + await page.waitForFunction(() => Boolean(window.__store), null, { timeout: 30_000 }) + return { app, page } + } + + const close = async (app: ElectronApplication): Promise => { + // Why: mirror the shared fixture's shutdown race — give Electron 10s to run + // before-quit/will-quit (which drives the beforeunload → session.setSync + // flush this suite relies on) and only then fall back to SIGKILL. + const proc = app.process() + try { + await Promise.race([ + app.close(), + new Promise((_, reject) => { + setTimeout(() => reject(new Error('Timed out closing Electron app')), 10_000) + }) + ]) + } catch { + if (proc) { + try { + proc.kill('SIGKILL') + } catch { + /* already dead */ + } + } + } + } + + const dispose = (): void => { + if (existsSync(userDataDir)) { + rmSync(userDataDir, { recursive: true, force: true }) + } + } + + return { userDataDir, launch, close, dispose } +} + +/** + * Attach a repo to the running renderer and wait until a terminal tab is + * active on its worktree. Matches the shared fixture's setup path so the + * first-launch state lines up with what real users persist. + */ +export async function attachRepoAndOpenTerminal(page: Page, repoPath: string): Promise { + if (!isValidGitRepo(repoPath)) { + throw new Error(`attachRepoAndOpenTerminal: ${repoPath} is not a git repo`) + } + + await page.evaluate(async (repoPath) => { + await window.api.repos.add({ path: repoPath }) + }, repoPath) + + await page.evaluate(async () => { + const store = window.__store + if (!store) { + return + } + await store.getState().fetchRepos() + }) + + await page.evaluate(async () => { + const store = window.__store + if (!store) { + return + } + const repos = store.getState().repos + for (const repo of repos) { + await store.getState().fetchWorktrees(repo.id) + } + }) + + await page.waitForFunction( + () => window.__store?.getState().workspaceSessionReady === true, + null, + { timeout: 30_000 } + ) + + // Why: fetchWorktrees() is async. Awaiting the outer page.evaluate returns + // before the Zustand worktree slice has observed the hydrated state, so a + // single evaluate() that reads worktreesByRepo can see an empty map. Poll + // the store until *any* worktree shows up, then pick the one that matches + // our seeded repo. Matching by basename is sufficient because each test + // run uses a unique tmp-dir suffix, and it avoids symlink-canonicalization + // mismatches between the path we pass in and the one the store records. + await expect + .poll( + async () => + page.evaluate(() => { + const store = window.__store + if (!store) { + return false + } + return Object.values(store.getState().worktreesByRepo).flat().length > 0 + }), + { + timeout: 15_000, + message: 'attachRepoAndOpenTerminal: seeded worktree never surfaced in the store' + } + ) + .toBe(true) + + const repoBasename = repoPath.split('/').filter(Boolean).pop() ?? '' + const worktreeId = await page.evaluate((repoBasename: string) => { + const store = window.__store + if (!store) { + return null + } + const state = store.getState() + const allWorktrees = Object.values(state.worktreesByRepo).flat() + // Why: the primary worktree lives at the repo root, so its path ends with + // the repo directory name. The secondary worktree is a sibling directory + // and will not match this suffix. This gives us the primary deterministically + // without depending on boolean fields on the worktree record. + const primary = + allWorktrees.find((worktree) => worktree.path.endsWith(`/${repoBasename}`)) ?? allWorktrees[0] + if (!primary) { + return null + } + state.setActiveWorktree(primary.id) + return primary.id + }, repoBasename) + + if (!worktreeId) { + throw new Error('attachRepoAndOpenTerminal: test repo did not surface in the store') + } + + return worktreeId +} + +function isValidGitRepo(repoPath: string): boolean { + if (!repoPath || !existsSync(repoPath)) { + return false + } + try { + return ( + execSync('git rev-parse --is-inside-work-tree', { + cwd: repoPath, + stdio: 'pipe', + encoding: 'utf8' + }).trim() === 'true' + ) + } catch { + return false + } +} diff --git a/tests/e2e/terminal-restart-persistence.spec.ts b/tests/e2e/terminal-restart-persistence.spec.ts new file mode 100644 index 00000000..87da9ab2 --- /dev/null +++ b/tests/e2e/terminal-restart-persistence.spec.ts @@ -0,0 +1,277 @@ +/** + * E2E tests for terminal scrollback persistence across clean app restarts. + * + * Why this suite exists: + * PR #461 added a 3-minute periodic interval that re-serialized every + * mounted TerminalPane's scrollback so an unclean exit (crash, SIGKILL) + * wouldn't lose in-session output. With many panes of accumulated output, + * each tick blocked the renderer main thread for seconds, causing visible + * input lag across the whole app. The periodic save was removed in favor + * of the out-of-process terminal daemon (PR #729). For users who don't + * opt into the daemon, the `beforeunload` save in App.tsx is now the + * *only* thing that preserves scrollback across a restart — this suite + * locks that behavior down so a future regression can't silently return + * us to "quit → empty terminal on relaunch." + * + * What it covers: + * - Scrollback survives clean quit → relaunch (primary regression test). + * - Tab layout (active worktree, terminal tab count) survives restart. + * - Idle session writes stay infrequent (catches a reintroduced frequent + * interval before it ships; weaker than asserting the 3-minute cadence + * is gone, but doesn't require a minutes-long test run). + * + * What it does NOT try to cover: + * - Main-thread input-lag improvement — machine-dependent and flaky. + * - Crash/SIGKILL recovery — non-daemon users now intentionally lose + * in-session scrollback on unclean exit; that's the tradeoff the + * removed periodic save represented. + */ + +import { readFileSync, existsSync } from 'fs' +import type { ElectronApplication, Page } from '@stablyai/playwright-test' +import { test, expect } from './helpers/orca-app' +import { TEST_REPO_PATH_FILE } from './global-setup' +import { + discoverActivePtyId, + execInTerminal, + waitForActiveTerminalManager, + waitForTerminalOutput, + waitForPaneCount, + getTerminalContent +} from './helpers/terminal' +import { + waitForSessionReady, + waitForActiveWorktree, + getActiveWorktreeId, + getWorktreeTabs, + ensureTerminalVisible +} from './helpers/store' +import { attachRepoAndOpenTerminal, createRestartSession } from './helpers/orca-restart' + +// Why: each test in this file does a full quit→relaunch cycle, which spawns +// two Electron instances back-to-back. Running in serial keeps the isolated +// userDataDirs from competing for the same Electron cache lock on cold start +// and keeps the failure mode interpretable when something goes wrong. +test.describe.configure({ mode: 'serial' }) + +/** + * Shared bootstrap for a *first* launch: attach the seeded test repo, + * activate its worktree, ensure a terminal is mounted, and return the + * PTY id we can drive with `execInTerminal`. + * + * Why: every test in this file needs the exact same starting state on the + * first launch. Inlining it would obscure the thing each test is actually + * asserting about the *second* launch. + */ +async function bootstrapFirstLaunch( + page: Page, + repoPath: string +): Promise<{ worktreeId: string; ptyId: string }> { + const worktreeId = await attachRepoAndOpenTerminal(page, repoPath) + await waitForSessionReady(page) + await waitForActiveWorktree(page) + await ensureTerminalVisible(page) + + const hasPaneManager = await waitForActiveTerminalManager(page, 30_000) + .then(() => true) + .catch(() => false) + test.skip( + !hasPaneManager, + 'Electron automation in this environment never mounts the TerminalPane manager, so restart-persistence assertions would only fail on harness setup.' + ) + await waitForPaneCount(page, 1, 30_000) + + const ptyId = await discoverActivePtyId(page) + return { worktreeId, ptyId } +} + +/** + * Shared bootstrap for a *second* launch: just wait for the session to + * restore, and confirm the previously-active worktree is the active one + * again so downstream assertions operate against the right worktree. + */ +async function bootstrapRestoredLaunch(page: Page, expectedWorktreeId: string): Promise { + await waitForSessionReady(page) + await expect + .poll(async () => getActiveWorktreeId(page), { timeout: 10_000 }) + .toBe(expectedWorktreeId) + await ensureTerminalVisible(page) + // Why: the PaneManager remounts asynchronously after session hydration. The + // restored terminal surface is what we're about to assert against, so make + // sure it exists before any content/layout assertion races. + await waitForActiveTerminalManager(page, 30_000) + await waitForPaneCount(page, 1, 30_000) +} + +test.describe('Terminal restart persistence', () => { + test('scrollback survives clean quit and relaunch', async (// oxlint-disable-next-line no-empty-pattern -- Playwright's second fixture arg is testInfo; the first must be an object destructure to opt out of the default fixture set. + {}, testInfo) => { + const repoPath = readFileSync(TEST_REPO_PATH_FILE, 'utf-8').trim() + if (!repoPath || !existsSync(repoPath)) { + test.skip(true, 'Global setup did not produce a seeded test repo') + return + } + + const session = createRestartSession(testInfo) + let firstApp: ElectronApplication | null = null + let secondApp: ElectronApplication | null = null + + try { + // ── First launch ──────────────────────────────────────────────── + const firstLaunch = await session.launch() + firstApp = firstLaunch.app + const { worktreeId, ptyId } = await bootstrapFirstLaunch(firstLaunch.page, repoPath) + + // Why: the marker must be distinctive enough that it can't appear in the + // restored prompt banner or a stray OSC sequence. The timestamp suffix + // keeps it unique across retries, and the trailing newline ensures the + // buffer snapshot contains it on a line of its own. + const marker = `SCROLLBACK_PERSIST_${Date.now()}` + await execInTerminal(firstLaunch.page, ptyId, `echo ${marker}`) + await waitForTerminalOutput(firstLaunch.page, marker) + + // Why: closing the app triggers beforeunload → session.setSync, which is + // the one remaining codepath that flushes serialized scrollback to disk + // for non-daemon users. This is the behavior the suite is guarding. + await session.close(firstApp) + firstApp = null + + // ── Second launch ─────────────────────────────────────────────── + const secondLaunch = await session.launch() + secondApp = secondLaunch.app + await bootstrapRestoredLaunch(secondLaunch.page, worktreeId) + + // Why: buffer restore replays the serialized output through xterm.write + // during pane mount. Poll the live terminal content rather than hitting + // the store because the store only sees the raw saved buffer, whereas + // restoreScrollbackBuffers can strip alt-screen sequences before write. + await expect + .poll(async () => (await getTerminalContent(secondLaunch.page)).includes(marker), { + timeout: 15_000, + message: 'Restored terminal did not contain the pre-quit scrollback marker' + }) + .toBe(true) + } finally { + if (secondApp) { + await session.close(secondApp) + } + if (firstApp) { + await session.close(firstApp) + } + session.dispose() + } + }) + + test('active worktree and terminal tab count survive restart', async (// oxlint-disable-next-line no-empty-pattern -- Playwright's second fixture arg is testInfo; the first must be an object destructure to opt out of the default fixture set. + {}, testInfo) => { + const repoPath = readFileSync(TEST_REPO_PATH_FILE, 'utf-8').trim() + if (!repoPath || !existsSync(repoPath)) { + test.skip(true, 'Global setup did not produce a seeded test repo') + return + } + + const session = createRestartSession(testInfo) + let firstApp: ElectronApplication | null = null + let secondApp: ElectronApplication | null = null + + try { + // ── First launch ──────────────────────────────────────────────── + const firstLaunch = await session.launch() + firstApp = firstLaunch.app + const { worktreeId } = await bootstrapFirstLaunch(firstLaunch.page, repoPath) + + // Add a second terminal tab so the restart has layout state to restore + // beyond "one default tab." createTab goes through the same store path + // as the Cmd+T shortcut but doesn't depend on window focus timing. + await firstLaunch.page.evaluate((worktreeId: string) => { + const store = window.__store + if (!store) { + return + } + store.getState().createTab(worktreeId) + }, worktreeId) + + await expect + .poll(async () => (await getWorktreeTabs(firstLaunch.page, worktreeId)).length, { + timeout: 5_000 + }) + .toBeGreaterThanOrEqual(2) + + const tabsBefore = await getWorktreeTabs(firstLaunch.page, worktreeId) + + await session.close(firstApp) + firstApp = null + + // ── Second launch ─────────────────────────────────────────────── + const secondLaunch = await session.launch() + secondApp = secondLaunch.app + await bootstrapRestoredLaunch(secondLaunch.page, worktreeId) + + // Why: checking tab *count* (not ids) is the stable assertion — tab ids + // are regenerated on each launch because the renderer mints them fresh, + // while the persisted layout only carries the tab positions. Count + // survives; id identity does not. + await expect + .poll(async () => (await getWorktreeTabs(secondLaunch.page, worktreeId)).length, { + timeout: 10_000 + }) + .toBe(tabsBefore.length) + } finally { + if (secondApp) { + await session.close(secondApp) + } + if (firstApp) { + await session.close(firstApp) + } + session.dispose() + } + }) + + test('idle session does not spam session.set writes', async (// oxlint-disable-next-line no-empty-pattern -- Playwright's second fixture arg is testInfo; the first must be an object destructure to opt out of the default fixture set. + {}, testInfo) => { + const repoPath = readFileSync(TEST_REPO_PATH_FILE, 'utf-8').trim() + if (!repoPath || !existsSync(repoPath)) { + test.skip(true, 'Global setup did not produce a seeded test repo') + return + } + + const session = createRestartSession(testInfo) + let app: ElectronApplication | null = null + + try { + const { app: launchedApp, page } = await session.launch() + app = launchedApp + await bootstrapFirstLaunch(page, repoPath) + + // Why: the periodic scrollback save that this branch removes was a + // `session.set` call on every tick. Counting `session.set` calls over a + // short idle window is a cheap proxy for "no high-frequency background + // writer was reintroduced." The 10s window intentionally stays below the + // per-test budget; the threshold is deliberately loose so normal user- + // driven store activity (tab auto-create, worktree activation) doesn't + // flake the test, while still catching an interval that fires every + // couple of seconds. + const callCount = await page.evaluate(async () => { + const api = ( + window as unknown as { api: { session: { set: (...args: unknown[]) => unknown } } } + ).api + let count = 0 + const originalSet = api.session.set.bind(api.session) + api.session.set = (...args: unknown[]) => { + count += 1 + return originalSet(...args) + } + await new Promise((resolve) => setTimeout(resolve, 10_000)) + api.session.set = originalSet + return count + }) + + expect(callCount).toBeLessThan(20) + } finally { + if (app) { + await session.close(app) + } + session.dispose() + } + }) +})