feat: computer use via agent-browser CDP bridge (#856)

2026-04-21 06:07:17 +00:00 · 2026-04-20 23:56:14 -04:00 · 2026-04-20 23:56:14 -04:00 · 7a9bc4ef6d
commit 7a9bc4ef6d
parent 22ae50e0ae
35 changed files with 13560 additions and 43 deletions
--- a/config/electron-builder.config.cjs
+++ b/config/electron-builder.config.cjs
@ -1,3 +1,6 @@
+const { chmodSync, existsSync, readdirSync } = require('node:fs')
+const { join } = require('node:path')
+
 const isMacRelease = process.env.ORCA_MAC_RELEASE === '1'

 /** @type {import('electron-builder').Configuration} */
@ -23,12 +26,34 @@ module.exports = {
  // Why: daemon-entry.js is forked as a separate Node.js process and must be
  // accessible on disk (not inside the asar archive) for child_process.fork().
  asarUnpack: ['out/cli/**', 'out/shared/**', 'out/main/daemon-entry.js', 'out/main/chunks/**', 'resources/**'],
+  afterPack: async (context) => {
+    const resourcesDir =
+      context.electronPlatformName === 'darwin'
+        ? join(context.appOutDir, `${context.packager.appInfo.productFilename}.app`, 'Contents', 'Resources')
+        : join(context.appOutDir, 'resources')
+    if (!existsSync(resourcesDir)) {
+      return
+    }
+    for (const filename of readdirSync(resourcesDir)) {
+      if (!filename.startsWith('agent-browser-')) {
+        continue
+      }
+      // Why: the upstream package has inconsistent executable bits across
+      // platform binaries (notably darwin-x64). child_process.execFile needs
+      // the copied binary to be executable in packaged apps.
+      chmodSync(join(resourcesDir, filename), 0o755)
+    }
+  },
  win: {
    executableName: 'Orca',
    extraResources: [
      {
        from: 'resources/win32/bin/orca.cmd',
        to: 'bin/orca.cmd'
+      },
+      {
+        from: 'node_modules/agent-browser/bin/agent-browser-win32-x64.exe',
+        to: 'agent-browser-win32-x64.exe'
      }
    ]
  },
@ -60,6 +85,10 @@ module.exports = {
      {
        from: 'resources/darwin/bin/orca',
        to: 'bin/orca'
+      },
+      {
+        from: 'node_modules/agent-browser/bin/agent-browser-darwin-${arch}',
+        to: 'agent-browser-darwin-${arch}'
      }
    ],
    target: [
@ -84,6 +113,10 @@ module.exports = {
      {
        from: 'resources/linux/bin/orca',
        to: 'bin/orca'
+      },
+      {
+        from: 'node_modules/agent-browser/bin/agent-browser-linux-${arch}',
+        to: 'agent-browser-linux-${arch}'
      }
    ],
    target: ['AppImage', 'deb'],
--- a/package.json
+++ b/package.json
@ -73,6 +73,7 @@
    "@xterm/addon-webgl": "^0.19.0",
    "@xterm/headless": "^6.0.0",
    "@xterm/xterm": "^6.0.0",
+    "agent-browser": "~0.24.1",
    "class-variance-authority": "^0.7.1",
    "clsx": "^2.1.1",
    "cmdk": "^1.1.1",
@ -99,6 +100,7 @@
    "ssh2": "^1.17.0",
    "tailwind-merge": "^3.5.0",
    "tw-animate-css": "^1.4.0",
+    "ws": "^8.20.0",
    "zod": "^4.3.6",
    "zustand": "^5.0.12"
  },
@ -111,6 +113,7 @@
    "@types/react": "^19.2.14",
    "@types/react-dom": "^19.2.3",
    "@types/ssh2": "^1.15.5",
+    "@types/ws": "^8.18.1",
    "@typescript/native-preview": "7.0.0-dev.20260406.1",
    "@vitejs/plugin-react": "^5.2.0",
    "electron": "^41.1.0",
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@ -106,6 +106,9 @@ importers:
      '@xterm/xterm':
        specifier: ^6.0.0
        version: 6.0.0
+      agent-browser:
+        specifier: ~0.24.1
+        version: 0.24.1
      class-variance-authority:
        specifier: ^0.7.1
        version: 0.7.1
@ -184,6 +187,9 @@ importers:
      tw-animate-css:
        specifier: ^1.4.0
        version: 1.4.0
+      ws:
+        specifier: ^8.20.0
+        version: 8.20.0
      zod:
        specifier: ^4.3.6
        version: 4.3.6
@ -199,7 +205,7 @@ importers:
        version: 1.59.1
      '@stablyai/playwright-test':
        specifier: ^2.1.13
-        version: 2.1.13(@playwright/test@1.59.1)(zod@4.3.6)
+        version: 2.1.14(@playwright/test@1.59.1)(zod@4.3.6)
      '@tailwindcss/vite':
        specifier: ^4.2.2
        version: 4.2.2(vite@7.3.1(@types/node@25.5.0)(jiti@2.6.1)(lightningcss@1.32.0)(yaml@2.8.3))
@ -215,6 +221,9 @@ importers:
      '@types/ssh2':
        specifier: ^1.15.5
        version: 1.15.5
+      '@types/ws':
+        specifier: ^8.18.1
+        version: 8.18.1
      '@typescript/native-preview':
        specifier: 7.0.0-dev.20260406.1
        version: 7.0.0-dev.20260406.1
@ -2205,8 +2214,8 @@ packages:
    resolution: {integrity: sha512-tlqY9xq5ukxTUZBmoOp+m61cqwQD5pHJtFY3Mn8CA8ps6yghLH/Hw8UPdqg4OLmFW3IFlcXnQNmo/dh8HzXYIQ==}
    engines: {node: '>=18'}

-  '@stablyai/playwright-base@2.1.13':
-    resolution: {integrity: sha512-F8lc2qSfNZQ53WeWWDLLZSpu6f2ZCuiVgGP0P0+PGdO9swCKEwV0f+ti7a4MlmgMlHoCsf5tvddXIVpikhPRlQ==}
+  '@stablyai/playwright-base@2.1.14':
+    resolution: {integrity: sha512-/iAgMW5tC0ETDo3mFyTzszRrD7rGFIT4fgDgtZxqa9vPhiTLix/1+GeOOBNY0uS+XRLFY0Uc/irsC3XProL47g==}
    engines: {node: '>=18'}
    peerDependencies:
      '@playwright/test': ^1.52.0
@ -2215,13 +2224,13 @@ packages:
      zod:
        optional: true

-  '@stablyai/playwright-test@2.1.13':
-    resolution: {integrity: sha512-VXy65GukMkIsHtTuYuLhSP3l3YMl21ePTXKI2xLRBCkgzhTLdzat0vHM5TEh7vh58lsxmHlruMFESjcaIeb25g==}
+  '@stablyai/playwright-test@2.1.14':
+    resolution: {integrity: sha512-CAyVVnRdsyJg9pbK3Yq5L9lcvEabilFLb2RWeTQybKv7sDkEEqE2t1boXqBt3X6wQO6lsyhUHB9pc10wSwuc4Q==}
    peerDependencies:
      '@playwright/test': ^1.52.0

-  '@stablyai/playwright@2.1.13':
-    resolution: {integrity: sha512-PGE6hR5WTknfbEBz+KvhG9i2gukSYdie0at6SI0CnJPu13NvGBno1N0Fm/AePhtO5Kjn1mMWW5cRiknVP4bOwA==}
+  '@stablyai/playwright@2.1.14':
+    resolution: {integrity: sha512-+SkphioOf+o2VWiM3KPm/fFTTjwNHUV5b2ZRPrLMTsW6bwmEvjo2FbVOUobNBqbopQBnntNLd8ZCG2gvw7rwtg==}
    peerDependencies:
      '@playwright/test': ^1.52.0

@ -2751,6 +2760,9 @@ packages:
  '@types/verror@1.10.11':
    resolution: {integrity: sha512-RlDm9K7+o5stv0Co8i8ZRGxDbrTxhJtgjqjFyVh/tXQyl/rYtTKlnTvZ88oSTeYREWurwx20Js4kTuKCsFkUtg==}

+  '@types/ws@8.18.1':
+    resolution: {integrity: sha512-ThVF6DCVhA8kUGy+aazFQ4kXQ7E1Ty7A3ypFOe0IcJV8O/M511G99AW24irKrW56Wt44yG9+ij8FaqoBGkuBXg==}
+
  '@types/yauzl@2.10.3':
    resolution: {integrity: sha512-oJoftv0LSuaDZE3Le4DbKX+KS9G36NzOeSap90UIK0yMA/NhKJhqlSGtNDORNRaIbQfzjXDrQa0ytJ6mNRGz/Q==}

@ -2880,6 +2892,10 @@ packages:
    resolution: {integrity: sha512-MnA+YT8fwfJPgBx3m60MNqakm30XOkyIoH1y6huTQvC0PwZG7ki8NacLBcrPbNoo8vEZy7Jpuk7+jMO+CUovTQ==}
    engines: {node: '>= 14'}

+  agent-browser@0.24.1:
+    resolution: {integrity: sha512-csWJtYEQow52b+p93zVZfNrcNBwbxGCZDXDMNWl2ij2i0MFKubIzN+icUeX2/NrkZe5iIau8px+HQlxata2oPw==}
+    hasBin: true
+
  ajv-formats@3.0.1:
    resolution: {integrity: sha512-8iUql50EUR+uUcdRQ3HDqa6EVyo3docL8g5WJ3FNcWmu62IbkGUue/pEyLBW8VGKKucTPgqeks4fIU1DA4yowQ==}
    peerDependencies:
@ -6047,6 +6063,18 @@ packages:
  wrappy@1.0.2:
    resolution: {integrity: sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==}

+  ws@8.20.0:
+    resolution: {integrity: sha512-sAt8BhgNbzCtgGbt2OxmpuryO63ZoDk/sqaB/znQm94T4fCEsy/yV+7CdC1kJhOU9lboAEU7R3kquuycDoibVA==}
+    engines: {node: '>=10.0.0'}
+    peerDependencies:
+      bufferutil: ^4.0.1
+      utf-8-validate: '>=5.0.2'
+    peerDependenciesMeta:
+      bufferutil:
+        optional: true
+      utf-8-validate:
+        optional: true
+
  wsl-utils@0.3.1:
    resolution: {integrity: sha512-g/eziiSUNBSsdDJtCLB8bdYEUMj4jR7AGeUo96p/3dTafgjHhpF4RiCFPiRILwjQoDXx5MqkBr4fwWtR3Ky4Wg==}
    engines: {node: '>=20'}
@ -7897,7 +7925,7 @@ snapshots:

  '@sindresorhus/merge-streams@4.0.0': {}

-  '@stablyai/playwright-base@2.1.13(@playwright/test@1.59.1)(zod@4.3.6)':
+  '@stablyai/playwright-base@2.1.14(@playwright/test@1.59.1)(zod@4.3.6)':
    dependencies:
      '@playwright/test': 1.59.1
      jpeg-js: 0.4.4
@ -7906,18 +7934,18 @@ snapshots:
    optionalDependencies:
      zod: 4.3.6

-  '@stablyai/playwright-test@2.1.13(@playwright/test@1.59.1)(zod@4.3.6)':
+  '@stablyai/playwright-test@2.1.14(@playwright/test@1.59.1)(zod@4.3.6)':
    dependencies:
      '@playwright/test': 1.59.1
-      '@stablyai/playwright': 2.1.13(@playwright/test@1.59.1)(zod@4.3.6)
-      '@stablyai/playwright-base': 2.1.13(@playwright/test@1.59.1)(zod@4.3.6)
+      '@stablyai/playwright': 2.1.14(@playwright/test@1.59.1)(zod@4.3.6)
+      '@stablyai/playwright-base': 2.1.14(@playwright/test@1.59.1)(zod@4.3.6)
    transitivePeerDependencies:
      - zod

-  '@stablyai/playwright@2.1.13(@playwright/test@1.59.1)(zod@4.3.6)':
+  '@stablyai/playwright@2.1.14(@playwright/test@1.59.1)(zod@4.3.6)':
    dependencies:
      '@playwright/test': 1.59.1
-      '@stablyai/playwright-base': 2.1.13(@playwright/test@1.59.1)(zod@4.3.6)
+      '@stablyai/playwright-base': 2.1.14(@playwright/test@1.59.1)(zod@4.3.6)
    transitivePeerDependencies:
      - zod

@ -8482,6 +8510,10 @@ snapshots:
  '@types/verror@1.10.11':
    optional: true

+  '@types/ws@8.18.1':
+    dependencies:
+      '@types/node': 25.5.0
+
  '@types/yauzl@2.10.3':
    dependencies:
      '@types/node': 25.5.0
@ -8608,6 +8640,8 @@ snapshots:

  agent-base@7.1.4: {}

+  agent-browser@0.24.1: {}
+
  ajv-formats@3.0.1(ajv@8.18.0):
    optionalDependencies:
      ajv: 8.18.0
@ -12374,6 +12408,8 @@ snapshots:

  wrappy@1.0.2: {}

+  ws@8.20.0: {}
+
  wsl-utils@0.3.1:
    dependencies:
      is-wsl: 3.1.1
--- a/skills/orca-cli/SKILL.md
+++ b/skills/orca-cli/SKILL.md
@ -1,6 +1,6 @@
 ---
 name: orca-cli
-description: Use the Orca CLI to orchestrate worktrees and live terminals through a running Orca editor. Use when an agent needs to create, inspect, update, or remove Orca worktrees; inspect repo state known to Orca; or read, send to, wait on, or stop Orca-managed terminals. Coding agents should also keep the current worktree comment updated with the latest meaningful work-in-progress checkpoint whenever useful; this is an expected default behavior, not a special trigger-only action. Triggers include "use orca cli", "manage Orca worktrees", "read Orca terminal", "reply to Claude Code in Orca", "create a worktree in Orca", "update Orca worktree comment", or any task where the agent should operate through Orca instead of talking to git worktrees and terminal processes directly.
+description: Use the Orca CLI to orchestrate worktrees, live terminals, and browser automation through a running Orca editor. Use when an agent needs to create, inspect, update, or remove Orca worktrees; inspect repo state known to Orca; read, send to, wait on, or stop Orca-managed terminals; or automate the built-in browser (navigate, snapshot, click, fill, screenshot). Coding agents should also keep the current worktree comment updated with the latest meaningful work-in-progress checkpoint whenever useful. Triggers include "use orca cli", "manage Orca worktrees", "read Orca terminal", "reply to Claude Code in Orca", "create a worktree in Orca", "update Orca worktree comment", "click on", "fill the form", "take a screenshot", "navigate to", "interact with the page", "snapshot the page", or any task where the agent should operate through Orca.
 ---

 # Orca CLI
@ -167,6 +167,374 @@ Why: terminal handles are runtime-scoped and may go stale after reloads. If Orca
 - If the user asks for CLI UX feedback, test the public `orca` command first. Only inspect `src/cli` or use `node out/cli/index.js` if the public command is missing or the task is explicitly about implementation internals.
 - If a command fails, prefer retrying with the public `orca` command before concluding the CLI is broken, unless the failure already came from `orca` itself.

+## Browser Automation
+
+The `orca` CLI also drives the built-in Orca browser. The core workflow is a **snapshot-interact-re-snapshot** loop:
+
+1. **Snapshot** the page to see interactive elements and their refs.
+2. **Interact** using refs (`@e1`, `@e3`, etc.) to click, fill, or select.
+3. **Re-snapshot** after interactions to see the updated page state.
+
+```bash
+orca goto --url https://example.com --json
+orca snapshot --json
+# Read the refs from the snapshot output
+orca click --element @e3 --json
+orca snapshot --json
+```
+
+### Element Refs
+
+Refs like `@e1`, `@e5` are short identifiers assigned to interactive page elements during a snapshot. They are:
+
+- **Assigned by snapshot**: Run `orca snapshot` to get current refs.
+- **Scoped to one tab**: Refs from one tab are not valid in another.
+- **Invalidated by navigation**: If the page navigates after a snapshot, refs become stale. Re-snapshot to get fresh refs.
+- **Invalidated by tab switch**: Switching tabs with `orca tab switch` invalidates refs. Re-snapshot after switching.
+
+If a ref is stale, the command returns `browser_stale_ref` — re-snapshot and retry.
+
+### Worktree Scoping
+
+Browser commands default to the **current worktree** — only tabs belonging to the agent's worktree are visible and targetable. Tab indices are relative to the filtered tab list.
+
+```bash
+# Default: operates on tabs in the current worktree
+orca snapshot --json
+
+# Explicitly target all worktrees (cross-worktree access)
+orca snapshot --worktree all --json
+
+# Tab indices are relative to the worktree-filtered list
+orca tab list --json         # Shows tabs [0], [1], [2] for this worktree
+orca tab switch --index 1 --json   # Switches to tab [1] within this worktree
+```
+
+If no tabs are open in the current worktree, commands return `browser_no_tab`.
+
+### Stable Page Targeting
+
+For single-agent flows, bare browser commands are fine: Orca will target the active browser tab in the current worktree.
+
+For concurrent or multi-process browser automation, prefer a stable page id instead of ambient active-tab state:
+
+1. Run `orca tab list --json`.
+2. Read `tabs[].browserPageId` from the result.
+3. Pass `--page <browserPageId>` to follow-up commands like `snapshot`, `click`, `goto`, `screenshot`, `tab switch`, or `tab close`.
+
+Why: active-tab state and tab indices can change while another Orca CLI process is working. `browserPageId` pins the command to one concrete tab.
+
+```bash
+orca tab list --json
+orca snapshot --page page-123 --json
+orca click --page page-123 --element @e3 --json
+orca screenshot --page page-123 --json
+orca tab switch --page page-123 --json
+orca tab close --page page-123 --json
+```
+
+If you also pass `--worktree`, Orca treats it as extra scoping/validation for that page id. Without `--page`, commands still fall back to the current worktree's active tab.
+
+### Navigation
+
+```bash
+orca goto --url <url> [--json]           # Navigate to URL, waits for page load
+orca back [--json]                       # Go back in browser history
+orca forward [--json]                    # Go forward in browser history
+orca reload [--json]                     # Reload the current page
+```
+
+### Observation
+
+```bash
+orca snapshot [--page <browserPageId>] [--json]                   # Accessibility tree snapshot with element refs
+orca screenshot [--page <browserPageId>] [--format <png|jpeg>] [--json]  # Viewport screenshot (base64)
+orca full-screenshot [--page <browserPageId>] [--format <png|jpeg>] [--json]  # Full-page screenshot (base64)
+orca pdf [--page <browserPageId>] [--json]                        # Export page as PDF (base64)
+```
+
+### Interaction
+
+```bash
+orca click --element <ref> [--page <browserPageId>] [--json]      # Click an element by ref
+orca dblclick --element <ref> [--page <browserPageId>] [--json]   # Double-click an element
+orca fill --element <ref> --value <text> [--page <browserPageId>] [--json]  # Clear and fill an input
+orca type --input <text> [--page <browserPageId>] [--json]        # Type at current focus (no element targeting)
+orca select --element <ref> --value <value> [--page <browserPageId>] [--json]  # Select dropdown option
+orca check --element <ref> [--page <browserPageId>] [--json]      # Check a checkbox
+orca uncheck --element <ref> [--page <browserPageId>] [--json]    # Uncheck a checkbox
+orca scroll --direction <up|down> [--amount <pixels>] [--page <browserPageId>] [--json]  # Scroll viewport
+orca scrollintoview --element <ref> [--page <browserPageId>] [--json]  # Scroll element into view
+orca hover --element <ref> [--page <browserPageId>] [--json]      # Hover over an element
+orca focus --element <ref> [--page <browserPageId>] [--json]      # Focus an element
+orca drag --from <ref> --to <ref> [--page <browserPageId>] [--json]  # Drag from one element to another
+orca clear --element <ref> [--page <browserPageId>] [--json]      # Clear an input field
+orca select-all --element <ref> [--page <browserPageId>] [--json] # Select all text in an element
+orca keypress --key <key> [--page <browserPageId>] [--json]       # Press a key (Enter, Tab, Escape, etc.)
+orca upload --element <ref> --files <paths> [--page <browserPageId>] [--json]  # Upload files to a file input
+```
+
+### Tab Management
+
+```bash
+orca tab list [--json]                   # List open browser tabs
+orca tab switch (--index <n> | --page <browserPageId>) [--json]     # Switch active tab (invalidates refs)
+orca tab create [--url <url>] [--json]   # Open a new browser tab
+orca tab close [--index <n> | --page <browserPageId>] [--json]    # Close a browser tab
+```
+
+### Wait / Synchronization
+
+```bash
+orca wait [--timeout <ms>] [--json]                        # Wait for timeout (default 1000ms)
+orca wait --selector <css> [--state <visible|hidden>] [--timeout <ms>] [--json]  # Wait for element
+orca wait --text <string> [--timeout <ms>] [--json]        # Wait for text to appear on page
+orca wait --url <substring> [--timeout <ms>] [--json]      # Wait for URL to contain substring
+orca wait --load <networkidle|load|domcontentloaded> [--timeout <ms>] [--json]   # Wait for load state
+orca wait --fn <js-expression> [--timeout <ms>] [--json]   # Wait for JS condition to be truthy
+```
+
+After any page-changing action, pick one:
+
+- Wait for specific content: `orca wait --text "Dashboard" --json`
+- Wait for URL change: `orca wait --url "/dashboard" --json`
+- Wait for network idle (catch-all for SPA navigation): `orca wait --load networkidle --json`
+- Wait for an element: `orca wait --selector ".results" --json`
+
+Avoid bare `orca wait --timeout 2000` except when debugging — it makes scripts slow and flaky.
+
+### Data Extraction
+
+```bash
+orca exec --command "get text @e1" [--json]   # Get visible text of an element
+orca exec --command "get html @e1" [--json]   # Get innerHTML
+orca exec --command "get value @e1" [--json]  # Get input value
+orca exec --command "get attr @e1 href" [--json]  # Get element attribute
+orca exec --command "get title" [--json]      # Get page title
+orca exec --command "get url" [--json]        # Get current URL
+orca exec --command "get count .item" [--json]      # Count matching elements
+```
+
+### State Checks
+
+```bash
+orca exec --command "is visible @e1" [--json]  # Check if element is visible
+orca exec --command "is enabled @e1" [--json]  # Check if element is enabled
+orca exec --command "is checked @e1" [--json]  # Check if checkbox is checked
+```
+
+### Page Inspection
+
+```bash
+orca eval --expression <js> [--json]     # Evaluate JS in page context
+```
+
+### Cookie Management
+
+```bash
+orca cookie get [--url <url>] [--json]   # List cookies
+orca cookie set --name <n> --value <v> [--domain <d>] [--json]  # Set a cookie
+orca cookie delete --name <n> [--domain <d>] [--json]  # Delete a cookie
+```
+
+### Emulation
+
+```bash
+orca viewport --width <w> --height <h> [--scale <n>] [--mobile] [--json]
+orca geolocation --latitude <lat> --longitude <lng> [--accuracy <m>] [--json]
+```
+
+### Request Interception
+
+```bash
+orca intercept enable [--patterns <list>] [--json]  # Start intercepting requests
+orca intercept disable [--json]          # Stop intercepting
+orca intercept list [--json]             # List paused requests
+```
+
+> **Note:** Per-request `intercept continue` and `intercept block` are not yet supported.
+> They will be added once agent-browser supports per-request interception decisions.
+
+### Console / Network Capture
+
+```bash
+orca capture start [--json]              # Start capturing console + network
+orca capture stop [--json]               # Stop capturing
+orca console [--limit <n>] [--json]      # Read captured console entries
+orca network [--limit <n>] [--json]      # Read captured network entries
+```
+
+### Mouse Control
+
+```bash
+orca exec --command "mouse move 100 200" [--json]   # Move mouse to coordinates
+orca exec --command "mouse down left" [--json]      # Press mouse button
+orca exec --command "mouse up left" [--json]        # Release mouse button
+orca exec --command "mouse wheel 100" [--json]      # Scroll wheel
+```
+
+### Keyboard
+
+```bash
+orca exec --command "keyboard inserttext \"text\"" [--json]  # Insert text bypassing key events
+orca exec --command "keyboard type \"text\"" [--json]        # Raw keystrokes
+orca exec --command "keydown Shift" [--json]                 # Hold key down
+orca exec --command "keyup Shift" [--json]                   # Release key
+```
+
+### Frames (Iframes)
+
+Iframes are auto-inlined in snapshots — refs inside iframes work transparently. For scoped interaction:
+
+```bash
+orca exec --command "frame @e3" [--json]        # Switch to iframe by ref
+orca exec --command "frame \"#iframe\"" [--json] # Switch to iframe by CSS selector
+orca exec --command "frame main" [--json]       # Return to main frame
+```
+
+### Semantic Locators (alternative to refs)
+
+When refs aren't available or you want to skip a snapshot:
+
+```bash
+orca exec --command "find role button click --name \"Submit\"" [--json]
+orca exec --command "find text \"Sign In\" click" [--json]
+orca exec --command "find label \"Email\" fill \"user@test.com\"" [--json]
+orca exec --command "find placeholder \"Search\" type \"query\"" [--json]
+orca exec --command "find testid \"submit-btn\" click" [--json]
+```
+
+### Dialogs
+
+`alert` and `beforeunload` are auto-accepted. For `confirm` and `prompt`:
+
+```bash
+orca exec --command "dialog status" [--json]        # Check for pending dialog
+orca exec --command "dialog accept" [--json]        # Accept
+orca exec --command "dialog accept \"text\"" [--json]  # Accept with prompt input
+orca exec --command "dialog dismiss" [--json]       # Dismiss/cancel
+```
+
+### Extended Commands (Passthrough)
+
+```bash
+orca exec --command "<agent-browser command>" [--json]
+```
+
+The `exec` command provides access to agent-browser's full command surface. Useful for commands without typed Orca handlers:
+
+```bash
+orca exec --command "set device \"iPhone 14\"" --json   # Emulate device
+orca exec --command "set offline on" --json             # Toggle offline mode
+orca exec --command "set media dark" --json             # Emulate color scheme
+orca exec --command "network requests" --json           # View tracked network requests
+orca exec --command "help" --json                       # See all available commands
+```
+
+**Important:** Do not use `orca exec --command "tab ..."` for tab management. Use `orca tab list/create/close/switch` instead — those operate at the Orca level and keep the UI synchronized.
+
+### `fill` vs `type`
+
+- **`fill`** targets a specific element by ref, clears its value first, then enters text. Use for form fields.
+- **`type`** types at whatever currently has focus. Use for search boxes or after clicking into an input.
+
+If neither works on a custom input component, try:
+
+```bash
+orca focus --element @e1 --json
+orca exec --command "keyboard inserttext \"text\"" --json   # bypasses key events
+```
+
+### Browser Error Codes
+
+| Error Code | Meaning | Recovery |
+|-----------|---------|----------|
+| `browser_no_tab` | No browser tab is open in this worktree | Open a tab, or use `--worktree all` to check other worktrees |
+| `browser_stale_ref` | Ref is invalid (page changed since snapshot) | Run `orca snapshot` to get fresh refs |
+| `browser_tab_not_found` | Tab index does not exist | Run `orca tab list` to see available tabs |
+| `browser_error` | Error from the browser automation engine | Read the message for details; common causes: element not found, navigation timeout, JS error |
+
+### Browser Worked Example
+
+Agent fills a login form and verifies the dashboard loads:
+
+```bash
+# Navigate to the login page
+orca goto --url https://app.example.com/login --json
+
+# See what's on the page
+orca snapshot --json
+# Output includes:
+#   [@e1] text input "Email"
+#   [@e2] text input "Password"
+#   [@e3] button "Sign In"
+
+# Fill the form
+orca fill --element @e1 --value "user@example.com" --json
+orca fill --element @e2 --value "s3cret" --json
+
+# Submit
+orca click --element @e3 --json
+
+# Verify the dashboard loaded
+orca snapshot --json
+# Output should show dashboard content, not the login form
+```
+
+### Browser Troubleshooting
+
+**"Ref not found" / `browser_stale_ref`**
+Page changed since the snapshot. Run `orca snapshot --json` again, then use the new refs.
+
+**Element exists but not in snapshot**
+It may be off-screen or not yet rendered. Try:
+
+```bash
+orca scroll --direction down --amount 1000 --json
+orca snapshot --json
+# or wait for it:
+orca wait --text "..." --json
+orca snapshot --json
+```
+
+**Click does nothing / overlay swallows the click**
+Modals or cookie banners may be blocking. Snapshot, find the dismiss button, click it, then re-snapshot.
+
+**Fill/type doesn't work on a custom input**
+Some components intercept key events. Use `keyboard inserttext`:
+
+```bash
+orca focus --element @e1 --json
+orca exec --command "keyboard inserttext \"text\"" --json
+```
+
+**`browser_no_tab` error**
+No browser tab is open in the current worktree. Open one with `orca tab create --url <url> --json`.
+
+### Auto-Switch Worktree
+
+Browser commands automatically activate the target worktree in the Orca UI when needed. If the agent issues a browser command targeting a worktree that isn't currently active, Orca will switch to that worktree before executing the command.
+
+### Tab Create Auto-Activation
+
+When `orca tab create` opens a new tab, it is automatically set as the active tab for the worktree. Subsequent commands (`snapshot`, `click`, etc.) will target the newly created tab without needing an explicit `tab switch`.
+
+### Browser Agent Guidance
+
+- Always snapshot before interacting with elements.
+- After navigation (`goto`, `back`, `reload`, clicking a link), re-snapshot to get fresh refs.
+- After switching tabs, re-snapshot.
+- If you get `browser_stale_ref`, re-snapshot and retry with the new refs.
+- Use `orca tab list` before `orca tab switch` to know which tabs exist.
+- For concurrent browser workflows, prefer `orca tab list --json` and reuse `tabs[].browserPageId` with `--page` on later commands.
+- Use `orca wait` to synchronize after actions that trigger async updates (form submits, SPA navigation, modals) instead of arbitrary sleeps.
+- Use `orca eval` as an escape hatch for interactions not covered by other commands.
+- Use `orca exec --command "help"` to discover extended commands.
+- Worktree scoping is automatic — you'll only see tabs from your worktree by default.
+- Bare browser commands without `--page` still target the current worktree's active tab, which is convenient but less robust for multi-process automation.
+- Tab creation auto-activates the new tab — no need for `tab switch` after `tab create`.
+- Browser commands auto-switch the active worktree if needed — no manual worktree activation required.
+
 ## Important Constraints

 - Orca CLI only talks to a running Orca editor.
--- a/src/cli/index.test.ts
+++ b/src/cli/index.test.ts
@ -1,3 +1,5 @@
+/* oxlint-disable max-lines -- Why: CLI parsing behavior is exercised end-to-end
+in one file so command and flag interactions stay visible in a single suite. */
 import path from 'path'
 import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'

@ -35,7 +37,24 @@ vi.mock('./runtime-client', () => {
  }
 })

-import { buildCurrentWorktreeSelector, main, normalizeWorktreeSelector } from './index'
+import {
+  buildCurrentWorktreeSelector,
+  COMMAND_SPECS,
+  main,
+  normalizeWorktreeSelector
+} from './index'
+import { RuntimeClientError } from './runtime-client'
+
+describe('COMMAND_SPECS collision check', () => {
+  it('has no duplicate command paths', () => {
+    const seen = new Set<string>()
+    for (const spec of COMMAND_SPECS) {
+      const key = spec.path.join(' ')
+      expect(seen.has(key), `Duplicate COMMAND_SPECS path: "${key}"`).toBe(false)
+      seen.add(key)
+    }
+  })
+})

 describe('orca cli worktree awareness', () => {
  beforeEach(() => {
@ -303,3 +322,312 @@ describe('orca cli worktree awareness', () => {
    })
  })
 })
+
+describe('orca cli browser page targeting', () => {
+  beforeEach(() => {
+    callMock.mockReset()
+  })
+
+  afterEach(() => {
+    vi.restoreAllMocks()
+  })
+
+  it('passes explicit page ids to snapshot without resolving the current worktree', async () => {
+    callMock.mockResolvedValueOnce({
+      id: 'req_snapshot',
+      ok: true,
+      result: {
+        browserPageId: 'page-1',
+        snapshot: 'tree',
+        refs: [],
+        url: 'https://example.com',
+        title: 'Example'
+      },
+      _meta: {
+        runtimeId: 'runtime-1'
+      }
+    })
+    vi.spyOn(console, 'log').mockImplementation(() => {})
+
+    await main(['snapshot', '--page', 'page-1', '--json'], '/tmp/not-an-orca-worktree')
+
+    expect(callMock).toHaveBeenCalledTimes(1)
+    expect(callMock).toHaveBeenCalledWith('browser.snapshot', {
+      page: 'page-1'
+    })
+  })
+
+  it('resolves current worktree only when --page is combined with --worktree current', async () => {
+    callMock
+      .mockResolvedValueOnce({
+        id: 'req_list',
+        ok: true,
+        result: {
+          worktrees: [
+            {
+              id: 'repo::/tmp/repo/feature',
+              repoId: 'repo',
+              path: '/tmp/repo/feature',
+              branch: 'feature/foo',
+              linkedIssue: null,
+              git: {
+                path: '/tmp/repo/feature',
+                head: 'abc',
+                branch: 'feature/foo',
+                isBare: false,
+                isMainWorktree: false
+              },
+              displayName: '',
+              comment: ''
+            }
+          ],
+          totalCount: 1,
+          truncated: false
+        },
+        _meta: {
+          runtimeId: 'runtime-1'
+        }
+      })
+      .mockResolvedValueOnce({
+        id: 'req_snapshot',
+        ok: true,
+        result: {
+          browserPageId: 'page-1',
+          snapshot: 'tree',
+          refs: [],
+          url: 'https://example.com',
+          title: 'Example'
+        },
+        _meta: {
+          runtimeId: 'runtime-1'
+        }
+      })
+    vi.spyOn(console, 'log').mockImplementation(() => {})
+
+    await main(
+      ['snapshot', '--page', 'page-1', '--worktree', 'current', '--json'],
+      '/tmp/repo/feature/src'
+    )
+
+    expect(callMock).toHaveBeenNthCalledWith(1, 'worktree.list', {
+      limit: 10_000
+    })
+    expect(callMock).toHaveBeenNthCalledWith(2, 'browser.snapshot', {
+      page: 'page-1',
+      worktree: `path:${path.resolve('/tmp/repo/feature')}`
+    })
+  })
+
+  it('passes page-targeted tab switches through without auto-scoping to the current worktree', async () => {
+    callMock.mockResolvedValueOnce({
+      id: 'req_switch',
+      ok: true,
+      result: {
+        switched: 2,
+        browserPageId: 'page-2'
+      },
+      _meta: {
+        runtimeId: 'runtime-1'
+      }
+    })
+    vi.spyOn(console, 'log').mockImplementation(() => {})
+
+    await main(['tab', 'switch', '--page', 'page-2', '--json'], '/tmp/repo/feature/src')
+
+    expect(callMock).toHaveBeenCalledTimes(1)
+    expect(callMock).toHaveBeenCalledWith('browser.tabSwitch', {
+      index: undefined,
+      page: 'page-2'
+    })
+  })
+
+  it('still resolves the current worktree when tab switch --page is combined with --worktree current', async () => {
+    callMock
+      .mockResolvedValueOnce({
+        id: 'req_list',
+        ok: true,
+        result: {
+          worktrees: [
+            {
+              id: 'repo::/tmp/repo/feature',
+              repoId: 'repo',
+              path: '/tmp/repo/feature',
+              branch: 'feature/foo',
+              linkedIssue: null,
+              git: {
+                path: '/tmp/repo/feature',
+                head: 'abc',
+                branch: 'feature/foo',
+                isBare: false,
+                isMainWorktree: false
+              },
+              displayName: '',
+              comment: ''
+            }
+          ],
+          totalCount: 1,
+          truncated: false
+        },
+        _meta: {
+          runtimeId: 'runtime-1'
+        }
+      })
+      .mockResolvedValueOnce({
+        id: 'req_switch',
+        ok: true,
+        result: {
+          switched: 2,
+          browserPageId: 'page-2'
+        },
+        _meta: {
+          runtimeId: 'runtime-1'
+        }
+      })
+    vi.spyOn(console, 'log').mockImplementation(() => {})
+
+    await main(
+      ['tab', 'switch', '--page', 'page-2', '--worktree', 'current', '--json'],
+      '/tmp/repo/feature/src'
+    )
+
+    expect(callMock).toHaveBeenNthCalledWith(1, 'worktree.list', {
+      limit: 10_000
+    })
+    expect(callMock).toHaveBeenNthCalledWith(2, 'browser.tabSwitch', {
+      index: undefined,
+      page: 'page-2',
+      worktree: `path:${path.resolve('/tmp/repo/feature')}`
+    })
+  })
+})
+
+describe('orca cli browser waits and viewport flags', () => {
+  beforeEach(() => {
+    callMock.mockReset()
+    process.exitCode = undefined
+  })
+
+  afterEach(() => {
+    vi.restoreAllMocks()
+  })
+
+  it('gives selector waits an explicit RPC timeout budget', async () => {
+    callMock.mockResolvedValueOnce({
+      id: 'req_wait',
+      ok: true,
+      result: { ok: true },
+      _meta: {
+        runtimeId: 'runtime-1'
+      }
+    })
+    vi.spyOn(console, 'log').mockImplementation(() => {})
+
+    await main(
+      ['wait', '--selector', '#ready', '--worktree', 'all', '--json'],
+      '/tmp/not-an-orca-worktree'
+    )
+
+    expect(callMock).toHaveBeenCalledWith(
+      'browser.wait',
+      {
+        selector: '#ready',
+        timeout: undefined,
+        text: undefined,
+        url: undefined,
+        load: undefined,
+        fn: undefined,
+        state: undefined,
+        worktree: undefined
+      },
+      { timeoutMs: 60_000 }
+    )
+  })
+
+  it('extends selector wait RPC timeout when the user passes --timeout', async () => {
+    callMock.mockResolvedValueOnce({
+      id: 'req_wait',
+      ok: true,
+      result: { ok: true },
+      _meta: {
+        runtimeId: 'runtime-1'
+      }
+    })
+    vi.spyOn(console, 'log').mockImplementation(() => {})
+
+    await main(
+      ['wait', '--selector', '#ready', '--timeout', '12000', '--worktree', 'all', '--json'],
+      '/tmp/not-an-orca-worktree'
+    )
+
+    expect(callMock).toHaveBeenCalledWith(
+      'browser.wait',
+      {
+        selector: '#ready',
+        timeout: 12000,
+        text: undefined,
+        url: undefined,
+        load: undefined,
+        fn: undefined,
+        state: undefined,
+        worktree: undefined
+      },
+      { timeoutMs: 17000 }
+    )
+  })
+
+  it('does not tell users Orca is down for a generic runtime timeout', async () => {
+    callMock.mockRejectedValueOnce(
+      new RuntimeClientError(
+        'runtime_timeout',
+        'Timed out waiting for the Orca runtime to respond.'
+      )
+    )
+    const errorSpy = vi.spyOn(console, 'error').mockImplementation(() => {})
+
+    await main(['wait', '--selector', '#ready', '--worktree', 'all'], '/tmp/not-an-orca-worktree')
+
+    expect(errorSpy).toHaveBeenCalledWith('Timed out waiting for the Orca runtime to respond.')
+  })
+
+  it('passes the mobile viewport flag through to browser.viewport', async () => {
+    callMock.mockResolvedValueOnce({
+      id: 'req_viewport',
+      ok: true,
+      result: {
+        width: 375,
+        height: 812,
+        deviceScaleFactor: 2,
+        mobile: true
+      },
+      _meta: {
+        runtimeId: 'runtime-1'
+      }
+    })
+    vi.spyOn(console, 'log').mockImplementation(() => {})
+
+    await main(
+      [
+        'viewport',
+        '--width',
+        '375',
+        '--height',
+        '812',
+        '--scale',
+        '2',
+        '--mobile',
+        '--worktree',
+        'all',
+        '--json'
+      ],
+      '/tmp/not-an-orca-worktree'
+    )
+
+    expect(callMock).toHaveBeenCalledWith('browser.viewport', {
+      width: 375,
+      height: 812,
+      deviceScaleFactor: 2,
+      mobile: true,
+      worktree: undefined
+    })
+  })
+})
--- a/src/cli/index.ts
+++ b/src/cli/index.ts
--- a/src/cli/runtime-client.ts
+++ b/src/cli/runtime-client.ts
@ -58,7 +58,10 @@ export class RuntimeClient {
  private readonly userDataPath: string
  private readonly requestTimeoutMs: number

-  constructor(userDataPath = getDefaultUserDataPath(), requestTimeoutMs = 15000) {
+  // Why: browser commands trigger first-time session init (agent-browser connect +
+  // CDP proxy setup) which can take 15-30s. 60s accommodates cold start without
+  // being so large that genuine hangs go unnoticed.
+  constructor(userDataPath = getDefaultUserDataPath(), requestTimeoutMs = 60_000) {
    this.userDataPath = userDataPath
    this.requestTimeoutMs = requestTimeoutMs
  }
@ -383,6 +386,12 @@ export function getDefaultUserDataPath(
  platform: NodeJS.Platform = process.platform,
  homeDir = homedir()
 ): string {
+  // Why: in dev mode, the Electron app writes runtime metadata to `orca-dev`
+  // instead of `orca` to avoid clobbering the production app's metadata. The
+  // CLI needs to find the same metadata file, so respect this env var override.
+  if (process.env.ORCA_USER_DATA_PATH) {
+    return process.env.ORCA_USER_DATA_PATH
+  }
  if (platform === 'darwin') {
    return join(homeDir, 'Library', 'Application Support', 'orca')
  }
--- a/src/main/browser/agent-browser-bridge.test.ts
+++ b/src/main/browser/agent-browser-bridge.test.ts
--- a/src/main/browser/agent-browser-bridge.ts
+++ b/src/main/browser/agent-browser-bridge.ts
--- a/src/main/browser/browser-manager.test.ts
+++ b/src/main/browser/browser-manager.test.ts
@ -3,6 +3,7 @@ import { beforeEach, describe, expect, it, vi } from 'vitest'

 const {
  shellOpenExternalMock,
+  browserWindowFromWebContentsMock,
  menuBuildFromTemplateMock,
  guestOffMock,
  guestOnMock,
@ -13,6 +14,7 @@ const {
  screenGetCursorScreenPointMock
 } = vi.hoisted(() => ({
  shellOpenExternalMock: vi.fn(),
+  browserWindowFromWebContentsMock: vi.fn(),
  menuBuildFromTemplateMock: vi.fn(),
  guestOffMock: vi.fn(),
  guestOnMock: vi.fn(),
@ -24,6 +26,9 @@ const {
 }))

 vi.mock('electron', () => ({
+  BrowserWindow: {
+    fromWebContents: browserWindowFromWebContentsMock
+  },
  clipboard: { writeText: vi.fn() },
  shell: { openExternal: shellOpenExternalMock },
  Menu: {
@ -44,6 +49,7 @@ describe('browserManager', () => {

  beforeEach(() => {
    shellOpenExternalMock.mockReset()
+    browserWindowFromWebContentsMock.mockReset()
    menuBuildFromTemplateMock.mockReset()
    guestOffMock.mockReset()
    guestOnMock.mockReset()
@ -148,6 +154,295 @@ describe('browserManager', () => {
    expect(shellOpenExternalMock).toHaveBeenCalledWith('https://example.com/login')
  })

+  it('activates the owning browser workspace when ensuring a page-backed guest is visible', async () => {
+    const rendererExecuteJavaScriptMock = vi
+      .fn()
+      .mockResolvedValueOnce({
+        prevTabType: 'terminal',
+        prevActiveWorktreeId: 'wt-1',
+        prevActiveBrowserWorkspaceId: 'workspace-prev',
+        prevActiveBrowserPageId: 'page-prev',
+        prevFocusedGroupTabId: 'tab-prev',
+        targetWorktreeId: 'wt-1',
+        targetBrowserWorkspaceId: 'workspace-1',
+        targetBrowserPageId: 'page-1'
+      })
+      .mockResolvedValueOnce(undefined)
+    const guest = {
+      id: 707,
+      isDestroyed: vi.fn(() => false),
+      getType: vi.fn(() => 'webview'),
+      setBackgroundThrottling: guestSetBackgroundThrottlingMock,
+      setWindowOpenHandler: guestSetWindowOpenHandlerMock,
+      on: guestOnMock,
+      off: guestOffMock,
+      openDevTools: guestOpenDevToolsMock
+    }
+    const renderer = {
+      id: rendererWebContentsId,
+      isDestroyed: vi.fn(() => false),
+      executeJavaScript: rendererExecuteJavaScriptMock
+    }
+    browserWindowFromWebContentsMock.mockReturnValue({ isFocused: vi.fn(() => true) })
+    webContentsFromIdMock.mockImplementation((id: number) => {
+      if (id === guest.id) {
+        return guest
+      }
+      if (id === rendererWebContentsId) {
+        return renderer
+      }
+      return null
+    })
+
+    browserManager.attachGuestPolicies(guest as never)
+    browserManager.registerGuest({
+      browserPageId: 'page-1',
+      workspaceId: 'workspace-1',
+      worktreeId: 'wt-1',
+      webContentsId: guest.id,
+      rendererWebContentsId
+    })
+
+    const restore = await browserManager.ensureWebviewVisible(guest.id)
+
+    const activationScript = rendererExecuteJavaScriptMock.mock.calls[0]?.[0]
+    expect(activationScript).toContain('var browserWorkspaceId = "workspace-1";')
+    expect(activationScript).toContain('var browserPageId = "page-1";')
+    expect(activationScript).toContain('state.setActiveBrowserTab(browserWorkspaceId);')
+    expect(activationScript).toContain(
+      'state.setActiveBrowserPage(browserWorkspaceId, browserPageId);'
+    )
+    expect(activationScript).toContain('var targetWorktreeId = "wt-1";')
+
+    restore()
+  })
+
+  it('restores the previously focused browser workspace after screenshot prep changes tabs', async () => {
+    const rendererExecuteJavaScriptMock = vi
+      .fn()
+      .mockResolvedValueOnce({
+        prevTabType: 'browser',
+        prevActiveWorktreeId: 'wt-prev',
+        prevActiveBrowserWorkspaceId: 'workspace-prev',
+        prevActiveBrowserPageId: 'page-prev',
+        prevFocusedGroupTabId: 'tab-prev',
+        targetWorktreeId: 'wt-target',
+        targetBrowserWorkspaceId: 'workspace-target',
+        targetBrowserPageId: 'page-target'
+      })
+      .mockResolvedValueOnce(undefined)
+    const guest = {
+      id: 708,
+      isDestroyed: vi.fn(() => false),
+      getType: vi.fn(() => 'webview'),
+      setBackgroundThrottling: guestSetBackgroundThrottlingMock,
+      setWindowOpenHandler: guestSetWindowOpenHandlerMock,
+      on: guestOnMock,
+      off: guestOffMock,
+      openDevTools: guestOpenDevToolsMock
+    }
+    const renderer = {
+      id: rendererWebContentsId,
+      isDestroyed: vi.fn(() => false),
+      executeJavaScript: rendererExecuteJavaScriptMock
+    }
+    browserWindowFromWebContentsMock.mockReturnValue({ isFocused: vi.fn(() => true) })
+    webContentsFromIdMock.mockImplementation((id: number) => {
+      if (id === guest.id) {
+        return guest
+      }
+      if (id === rendererWebContentsId) {
+        return renderer
+      }
+      return null
+    })
+
+    browserManager.attachGuestPolicies(guest as never)
+    browserManager.registerGuest({
+      browserPageId: 'page-target',
+      workspaceId: 'workspace-target',
+      worktreeId: 'wt-target',
+      webContentsId: guest.id,
+      rendererWebContentsId
+    })
+
+    const restore = await browserManager.ensureWebviewVisible(guest.id)
+    restore()
+
+    const restoreScript = rendererExecuteJavaScriptMock.mock.calls[1]?.[0]
+    expect(restoreScript).toContain('state.setActiveWorktree("wt-prev");')
+    expect(restoreScript).toContain('state.setActiveBrowserTab("workspace-prev");')
+  })
+
+  it('restores the previously active page when screenshot prep switches pages inside one workspace', async () => {
+    const rendererExecuteJavaScriptMock = vi
+      .fn()
+      .mockResolvedValueOnce({
+        prevTabType: 'browser',
+        prevActiveWorktreeId: 'wt-target',
+        prevActiveBrowserWorkspaceId: 'workspace-target',
+        prevActiveBrowserPageId: 'page-prev',
+        prevFocusedGroupTabId: null,
+        targetWorktreeId: 'wt-target',
+        targetBrowserWorkspaceId: 'workspace-target',
+        targetBrowserPageId: 'page-target'
+      })
+      .mockResolvedValueOnce(undefined)
+    const guest = {
+      id: 709,
+      isDestroyed: vi.fn(() => false),
+      getType: vi.fn(() => 'webview'),
+      setBackgroundThrottling: guestSetBackgroundThrottlingMock,
+      setWindowOpenHandler: guestSetWindowOpenHandlerMock,
+      on: guestOnMock,
+      off: guestOffMock,
+      openDevTools: guestOpenDevToolsMock
+    }
+    const renderer = {
+      id: rendererWebContentsId,
+      isDestroyed: vi.fn(() => false),
+      executeJavaScript: rendererExecuteJavaScriptMock
+    }
+    browserWindowFromWebContentsMock.mockReturnValue({ isFocused: vi.fn(() => true) })
+    webContentsFromIdMock.mockImplementation((id: number) => {
+      if (id === guest.id) {
+        return guest
+      }
+      if (id === rendererWebContentsId) {
+        return renderer
+      }
+      return null
+    })
+
+    browserManager.attachGuestPolicies(guest as never)
+    browserManager.registerGuest({
+      browserPageId: 'page-target',
+      workspaceId: 'workspace-target',
+      worktreeId: 'wt-target',
+      webContentsId: guest.id,
+      rendererWebContentsId
+    })
+
+    const restore = await browserManager.ensureWebviewVisible(guest.id)
+    restore()
+
+    const restoreScript = rendererExecuteJavaScriptMock.mock.calls[1]?.[0]
+    expect(restoreScript).toContain('state.setActiveBrowserPage(')
+    expect(restoreScript).toContain('"workspace-target"')
+    expect(restoreScript).toContain('"page-prev"')
+  })
+
+  it('restores remembered browser workspace/page even when the visible pane was terminal', async () => {
+    const rendererExecuteJavaScriptMock = vi
+      .fn()
+      .mockResolvedValueOnce({
+        prevTabType: 'terminal',
+        prevActiveWorktreeId: 'wt-target',
+        prevActiveBrowserWorkspaceId: 'workspace-prev',
+        prevActiveBrowserPageId: 'page-prev',
+        prevFocusedGroupTabId: 'tab-prev',
+        targetWorktreeId: 'wt-target',
+        targetBrowserWorkspaceId: 'workspace-target',
+        targetBrowserPageId: 'page-target'
+      })
+      .mockResolvedValueOnce(undefined)
+    const guest = {
+      id: 7091,
+      isDestroyed: vi.fn(() => false),
+      getType: vi.fn(() => 'webview'),
+      setBackgroundThrottling: guestSetBackgroundThrottlingMock,
+      setWindowOpenHandler: guestSetWindowOpenHandlerMock,
+      on: guestOnMock,
+      off: guestOffMock,
+      openDevTools: guestOpenDevToolsMock
+    }
+    const renderer = {
+      id: rendererWebContentsId,
+      isDestroyed: vi.fn(() => false),
+      executeJavaScript: rendererExecuteJavaScriptMock
+    }
+    browserWindowFromWebContentsMock.mockReturnValue({ isFocused: vi.fn(() => true) })
+    webContentsFromIdMock.mockImplementation((id: number) => {
+      if (id === guest.id) {
+        return guest
+      }
+      if (id === rendererWebContentsId) {
+        return renderer
+      }
+      return null
+    })
+
+    browserManager.attachGuestPolicies(guest as never)
+    browserManager.registerGuest({
+      browserPageId: 'page-target',
+      workspaceId: 'workspace-target',
+      worktreeId: 'wt-target',
+      webContentsId: guest.id,
+      rendererWebContentsId
+    })
+
+    const restore = await browserManager.ensureWebviewVisible(guest.id)
+    restore()
+
+    const restoreScript = rendererExecuteJavaScriptMock.mock.calls[1]?.[0]
+    expect(restoreScript).toContain('state.setActiveBrowserTab("workspace-prev");')
+    expect(restoreScript).toContain('state.setActiveBrowserPage(')
+    expect(restoreScript).toContain('"workspace-prev"')
+    expect(restoreScript).toContain('"page-prev"')
+    expect(restoreScript).toContain('state.activateTab("tab-prev");')
+    expect(restoreScript).toContain('state.setActiveTabType("terminal");')
+  })
+
+  it('does not focus the Orca window while preparing a screenshot', async () => {
+    const rendererExecuteJavaScriptMock = vi.fn().mockResolvedValueOnce({
+      prevTabType: 'terminal',
+      prevActiveWorktreeId: 'wt-1',
+      prevActiveBrowserWorkspaceId: 'workspace-prev',
+      prevActiveBrowserPageId: 'page-prev',
+      prevFocusedGroupTabId: 'tab-prev',
+      targetWorktreeId: 'wt-1',
+      targetBrowserWorkspaceId: 'workspace-1',
+      targetBrowserPageId: 'page-1'
+    })
+    const guest = {
+      id: 710,
+      isDestroyed: vi.fn(() => false),
+      getType: vi.fn(() => 'webview'),
+      setBackgroundThrottling: guestSetBackgroundThrottlingMock,
+      setWindowOpenHandler: guestSetWindowOpenHandlerMock,
+      on: guestOnMock,
+      off: guestOffMock,
+      openDevTools: guestOpenDevToolsMock
+    }
+    const renderer = {
+      id: rendererWebContentsId,
+      isDestroyed: vi.fn(() => false),
+      executeJavaScript: rendererExecuteJavaScriptMock
+    }
+    webContentsFromIdMock.mockImplementation((id: number) => {
+      if (id === guest.id) {
+        return guest
+      }
+      if (id === rendererWebContentsId) {
+        return renderer
+      }
+      return null
+    })
+
+    browserManager.attachGuestPolicies(guest as never)
+    browserManager.registerGuest({
+      browserPageId: 'page-1',
+      workspaceId: 'workspace-1',
+      worktreeId: 'wt-1',
+      webContentsId: guest.id,
+      rendererWebContentsId
+    })
+
+    await browserManager.ensureWebviewVisible(guest.id)
+
+    expect(browserWindowFromWebContentsMock).not.toHaveBeenCalled()
+  })
+
  it('offers opening a link in another Orca browser tab from the guest context menu', () => {
    const rendererSendMock = vi.fn()
    const guest = {
@ -455,6 +750,101 @@ describe('browserManager', () => {
    )
  })

+  it('retires stale guest mappings when a page re-registers after a process swap', () => {
+    const rendererSendMock = vi.fn()
+    const oldGuestOnMock = vi.fn()
+    const oldGuestOffMock = vi.fn()
+    const newGuestOnMock = vi.fn()
+    const newGuestOffMock = vi.fn()
+    const oldGuest = {
+      id: 501,
+      isDestroyed: vi.fn(() => false),
+      getType: vi.fn(() => 'webview'),
+      setBackgroundThrottling: guestSetBackgroundThrottlingMock,
+      setWindowOpenHandler: guestSetWindowOpenHandlerMock,
+      on: oldGuestOnMock,
+      off: oldGuestOffMock,
+      openDevTools: guestOpenDevToolsMock,
+      getURL: vi.fn(() => 'https://old.example')
+    }
+    const newGuest = {
+      id: 502,
+      isDestroyed: vi.fn(() => false),
+      getType: vi.fn(() => 'webview'),
+      setBackgroundThrottling: guestSetBackgroundThrottlingMock,
+      setWindowOpenHandler: guestSetWindowOpenHandlerMock,
+      on: newGuestOnMock,
+      off: newGuestOffMock,
+      openDevTools: guestOpenDevToolsMock,
+      getURL: vi.fn(() => 'https://new.example')
+    }
+
+    webContentsFromIdMock.mockImplementation((id: number) => {
+      if (id === oldGuest.id) {
+        return oldGuest
+      }
+      if (id === newGuest.id) {
+        return newGuest
+      }
+      if (id === rendererWebContentsId) {
+        return { isDestroyed: vi.fn(() => false), send: rendererSendMock }
+      }
+      return null
+    })
+
+    browserManager.attachGuestPolicies(oldGuest as never)
+    browserManager.registerGuest({
+      browserPageId: 'browser-1',
+      webContentsId: oldGuest.id,
+      rendererWebContentsId
+    })
+
+    browserManager.attachGuestPolicies(newGuest as never)
+    browserManager.registerGuest({
+      browserPageId: 'browser-1',
+      webContentsId: newGuest.id,
+      rendererWebContentsId
+    })
+
+    const oldDidFailLoadHandler = oldGuestOnMock.mock.calls.find(
+      ([event]) => event === 'did-fail-load'
+    )?.[1] as
+      | ((
+          event: unknown,
+          errorCode: number,
+          errorDescription: string,
+          validatedUrl: string,
+          isMainFrame: boolean
+        ) => void)
+      | undefined
+    const newDidFailLoadHandler = newGuestOnMock.mock.calls.find(
+      ([event]) => event === 'did-fail-load'
+    )?.[1] as
+      | ((
+          event: unknown,
+          errorCode: number,
+          errorDescription: string,
+          validatedUrl: string,
+          isMainFrame: boolean
+        ) => void)
+      | undefined
+
+    oldDidFailLoadHandler?.(null, -105, 'Old guest failed', 'https://old.example', true)
+    expect(rendererSendMock).not.toHaveBeenCalled()
+
+    newDidFailLoadHandler?.(null, -106, 'New guest failed', 'https://new.example', true)
+    expect(rendererSendMock).toHaveBeenCalledWith('browser:guest-load-failed', {
+      browserPageId: 'browser-1',
+      loadError: {
+        code: -106,
+        description: 'New guest failed',
+        validatedUrl: 'https://new.example'
+      }
+    })
+    expect(oldGuestOffMock).toHaveBeenCalled()
+    expect(browserManager.getGuestWebContentsId('browser-1')).toBe(newGuest.id)
+  })
+
  it('does not forward ctrl/cmd+r or readline chords from browser guests', () => {
    const rendererSendMock = vi.fn()
    const guest = {
--- a/src/main/browser/browser-manager.ts
+++ b/src/main/browser/browser-manager.ts
@ -38,6 +38,7 @@ export type BrowserGuestRegistration = {
  browserPageId?: string
  browserTabId?: string
  workspaceId?: string
+  worktreeId?: string
  webContentsId: number
  rendererWebContentsId: number
 }
@ -71,15 +72,20 @@ function safeOrigin(rawUrl: string): string {
  }
 }

-class BrowserManager {
+export class BrowserManager {
  private readonly webContentsIdByTabId = new Map<string, number>()
  // Why: reverse map enables O(1) guest→tab lookups instead of O(N) linear
  // scans on every mouse event, load failure, permission, and popup event.
  private readonly tabIdByWebContentsId = new Map<number, string>()
+  // Why: guest registration is keyed by browser page id, but renderer
+  // visibility/focus state is keyed by browser workspace id. Screenshot prep
+  // has to bridge that mismatch to activate the right tab before capture.
+  private readonly workspaceIdByPageId = new Map<string, string>()
  private readonly rendererWebContentsIdByTabId = new Map<string, number>()
  private readonly contextMenuCleanupByTabId = new Map<string, () => void>()
  private readonly grabShortcutCleanupByTabId = new Map<string, () => void>()
  private readonly shortcutForwardingCleanupByTabId = new Map<string, () => void>()
+  private readonly worktreeIdByTabId = new Map<string, string>()
  private readonly policyAttachedGuestIds = new Set<number>()
  private readonly policyCleanupByGuestId = new Map<number, () => void>()
  private readonly pendingLoadFailuresByGuestId = new Map<
@ -108,12 +114,230 @@ class BrowserManager {
    return renderer
  }

+  // Why: screenshot sessions target guest page ids, but Orca's visible browser
+  // chrome is keyed by workspace ids. If we activate the page id directly, the
+  // webview stays hidden under the terminal pane and Page.captureScreenshot
+  // times out even though the guest still exists.
+  async ensureWebviewVisible(guestWebContentsId: number): Promise<() => void> {
+    const browserPageId = this.resolveBrowserTabIdForGuestWebContentsId(guestWebContentsId)
+    if (!browserPageId) {
+      return () => {}
+    }
+    const browserWorkspaceId = this.workspaceIdByPageId.get(browserPageId) ?? browserPageId
+    const worktreeId = this.worktreeIdByTabId.get(browserPageId) ?? null
+    const renderer = this.resolveRendererForBrowserTab(browserPageId)
+    if (!renderer || renderer.isDestroyed()) {
+      return () => {}
+    }
+
+    const prev = await renderer
+      .executeJavaScript(
+        `(function() {
+          var store = window.__store;
+          if (!store) return null;
+          var state = store.getState();
+          var prevTabType = state.activeTabType;
+          var prevActiveWorktreeId = state.activeWorktreeId || null;
+          var prevActiveBrowserWorkspaceId = state.activeBrowserTabId || null;
+          var prevActiveBrowserPageId = null;
+          var prevFocusedGroupTabId = null;
+          var targetWorktreeId = ${JSON.stringify(worktreeId)};
+          var browserWorkspaceId = ${JSON.stringify(browserWorkspaceId)};
+          var browserPageId = ${JSON.stringify(browserPageId)};
+          var browserTabsByWorktree = state.browserTabsByWorktree || {};
+
+          if (prevActiveWorktreeId) {
+            var prevFocusedGroupId = (state.activeGroupIdByWorktree || {})[prevActiveWorktreeId];
+            var prevGroups = (state.groupsByWorktree || {})[prevActiveWorktreeId] || [];
+            for (var pg = 0; pg < prevGroups.length; pg++) {
+              if (prevGroups[pg].id === prevFocusedGroupId) {
+                prevFocusedGroupTabId = prevGroups[pg].activeTabId;
+                break;
+              }
+            }
+          }
+
+          if (prevActiveBrowserWorkspaceId) {
+            for (var prevWtId in browserTabsByWorktree) {
+              var prevBrowserTabs = browserTabsByWorktree[prevWtId] || [];
+              for (var pbt = 0; pbt < prevBrowserTabs.length; pbt++) {
+                if (prevBrowserTabs[pbt].id === prevActiveBrowserWorkspaceId) {
+                  prevActiveBrowserPageId = prevBrowserTabs[pbt].activePageId || null;
+                  break;
+                }
+              }
+              if (prevActiveBrowserPageId) break;
+            }
+          }
+
+          if (
+            targetWorktreeId &&
+            prevActiveWorktreeId !== targetWorktreeId &&
+            typeof state.setActiveWorktree === 'function'
+          ) {
+            state.setActiveWorktree(targetWorktreeId);
+            state = store.getState();
+          }
+
+          var foundWorkspace = null;
+          for (var wtId in browserTabsByWorktree) {
+            var tabs = browserTabsByWorktree[wtId] || [];
+            for (var i = 0; i < tabs.length; i++) {
+              if (tabs[i].id === browserWorkspaceId) {
+                foundWorkspace = tabs[i];
+                if (!targetWorktreeId) {
+                  targetWorktreeId = wtId;
+                }
+                break;
+              }
+            }
+            if (foundWorkspace) break;
+          }
+
+          var hasTargetPage = false;
+          var targetPages = (state.browserPagesByWorkspace || {})[browserWorkspaceId] || [];
+          for (var pageIndex = 0; pageIndex < targetPages.length; pageIndex++) {
+            if (targetPages[pageIndex].id === browserPageId) {
+              hasTargetPage = true;
+              break;
+            }
+          }
+
+          if (foundWorkspace) {
+            if (typeof state.setActiveBrowserTab === 'function') {
+              state.setActiveBrowserTab(browserWorkspaceId);
+              state = store.getState();
+            } else {
+              var allTabs = state.unifiedTabsByWorktree || {};
+              var found = null;
+              for (var unifiedWtId in allTabs) {
+                var unifiedTabs = allTabs[unifiedWtId] || [];
+                for (var unifiedIndex = 0; unifiedIndex < unifiedTabs.length; unifiedIndex++) {
+                  if (
+                    unifiedTabs[unifiedIndex].contentType === 'browser' &&
+                    unifiedTabs[unifiedIndex].entityId === browserWorkspaceId
+                  ) {
+                    found = unifiedTabs[unifiedIndex];
+                    break;
+                  }
+                }
+                if (found) break;
+              }
+              if (found) {
+                state.activateTab(found.id);
+              }
+              state.setActiveTabType('browser');
+              state = store.getState();
+            }
+            // Why: activating the workspace alone is not enough for screenshot
+            // capture when a browser workspace contains multiple pages. The
+            // compositor only paints the currently mounted page guest.
+            if (
+              hasTargetPage &&
+              foundWorkspace.activePageId !== browserPageId &&
+              typeof state.setActiveBrowserPage === 'function'
+            ) {
+              state.setActiveBrowserPage(browserWorkspaceId, browserPageId);
+              state = store.getState();
+            }
+          }
+
+          return {
+            prevTabType: prevTabType,
+            prevActiveWorktreeId: prevActiveWorktreeId,
+            prevActiveBrowserWorkspaceId: prevActiveBrowserWorkspaceId,
+            prevActiveBrowserPageId: prevActiveBrowserPageId,
+            prevFocusedGroupTabId: prevFocusedGroupTabId,
+            targetWorktreeId: targetWorktreeId,
+            targetBrowserWorkspaceId: foundWorkspace ? browserWorkspaceId : null,
+            targetBrowserPageId: foundWorkspace && hasTargetPage ? browserPageId : null
+          };
+        })()`
+      )
+      .catch(() => null)
+
+    const needsRestore =
+      prev &&
+      (prev.prevTabType !== 'browser' ||
+        prev.prevActiveWorktreeId !== prev.targetWorktreeId ||
+        prev.prevFocusedGroupTabId !== null ||
+        prev.prevActiveBrowserWorkspaceId !== prev.targetBrowserWorkspaceId ||
+        prev.prevActiveBrowserPageId !== prev.targetBrowserPageId)
+
+    if (!needsRestore) {
+      return () => {}
+    }
+
+    return () => {
+      if (!prev || !renderer || renderer.isDestroyed()) {
+        return
+      }
+      renderer
+        .executeJavaScript(
+          `(function() {
+            var store = window.__store;
+            if (!store) return;
+            var state = store.getState();
+            if (
+              ${JSON.stringify(prev?.prevActiveWorktreeId)} &&
+              ${JSON.stringify(prev?.prevActiveWorktreeId)} !==
+                ${JSON.stringify(prev?.targetWorktreeId)} &&
+              typeof state.setActiveWorktree === 'function'
+            ) {
+              state.setActiveWorktree(${JSON.stringify(prev?.prevActiveWorktreeId)});
+              state = store.getState();
+            }
+            if (
+              ${JSON.stringify(prev?.prevActiveBrowserWorkspaceId)} &&
+              ${JSON.stringify(prev?.prevActiveBrowserWorkspaceId)} !==
+                ${JSON.stringify(prev?.targetBrowserWorkspaceId)} &&
+              typeof state.setActiveBrowserTab === 'function'
+            ) {
+              state.setActiveBrowserTab(${JSON.stringify(prev?.prevActiveBrowserWorkspaceId)});
+              state = store.getState();
+            }
+            if (
+              ${JSON.stringify(prev?.prevActiveBrowserWorkspaceId)} &&
+              ${JSON.stringify(prev?.prevActiveBrowserPageId)} &&
+              ${JSON.stringify(prev?.prevActiveBrowserPageId)} !==
+                ${JSON.stringify(prev?.targetBrowserPageId)} &&
+              typeof state.setActiveBrowserPage === 'function'
+            ) {
+              // Why: Orca remembers the last browser workspace/page even when
+              // the user is currently in terminal/editor view. Screenshot prep
+              // temporarily switches that hidden browser selection state, so
+              // restore it independently of the visible tab type.
+              state.setActiveBrowserPage(
+                ${JSON.stringify(prev?.prevActiveBrowserWorkspaceId)},
+                ${JSON.stringify(prev?.prevActiveBrowserPageId)}
+              );
+              state = store.getState();
+            }
+            if (
+              ${JSON.stringify(prev?.prevTabType)} !== 'browser' &&
+              ${JSON.stringify(prev?.prevFocusedGroupTabId)}
+            ) {
+              state.activateTab(${JSON.stringify(prev?.prevFocusedGroupTabId)});
+            }
+            if (${JSON.stringify(prev?.prevTabType)} !== 'browser') {
+              state.setActiveTabType(${JSON.stringify(prev?.prevTabType)});
+            }
+          })()`
+        )
+        .catch(() => {})
+    }
+  }
+
  attachGuestPolicies(guest: Electron.WebContents): void {
    if (this.policyAttachedGuestIds.has(guest.id)) {
      return
    }
    this.policyAttachedGuestIds.add(guest.id)
-    guest.setBackgroundThrottling(true)
+    // Why: background throttling must be disabled so agent-driven screenshots
+    // (Page.captureScreenshot via CDP proxy) can capture frames even when the
+    // Orca window is not the focused foreground app. With throttling enabled,
+    // the compositor stops producing frames and capturePage() returns empty.
+    guest.setBackgroundThrottling(false)
    guest.setWindowOpenHandler(({ url }) => {
      const browserTabId = this.resolveBrowserTabIdForGuestWebContentsId(guest.id)
      const browserUrl = normalizeBrowserNavigationUrl(url)
@ -189,9 +413,30 @@ class BrowserManager {
    })
  }

+  private retireStaleGuestWebContents(previousWebContentsId: number): void {
+    // Why: a browser page can re-register with a new guest id after Chromium
+    // swaps renderer processes. Late events from the dead guest must stop
+    // resolving to the live page, or stale download/popup/permission callbacks
+    // can be delivered to the wrong session after the swap.
+    this.tabIdByWebContentsId.delete(previousWebContentsId)
+
+    const policyCleanup = this.policyCleanupByGuestId.get(previousWebContentsId)
+    if (policyCleanup) {
+      policyCleanup()
+      this.policyCleanupByGuestId.delete(previousWebContentsId)
+    }
+    this.policyAttachedGuestIds.delete(previousWebContentsId)
+    this.pendingLoadFailuresByGuestId.delete(previousWebContentsId)
+    this.pendingPermissionEventsByGuestId.delete(previousWebContentsId)
+    this.pendingPopupEventsByGuestId.delete(previousWebContentsId)
+    this.pendingDownloadIdsByGuestId.delete(previousWebContentsId)
+  }
+
  registerGuest({
    browserPageId,
    browserTabId: legacyBrowserTabId,
+    workspaceId,
+    worktreeId,
    webContentsId,
    rendererWebContentsId
  }: BrowserGuestRegistration): void {
@ -231,9 +476,20 @@ class BrowserManager {
      return
    }

+    const previousWebContentsId = this.webContentsIdByTabId.get(browserTabId)
+    if (previousWebContentsId !== undefined && previousWebContentsId !== webContentsId) {
+      this.retireStaleGuestWebContents(previousWebContentsId)
+    }
+
    this.webContentsIdByTabId.set(browserTabId, webContentsId)
    this.tabIdByWebContentsId.set(webContentsId, browserTabId)
+    if (workspaceId) {
+      this.workspaceIdByPageId.set(browserTabId, workspaceId)
+    }
    this.rendererWebContentsIdByTabId.set(browserTabId, rendererWebContentsId)
+    if (worktreeId) {
+      this.worktreeIdByTabId.set(browserTabId, worktreeId)
+    }

    this.setupContextMenu(browserTabId, guest)
    this.setupGrabShortcut(browserTabId, guest)
@ -292,6 +548,8 @@ class BrowserManager {
    }
    this.webContentsIdByTabId.delete(browserTabId)
    this.rendererWebContentsIdByTabId.delete(browserTabId)
+    this.workspaceIdByPageId.delete(browserTabId)
+    this.worktreeIdByTabId.delete(browserTabId)
  }

  unregisterAll(): void {
@ -313,6 +571,7 @@ class BrowserManager {
    }
    this.policyCleanupByGuestId.clear()
    this.tabIdByWebContentsId.clear()
+    this.worktreeIdByTabId.clear()
    this.pendingLoadFailuresByGuestId.clear()
    this.pendingPermissionEventsByGuestId.clear()
    this.pendingPopupEventsByGuestId.clear()
@ -323,6 +582,14 @@ class BrowserManager {
    return this.webContentsIdByTabId.get(browserTabId) ?? null
  }

+  getWebContentsIdByTabId(): Map<string, number> {
+    return this.webContentsIdByTabId
+  }
+
+  getWorktreeIdForTab(browserTabId: string): string | undefined {
+    return this.worktreeIdByTabId.get(browserTabId)
+  }
+
  notifyPermissionDenied(args: {
    guestWebContentsId: number
    permission: string
--- a/src/main/browser/browser-session-registry.ts
+++ b/src/main/browser/browser-session-registry.ts
@ -373,8 +373,12 @@ class BrowserSessionRegistry {
    this.configuredPartitions.add(partition)

    const sess = session.fromPartition(partition)
+    // Why: clipboard-read and clipboard-sanitized-write are required for agent-browser's
+    // clipboard commands to work. Without these, navigator.clipboard.writeText/readText
+    // throws NotAllowedError even when invoked via CDP with userGesture:true.
+    const autoGranted = new Set(['fullscreen', 'clipboard-read', 'clipboard-sanitized-write'])
    sess.setPermissionRequestHandler((webContents, permission, callback) => {
-      const allowed = permission === 'fullscreen'
+      const allowed = autoGranted.has(permission)
      if (!allowed) {
        browserManager.notifyPermissionDenied({
          guestWebContentsId: webContents.id,
@ -385,7 +389,7 @@ class BrowserSessionRegistry {
      callback(allowed)
    })
    sess.setPermissionCheckHandler((_webContents, permission) => {
-      return permission === 'fullscreen'
+      return autoGranted.has(permission)
    })
    sess.setDisplayMediaRequestHandler((_request, callback) => {
      callback({ video: undefined, audio: undefined })
--- a/src/main/browser/cdp-bridge-integration.test.ts
+++ b/src/main/browser/cdp-bridge-integration.test.ts
@ -0,0 +1,533 @@
+/* eslint-disable max-lines -- Why: integration test covering the full browser automation pipeline end-to-end. */
+import { mkdtempSync } from 'fs'
+import { tmpdir } from 'os'
+import { join } from 'path'
+import { createConnection } from 'net'
+import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'
+
+// ── Electron mocks ──
+
+const { webContentsFromIdMock } = vi.hoisted(() => ({
+  webContentsFromIdMock: vi.fn()
+}))
+
+vi.mock('electron', () => ({
+  webContents: { fromId: webContentsFromIdMock },
+  shell: { openExternal: vi.fn() },
+  ipcMain: { handle: vi.fn(), removeHandler: vi.fn(), on: vi.fn() },
+  app: { getPath: vi.fn(() => '/tmp'), isPackaged: false }
+}))
+
+vi.mock('../git/worktree', () => ({
+  listWorktrees: vi.fn().mockResolvedValue([])
+}))
+
+import { BrowserManager } from './browser-manager'
+import { CdpBridge } from './cdp-bridge'
+import { OrcaRuntimeService } from '../runtime/orca-runtime'
+import { OrcaRuntimeRpcServer } from '../runtime/runtime-rpc'
+import { readRuntimeMetadata } from '../runtime/runtime-metadata'
+
+// ── CDP response builders ──
+
+type AXNode = {
+  nodeId: string
+  backendDOMNodeId?: number
+  role?: { type: string; value: string }
+  name?: { type: string; value: string }
+  properties?: { name: string; value: { type: string; value: unknown } }[]
+  childIds?: string[]
+  ignored?: boolean
+}
+
+function axNode(
+  id: string,
+  role: string,
+  name: string,
+  opts?: { childIds?: string[]; backendDOMNodeId?: number }
+): AXNode {
+  return {
+    nodeId: id,
+    backendDOMNodeId: opts?.backendDOMNodeId ?? parseInt(id, 10) * 100,
+    role: { type: 'role', value: role },
+    name: { type: 'computedString', value: name },
+    childIds: opts?.childIds
+  }
+}
+
+const EXAMPLE_COM_TREE: AXNode[] = [
+  axNode('1', 'WebArea', 'Example Domain', { childIds: ['2', '3', '4'] }),
+  axNode('2', 'heading', 'Example Domain'),
+  axNode('3', 'staticText', 'This domain is for use in illustrative examples.'),
+  axNode('4', 'link', 'More information...', { backendDOMNodeId: 400 })
+]
+
+const SEARCH_PAGE_TREE: AXNode[] = [
+  axNode('1', 'WebArea', 'Search', { childIds: ['2', '3', '4', '5'] }),
+  axNode('2', 'navigation', 'Main Nav', { childIds: ['3'] }),
+  axNode('3', 'link', 'Home', { backendDOMNodeId: 300 }),
+  axNode('4', 'textbox', 'Search query', { backendDOMNodeId: 400 }),
+  axNode('5', 'button', 'Search', { backendDOMNodeId: 500 })
+]
+
+// ── Mock WebContents factory ──
+
+function createMockGuest(id: number, url: string, title: string) {
+  let currentUrl = url
+  let currentTitle = title
+  let currentTree = EXAMPLE_COM_TREE
+  let navHistoryId = 1
+
+  const sendCommandMock = vi.fn(async (method: string, params?: Record<string, unknown>) => {
+    switch (method) {
+      case 'Page.enable':
+      case 'DOM.enable':
+      case 'Accessibility.enable':
+        return {}
+      case 'Accessibility.getFullAXTree':
+        return { nodes: currentTree }
+      case 'Page.getNavigationHistory':
+        return {
+          entries: [{ id: navHistoryId, url: currentUrl }],
+          currentIndex: 0
+        }
+      case 'Page.navigate': {
+        const targetUrl = (params as { url: string }).url
+        if (targetUrl.includes('nonexistent.invalid')) {
+          return { errorText: 'net::ERR_NAME_NOT_RESOLVED' }
+        }
+        navHistoryId++
+        currentUrl = targetUrl
+        if (targetUrl.includes('search.example.com')) {
+          currentTitle = 'Search'
+          currentTree = SEARCH_PAGE_TREE
+        } else {
+          currentTitle = 'Example Domain'
+          currentTree = EXAMPLE_COM_TREE
+        }
+        return {}
+      }
+      case 'Runtime.evaluate': {
+        const expr = (params as { expression: string }).expression
+        if (expr === 'document.readyState') {
+          return { result: { value: 'complete' } }
+        }
+        if (expr === 'location.origin') {
+          return { result: { value: new URL(currentUrl).origin } }
+        }
+        if (expr.includes('innerWidth')) {
+          return { result: { value: JSON.stringify({ w: 1280, h: 720 }) } }
+        }
+        if (expr.includes('scrollBy')) {
+          return { result: { value: undefined } }
+        }
+        if (expr.includes('dispatchEvent')) {
+          return { result: { value: undefined } }
+        }
+        // eslint-disable-next-line no-eval
+        return { result: { value: String(eval(expr)), type: 'string' } }
+      }
+      case 'DOM.scrollIntoViewIfNeeded':
+        return {}
+      case 'DOM.getBoxModel':
+        return { model: { content: [100, 200, 300, 200, 300, 250, 100, 250] } }
+      case 'Input.dispatchMouseEvent':
+        return {}
+      case 'Input.insertText':
+        return {}
+      case 'Input.dispatchKeyEvent':
+        return {}
+      case 'DOM.focus':
+        return {}
+      case 'DOM.describeNode':
+        return { node: { nodeId: 1 } }
+      case 'DOM.requestNode':
+        return { nodeId: 1 }
+      case 'DOM.resolveNode':
+        return { object: { objectId: 'obj-1' } }
+      case 'Runtime.callFunctionOn':
+        return { result: { value: undefined } }
+      case 'DOM.setFileInputFiles':
+        return {}
+      case 'Page.captureScreenshot':
+        return {
+          data: 'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg=='
+        }
+      case 'Page.reload':
+        return {}
+      case 'Network.enable':
+        return {}
+      case 'Target.setAutoAttach':
+        return {}
+      case 'Runtime.enable':
+        return {}
+      default:
+        throw new Error(`Unexpected CDP method: ${method}`)
+    }
+  })
+
+  const debuggerListeners = new Map<string, ((...args: unknown[]) => void)[]>()
+
+  const guest = {
+    id,
+    isDestroyed: vi.fn(() => false),
+    getType: vi.fn(() => 'webview'),
+    getURL: vi.fn(() => currentUrl),
+    getTitle: vi.fn(() => currentTitle),
+    setBackgroundThrottling: vi.fn(),
+    setWindowOpenHandler: vi.fn(),
+    on: vi.fn(),
+    off: vi.fn(),
+    debugger: {
+      attach: vi.fn(),
+      detach: vi.fn(),
+      sendCommand: sendCommandMock,
+      on: vi.fn((event: string, handler: (...args: unknown[]) => void) => {
+        const handlers = debuggerListeners.get(event) ?? []
+        handlers.push(handler)
+        debuggerListeners.set(event, handlers)
+      }),
+      removeListener: vi.fn((event: string, handler: (...args: unknown[]) => void) => {
+        const handlers = debuggerListeners.get(event) ?? []
+        const idx = handlers.indexOf(handler)
+        if (idx >= 0) {
+          handlers.splice(idx, 1)
+        }
+      }),
+      removeAllListeners: vi.fn((event: string) => {
+        debuggerListeners.set(event, [])
+      }),
+      off: vi.fn()
+    }
+  }
+
+  return { guest, sendCommandMock }
+}
+
+// ── RPC helper ──
+
+async function sendRequest(
+  endpoint: string,
+  request: Record<string, unknown>
+): Promise<Record<string, unknown>> {
+  return await new Promise((resolve, reject) => {
+    const socket = createConnection(endpoint)
+    let buffer = ''
+    socket.setEncoding('utf8')
+    socket.once('error', reject)
+    socket.on('data', (chunk) => {
+      buffer += chunk
+      const newlineIndex = buffer.indexOf('\n')
+      if (newlineIndex === -1) {
+        return
+      }
+      const message = buffer.slice(0, newlineIndex)
+      socket.end()
+      resolve(JSON.parse(message) as Record<string, unknown>)
+    })
+    socket.on('connect', () => {
+      socket.write(`${JSON.stringify(request)}\n`)
+    })
+  })
+}
+
+// ── Tests ──
+
+describe('Browser automation pipeline (integration)', () => {
+  let server: OrcaRuntimeRpcServer
+  let endpoint: string
+  let authToken: string
+
+  const GUEST_WC_ID = 5001
+  const RENDERER_WC_ID = 1
+
+  beforeEach(async () => {
+    const { guest } = createMockGuest(GUEST_WC_ID, 'https://example.com', 'Example Domain')
+    webContentsFromIdMock.mockImplementation((id: number) => {
+      if (id === GUEST_WC_ID) {
+        return guest
+      }
+      return null
+    })
+
+    const browserManager = new BrowserManager()
+    // Simulate the attach-time policy (normally done in will-attach-webview)
+    browserManager.attachGuestPolicies(guest as never)
+    browserManager.registerGuest({
+      browserPageId: 'page-1',
+      webContentsId: GUEST_WC_ID,
+      rendererWebContentsId: RENDERER_WC_ID
+    })
+
+    const cdpBridge = new CdpBridge(browserManager)
+    cdpBridge.setActiveTab(GUEST_WC_ID)
+
+    const userDataPath = mkdtempSync(join(tmpdir(), 'browser-e2e-'))
+    const runtime = new OrcaRuntimeService()
+    // eslint-disable-next-line @typescript-eslint/no-explicit-any
+    runtime.setAgentBrowserBridge(cdpBridge as any)
+
+    server = new OrcaRuntimeRpcServer({ runtime, userDataPath })
+    await server.start()
+
+    const metadata = readRuntimeMetadata(userDataPath)!
+    endpoint = metadata.transport!.endpoint
+    authToken = metadata.authToken!
+  })
+
+  afterEach(async () => {
+    await server.stop()
+  })
+
+  async function rpc(method: string, params?: Record<string, unknown>) {
+    const response = await sendRequest(endpoint, {
+      id: `req_${method}`,
+      authToken,
+      method,
+      ...(params ? { params } : {})
+    })
+    return response
+  }
+
+  // ── Snapshot ──
+
+  it('takes a snapshot and returns refs for interactive elements', async () => {
+    const res = await rpc('browser.snapshot')
+    expect(res.ok).toBe(true)
+
+    const result = res.result as {
+      snapshot: string
+      refs: { ref: string; role: string; name: string }[]
+      url: string
+      title: string
+    }
+    expect(result.url).toBe('https://example.com')
+    expect(result.title).toBe('Example Domain')
+    expect(result.snapshot).toContain('heading "Example Domain"')
+    expect(result.snapshot).toContain('link "More information..."')
+    expect(result.refs).toHaveLength(1)
+    expect(result.refs[0]).toMatchObject({
+      ref: '@e1',
+      role: 'link',
+      name: 'More information...'
+    })
+  })
+
+  // ── Click ──
+
+  it('clicks an element by ref after snapshot', async () => {
+    await rpc('browser.snapshot')
+
+    const res = await rpc('browser.click', { element: '@e1' })
+    expect(res.ok).toBe(true)
+    expect((res.result as { clicked: string }).clicked).toBe('@e1')
+  })
+
+  it('returns error when clicking without a prior snapshot', async () => {
+    const res = await rpc('browser.click', { element: '@e1' })
+    expect(res.ok).toBe(false)
+    expect((res.error as { code: string }).code).toBe('browser_stale_ref')
+  })
+
+  it('returns error for non-existent ref', async () => {
+    await rpc('browser.snapshot')
+
+    const res = await rpc('browser.click', { element: '@e999' })
+    expect(res.ok).toBe(false)
+    expect((res.error as { code: string }).code).toBe('browser_ref_not_found')
+  })
+
+  // ── Navigation ──
+
+  it('navigates to a URL and invalidates refs', async () => {
+    await rpc('browser.snapshot')
+
+    const gotoRes = await rpc('browser.goto', { url: 'https://search.example.com' })
+    expect(gotoRes.ok).toBe(true)
+    const gotoResult = gotoRes.result as { url: string; title: string }
+    expect(gotoResult.url).toBe('https://search.example.com')
+    expect(gotoResult.title).toBe('Search')
+
+    // Old refs should be stale after navigation
+    const clickRes = await rpc('browser.click', { element: '@e1' })
+    expect(clickRes.ok).toBe(false)
+    expect((clickRes.error as { code: string }).code).toBe('browser_stale_ref')
+
+    // Re-snapshot should work and show new page
+    const snapRes = await rpc('browser.snapshot')
+    expect(snapRes.ok).toBe(true)
+    const snapResult = snapRes.result as { snapshot: string; refs: { name: string }[] }
+    expect(snapResult.snapshot).toContain('Search')
+    expect(snapResult.refs.map((r) => r.name)).toContain('Search')
+    expect(snapResult.refs.map((r) => r.name)).toContain('Home')
+  })
+
+  it('returns error for failed navigation', async () => {
+    const res = await rpc('browser.goto', { url: 'https://nonexistent.invalid' })
+    expect(res.ok).toBe(false)
+    expect((res.error as { code: string }).code).toBe('browser_navigation_failed')
+  })
+
+  // ── Fill ──
+
+  it('fills an input by ref', async () => {
+    await rpc('browser.goto', { url: 'https://search.example.com' })
+    await rpc('browser.snapshot')
+
+    // @e2 should be the textbox "Search query" on the search page
+    const res = await rpc('browser.fill', { element: '@e2', value: 'hello world' })
+    expect(res.ok).toBe(true)
+    expect((res.result as { filled: string }).filled).toBe('@e2')
+  })
+
+  // ── Type ──
+
+  it('types text at current focus', async () => {
+    const res = await rpc('browser.type', { input: 'some text' })
+    expect(res.ok).toBe(true)
+    expect((res.result as { typed: boolean }).typed).toBe(true)
+  })
+
+  // ── Select ──
+
+  it('selects a dropdown option by ref', async () => {
+    await rpc('browser.goto', { url: 'https://search.example.com' })
+    await rpc('browser.snapshot')
+
+    const res = await rpc('browser.select', { element: '@e2', value: 'option-1' })
+    expect(res.ok).toBe(true)
+    expect((res.result as { selected: string }).selected).toBe('@e2')
+  })
+
+  // ── Scroll ──
+
+  it('scrolls the viewport', async () => {
+    const res = await rpc('browser.scroll', { direction: 'down' })
+    expect(res.ok).toBe(true)
+    expect((res.result as { scrolled: string }).scrolled).toBe('down')
+
+    const res2 = await rpc('browser.scroll', { direction: 'up', amount: 200 })
+    expect(res2.ok).toBe(true)
+    expect((res2.result as { scrolled: string }).scrolled).toBe('up')
+  })
+
+  // ── Reload ──
+
+  it('reloads the page', async () => {
+    const res = await rpc('browser.reload')
+    expect(res.ok).toBe(true)
+    expect((res.result as { url: string }).url).toBe('https://example.com')
+  })
+
+  // ── Screenshot ──
+
+  it('captures a screenshot', async () => {
+    const res = await rpc('browser.screenshot', { format: 'png' })
+    expect(res.ok).toBe(true)
+    const result = res.result as { data: string; format: string }
+    expect(result.format).toBe('png')
+    expect(result.data.length).toBeGreaterThan(0)
+  })
+
+  // ── Eval ──
+
+  it('evaluates JavaScript in the page context', async () => {
+    const res = await rpc('browser.eval', { expression: '2 + 2' })
+    expect(res.ok).toBe(true)
+    expect((res.result as { result: string }).result).toBe('4')
+  })
+
+  // ── Tab management ──
+
+  it('lists open tabs', async () => {
+    const res = await rpc('browser.tabList')
+    expect(res.ok).toBe(true)
+    const result = res.result as { tabs: { index: number; url: string; active: boolean }[] }
+    expect(result.tabs).toHaveLength(1)
+    expect(result.tabs[0]).toMatchObject({
+      index: 0,
+      url: 'https://example.com',
+      active: true
+    })
+  })
+
+  it('returns error for out-of-range tab switch', async () => {
+    const res = await rpc('browser.tabSwitch', { index: 5 })
+    expect(res.ok).toBe(false)
+    expect((res.error as { code: string }).code).toBe('browser_tab_not_found')
+  })
+
+  // ── Full agent workflow simulation ──
+
+  it('simulates a complete agent workflow: navigate → snapshot → interact → re-snapshot', async () => {
+    // 1. Navigate to search page
+    const gotoRes = await rpc('browser.goto', { url: 'https://search.example.com' })
+    expect(gotoRes.ok).toBe(true)
+
+    // 2. Snapshot the page
+    const snap1 = await rpc('browser.snapshot')
+    expect(snap1.ok).toBe(true)
+    const snap1Result = snap1.result as {
+      snapshot: string
+      refs: { ref: string; role: string; name: string }[]
+    }
+
+    // Verify we see the search page structure
+    expect(snap1Result.snapshot).toContain('[Main Nav]')
+    expect(snap1Result.snapshot).toContain('text input "Search query"')
+    expect(snap1Result.snapshot).toContain('button "Search"')
+
+    // 3. Fill the search input
+    const searchInput = snap1Result.refs.find((r) => r.name === 'Search query')
+    expect(searchInput).toBeDefined()
+    const fillRes = await rpc('browser.fill', {
+      element: searchInput!.ref,
+      value: 'integration testing'
+    })
+    expect(fillRes.ok).toBe(true)
+
+    // 4. Click the search button
+    const searchBtn = snap1Result.refs.find((r) => r.name === 'Search')
+    expect(searchBtn).toBeDefined()
+    const clickRes = await rpc('browser.click', { element: searchBtn!.ref })
+    expect(clickRes.ok).toBe(true)
+
+    // 5. Take a screenshot
+    const ssRes = await rpc('browser.screenshot')
+    expect(ssRes.ok).toBe(true)
+
+    // 6. Check tab list
+    const tabRes = await rpc('browser.tabList')
+    expect(tabRes.ok).toBe(true)
+    const tabs = (tabRes.result as { tabs: { url: string }[] }).tabs
+    expect(tabs[0].url).toBe('https://search.example.com')
+  })
+
+  // ── No tab errors ──
+
+  it('returns browser_no_tab when no tabs are registered', async () => {
+    // Create a fresh setup with no registered tabs
+    const emptyManager = new BrowserManager()
+    const emptyBridge = new CdpBridge(emptyManager)
+
+    const userDataPath2 = mkdtempSync(join(tmpdir(), 'browser-e2e-empty-'))
+    const runtime2 = new OrcaRuntimeService()
+    // eslint-disable-next-line @typescript-eslint/no-explicit-any
+    runtime2.setAgentBrowserBridge(emptyBridge as any)
+
+    const server2 = new OrcaRuntimeRpcServer({ runtime: runtime2, userDataPath: userDataPath2 })
+    await server2.start()
+
+    const metadata2 = readRuntimeMetadata(userDataPath2)!
+    const res = await sendRequest(metadata2.transport!.endpoint, {
+      id: 'req_no_tab',
+      authToken: metadata2.authToken,
+      method: 'browser.snapshot'
+    })
+
+    expect(res.ok).toBe(false)
+    expect((res.error as { code: string }).code).toBe('browser_no_tab')
+
+    await server2.stop()
+  })
+})
--- a/src/main/browser/cdp-bridge.ts
+++ b/src/main/browser/cdp-bridge.ts
--- a/src/main/browser/cdp-screenshot.test.ts
+++ b/src/main/browser/cdp-screenshot.test.ts
@ -0,0 +1,246 @@
+import { afterEach, describe, expect, it, vi } from 'vitest'
+
+import { captureFullPageScreenshot, captureScreenshot } from './cdp-screenshot'
+
+function createMockWebContents() {
+  return {
+    isDestroyed: vi.fn(() => false),
+    invalidate: vi.fn(),
+    capturePage: vi.fn(),
+    debugger: {
+      isAttached: vi.fn(() => true),
+      sendCommand: vi.fn()
+    }
+  }
+}
+
+describe('captureScreenshot', () => {
+  afterEach(() => {
+    vi.useRealTimers()
+  })
+
+  it('invalidates the guest before forwarding Page.captureScreenshot', async () => {
+    const webContents = createMockWebContents()
+    webContents.debugger.sendCommand.mockResolvedValueOnce({ data: 'png-data' })
+    const onResult = vi.fn()
+    const onError = vi.fn()
+
+    captureScreenshot(webContents as never, { format: 'png' }, onResult, onError)
+    await Promise.resolve()
+
+    expect(webContents.invalidate).toHaveBeenCalledTimes(1)
+    expect(webContents.debugger.sendCommand).toHaveBeenCalledWith('Page.captureScreenshot', {
+      format: 'png'
+    })
+    expect(onResult).toHaveBeenCalledWith({ data: 'png-data' })
+    expect(onError).not.toHaveBeenCalled()
+  })
+
+  it('falls back to capturePage when Page.captureScreenshot stalls', async () => {
+    vi.useFakeTimers()
+
+    const webContents = createMockWebContents()
+    webContents.debugger.sendCommand.mockImplementation(() => new Promise(() => {}))
+    webContents.capturePage.mockResolvedValueOnce({
+      isEmpty: () => false,
+      toPNG: () => Buffer.from('fallback-png')
+    })
+    const onResult = vi.fn()
+    const onError = vi.fn()
+
+    captureScreenshot(webContents as never, { format: 'png' }, onResult, onError)
+    await vi.advanceTimersByTimeAsync(8000)
+
+    expect(webContents.capturePage).toHaveBeenCalledTimes(1)
+    expect(onResult).toHaveBeenCalledWith({
+      data: Buffer.from('fallback-png').toString('base64')
+    })
+    expect(onError).not.toHaveBeenCalled()
+  })
+
+  it('crops the fallback image when the request includes a visible clip rect', async () => {
+    vi.useFakeTimers()
+
+    const croppedImage = {
+      isEmpty: () => false,
+      toPNG: () => Buffer.from('cropped-png')
+    }
+    const webContents = createMockWebContents()
+    webContents.debugger.sendCommand.mockImplementation(() => new Promise(() => {}))
+    webContents.capturePage.mockResolvedValueOnce({
+      isEmpty: () => false,
+      getSize: () => ({ width: 400, height: 300 }),
+      crop: vi.fn(() => croppedImage),
+      toPNG: () => Buffer.from('full-png')
+    })
+    const onResult = vi.fn()
+    const onError = vi.fn()
+
+    captureScreenshot(
+      webContents as never,
+      {
+        format: 'png',
+        clip: { x: 10, y: 20, width: 30, height: 40, scale: 2 }
+      },
+      onResult,
+      onError
+    )
+    await vi.advanceTimersByTimeAsync(8000)
+
+    const fallbackImage = await webContents.capturePage.mock.results[0]?.value
+    expect(fallbackImage.crop).toHaveBeenCalledWith({ x: 20, y: 40, width: 60, height: 80 })
+    expect(onResult).toHaveBeenCalledWith({
+      data: Buffer.from('cropped-png').toString('base64')
+    })
+    expect(onError).not.toHaveBeenCalled()
+  })
+
+  it('keeps the timeout error when the request needs beyond-viewport pixels', async () => {
+    vi.useFakeTimers()
+
+    const webContents = createMockWebContents()
+    webContents.debugger.sendCommand.mockImplementation(() => new Promise(() => {}))
+    webContents.capturePage.mockResolvedValueOnce({
+      isEmpty: () => false,
+      getSize: () => ({ width: 400, height: 300 }),
+      crop: vi.fn(),
+      toPNG: () => Buffer.from('full-png')
+    })
+    const onResult = vi.fn()
+    const onError = vi.fn()
+
+    captureScreenshot(
+      webContents as never,
+      {
+        format: 'png',
+        captureBeyondViewport: true,
+        clip: { x: 0, y: 0, width: 800, height: 1200, scale: 1 }
+      },
+      onResult,
+      onError
+    )
+    await vi.advanceTimersByTimeAsync(8000)
+
+    expect(onResult).not.toHaveBeenCalled()
+    expect(onError).toHaveBeenCalledWith(
+      'Screenshot timed out — the browser tab may not be visible or the window may not have focus.'
+    )
+  })
+
+  it('ignores the fallback result when CDP settles first after the timeout fires', async () => {
+    vi.useFakeTimers()
+
+    let resolveCapturePage: ((value: unknown) => void) | null = null
+    let resolveSendCommand: ((value: unknown) => void) | null = null
+    const webContents = createMockWebContents()
+    webContents.debugger.sendCommand.mockImplementation(
+      () =>
+        new Promise((resolve) => {
+          resolveSendCommand = resolve
+        })
+    )
+    webContents.capturePage.mockImplementation(
+      () =>
+        new Promise((resolve) => {
+          resolveCapturePage = resolve
+        })
+    )
+    const onResult = vi.fn()
+    const onError = vi.fn()
+
+    captureScreenshot(webContents as never, { format: 'png' }, onResult, onError)
+    await vi.advanceTimersByTimeAsync(8000)
+
+    expect(resolveSendCommand).toBeTypeOf('function')
+    resolveSendCommand!({ data: 'cdp-png' })
+    await Promise.resolve()
+
+    expect(resolveCapturePage).toBeTypeOf('function')
+    resolveCapturePage!({
+      isEmpty: () => false,
+      getSize: () => ({ width: 100, height: 100 }),
+      crop: vi.fn(),
+      toPNG: () => Buffer.from('fallback-png')
+    })
+    await Promise.resolve()
+
+    expect(onResult).toHaveBeenCalledTimes(1)
+    expect(onResult).toHaveBeenCalledWith({ data: 'cdp-png' })
+    expect(onError).not.toHaveBeenCalled()
+  })
+
+  it('reports the original timeout when the fallback capture is unavailable', async () => {
+    vi.useFakeTimers()
+
+    const webContents = createMockWebContents()
+    webContents.debugger.sendCommand.mockImplementation(() => new Promise(() => {}))
+    webContents.capturePage.mockResolvedValueOnce({
+      isEmpty: () => true,
+      toPNG: () => Buffer.from('unused')
+    })
+    const onResult = vi.fn()
+    const onError = vi.fn()
+
+    captureScreenshot(webContents as never, { format: 'png' }, onResult, onError)
+    await vi.advanceTimersByTimeAsync(8000)
+
+    expect(onResult).not.toHaveBeenCalled()
+    expect(onError).toHaveBeenCalledWith(
+      'Screenshot timed out — the browser tab may not be visible or the window may not have focus.'
+    )
+  })
+})
+
+describe('captureFullPageScreenshot', () => {
+  it('uses cssContentSize so HiDPI pages are captured at the real page size', async () => {
+    const webContents = createMockWebContents()
+    webContents.debugger.sendCommand.mockImplementation((method: string) => {
+      if (method === 'Page.getLayoutMetrics') {
+        return Promise.resolve({
+          cssContentSize: { width: 640.25, height: 1280.75 },
+          contentSize: { width: 1280.5, height: 2561.5 }
+        })
+      }
+      if (method === 'Page.captureScreenshot') {
+        return Promise.resolve({ data: 'full-page-data' })
+      }
+      return Promise.resolve({})
+    })
+
+    await expect(captureFullPageScreenshot(webContents as never, 'png')).resolves.toEqual({
+      data: 'full-page-data',
+      format: 'png'
+    })
+    expect(webContents.debugger.sendCommand).toHaveBeenNthCalledWith(1, 'Page.getLayoutMetrics', {})
+    expect(webContents.debugger.sendCommand).toHaveBeenNthCalledWith(2, 'Page.captureScreenshot', {
+      format: 'png',
+      captureBeyondViewport: true,
+      clip: { x: 0, y: 0, width: 641, height: 1281, scale: 1 }
+    })
+  })
+
+  it('falls back to legacy contentSize when cssContentSize is unavailable', async () => {
+    const webContents = createMockWebContents()
+    webContents.debugger.sendCommand.mockImplementation((method: string) => {
+      if (method === 'Page.getLayoutMetrics') {
+        return Promise.resolve({
+          contentSize: { width: 800, height: 1600 }
+        })
+      }
+      if (method === 'Page.captureScreenshot') {
+        return Promise.resolve({ data: 'legacy-full-page-data' })
+      }
+      return Promise.resolve({})
+    })
+
+    await expect(captureFullPageScreenshot(webContents as never, 'jpeg')).resolves.toEqual({
+      data: 'legacy-full-page-data',
+      format: 'jpeg'
+    })
+    expect(webContents.debugger.sendCommand).toHaveBeenNthCalledWith(2, 'Page.captureScreenshot', {
+      format: 'jpeg',
+      captureBeyondViewport: true,
+      clip: { x: 0, y: 0, width: 800, height: 1600, scale: 1 }
+    })
+  })
+})
--- a/src/main/browser/cdp-screenshot.ts
+++ b/src/main/browser/cdp-screenshot.ts
@ -0,0 +1,264 @@
+import type { WebContents } from 'electron'
+
+const SCREENSHOT_TIMEOUT_MS = 8000
+const SCREENSHOT_TIMEOUT_MESSAGE =
+  'Screenshot timed out — the browser tab may not be visible or the window may not have focus.'
+
+function applyFallbackClip(
+  image: Electron.NativeImage,
+  params: Record<string, unknown> | undefined
+): Electron.NativeImage | null {
+  if (params?.captureBeyondViewport) {
+    // Why: capturePage() can only see the currently painted viewport. If the
+    // caller asked for beyond-viewport pixels, returning a viewport-sized image
+    // would silently lie about what was captured.
+    return null
+  }
+
+  const clip = params?.clip
+  if (!clip || typeof clip !== 'object') {
+    return image
+  }
+  const clipRect = clip as Record<string, unknown>
+
+  const x = typeof clipRect.x === 'number' ? clipRect.x : NaN
+  const y = typeof clipRect.y === 'number' ? clipRect.y : NaN
+  const width = typeof clipRect.width === 'number' ? clipRect.width : NaN
+  const height = typeof clipRect.height === 'number' ? clipRect.height : NaN
+  const scale =
+    typeof clipRect.scale === 'number' && Number.isFinite(clipRect.scale) && clipRect.scale > 0
+      ? clipRect.scale
+      : 1
+
+  if (![x, y, width, height].every(Number.isFinite) || width <= 0 || height <= 0) {
+    return null
+  }
+
+  const cropRect = {
+    x: Math.round(x * scale),
+    y: Math.round(y * scale),
+    width: Math.round(width * scale),
+    height: Math.round(height * scale)
+  }
+  const imageSize = image.getSize()
+  if (
+    cropRect.x < 0 ||
+    cropRect.y < 0 ||
+    cropRect.width <= 0 ||
+    cropRect.height <= 0 ||
+    cropRect.x + cropRect.width > imageSize.width ||
+    cropRect.y + cropRect.height > imageSize.height
+  ) {
+    return null
+  }
+
+  return image.crop(cropRect)
+}
+
+function encodeNativeImageScreenshot(
+  image: Electron.NativeImage,
+  params: Record<string, unknown> | undefined
+): { data: string } | null {
+  if (image.isEmpty()) {
+    return null
+  }
+
+  const clippedImage = applyFallbackClip(image, params)
+  if (!clippedImage || clippedImage.isEmpty()) {
+    return null
+  }
+
+  const format = params?.format === 'jpeg' ? 'jpeg' : 'png'
+  const quality =
+    typeof params?.quality === 'number' && Number.isFinite(params.quality)
+      ? Math.max(0, Math.min(100, Math.round(params.quality)))
+      : undefined
+  const buffer = format === 'jpeg' ? clippedImage.toJPEG(quality ?? 90) : clippedImage.toPNG()
+  return { data: buffer.toString('base64') }
+}
+
+function getLayoutClip(metrics: {
+  cssContentSize?: { width?: number; height?: number }
+  contentSize?: { width?: number; height?: number }
+}): { x: number; y: number; width: number; height: number; scale: number } | null {
+  // Why: Page.captureScreenshot clip coordinates are in CSS pixels. On HiDPI
+  // Electron guests, `contentSize` can reflect device pixels, which makes
+  // Chromium tile the page into a duplicated 2x2 grid. Prefer cssContentSize
+  // and only fall back to contentSize when older Chromium builds omit it.
+  const size = metrics.cssContentSize ?? metrics.contentSize
+  const width = size?.width
+  const height = size?.height
+  if (
+    typeof width !== 'number' ||
+    !Number.isFinite(width) ||
+    width <= 0 ||
+    typeof height !== 'number' ||
+    !Number.isFinite(height) ||
+    height <= 0
+  ) {
+    return null
+  }
+
+  return {
+    x: 0,
+    y: 0,
+    width: Math.ceil(width),
+    height: Math.ceil(height),
+    scale: 1
+  }
+}
+
+async function sendCommandWithTimeout<T>(
+  webContents: WebContents,
+  method: string,
+  params: Record<string, unknown> | undefined,
+  timeoutMessage: string
+): Promise<T> {
+  let timer: NodeJS.Timeout | null = null
+  try {
+    return await Promise.race([
+      webContents.debugger.sendCommand(method, params ?? {}) as Promise<T>,
+      new Promise<T>((_, reject) => {
+        timer = setTimeout(() => reject(new Error(timeoutMessage)), SCREENSHOT_TIMEOUT_MS)
+      })
+    ])
+  } finally {
+    if (timer) {
+      clearTimeout(timer)
+    }
+  }
+}
+
+export async function captureFullPageScreenshot(
+  webContents: WebContents,
+  format: 'png' | 'jpeg' = 'png'
+): Promise<{ data: string; format: 'png' | 'jpeg' }> {
+  if (webContents.isDestroyed()) {
+    throw new Error('WebContents destroyed')
+  }
+  const dbg = webContents.debugger
+  if (!dbg.isAttached()) {
+    throw new Error('Debugger not attached')
+  }
+
+  try {
+    webContents.invalidate()
+  } catch {
+    // Some guest teardown paths reject repaint requests. Fall through to CDP.
+  }
+
+  const metrics = await sendCommandWithTimeout<{
+    cssContentSize?: { width?: number; height?: number }
+    contentSize?: { width?: number; height?: number }
+  }>(webContents, 'Page.getLayoutMetrics', undefined, SCREENSHOT_TIMEOUT_MESSAGE)
+  const clip = getLayoutClip(metrics)
+  if (!clip) {
+    throw new Error('Unable to determine full-page screenshot bounds')
+  }
+
+  const { data } = await sendCommandWithTimeout<{ data: string }>(
+    webContents,
+    'Page.captureScreenshot',
+    {
+      format,
+      captureBeyondViewport: true,
+      clip
+    },
+    SCREENSHOT_TIMEOUT_MESSAGE
+  )
+
+  return { data, format }
+}
+
+// Why: Electron's capturePage() is unreliable on webview guests — the compositor
+// may not produce frames when the webview panel is inactive, unfocused, or in a
+// split-pane layout. Instead, use the debugger's Page.captureScreenshot which
+// renders server-side in the Blink compositor and doesn't depend on OS-level
+// window focus or display state. Guard with a timeout so agent-browser doesn't
+// hang on its 30s CDP timeout if the debugger stalls.
+export function captureScreenshot(
+  webContents: WebContents,
+  params: Record<string, unknown> | undefined,
+  onResult: (result: unknown) => void,
+  onError: (message: string) => void
+): void {
+  if (webContents.isDestroyed()) {
+    onError('WebContents destroyed')
+    return
+  }
+  const dbg = webContents.debugger
+  if (!dbg.isAttached()) {
+    onError('Debugger not attached')
+    return
+  }
+
+  const screenshotParams: Record<string, unknown> = {}
+  if (params?.format) {
+    screenshotParams.format = params.format
+  }
+  if (params?.quality) {
+    screenshotParams.quality = params.quality
+  }
+  if (params?.clip) {
+    screenshotParams.clip = params.clip
+  }
+  if (params?.captureBeyondViewport != null) {
+    screenshotParams.captureBeyondViewport = params.captureBeyondViewport
+  }
+  if (params?.fromSurface != null) {
+    screenshotParams.fromSurface = params.fromSurface
+  }
+
+  let settled = false
+  // Why: a compositor invalidate is cheap and can recover guest instances that
+  // are visible but have not produced a fresh frame since being reclaimed into
+  // the active browser tab.
+  try {
+    webContents.invalidate()
+  } catch {
+    // Some guest teardown paths reject repaint requests. Fall through to CDP.
+  }
+  const timer = setTimeout(async () => {
+    if (!settled) {
+      try {
+        const image = await webContents.capturePage()
+        if (settled) {
+          return
+        }
+        const fallback = encodeNativeImageScreenshot(image, params)
+        if (fallback) {
+          if (settled) {
+            return
+          }
+          settled = true
+          onResult(fallback)
+          return
+        }
+      } catch {
+        // Fall through to the original timeout error below.
+      }
+
+      if (!settled) {
+        settled = true
+        onError(SCREENSHOT_TIMEOUT_MESSAGE)
+      }
+    }
+  }, SCREENSHOT_TIMEOUT_MS)
+
+  dbg
+    .sendCommand('Page.captureScreenshot', screenshotParams)
+    .then((result) => {
+      if (!settled) {
+        settled = true
+        clearTimeout(timer)
+        onResult(result)
+      }
+    })
+    .catch((err) => {
+      if (!settled) {
+        settled = true
+        clearTimeout(timer)
+        onError((err as Error).message)
+      }
+    })
+}
--- a/src/main/browser/cdp-ws-proxy.test.ts
+++ b/src/main/browser/cdp-ws-proxy.test.ts
@ -0,0 +1,311 @@
+import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'
+import WebSocket from 'ws'
+import { CdpWsProxy } from './cdp-ws-proxy'
+
+vi.mock('electron', () => ({
+  webContents: { fromId: vi.fn() }
+}))
+
+type DebuggerListener = (...args: unknown[]) => void
+
+function createMockWebContents() {
+  const listeners = new Map<string, DebuggerListener[]>()
+
+  const debuggerObj = {
+    isAttached: vi.fn(() => false),
+    attach: vi.fn(),
+    detach: vi.fn(),
+    sendCommand: vi.fn(async () => ({})),
+    on: vi.fn((event: string, handler: DebuggerListener) => {
+      const arr = listeners.get(event) ?? []
+      arr.push(handler)
+      listeners.set(event, arr)
+    }),
+    removeListener: vi.fn((event: string, handler: DebuggerListener) => {
+      const arr = listeners.get(event) ?? []
+      listeners.set(
+        event,
+        arr.filter((h) => h !== handler)
+      )
+    })
+  }
+
+  return {
+    webContents: {
+      debugger: debuggerObj,
+      isDestroyed: () => false,
+      focus: vi.fn(),
+      getTitle: vi.fn(() => 'Example'),
+      getURL: vi.fn(() => 'https://example.com')
+    },
+    listeners,
+    emit(event: string, ...args: unknown[]) {
+      for (const handler of listeners.get(event) ?? []) {
+        handler(...args)
+      }
+    }
+  }
+}
+
+describe('CdpWsProxy', () => {
+  let mock: ReturnType<typeof createMockWebContents>
+  let proxy: CdpWsProxy
+  let endpoint: string
+
+  beforeEach(async () => {
+    mock = createMockWebContents()
+    // eslint-disable-next-line @typescript-eslint/no-explicit-any
+    proxy = new CdpWsProxy(mock.webContents as any)
+    endpoint = await proxy.start()
+  })
+
+  afterEach(async () => {
+    await proxy.stop()
+  })
+
+  function connect(): Promise<WebSocket> {
+    return new Promise((resolve) => {
+      const ws = new WebSocket(endpoint)
+      ws.on('open', () => resolve(ws))
+    })
+  }
+
+  function sendAndReceive(
+    ws: WebSocket,
+    msg: Record<string, unknown>
+  ): Promise<Record<string, unknown>> {
+    return new Promise((resolve) => {
+      ws.once('message', (data) => resolve(JSON.parse(data.toString())))
+      ws.send(JSON.stringify(msg))
+    })
+  }
+
+  it('starts on a random port and returns ws:// URL', () => {
+    expect(endpoint).toMatch(/^ws:\/\/127\.0\.0\.1:\d+$/)
+    expect(proxy.getPort()).toBeGreaterThan(0)
+  })
+
+  it('attaches debugger on start', () => {
+    expect(mock.webContents.debugger.attach).toHaveBeenCalledWith('1.3')
+  })
+
+  // ── CDP message ID correlation ──
+
+  it('correlates CDP request/response IDs', async () => {
+    mock.webContents.debugger.sendCommand.mockResolvedValueOnce({ tree: 'nodes' })
+
+    const ws = connect()
+    const client = await ws
+    const response = await sendAndReceive(client, {
+      id: 42,
+      method: 'Accessibility.getFullAXTree',
+      params: {}
+    })
+
+    expect(response.id).toBe(42)
+    expect(response.result).toEqual({ tree: 'nodes' })
+    client.close()
+  })
+
+  it('returns error response when sendCommand fails', async () => {
+    mock.webContents.debugger.sendCommand.mockRejectedValueOnce(new Error('Node not found'))
+
+    const client = await connect()
+    const response = await sendAndReceive(client, {
+      id: 7,
+      method: 'DOM.describeNode',
+      params: { nodeId: 999 }
+    })
+
+    expect(response.id).toBe(7)
+    expect(response.error).toEqual({ code: -32000, message: 'Node not found' })
+    client.close()
+  })
+
+  // ── Concurrent requests get correct responses ──
+
+  it('handles concurrent requests with correct correlation', async () => {
+    let resolveFirst: (v: unknown) => void
+    const firstPromise = new Promise((r) => {
+      resolveFirst = r
+    })
+
+    mock.webContents.debugger.sendCommand
+      .mockImplementationOnce(async () => {
+        await firstPromise
+        return { result: 'slow' }
+      })
+      .mockResolvedValueOnce({ result: 'fast' })
+
+    const client = await connect()
+
+    const responses: Record<string, unknown>[] = []
+    client.on('message', (data) => {
+      responses.push(JSON.parse(data.toString()))
+    })
+
+    client.send(JSON.stringify({ id: 1, method: 'DOM.enable', params: {} }))
+    await new Promise((r) => setTimeout(r, 10))
+    client.send(JSON.stringify({ id: 2, method: 'Page.enable', params: {} }))
+
+    await new Promise((r) => setTimeout(r, 20))
+    resolveFirst!(undefined)
+    await new Promise((r) => setTimeout(r, 20))
+
+    expect(responses).toHaveLength(2)
+    const resp1 = responses.find((r) => r.id === 1)
+    const resp2 = responses.find((r) => r.id === 2)
+    expect(resp1?.result).toEqual({ result: 'slow' })
+    expect(resp2?.result).toEqual({ result: 'fast' })
+
+    client.close()
+  })
+
+  it('does not deliver a late response from a closed client to a newer websocket', async () => {
+    let resolveSlowCommand: ((value: { result: string }) => void) | null = null
+    mock.webContents.debugger.sendCommand
+      .mockImplementationOnce(
+        () =>
+          new Promise((resolve) => {
+            resolveSlowCommand = resolve
+          })
+      )
+      .mockResolvedValueOnce({ result: 'new-client' })
+
+    const firstClient = await connect()
+    firstClient.send(JSON.stringify({ id: 1, method: 'DOM.enable', params: {} }))
+    await new Promise((resolve) => setTimeout(resolve, 10))
+
+    const secondClient = await connect()
+    const responses: Record<string, unknown>[] = []
+    secondClient.on('message', (data) => {
+      responses.push(JSON.parse(data.toString()))
+    })
+
+    secondClient.send(JSON.stringify({ id: 2, method: 'Page.enable', params: {} }))
+    await new Promise((resolve) => setTimeout(resolve, 20))
+
+    resolveSlowCommand!({ result: 'old-client' })
+    await new Promise((resolve) => setTimeout(resolve, 20))
+
+    expect(responses).toEqual([{ id: 2, result: { result: 'new-client' } }])
+
+    secondClient.close()
+  })
+
+  // ── sessionId envelope translation ──
+
+  it('forwards sessionId to sendCommand for OOPIF support', async () => {
+    mock.webContents.debugger.sendCommand.mockResolvedValueOnce({})
+
+    const client = await connect()
+    await sendAndReceive(client, {
+      id: 1,
+      method: 'DOM.enable',
+      params: {},
+      sessionId: 'oopif-session-123'
+    })
+
+    expect(mock.webContents.debugger.sendCommand).toHaveBeenCalledWith(
+      'DOM.enable',
+      {},
+      'oopif-session-123'
+    )
+    client.close()
+  })
+
+  // ── Event forwarding ──
+
+  it('forwards CDP events from debugger to client', async () => {
+    const client = await connect()
+
+    const eventPromise = new Promise<Record<string, unknown>>((resolve) => {
+      client.on('message', (data) => resolve(JSON.parse(data.toString())))
+    })
+
+    mock.emit('message', {}, 'Console.messageAdded', { entry: { text: 'hello' } })
+
+    const event = await eventPromise
+    expect(event.method).toBe('Console.messageAdded')
+    expect(event.params).toEqual({ entry: { text: 'hello' } })
+    client.close()
+  })
+
+  it('forwards sessionId in events when present', async () => {
+    const client = await connect()
+
+    const eventPromise = new Promise<Record<string, unknown>>((resolve) => {
+      client.on('message', (data) => resolve(JSON.parse(data.toString())))
+    })
+
+    mock.emit('message', {}, 'DOM.nodeInserted', { node: {} }, 'iframe-session-456')
+
+    const event = await eventPromise
+    expect(event.sessionId).toBe('iframe-session-456')
+    client.close()
+  })
+
+  it('does not focus the guest for Runtime.evaluate polling commands', async () => {
+    const client = await connect()
+
+    await sendAndReceive(client, {
+      id: 9,
+      method: 'Runtime.evaluate',
+      params: { expression: 'document.readyState' }
+    })
+
+    expect(mock.webContents.focus).not.toHaveBeenCalled()
+    client.close()
+  })
+
+  it('still focuses the guest for Input.insertText', async () => {
+    const client = await connect()
+
+    await sendAndReceive(client, {
+      id: 10,
+      method: 'Input.insertText',
+      params: { text: 'hello' }
+    })
+
+    expect(mock.webContents.focus).toHaveBeenCalledTimes(1)
+    client.close()
+  })
+
+  // ── Page.frameNavigated interception ──
+
+  // ── Cleanup ──
+
+  it('detaches debugger and closes server on stop', async () => {
+    const client = await connect()
+    await proxy.stop()
+
+    expect(mock.webContents.debugger.detach).toHaveBeenCalled()
+    expect(proxy.getPort()).toBeGreaterThan(0) // port stays set but server is closed
+
+    await new Promise<void>((resolve) => {
+      client.on('close', () => resolve())
+      if (client.readyState === WebSocket.CLOSED) {
+        resolve()
+      }
+    })
+  })
+
+  it('rejects inflight requests on stop', async () => {
+    let resolveCommand: (v: unknown) => void
+    mock.webContents.debugger.sendCommand.mockImplementation(
+      () =>
+        new Promise((r) => {
+          resolveCommand = r as (v: unknown) => void
+        })
+    )
+
+    const client = await connect()
+    client.send(JSON.stringify({ id: 1, method: 'Page.enable', params: {} }))
+
+    await new Promise((r) => setTimeout(r, 10))
+    await proxy.stop()
+
+    resolveCommand!({})
+    client.close()
+  })
+})
--- a/src/main/browser/cdp-ws-proxy.ts
+++ b/src/main/browser/cdp-ws-proxy.ts
@ -0,0 +1,301 @@
+import { WebSocketServer, WebSocket } from 'ws'
+import { createServer, type Server, type IncomingMessage, type ServerResponse } from 'http'
+import type { WebContents } from 'electron'
+import { captureScreenshot } from './cdp-screenshot'
+
+export class CdpWsProxy {
+  private httpServer: Server | null = null
+  private wss: WebSocketServer | null = null
+  private client: WebSocket | null = null
+  private port = 0
+  private debuggerMessageHandler: ((...args: unknown[]) => void) | null = null
+  private debuggerDetachHandler: ((...args: unknown[]) => void) | null = null
+  private attached = false
+  // Why: agent-browser filters events by sessionId from Target.attachToTarget.
+  private clientSessionId: string | undefined = undefined
+
+  constructor(private readonly webContents: WebContents) {}
+
+  async start(): Promise<string> {
+    await this.attachDebugger()
+    return new Promise<string>((resolve, reject) => {
+      this.httpServer = createServer((req, res) => this.handleHttpRequest(req, res))
+      this.wss = new WebSocketServer({ server: this.httpServer })
+      this.wss.on('connection', (ws) => {
+        if (this.client) {
+          this.client.close()
+        }
+        this.client = ws
+        ws.on('message', (data) => this.handleClientMessage(ws, data.toString()))
+        ws.on('close', () => {
+          if (this.client === ws) {
+            this.client = null
+          }
+        })
+      })
+      this.httpServer.listen(0, '127.0.0.1', () => {
+        const addr = this.httpServer!.address()
+        if (typeof addr === 'object' && addr) {
+          this.port = addr.port
+          resolve(`ws://127.0.0.1:${this.port}`)
+        } else {
+          reject(new Error('Failed to bind proxy server'))
+        }
+      })
+      this.httpServer.on('error', reject)
+    })
+  }
+
+  async stop(): Promise<void> {
+    this.detachDebugger()
+    if (this.client) {
+      this.client.close()
+      this.client = null
+    }
+    if (this.wss) {
+      this.wss.close()
+      this.wss = null
+    }
+    if (this.httpServer) {
+      this.httpServer.close()
+      this.httpServer = null
+    }
+  }
+
+  getPort(): number {
+    return this.port
+  }
+
+  private send(payload: unknown, client = this.client): void {
+    if (client?.readyState === WebSocket.OPEN) {
+      client.send(JSON.stringify(payload))
+    }
+  }
+
+  private sendResult(clientId: number, result: unknown, client = this.client): void {
+    this.send({ id: clientId, result }, client)
+  }
+
+  private sendError(clientId: number, message: string, client = this.client): void {
+    this.send({ id: clientId, error: { code: -32000, message } }, client)
+  }
+
+  private buildTargetInfo(): Record<string, unknown> {
+    const destroyed = this.webContents.isDestroyed()
+    return {
+      targetId: 'orca-proxy-target',
+      type: 'page',
+      title: destroyed ? '' : this.webContents.getTitle(),
+      url: destroyed ? '' : this.webContents.getURL(),
+      attached: true,
+      canAccessOpener: false
+    }
+  }
+
+  private handleHttpRequest(req: IncomingMessage, res: ServerResponse): void {
+    const url = req.url ?? ''
+    if (url === '/json/version' || url === '/json/version/') {
+      res.writeHead(200, { 'Content-Type': 'application/json' })
+      res.end(
+        JSON.stringify({
+          Browser: 'Orca/CdpWsProxy',
+          'Protocol-Version': '1.3',
+          webSocketDebuggerUrl: `ws://127.0.0.1:${this.port}`
+        })
+      )
+      return
+    }
+    if (url === '/json' || url === '/json/' || url === '/json/list' || url === '/json/list/') {
+      res.writeHead(200, { 'Content-Type': 'application/json' })
+      res.end(
+        JSON.stringify([
+          {
+            ...this.buildTargetInfo(),
+            id: 'orca-proxy-target',
+            webSocketDebuggerUrl: `ws://127.0.0.1:${this.port}`
+          }
+        ])
+      )
+      return
+    }
+    res.writeHead(404)
+    res.end()
+  }
+
+  private async attachDebugger(): Promise<void> {
+    if (this.attached) {
+      return
+    }
+    if (!this.webContents.debugger.isAttached()) {
+      try {
+        this.webContents.debugger.attach('1.3')
+      } catch {
+        throw new Error('Could not attach debugger. DevTools may already be open for this tab.')
+      }
+    }
+    this.attached = true
+    this.debuggerMessageHandler = (_event: unknown, ...rest: unknown[]) => {
+      const [method, params, sessionId] = rest as [
+        string,
+        Record<string, unknown>,
+        string | undefined
+      ]
+      if (!this.client || this.client.readyState !== WebSocket.OPEN) {
+        return
+      }
+      // Why: Electron passes empty string (not undefined) for root-session events, but
+      // agent-browser filters events by the sessionId from Target.attachToTarget.
+      const msg: Record<string, unknown> = { method, params }
+      msg.sessionId = sessionId || this.clientSessionId
+      this.client.send(JSON.stringify(msg))
+    }
+    this.debuggerDetachHandler = () => {
+      this.attached = false
+      this.stop()
+    }
+    this.webContents.debugger.on('message', this.debuggerMessageHandler as never)
+    this.webContents.debugger.on('detach', this.debuggerDetachHandler as never)
+  }
+
+  private detachDebugger(): void {
+    if (this.debuggerMessageHandler) {
+      this.webContents.debugger.removeListener('message', this.debuggerMessageHandler as never)
+      this.debuggerMessageHandler = null
+    }
+    if (this.debuggerDetachHandler) {
+      this.webContents.debugger.removeListener('detach', this.debuggerDetachHandler as never)
+      this.debuggerDetachHandler = null
+    }
+    if (this.attached) {
+      try {
+        this.webContents.debugger.detach()
+      } catch {
+        /* already detached */
+      }
+      this.attached = false
+    }
+  }
+
+  private handleClientMessage(client: WebSocket, raw: string): void {
+    let msg: { id?: number; method?: string; params?: Record<string, unknown>; sessionId?: string }
+    try {
+      msg = JSON.parse(raw)
+    } catch {
+      return
+    }
+    if (msg.id == null || !msg.method) {
+      return
+    }
+    const clientId = msg.id
+
+    if (msg.method === 'Target.getTargets') {
+      this.sendResult(clientId, { targetInfos: [this.buildTargetInfo()] }, client)
+      return
+    }
+    if (msg.method === 'Target.getTargetInfo') {
+      this.sendResult(clientId, { targetInfo: this.buildTargetInfo() }, client)
+      return
+    }
+    if (msg.method === 'Target.setDiscoverTargets' || msg.method === 'Target.detachFromTarget') {
+      if (msg.method === 'Target.detachFromTarget') {
+        this.clientSessionId = undefined
+      }
+      this.sendResult(clientId, {}, client)
+      return
+    }
+    if (msg.method === 'Target.attachToTarget') {
+      this.clientSessionId = 'orca-proxy-session'
+      this.sendResult(clientId, { sessionId: this.clientSessionId }, client)
+      return
+    }
+    if (msg.method === 'Browser.getVersion') {
+      this.sendResult(
+        clientId,
+        {
+          protocolVersion: '1.3',
+          product: 'Orca/Electron',
+          userAgent: '',
+          jsVersion: ''
+        },
+        client
+      )
+      return
+    }
+    if (msg.method === 'Page.bringToFront') {
+      if (!this.webContents.isDestroyed()) {
+        this.webContents.focus()
+      }
+      this.sendResult(clientId, {}, client)
+      return
+    }
+    // Why: Page.captureScreenshot via debugger.sendCommand hangs on Electron webview guests.
+    if (msg.method === 'Page.captureScreenshot') {
+      this.handleScreenshot(client, clientId, msg.params)
+      return
+    }
+    // Why: Input.insertText can still require native focus in Electron webviews.
+    // Do not auto-focus generic Runtime.evaluate/callFunctionOn traffic: wait
+    // polling and read-only JS probes use those methods heavily, and focusing on
+    // every eval steals the user's foreground window while background automation
+    // is running.
+    if (msg.method === 'Input.insertText' && !this.webContents.isDestroyed()) {
+      this.webContents.focus()
+    }
+    // Why: agent-browser waits for network idle to detect navigation completion.
+    // Electron webview CDP subscriptions silently lapse after cross-process swaps.
+    if (msg.method === 'Page.navigate' && !this.webContents.isDestroyed()) {
+      void this.navigateWithLifecycleEnsured(client, clientId, msg.params ?? {})
+      return
+    }
+    this.forwardCommand(client, clientId, msg.method, msg.params ?? {}, msg.sessionId)
+  }
+
+  private forwardCommand(
+    client: WebSocket,
+    clientId: number,
+    method: string,
+    params: Record<string, unknown>,
+    msgSessionId?: string
+  ): void {
+    const sessionId =
+      msgSessionId && msgSessionId !== this.clientSessionId ? msgSessionId : undefined
+    this.webContents.debugger
+      .sendCommand(method, params, sessionId)
+      .then((result) => {
+        this.sendResult(clientId, result, client)
+      })
+      .catch((err: Error) => {
+        this.sendError(clientId, err.message, client)
+      })
+  }
+
+  private async navigateWithLifecycleEnsured(
+    client: WebSocket,
+    clientId: number,
+    params: Record<string, unknown>
+  ): Promise<void> {
+    try {
+      const dbg = this.webContents.debugger
+      // Why: without Network.enable, agent-browser never sees network idle → goto times out.
+      await dbg.sendCommand('Network.enable', {})
+      await dbg.sendCommand('Page.enable', {})
+      await dbg.sendCommand('Page.setLifecycleEventsEnabled', { enabled: true })
+    } catch {
+      /* best-effort */
+    }
+    this.forwardCommand(client, clientId, 'Page.navigate', params)
+  }
+
+  private handleScreenshot(
+    client: WebSocket,
+    clientId: number,
+    params?: Record<string, unknown>
+  ): void {
+    captureScreenshot(
+      this.webContents,
+      params,
+      (result) => this.sendResult(clientId, result, client),
+      (message) => this.sendError(clientId, message, client)
+    )
+  }
+}
--- a/src/main/browser/snapshot-engine.test.ts
+++ b/src/main/browser/snapshot-engine.test.ts
@ -0,0 +1,196 @@
+import { describe, expect, it, vi } from 'vitest'
+import { buildSnapshot, type CdpCommandSender } from './snapshot-engine'
+
+type AXNode = {
+  nodeId: string
+  backendDOMNodeId?: number
+  role?: { type: string; value: string }
+  name?: { type: string; value: string }
+  properties?: { name: string; value: { type: string; value: unknown } }[]
+  childIds?: string[]
+  ignored?: boolean
+}
+
+function makeSender(nodes: AXNode[]): CdpCommandSender {
+  return vi.fn(async (method: string) => {
+    if (method === 'Accessibility.enable') {
+      return {}
+    }
+    if (method === 'Accessibility.getFullAXTree') {
+      return { nodes }
+    }
+    throw new Error(`Unexpected CDP method: ${method}`)
+  })
+}
+
+function node(
+  id: string,
+  role: string,
+  name: string,
+  opts?: {
+    childIds?: string[]
+    backendDOMNodeId?: number
+    ignored?: boolean
+    properties?: AXNode['properties']
+  }
+): AXNode {
+  return {
+    nodeId: id,
+    backendDOMNodeId: opts?.backendDOMNodeId ?? parseInt(id, 10),
+    role: { type: 'role', value: role },
+    name: { type: 'computedString', value: name },
+    childIds: opts?.childIds,
+    ignored: opts?.ignored,
+    properties: opts?.properties
+  }
+}
+
+describe('buildSnapshot', () => {
+  it('returns empty snapshot for empty tree', async () => {
+    const result = await buildSnapshot(makeSender([]))
+    expect(result.snapshot).toBe('')
+    expect(result.refs).toEqual([])
+    expect(result.refMap.size).toBe(0)
+  })
+
+  it('assigns refs to interactive elements', async () => {
+    const nodes: AXNode[] = [
+      node('1', 'WebArea', 'page', { childIds: ['2', '3'] }),
+      node('2', 'button', 'Submit', { backendDOMNodeId: 10 }),
+      node('3', 'link', 'Home', { backendDOMNodeId: 11 })
+    ]
+    const result = await buildSnapshot(makeSender(nodes))
+
+    expect(result.refs).toHaveLength(2)
+    expect(result.refs[0]).toEqual({ ref: '@e1', role: 'button', name: 'Submit' })
+    expect(result.refs[1]).toEqual({ ref: '@e2', role: 'link', name: 'Home' })
+    expect(result.snapshot).toContain('[@e1] button "Submit"')
+    expect(result.snapshot).toContain('[@e2] link "Home"')
+  })
+
+  it('renders text inputs with friendly role name', async () => {
+    const nodes: AXNode[] = [
+      node('1', 'WebArea', 'page', { childIds: ['2'] }),
+      node('2', 'textbox', 'Email', { backendDOMNodeId: 10 })
+    ]
+    const result = await buildSnapshot(makeSender(nodes))
+    expect(result.snapshot).toContain('text input "Email"')
+  })
+
+  it('renders landmarks without refs', async () => {
+    const nodes: AXNode[] = [
+      node('1', 'WebArea', 'page', { childIds: ['2'] }),
+      node('2', 'navigation', 'Main Nav', { childIds: ['3'] }),
+      node('3', 'link', 'About', { backendDOMNodeId: 10 })
+    ]
+    const result = await buildSnapshot(makeSender(nodes))
+
+    expect(result.snapshot).toContain('[Main Nav]')
+    expect(result.refs).toHaveLength(1)
+    expect(result.refs[0].name).toBe('About')
+  })
+
+  it('renders headings without refs', async () => {
+    const nodes: AXNode[] = [
+      node('1', 'WebArea', 'page', { childIds: ['2'] }),
+      node('2', 'heading', 'Welcome')
+    ]
+    const result = await buildSnapshot(makeSender(nodes))
+    expect(result.snapshot).toContain('heading "Welcome"')
+    expect(result.refs).toHaveLength(0)
+  })
+
+  it('renders static text without refs', async () => {
+    const nodes: AXNode[] = [
+      node('1', 'WebArea', 'page', { childIds: ['2'] }),
+      node('2', 'staticText', 'Hello world')
+    ]
+    const result = await buildSnapshot(makeSender(nodes))
+    expect(result.snapshot).toContain('text "Hello world"')
+    expect(result.refs).toHaveLength(0)
+  })
+
+  it('skips generic/none/presentation roles', async () => {
+    const nodes: AXNode[] = [
+      node('1', 'WebArea', 'page', { childIds: ['2'] }),
+      node('2', 'generic', '', { childIds: ['3'] }),
+      node('3', 'button', 'OK', { backendDOMNodeId: 10 })
+    ]
+    const result = await buildSnapshot(makeSender(nodes))
+    expect(result.refs).toHaveLength(1)
+    expect(result.refs[0].name).toBe('OK')
+    expect(result.snapshot).not.toContain('generic')
+  })
+
+  it('skips ignored nodes but walks their children', async () => {
+    const nodes: AXNode[] = [
+      node('1', 'WebArea', 'page', { childIds: ['2'] }),
+      node('2', 'group', 'ignored group', { childIds: ['3'], ignored: true }),
+      node('3', 'button', 'Deep', { backendDOMNodeId: 10 })
+    ]
+    const result = await buildSnapshot(makeSender(nodes))
+    expect(result.refs).toHaveLength(1)
+    expect(result.refs[0].name).toBe('Deep')
+  })
+
+  it('skips interactive elements without a name', async () => {
+    const nodes: AXNode[] = [
+      node('1', 'WebArea', 'page', { childIds: ['2', '3'] }),
+      node('2', 'button', '', { backendDOMNodeId: 10 }),
+      node('3', 'button', 'Labeled', { backendDOMNodeId: 11 })
+    ]
+    const result = await buildSnapshot(makeSender(nodes))
+    expect(result.refs).toHaveLength(1)
+    expect(result.refs[0].name).toBe('Labeled')
+  })
+
+  it('populates refMap with backendDOMNodeId', async () => {
+    const nodes: AXNode[] = [
+      node('1', 'WebArea', 'page', { childIds: ['2'] }),
+      node('2', 'checkbox', 'Agree', { backendDOMNodeId: 42 })
+    ]
+    const result = await buildSnapshot(makeSender(nodes))
+    const entry = result.refMap.get('@e1')
+    expect(entry).toBeDefined()
+    expect(entry!.backendDOMNodeId).toBe(42)
+    expect(entry!.role).toBe('checkbox')
+    expect(entry!.name).toBe('Agree')
+  })
+
+  it('indents children under landmarks', async () => {
+    const nodes: AXNode[] = [
+      node('1', 'WebArea', 'page', { childIds: ['2'] }),
+      node('2', 'main', '', { childIds: ['3'] }),
+      node('3', 'button', 'Action', { backendDOMNodeId: 10 })
+    ]
+    const result = await buildSnapshot(makeSender(nodes))
+    const lines = result.snapshot.split('\n')
+    const mainLine = lines.find((l) => l.includes('[Main Content]'))
+    const buttonLine = lines.find((l) => l.includes('Action'))
+    expect(mainLine).toBeDefined()
+    expect(buttonLine).toBeDefined()
+    expect(buttonLine!.startsWith('  ')).toBe(true)
+  })
+
+  it('handles a realistic page structure', async () => {
+    const nodes: AXNode[] = [
+      node('1', 'WebArea', 'page', { childIds: ['2', '3', '4'] }),
+      node('2', 'banner', '', { childIds: ['5'] }),
+      node('3', 'main', '', { childIds: ['6', '7', '8'] }),
+      node('4', 'contentinfo', '', {}),
+      node('5', 'link', 'Logo', { backendDOMNodeId: 10 }),
+      node('6', 'heading', 'Dashboard'),
+      node('7', 'textbox', 'Search', { backendDOMNodeId: 20 }),
+      node('8', 'button', 'Go', { backendDOMNodeId: 21 })
+    ]
+    const result = await buildSnapshot(makeSender(nodes))
+
+    expect(result.refs).toHaveLength(3)
+    expect(result.refs.map((r) => r.name)).toEqual(['Logo', 'Search', 'Go'])
+
+    expect(result.snapshot).toContain('[Header]')
+    expect(result.snapshot).toContain('[Main Content]')
+    expect(result.snapshot).toContain('[Footer]')
+    expect(result.snapshot).toContain('heading "Dashboard"')
+  })
+})
--- a/src/main/browser/snapshot-engine.ts
+++ b/src/main/browser/snapshot-engine.ts
@ -0,0 +1,451 @@
+/* eslint-disable max-lines -- Why: snapshot building, AX tree walking, ref mapping, and cursor-interactive detection are tightly coupled and belong in one module. */
+import type { BrowserSnapshotRef } from '../../shared/runtime-types'
+
+export type CdpCommandSender = (
+  method: string,
+  params?: Record<string, unknown>
+) => Promise<unknown>
+
+type AXNode = {
+  nodeId: string
+  backendDOMNodeId?: number
+  role?: { type: string; value: string }
+  name?: { type: string; value: string }
+  properties?: { name: string; value: { type: string; value: unknown } }[]
+  childIds?: string[]
+  ignored?: boolean
+}
+
+type SnapshotEntry = {
+  ref: string
+  role: string
+  name: string
+  backendDOMNodeId: number
+  depth: number
+}
+
+export type RefEntry = {
+  backendDOMNodeId: number
+  role: string
+  name: string
+  sessionId?: string
+  // Why: when multiple elements share the same role+name, nth tracks which
+  // occurrence this ref represents (1-indexed). Used during stale ref recovery
+  // to disambiguate duplicates.
+  nth?: number
+}
+
+export type SnapshotResult = {
+  snapshot: string
+  refs: BrowserSnapshotRef[]
+  refMap: Map<string, RefEntry>
+}
+
+const INTERACTIVE_ROLES = new Set([
+  'button',
+  'link',
+  'textbox',
+  'searchbox',
+  'combobox',
+  'checkbox',
+  'radio',
+  'switch',
+  'slider',
+  'spinbutton',
+  'menuitem',
+  'menuitemcheckbox',
+  'menuitemradio',
+  'tab',
+  'option',
+  'treeitem'
+])
+
+const LANDMARK_ROLES = new Set([
+  'banner',
+  'navigation',
+  'main',
+  'complementary',
+  'contentinfo',
+  'region',
+  'form',
+  'search'
+])
+
+const HEADING_PATTERN = /^heading$/
+
+const SKIP_ROLES = new Set(['none', 'presentation', 'generic'])
+
+export async function buildSnapshot(
+  sendCommand: CdpCommandSender,
+  iframeSessions?: Map<string, string>,
+  makeIframeSender?: (sessionId: string) => CdpCommandSender
+): Promise<SnapshotResult> {
+  await sendCommand('Accessibility.enable')
+  const { nodes } = (await sendCommand('Accessibility.getFullAXTree')) as { nodes: AXNode[] }
+
+  const nodeById = new Map<string, AXNode>()
+  for (const node of nodes) {
+    nodeById.set(node.nodeId, node)
+  }
+
+  const entries: SnapshotEntry[] = []
+  let refCounter = 1
+
+  const root = nodes[0]
+  if (!root) {
+    return { snapshot: '', refs: [], refMap: new Map() }
+  }
+
+  walkTree(root, nodeById, 0, entries, () => refCounter++)
+
+  // Why: many modern SPAs use styled <div>s, <span>s, and custom elements as
+  // interactive controls without proper ARIA roles. These elements are invisible
+  // to the accessibility tree walk above but are clearly interactive (cursor:pointer,
+  // onclick, tabindex, contenteditable). This DOM query pass discovers them and
+  // promotes them to interactive refs so the agent can interact with them.
+  const cursorInteractiveEntries = await findCursorInteractiveElements(sendCommand, entries)
+  for (const cie of cursorInteractiveEntries) {
+    cie.ref = `@e${refCounter++}`
+    entries.push(cie)
+  }
+
+  // Why: cross-origin iframes have their own AX trees accessible only through
+  // their dedicated CDP session. Append their elements after the parent tree
+  // so the agent can see and interact with iframe content.
+  const iframeRefSessions: { ref: string; sessionId: string }[] = []
+  if (iframeSessions && makeIframeSender && iframeSessions.size > 0) {
+    for (const [_frameId, sessionId] of iframeSessions) {
+      try {
+        const iframeSender = makeIframeSender(sessionId)
+        await iframeSender('Accessibility.enable')
+        const { nodes: iframeNodes } = (await iframeSender('Accessibility.getFullAXTree')) as {
+          nodes: AXNode[]
+        }
+        if (iframeNodes.length === 0) {
+          continue
+        }
+        const iframeNodeById = new Map<string, AXNode>()
+        for (const n of iframeNodes) {
+          iframeNodeById.set(n.nodeId, n)
+        }
+        const iframeRoot = iframeNodes[0]
+        if (iframeRoot) {
+          const startRef = refCounter
+          walkTree(iframeRoot, iframeNodeById, 1, entries, () => refCounter++)
+          for (let i = startRef; i < refCounter; i++) {
+            iframeRefSessions.push({ ref: `@e${i}`, sessionId })
+          }
+        }
+      } catch {
+        // Iframe session may be stale — skip silently
+      }
+    }
+  }
+
+  const refMap = new Map<string, RefEntry>()
+  const refs: BrowserSnapshotRef[] = []
+  const lines: string[] = []
+
+  // Why: when multiple elements share the same role+name (e.g. 3 "Submit"
+  // buttons), the agent can't distinguish them from text alone. Appending a
+  // disambiguation suffix like "(2nd)" lets the agent refer to duplicates.
+  const nameCounts = new Map<string, number>()
+  const nameOccurrence = new Map<string, number>()
+  for (const entry of entries) {
+    if (entry.ref) {
+      const key = `${entry.role}:${entry.name}`
+      nameCounts.set(key, (nameCounts.get(key) ?? 0) + 1)
+    }
+  }
+
+  for (const entry of entries) {
+    const indent = '  '.repeat(entry.depth)
+    if (entry.ref) {
+      const key = `${entry.role}:${entry.name}`
+      const total = nameCounts.get(key) ?? 1
+      let displayName = entry.name
+      const nth = (nameOccurrence.get(key) ?? 0) + 1
+      nameOccurrence.set(key, nth)
+      if (total > 1 && nth > 1) {
+        displayName = `${entry.name} (${ordinal(nth)})`
+      }
+      lines.push(`${indent}[${entry.ref}] ${entry.role} "${displayName}"`)
+      refs.push({ ref: entry.ref, role: entry.role, name: displayName })
+      const iframeSession = iframeRefSessions.find((s) => s.ref === entry.ref)
+      refMap.set(entry.ref, {
+        backendDOMNodeId: entry.backendDOMNodeId,
+        role: entry.role,
+        name: entry.name,
+        sessionId: iframeSession?.sessionId,
+        nth: total > 1 ? nth : undefined
+      })
+    } else {
+      lines.push(`${indent}${entry.role} "${entry.name}"`)
+    }
+  }
+
+  return { snapshot: lines.join('\n'), refs, refMap }
+}
+
+function walkTree(
+  node: AXNode,
+  nodeById: Map<string, AXNode>,
+  depth: number,
+  entries: SnapshotEntry[],
+  nextRef: () => number
+): void {
+  if (node.ignored) {
+    walkChildren(node, nodeById, depth, entries, nextRef)
+    return
+  }
+
+  const role = node.role?.value ?? ''
+  const name = node.name?.value ?? ''
+
+  if (SKIP_ROLES.has(role)) {
+    walkChildren(node, nodeById, depth, entries, nextRef)
+    return
+  }
+
+  const isInteractive = INTERACTIVE_ROLES.has(role)
+  const isHeading = HEADING_PATTERN.test(role)
+  const isLandmark = LANDMARK_ROLES.has(role)
+  const isStaticText = role === 'staticText' || role === 'StaticText'
+
+  if (!isInteractive && !isHeading && !isLandmark && !isStaticText) {
+    walkChildren(node, nodeById, depth, entries, nextRef)
+    return
+  }
+
+  if (!name && !isLandmark) {
+    walkChildren(node, nodeById, depth, entries, nextRef)
+    return
+  }
+
+  const hasFocusable = isInteractive && isFocusable(node)
+
+  if (isLandmark) {
+    entries.push({
+      ref: '',
+      role: formatLandmarkRole(role, name),
+      name: name || role,
+      backendDOMNodeId: node.backendDOMNodeId ?? 0,
+      depth
+    })
+    walkChildren(node, nodeById, depth + 1, entries, nextRef)
+    return
+  }
+
+  if (isHeading) {
+    entries.push({
+      ref: '',
+      role: 'heading',
+      name,
+      backendDOMNodeId: node.backendDOMNodeId ?? 0,
+      depth
+    })
+    return
+  }
+
+  if (isStaticText && name.trim().length > 0) {
+    entries.push({
+      ref: '',
+      role: 'text',
+      name: name.trim(),
+      backendDOMNodeId: node.backendDOMNodeId ?? 0,
+      depth
+    })
+    return
+  }
+
+  if (isInteractive && (hasFocusable || node.backendDOMNodeId)) {
+    const ref = `@e${nextRef()}`
+    entries.push({
+      ref,
+      role: formatInteractiveRole(role),
+      name: name || '(unlabeled)',
+      backendDOMNodeId: node.backendDOMNodeId ?? 0,
+      depth
+    })
+    return
+  }
+
+  walkChildren(node, nodeById, depth, entries, nextRef)
+}
+
+function walkChildren(
+  node: AXNode,
+  nodeById: Map<string, AXNode>,
+  depth: number,
+  entries: SnapshotEntry[],
+  nextRef: () => number
+): void {
+  if (!node.childIds) {
+    return
+  }
+  for (const childId of node.childIds) {
+    const child = nodeById.get(childId)
+    if (child) {
+      walkTree(child, nodeById, depth, entries, nextRef)
+    }
+  }
+}
+
+function isFocusable(node: AXNode): boolean {
+  if (!node.properties) {
+    return true
+  }
+  const focusable = node.properties.find((p) => p.name === 'focusable')
+  if (focusable && focusable.value.value === false) {
+    return false
+  }
+  return true
+}
+
+function formatInteractiveRole(role: string): string {
+  switch (role) {
+    case 'textbox':
+    case 'searchbox':
+      return 'text input'
+    case 'combobox':
+      return 'combobox'
+    case 'menuitem':
+    case 'menuitemcheckbox':
+    case 'menuitemradio':
+      return 'menu item'
+    case 'spinbutton':
+      return 'number input'
+    case 'treeitem':
+      return 'tree item'
+    default:
+      return role
+  }
+}
+
+function formatLandmarkRole(role: string, name: string): string {
+  if (name) {
+    return `[${name}]`
+  }
+  switch (role) {
+    case 'banner':
+      return '[Header]'
+    case 'navigation':
+      return '[Navigation]'
+    case 'main':
+      return '[Main Content]'
+    case 'complementary':
+      return '[Sidebar]'
+    case 'contentinfo':
+      return '[Footer]'
+    case 'search':
+      return '[Search]'
+    default:
+      return `[${role}]`
+  }
+}
+
+function ordinal(n: number): string {
+  const s = ['th', 'st', 'nd', 'rd']
+  const v = n % 100
+  return `${n}${s[(v - 20) % 10] || s[v] || s[0]}`
+}
+
+// Why: finds DOM elements that are visually interactive (cursor:pointer, onclick,
+// tabindex, contenteditable) but lack standard ARIA roles. These are common in
+// modern SPAs where styled <div>s act as buttons. Returns them as a JS array of
+// remote object references that we can resolve to backendNodeIds via CDP.
+async function findCursorInteractiveElements(
+  sendCommand: CdpCommandSender,
+  existingEntries: SnapshotEntry[]
+): Promise<SnapshotEntry[]> {
+  const existingNodeIds = new Set(existingEntries.map((e) => e.backendDOMNodeId))
+  const results: SnapshotEntry[] = []
+
+  try {
+    // Single evaluate call that finds interactive elements and returns their info
+    // along with a way to reference them by index
+    const { result } = (await sendCommand('Runtime.evaluate', {
+      expression: `(() => {
+        const SKIP_ROLES = new Set(['button','link','textbox','checkbox','radio','tab',
+          'menuitem','option','switch','slider','combobox','searchbox','spinbutton','treeitem',
+          'menuitemcheckbox','menuitemradio']);
+        const SKIP_TAGS = new Set(['input','button','select','textarea','a']);
+        const seen = new Set();
+        const found = [];
+        const matchedElements = [];
+
+        function check(el) {
+          if (seen.has(el)) return;
+          seen.add(el);
+          const tag = el.tagName.toLowerCase();
+          if (SKIP_TAGS.has(tag)) return;
+          const role = el.getAttribute('role');
+          if (role && SKIP_ROLES.has(role)) return;
+          const rect = el.getBoundingClientRect();
+          if (rect.width === 0 || rect.height === 0) return;
+          const text = (el.ariaLabel || el.getAttribute('aria-label') || el.textContent || '').trim().slice(0, 80);
+          if (!text) return;
+          found.push({ text, tag });
+          matchedElements.push(el);
+          if (found.length >= 50) return;
+        }
+
+        document.querySelectorAll('[onclick], [tabindex]:not([tabindex="-1"]), [contenteditable="true"]').forEach(el => {
+          if (found.length < 50) check(el);
+        });
+        document.querySelectorAll('div, span, li, td, img, svg, label').forEach(el => {
+          if (found.length >= 50) return;
+          try {
+            if (window.getComputedStyle(el).cursor === 'pointer') check(el);
+          } catch {}
+        });
+
+        window.__orcaCursorInteractive = matchedElements;
+        return JSON.stringify(found);
+      })()`,
+      returnByValue: true
+    })) as { result: { value: string } }
+
+    const elements = JSON.parse(result.value) as { text: string; tag: string }[]
+
+    for (let i = 0; i < elements.length; i++) {
+      try {
+        const { result: objResult } = (await sendCommand('Runtime.evaluate', {
+          expression: `window.__orcaCursorInteractive[${i}]`
+        })) as { result: { objectId?: string } }
+
+        if (!objResult.objectId) {
+          continue
+        }
+
+        const { node } = (await sendCommand('DOM.describeNode', {
+          objectId: objResult.objectId
+        })) as { node: { backendNodeId: number } }
+
+        if (existingNodeIds.has(node.backendNodeId)) {
+          continue
+        }
+
+        results.push({
+          ref: '',
+          role: 'clickable',
+          name: elements[i].text,
+          backendDOMNodeId: node.backendNodeId,
+          depth: 0
+        })
+      } catch {
+        continue
+      }
+    }
+
+    // Clean up
+    await sendCommand('Runtime.evaluate', {
+      expression: 'delete window.__orcaCursorInteractive',
+      returnByValue: true
+    })
+  } catch {
+    // DOM query failed — not critical, just return empty
+  }
+
+  return results
+}
--- a/src/main/index.ts
+++ b/src/main/index.ts
@ -35,6 +35,8 @@ import { CodexAccountService } from './codex-accounts/service'
 import { CodexRuntimeHomeService } from './codex-accounts/runtime-home-service'
 import { openCodeHookService } from './opencode/hook-service'
 import { StarNagService } from './star-nag/service'
+import { AgentBrowserBridge } from './browser/agent-browser-bridge'
+import { browserManager } from './browser/browser-manager'

 let mainWindow: BrowserWindow | null = null
 /** Whether a manual app.quit() (Cmd+Q, etc.) is in progress. Shared with the
@ -158,6 +160,7 @@ app.whenReady().then(async () => {
  starNag = new StarNagService(store, stats)
  starNag.start()
  starNag.registerIpcHandlers()
+  runtime.setAgentBrowserBridge(new AgentBrowserBridge(browserManager))
  nativeTheme.themeSource = store.getSettings().theme ?? 'system'
  registerAppMenu({
    onCheckForUpdates: () => checkForUpdatesFromMenu(),
@ -265,6 +268,9 @@ app.on('will-quit', () => {
  openCodeHookService.stop()
  starNag?.stop()
  stats?.flush()
+  // Why: agent-browser daemon processes would otherwise linger after Orca quits,
+  // holding ports and leaving stale session state on disk.
+  runtime?.getAgentBrowserBridge()?.destroyAllSessions()
  killAllPty()
  // Why: in daemon mode, killAllPty is a no-op (daemon sessions survive app
  // quit) but the client connection must be closed so sockets are released.
--- a/src/main/ipc/browser.test.ts
+++ b/src/main/ipc/browser.test.ts
@ -5,6 +5,8 @@ const {
  handleMock,
  registerGuestMock,
  unregisterGuestMock,
+  getGuestWebContentsIdMock,
+  getWorktreeIdForTabMock,
  openDevToolsMock,
  getDownloadPromptMock,
  acceptDownloadMock,
@ -16,6 +18,8 @@ const {
  handleMock: vi.fn(),
  registerGuestMock: vi.fn(),
  unregisterGuestMock: vi.fn(),
+  getGuestWebContentsIdMock: vi.fn(),
+  getWorktreeIdForTabMock: vi.fn(),
  openDevToolsMock: vi.fn().mockResolvedValue(true),
  getDownloadPromptMock: vi.fn(),
  acceptDownloadMock: vi.fn(),
@ -41,6 +45,8 @@ vi.mock('../browser/browser-manager', () => ({
  browserManager: {
    registerGuest: registerGuestMock,
    unregisterGuest: unregisterGuestMock,
+    getGuestWebContentsId: getGuestWebContentsIdMock,
+    getWorktreeIdForTab: getWorktreeIdForTabMock,
    openDevTools: openDevToolsMock,
    getDownloadPrompt: getDownloadPromptMock,
    acceptDownload: acceptDownloadMock,
@ -48,7 +54,7 @@ vi.mock('../browser/browser-manager', () => ({
  }
 }))

-import { registerBrowserHandlers } from './browser'
+import { registerBrowserHandlers, setAgentBrowserBridgeRef } from './browser'

 describe('registerBrowserHandlers', () => {
  beforeEach(() => {
@ -56,6 +62,8 @@ describe('registerBrowserHandlers', () => {
    handleMock.mockReset()
    registerGuestMock.mockReset()
    unregisterGuestMock.mockReset()
+    getGuestWebContentsIdMock.mockReset()
+    getWorktreeIdForTabMock.mockReset()
    openDevToolsMock.mockReset()
    getDownloadPromptMock.mockReset()
    acceptDownloadMock.mockReset()
@ -63,6 +71,7 @@ describe('registerBrowserHandlers', () => {
    showSaveDialogMock.mockReset()
    browserWindowFromWebContentsMock.mockReset()
    openDevToolsMock.mockResolvedValue(true)
+    setAgentBrowserBridgeRef(null)
  })

  it('rejects non-window callers', async () => {
@ -118,4 +127,31 @@ describe('registerBrowserHandlers', () => {
    })
    expect(result).toEqual({ ok: true })
  })
+
+  it('updates the bridge active tab for the owning worktree', async () => {
+    const onTabChangedMock = vi.fn()
+    getGuestWebContentsIdMock.mockReturnValue(4242)
+    getWorktreeIdForTabMock.mockReturnValue('wt-browser')
+
+    setAgentBrowserBridgeRef({ onTabChanged: onTabChangedMock } as never)
+    registerBrowserHandlers()
+
+    const activeTabChangedHandler = handleMock.mock.calls.find(
+      ([channel]) => channel === 'browser:activeTabChanged'
+    )?.[1] as (event: { sender: Electron.WebContents }, args: { browserPageId: string }) => boolean
+
+    const result = activeTabChangedHandler(
+      {
+        sender: {
+          isDestroyed: () => false,
+          getType: () => 'window',
+          getURL: () => 'file:///renderer/index.html'
+        } as Electron.WebContents
+      },
+      { browserPageId: 'page-1' }
+    )
+
+    expect(result).toBe(true)
+    expect(onTabChangedMock).toHaveBeenCalledWith(4242, 'wt-browser')
+  })
 })
--- a/src/main/ipc/browser.ts
+++ b/src/main/ipc/browser.ts
@ -2,6 +2,7 @@
   trust boundary (isTrustedBrowserRenderer) and handler teardown stay consistent. */
 import { BrowserWindow, dialog, ipcMain } from 'electron'
 import { browserManager } from '../browser/browser-manager'
+import type { AgentBrowserBridge } from '../browser/agent-browser-bridge'
 import { browserSessionRegistry } from '../browser/browser-session-registry'
 import {
  pickCookieFile,
@ -28,11 +29,37 @@ import type {
 } from '../../shared/types'

 let trustedBrowserRendererWebContentsId: number | null = null
+let agentBrowserBridgeRef: AgentBrowserBridge | null = null
+
+// Why: CLI-driven tab creation must wait until the renderer mounts the webview
+// and calls registerGuest, so the tab has a webContentsId and is operable by
+// subsequent commands. This map holds one-shot resolvers keyed by browserPageId.
+const pendingTabRegistrations = new Map<string, () => void>()
+
+export function waitForTabRegistration(browserPageId: string, timeoutMs = 8_000): Promise<void> {
+  if (browserManager.getGuestWebContentsId(browserPageId) !== null) {
+    return Promise.resolve()
+  }
+  return new Promise<void>((resolve, reject) => {
+    const timer = setTimeout(() => {
+      pendingTabRegistrations.delete(browserPageId)
+      reject(new Error('Tab registration timed out'))
+    }, timeoutMs)
+    pendingTabRegistrations.set(browserPageId, () => {
+      clearTimeout(timer)
+      resolve()
+    })
+  })
+}

 export function setTrustedBrowserRendererWebContentsId(webContentsId: number | null): void {
  trustedBrowserRendererWebContentsId = webContentsId
 }

+export function setAgentBrowserBridgeRef(bridge: AgentBrowserBridge | null): void {
+  agentBrowserBridgeRef = bridge
+}
+
 function isTrustedBrowserRenderer(sender: Electron.WebContents): boolean {
  if (sender.isDestroyed() || sender.getType() !== 'window') {
    return false
@ -64,17 +91,39 @@ export function registerBrowserHandlers(): void {
  ipcMain.removeHandler('browser:cancelGrab')
  ipcMain.removeHandler('browser:captureSelectionScreenshot')
  ipcMain.removeHandler('browser:extractHoverPayload')
+  ipcMain.removeHandler('browser:activeTabChanged')

  ipcMain.handle(
    'browser:registerGuest',
-    (event, args: { browserPageId: string; workspaceId: string; webContentsId: number }) => {
+    (
+      event,
+      args: {
+        browserPageId: string
+        workspaceId: string
+        worktreeId: string
+        webContentsId: number
+      }
+    ) => {
      if (!isTrustedBrowserRenderer(event.sender)) {
        return false
      }
+      // Why: when Chromium swaps a guest's renderer process (navigation,
+      // crash recovery), the renderer re-registers the same browserPageId
+      // with a new webContentsId. The bridge must destroy the old session's
+      // proxy (its webContents is gone) and let the next command recreate it.
+      const previousWcId = browserManager.getGuestWebContentsId(args.browserPageId)
      browserManager.registerGuest({
        ...args,
        rendererWebContentsId: event.sender.id
      })
+      if (agentBrowserBridgeRef && previousWcId !== null && previousWcId !== args.webContentsId) {
+        agentBrowserBridgeRef.onProcessSwap(args.browserPageId, args.webContentsId, previousWcId)
+      }
+      const pendingResolve = pendingTabRegistrations.get(args.browserPageId)
+      if (pendingResolve) {
+        pendingTabRegistrations.delete(args.browserPageId)
+        pendingResolve()
+      }
      return true
    }
  )
@ -83,10 +132,39 @@ export function registerBrowserHandlers(): void {
    if (!isTrustedBrowserRenderer(event.sender)) {
      return false
    }
+    // Why: notify bridge before unregistering so it can destroy the session
+    // process and proxy. Must happen before unregisterGuest clears the mapping.
+    const wcId = browserManager.getGuestWebContentsId(args.browserPageId)
+    if (wcId !== null && agentBrowserBridgeRef) {
+      agentBrowserBridgeRef.onTabClosed(wcId)
+    }
    browserManager.unregisterGuest(args.browserPageId)
    return true
  })

+  // Why: keeps the bridge's active tab in sync with the renderer's UI state.
+  // Without this, a user switching tabs in the UI would leave the agent operating
+  // on the previous tab, which is confusing.
+  ipcMain.handle('browser:activeTabChanged', (event, args: { browserPageId: string }) => {
+    if (!isTrustedBrowserRenderer(event.sender)) {
+      return false
+    }
+    if (!agentBrowserBridgeRef) {
+      return false
+    }
+    const wcId = browserManager.getGuestWebContentsId(args.browserPageId)
+    if (wcId !== null) {
+      // Why: renderer tab changes are scoped to a worktree. If we only update
+      // the global active guest, later worktree-scoped commands can still
+      // resolve to the previously active page inside that worktree.
+      agentBrowserBridgeRef.onTabChanged(
+        wcId,
+        browserManager.getWorktreeIdForTab(args.browserPageId)
+      )
+    }
+    return true
+  })
+
  ipcMain.handle('browser:openDevTools', (event, args: { browserPageId: string }) => {
    if (!isTrustedBrowserRenderer(event.sender)) {
      return false
--- a/src/main/ipc/register-core-handlers.test.ts
+++ b/src/main/ipc/register-core-handlers.test.ts
@ -20,6 +20,7 @@ const {
  registerUpdaterHandlersMock,
  registerRateLimitHandlersMock,
  registerBrowserHandlersMock,
+  setAgentBrowserBridgeRefMock,
  setTrustedBrowserRendererWebContentsIdMock,
  registerFilesystemWatcherHandlersMock,
  registerAppHandlersMock,
@ -44,6 +45,7 @@ const {
  registerUpdaterHandlersMock: vi.fn(),
  registerRateLimitHandlersMock: vi.fn(),
  registerBrowserHandlersMock: vi.fn(),
+  setAgentBrowserBridgeRefMock: vi.fn(),
  setTrustedBrowserRendererWebContentsIdMock: vi.fn(),
  registerFilesystemWatcherHandlersMock: vi.fn(),
  registerAppHandlersMock: vi.fn(),
@ -129,7 +131,8 @@ vi.mock('../window/attach-main-window-services', () => ({

 vi.mock('./browser', () => ({
  registerBrowserHandlers: registerBrowserHandlersMock,
-  setTrustedBrowserRendererWebContentsId: setTrustedBrowserRendererWebContentsIdMock
+  setTrustedBrowserRendererWebContentsId: setTrustedBrowserRendererWebContentsIdMock,
+  setAgentBrowserBridgeRef: setAgentBrowserBridgeRefMock
 }))

 vi.mock('./app', () => ({
@ -159,6 +162,7 @@ describe('registerCoreHandlers', () => {
    registerUpdaterHandlersMock.mockReset()
    registerRateLimitHandlersMock.mockReset()
    registerBrowserHandlersMock.mockReset()
+    setAgentBrowserBridgeRefMock.mockReset()
    setTrustedBrowserRendererWebContentsIdMock.mockReset()
    registerFilesystemWatcherHandlersMock.mockReset()
    registerAppHandlersMock.mockReset()
@ -167,7 +171,7 @@ describe('registerCoreHandlers', () => {

  it('passes the store through to handler registrars that need it', () => {
    const store = { marker: 'store' }
-    const runtime = { marker: 'runtime' }
+    const runtime = { marker: 'runtime', getAgentBrowserBridge: () => null }
    const stats = { marker: 'stats' }
    const claudeUsage = { marker: 'claudeUsage' }
    const codexUsage = { marker: 'codexUsage' }
@ -211,7 +215,7 @@ describe('registerCoreHandlers', () => {
    // The first test already called registerCoreHandlers, so the module-level
    // guard is now set. beforeEach reset all mocks, so call counts are 0.
    const store2 = { marker: 'store2' }
-    const runtime2 = { marker: 'runtime2' }
+    const runtime2 = { marker: 'runtime2', getAgentBrowserBridge: () => null }
    const stats2 = { marker: 'stats2' }
    const claudeUsage2 = { marker: 'claudeUsage2' }
    const codexUsage2 = { marker: 'codexUsage2' }
--- a/src/main/ipc/register-core-handlers.ts
+++ b/src/main/ipc/register-core-handlers.ts
@ -15,7 +15,7 @@ import { registerStatsHandlers } from './stats'
 import { registerRateLimitHandlers } from './rate-limits'
 import { registerRuntimeHandlers } from './runtime'
 import { registerNotificationHandlers } from './notifications'
-import { setTrustedBrowserRendererWebContentsId } from './browser'
+import { setTrustedBrowserRendererWebContentsId, setAgentBrowserBridgeRef } from './browser'
 import { registerSessionHandlers } from './session'
 import { registerSettingsHandlers } from './settings'
 import { registerBrowserHandlers } from './browser'
@ -50,6 +50,7 @@ export function registerCoreHandlers(
  // if a channel is registered twice, so we guard to register only once and
  // just update the per-window web-contents ID on subsequent calls.
  setTrustedBrowserRendererWebContentsId(mainWindowWebContentsId)
+  setAgentBrowserBridgeRef(runtime.getAgentBrowserBridge())
  if (registered) {
    return
  }
--- a/src/main/runtime/orca-runtime.test.ts
+++ b/src/main/runtime/orca-runtime.test.ts
@ -764,4 +764,158 @@ describe('OrcaRuntimeService', () => {
      }
    ])
  })
+
+  describe('browser page targeting', () => {
+    it('passes explicit page ids through without resolving the current worktree', async () => {
+      vi.mocked(listWorktrees).mockClear()
+      const runtime = createRuntime()
+      const snapshotMock = vi.fn().mockResolvedValue({
+        browserPageId: 'page-1',
+        snapshot: 'tree',
+        refs: [],
+        url: 'https://example.com',
+        title: 'Example'
+      })
+
+      runtime.setAgentBrowserBridge({
+        snapshot: snapshotMock
+      } as never)
+
+      const result = await runtime.browserSnapshot({ page: 'page-1' })
+
+      expect(result.browserPageId).toBe('page-1')
+      expect(snapshotMock).toHaveBeenCalledWith(undefined, 'page-1')
+      expect(listWorktrees).not.toHaveBeenCalled()
+    })
+
+    it('resolves explicit worktree selectors when page ids are also provided', async () => {
+      vi.mocked(listWorktrees).mockClear()
+      const runtime = createRuntime()
+      const snapshotMock = vi.fn().mockResolvedValue({
+        browserPageId: 'page-1',
+        snapshot: 'tree',
+        refs: [],
+        url: 'https://example.com',
+        title: 'Example'
+      })
+
+      runtime.setAgentBrowserBridge({
+        snapshot: snapshotMock,
+        getRegisteredTabs: vi.fn(() => new Map([['page-1', 1]]))
+      } as never)
+
+      await runtime.browserSnapshot({
+        worktree: 'branch:feature/foo',
+        page: 'page-1'
+      })
+
+      expect(snapshotMock).toHaveBeenCalledWith(TEST_WORKTREE_ID, 'page-1')
+    })
+
+    it('routes tab switch and capture start by explicit page id', async () => {
+      const runtime = createRuntime()
+      const tabSwitchMock = vi.fn().mockResolvedValue({
+        switched: 2,
+        browserPageId: 'page-2'
+      })
+      const captureStartMock = vi.fn().mockResolvedValue({
+        capturing: true
+      })
+
+      runtime.setAgentBrowserBridge({
+        tabSwitch: tabSwitchMock,
+        captureStart: captureStartMock
+      } as never)
+
+      await expect(runtime.browserTabSwitch({ page: 'page-2' })).resolves.toEqual({
+        switched: 2,
+        browserPageId: 'page-2'
+      })
+      await expect(runtime.browserCaptureStart({ page: 'page-2' })).resolves.toEqual({
+        capturing: true
+      })
+      expect(tabSwitchMock).toHaveBeenCalledWith(undefined, undefined, 'page-2')
+      expect(captureStartMock).toHaveBeenCalledWith(undefined, 'page-2')
+    })
+
+    it('does not silently drop invalid explicit worktree selectors for page-targeted commands', async () => {
+      vi.mocked(listWorktrees).mockResolvedValue(MOCK_GIT_WORKTREES)
+      const runtime = createRuntime()
+      const snapshotMock = vi.fn()
+
+      runtime.setAgentBrowserBridge({
+        snapshot: snapshotMock,
+        getRegisteredTabs: vi.fn(() => new Map([['page-1', 1]]))
+      } as never)
+
+      await expect(
+        runtime.browserSnapshot({
+          worktree: 'path:/tmp/missing-worktree',
+          page: 'page-1'
+        })
+      ).rejects.toThrow('selector_not_found')
+      expect(snapshotMock).not.toHaveBeenCalled()
+    })
+
+    it('does not silently drop invalid explicit worktree selectors for non-page browser commands', async () => {
+      vi.mocked(listWorktrees).mockResolvedValue(MOCK_GIT_WORKTREES)
+      const runtime = createRuntime()
+      const tabListMock = vi.fn()
+
+      runtime.setAgentBrowserBridge({
+        tabList: tabListMock
+      } as never)
+
+      await expect(
+        runtime.browserTabList({
+          worktree: 'path:/tmp/missing-worktree'
+        })
+      ).rejects.toThrow('selector_not_found')
+      expect(tabListMock).not.toHaveBeenCalled()
+    })
+
+    it('rejects closing an unknown page id instead of treating it as success', async () => {
+      vi.mocked(listWorktrees).mockResolvedValue(MOCK_GIT_WORKTREES)
+      const runtime = createRuntime()
+
+      runtime.setAgentBrowserBridge({
+        getRegisteredTabs: vi.fn(() => new Map([['page-1', 1]]))
+      } as never)
+
+      await expect(
+        runtime.browserTabClose({
+          page: 'missing-page'
+        })
+      ).rejects.toThrow('Browser page missing-page was not found')
+    })
+
+    it('rejects closing a page outside the explicitly scoped worktree', async () => {
+      vi.mocked(listWorktrees).mockResolvedValue([
+        ...MOCK_GIT_WORKTREES,
+        {
+          path: '/tmp/worktree-b',
+          head: 'def',
+          branch: 'feature/bar',
+          isBare: false,
+          isMainWorktree: false
+        }
+      ])
+      const runtime = createRuntime()
+      const getRegisteredTabsMock = vi.fn((worktreeId?: string) =>
+        worktreeId === `${TEST_REPO_ID}::/tmp/worktree-b` ? new Map() : new Map([['page-1', 1]])
+      )
+
+      runtime.setAgentBrowserBridge({
+        getRegisteredTabs: getRegisteredTabsMock
+      } as never)
+
+      await expect(
+        runtime.browserTabClose({
+          page: 'page-1',
+          worktree: 'path:/tmp/worktree-b'
+        })
+      ).rejects.toThrow('Browser page page-1 was not found in this worktree')
+      expect(getRegisteredTabsMock).toHaveBeenCalledWith(`${TEST_REPO_ID}::/tmp/worktree-b`)
+    })
+  })
 })
--- a/src/main/runtime/orca-runtime.ts
+++ b/src/main/runtime/orca-runtime.ts
--- a/src/main/runtime/runtime-rpc.ts
+++ b/src/main/runtime/runtime-rpc.ts
--- a/src/preload/api-types.d.ts
+++ b/src/preload/api-types.d.ts
@ -89,6 +89,7 @@ export type BrowserApi = {
  registerGuest: (args: {
    browserPageId: string
    workspaceId: string
+    worktreeId: string
    webContentsId: number
  }) => Promise<void>
  unregisterGuest: (args: { browserPageId: string }) => Promise<void>
@ -107,6 +108,10 @@ export type BrowserApi = {
  onContextMenuDismissed: (
    callback: (event: BrowserContextMenuDismissedEvent) => void
  ) => () => void
+  onNavigationUpdate: (
+    callback: (event: { browserPageId: string; url: string; title: string }) => void
+  ) => () => void
+  onActivateView: (callback: (data: { worktreeId: string }) => void) => () => void
  onOpenLinkInOrcaTab: (
    callback: (event: { browserPageId: string; url: string }) => void
  ) => () => void
@ -140,6 +145,7 @@ export type BrowserApi = {
    browserProfile?: string
  }) => Promise<BrowserCookieImportResult>
  sessionClearDefaultCookies: () => Promise<boolean>
+  notifyActiveTabChanged: (args: { browserPageId: string }) => Promise<boolean>
 }

 export type DetectedBrowserProfileInfo = {
@ -604,6 +610,14 @@ export type PreloadApi = {
    onOpenQuickOpen: (callback: () => void) => () => void
    onJumpToWorktreeIndex: (callback: (index: number) => void) => () => void
    onNewBrowserTab: (callback: () => void) => () => void
+    onRequestTabCreate: (
+      callback: (data: { requestId: string; url: string; worktreeId?: string }) => void
+    ) => () => void
+    replyTabCreate: (reply: { requestId: string; browserPageId?: string; error?: string }) => void
+    onRequestTabClose: (
+      callback: (data: { requestId: string; tabId: string | null; worktreeId?: string }) => void
+    ) => () => void
+    replyTabClose: (reply: { requestId: string; error?: string }) => void
    onNewTerminalTab: (callback: () => void) => () => void
    onFocusBrowserAddressBar: (callback: () => void) => () => void
    onFindInBrowserPage: (callback: () => void) => () => void
--- a/src/preload/index.ts
+++ b/src/preload/index.ts
@ -509,6 +509,7 @@ const api = {
    registerGuest: (args: {
      browserPageId: string
      workspaceId: string
+      worktreeId: string
      webContentsId: number
    }): Promise<void> => ipcRenderer.invoke('browser:registerGuest', args),

@ -666,6 +667,24 @@ const api = {
      return () => ipcRenderer.removeListener('browser:context-menu-dismissed', listener)
    },

+    onNavigationUpdate: (
+      callback: (event: { browserPageId: string; url: string; title: string }) => void
+    ): (() => void) => {
+      const listener = (
+        _event: Electron.IpcRendererEvent,
+        data: { browserPageId: string; url: string; title: string }
+      ) => callback(data)
+      ipcRenderer.on('browser:navigation-update', listener)
+      return () => ipcRenderer.removeListener('browser:navigation-update', listener)
+    },
+
+    onActivateView: (callback: (data: { worktreeId: string }) => void): (() => void) => {
+      const listener = (_event: Electron.IpcRendererEvent, data: { worktreeId: string }) =>
+        callback(data)
+      ipcRenderer.on('browser:activateView', listener)
+      return () => ipcRenderer.removeListener('browser:activateView', listener)
+    },
+
    onOpenLinkInOrcaTab: (
      callback: (event: { browserPageId: string; url: string }) => void
    ): (() => void) => {
@ -757,7 +776,10 @@ const api = {
    > => ipcRenderer.invoke('browser:session:importFromBrowser', args),

    sessionClearDefaultCookies: (): Promise<boolean> =>
-      ipcRenderer.invoke('browser:session:clearDefaultCookies')
+      ipcRenderer.invoke('browser:session:clearDefaultCookies'),
+
+    notifyActiveTabChanged: (args: { browserPageId: string }): Promise<boolean> =>
+      ipcRenderer.invoke('browser:activeTabChanged', args)
  },

  hooks: {
@ -1059,6 +1081,36 @@ const api = {
      ipcRenderer.on('ui:newBrowserTab', listener)
      return () => ipcRenderer.removeListener('ui:newBrowserTab', listener)
    },
+    onRequestTabCreate: (
+      callback: (data: { requestId: string; url: string; worktreeId?: string }) => void
+    ): (() => void) => {
+      const listener = (
+        _event: Electron.IpcRendererEvent,
+        data: { requestId: string; url: string; worktreeId?: string }
+      ) => callback(data)
+      ipcRenderer.on('browser:requestTabCreate', listener)
+      return () => ipcRenderer.removeListener('browser:requestTabCreate', listener)
+    },
+    replyTabCreate: (reply: {
+      requestId: string
+      browserPageId?: string
+      error?: string
+    }): void => {
+      ipcRenderer.send('browser:tabCreateReply', reply)
+    },
+    onRequestTabClose: (
+      callback: (data: { requestId: string; tabId: string | null; worktreeId?: string }) => void
+    ): (() => void) => {
+      const listener = (
+        _event: Electron.IpcRendererEvent,
+        data: { requestId: string; tabId: string | null; worktreeId?: string }
+      ) => callback(data)
+      ipcRenderer.on('browser:requestTabClose', listener)
+      return () => ipcRenderer.removeListener('browser:requestTabClose', listener)
+    },
+    replyTabClose: (reply: { requestId: string; error?: string }): void => {
+      ipcRenderer.send('browser:tabCloseReply', reply)
+    },
    onNewTerminalTab: (callback: () => void): (() => void) => {
      const listener = (_event: Electron.IpcRendererEvent) => callback()
      ipcRenderer.on('ui:newTerminalTab', listener)
--- a/src/renderer/src/components/browser-pane/BrowserPane.tsx
+++ b/src/renderer/src/components/browser-pane/BrowserPane.tsx
@ -999,6 +999,7 @@ function BrowserPagePane({
        void window.api.browser.registerGuest({
          browserPageId: browserTab.id,
          workspaceId,
+          worktreeId,
          webContentsId
        })
      }
--- a/src/renderer/src/hooks/useIpcEvents.test.ts
+++ b/src/renderer/src/hooks/useIpcEvents.test.ts
@ -152,6 +152,10 @@ describe('useIpcEvents updater integration', () => {
          onJumpToWorktreeIndex: () => () => {},
          onActivateWorktree: () => () => {},
          onNewBrowserTab: () => () => {},
+          onRequestTabCreate: () => () => {},
+          replyTabCreate: () => {},
+          onRequestTabClose: () => () => {},
+          replyTabClose: () => {},
          onNewTerminalTab: () => () => {},
          onCloseActiveTab: () => () => {},
          onSwitchTab: () => () => {},
@ -171,7 +175,9 @@ describe('useIpcEvents updater integration', () => {
        },
        browser: {
          onGuestLoadFailed: () => () => {},
-          onOpenLinkInOrcaTab: () => () => {}
+          onOpenLinkInOrcaTab: () => () => {},
+          onNavigationUpdate: () => () => {},
+          onActivateView: () => () => {}
        },
        rateLimits: {
          get: () => Promise.resolve({ limits: {}, lastUpdatedAt: Date.now() }),
@ -314,6 +320,10 @@ describe('useIpcEvents updater integration', () => {
          onJumpToWorktreeIndex: () => () => {},
          onActivateWorktree: () => () => {},
          onNewBrowserTab: () => () => {},
+          onRequestTabCreate: () => () => {},
+          replyTabCreate: () => {},
+          onRequestTabClose: () => () => {},
+          replyTabClose: () => {},
          onNewTerminalTab: () => () => {},
          onCloseActiveTab: () => () => {},
          onSwitchTab: () => () => {},
@ -330,7 +340,9 @@ describe('useIpcEvents updater integration', () => {
        },
        browser: {
          onGuestLoadFailed: () => () => {},
-          onOpenLinkInOrcaTab: () => () => {}
+          onOpenLinkInOrcaTab: () => () => {},
+          onNavigationUpdate: () => () => {},
+          onActivateView: () => () => {}
        },
        rateLimits: {
          get: () => Promise.resolve({ limits: {}, lastUpdatedAt: Date.now() }),
@ -372,6 +384,500 @@ describe('useIpcEvents updater integration', () => {
  })
 })

+describe('useIpcEvents browser tab close routing', () => {
+  beforeEach(() => {
+    vi.resetModules()
+    vi.unstubAllGlobals()
+  })
+
+  it('closes the active browser tab for the requested worktree when main does not provide a tab id', async () => {
+    const closeBrowserTab = vi.fn()
+    const closeBrowserPage = vi.fn()
+    const replyTabClose = vi.fn()
+    const tabCloseListenerRef: {
+      current:
+        | ((data: { requestId: string; tabId: string | null; worktreeId?: string }) => void)
+        | null
+    } = {
+      current: null
+    }
+
+    vi.doMock('react', async () => {
+      const actual = await vi.importActual<typeof ReactModule>('react')
+      return {
+        ...actual,
+        useEffect: (effect: () => void | (() => void)) => {
+          effect()
+        }
+      }
+    })
+
+    vi.doMock('../store', () => ({
+      useAppStore: {
+        getState: () => ({
+          setUpdateStatus: vi.fn(),
+          fetchRepos: vi.fn(),
+          fetchWorktrees: vi.fn(),
+          setActiveView: vi.fn(),
+          activeModal: null,
+          closeModal: vi.fn(),
+          openModal: vi.fn(),
+          activeWorktreeId: 'wt-1',
+          activeView: 'terminal',
+          setActiveRepo: vi.fn(),
+          setActiveWorktree: vi.fn(),
+          revealWorktreeInSidebar: vi.fn(),
+          setIsFullScreen: vi.fn(),
+          updateBrowserTabPageState: vi.fn(),
+          activeTabType: 'browser',
+          editorFontZoomLevel: 0,
+          setEditorFontZoomLevel: vi.fn(),
+          setRateLimitsFromPush: vi.fn(),
+          setSshConnectionState: vi.fn(),
+          setSshTargetLabels: vi.fn(),
+          enqueueSshCredentialRequest: vi.fn(),
+          removeSshCredentialRequest: vi.fn(),
+          settings: { terminalFontSize: 13 },
+          activeBrowserTabId: 'workspace-global',
+          activeBrowserTabIdByWorktree: {
+            'wt-1': 'workspace-global',
+            'wt-2': 'workspace-target'
+          },
+          browserTabsByWorktree: {
+            'wt-1': [{ id: 'workspace-global' }],
+            'wt-2': [{ id: 'workspace-target' }]
+          },
+          browserPagesByWorkspace: {},
+          closeBrowserTab,
+          closeBrowserPage
+        })
+      }
+    }))
+
+    vi.doMock('@/lib/ui-zoom', () => ({
+      applyUIZoom: vi.fn()
+    }))
+    vi.doMock('@/lib/worktree-activation', () => ({
+      activateAndRevealWorktree: vi.fn(),
+      ensureWorktreeHasInitialTerminal: vi.fn()
+    }))
+    vi.doMock('@/components/sidebar/visible-worktrees', () => ({
+      getVisibleWorktreeIds: () => []
+    }))
+    vi.doMock('@/lib/editor-font-zoom', () => ({
+      nextEditorFontZoomLevel: vi.fn(() => 0),
+      computeEditorFontSize: vi.fn(() => 13)
+    }))
+    vi.doMock('@/components/settings/SettingsConstants', () => ({
+      zoomLevelToPercent: vi.fn(() => 100),
+      ZOOM_MIN: -3,
+      ZOOM_MAX: 3
+    }))
+    vi.doMock('@/lib/zoom-events', () => ({
+      dispatchZoomLevelChanged: vi.fn()
+    }))
+
+    vi.stubGlobal('window', {
+      dispatchEvent: vi.fn(),
+      api: {
+        repos: { onChanged: () => () => {} },
+        worktrees: { onChanged: () => () => {} },
+        ui: {
+          onOpenSettings: () => () => {},
+          onToggleLeftSidebar: () => () => {},
+          onToggleRightSidebar: () => () => {},
+          onToggleWorktreePalette: () => () => {},
+          onOpenQuickOpen: () => () => {},
+          onJumpToWorktreeIndex: () => () => {},
+          onActivateWorktree: () => () => {},
+          onNewBrowserTab: () => () => {},
+          onRequestTabCreate: () => () => {},
+          replyTabCreate: () => {},
+          onRequestTabClose: (
+            listener: (data: {
+              requestId: string
+              tabId: string | null
+              worktreeId?: string
+            }) => void
+          ) => {
+            tabCloseListenerRef.current = listener
+            return () => {}
+          },
+          replyTabClose,
+          onNewTerminalTab: () => () => {},
+          onCloseActiveTab: () => () => {},
+          onSwitchTab: () => () => {},
+          onToggleStatusBar: () => () => {},
+          onFullscreenChanged: () => () => {},
+          onTerminalZoom: () => () => {},
+          getZoomLevel: () => 0,
+          set: vi.fn()
+        },
+        updater: {
+          getStatus: () => Promise.resolve({ state: 'idle' }),
+          onStatus: () => () => {},
+          onClearDismissal: () => () => {}
+        },
+        browser: {
+          onGuestLoadFailed: () => () => {},
+          onOpenLinkInOrcaTab: () => () => {},
+          onNavigationUpdate: () => () => {},
+          onActivateView: () => () => {}
+        },
+        rateLimits: {
+          get: () => Promise.resolve({ limits: {}, lastUpdatedAt: Date.now() }),
+          onUpdate: () => () => {}
+        },
+        ssh: {
+          listTargets: () => Promise.resolve([]),
+          getState: () => Promise.resolve(null),
+          onStateChanged: () => () => {},
+          onCredentialRequest: () => () => {},
+          onCredentialResolved: () => () => {}
+        }
+      }
+    })
+
+    const { useIpcEvents } = await import('./useIpcEvents')
+    useIpcEvents()
+
+    expect(tabCloseListenerRef.current).toBeTypeOf('function')
+    tabCloseListenerRef.current?.({
+      requestId: 'req-1',
+      tabId: null,
+      worktreeId: 'wt-2'
+    })
+
+    expect(closeBrowserTab).toHaveBeenCalledWith('workspace-target')
+    expect(closeBrowserPage).not.toHaveBeenCalled()
+    expect(replyTabClose).toHaveBeenCalledWith({ requestId: 'req-1' })
+  })
+
+  it('closes only the requested browser page when a workspace has multiple pages', async () => {
+    const closeBrowserTab = vi.fn()
+    const closeBrowserPage = vi.fn()
+    const replyTabClose = vi.fn()
+    const tabCloseListenerRef: {
+      current:
+        | ((data: { requestId: string; tabId: string | null; worktreeId?: string }) => void)
+        | null
+    } = {
+      current: null
+    }
+
+    vi.doMock('react', async () => {
+      const actual = await vi.importActual<typeof ReactModule>('react')
+      return {
+        ...actual,
+        useEffect: (effect: () => void | (() => void)) => {
+          effect()
+        }
+      }
+    })
+
+    vi.doMock('../store', () => ({
+      useAppStore: {
+        getState: () => ({
+          setUpdateStatus: vi.fn(),
+          fetchRepos: vi.fn(),
+          fetchWorktrees: vi.fn(),
+          setActiveView: vi.fn(),
+          activeModal: null,
+          closeModal: vi.fn(),
+          openModal: vi.fn(),
+          activeWorktreeId: 'wt-1',
+          activeView: 'terminal',
+          setActiveRepo: vi.fn(),
+          setActiveWorktree: vi.fn(),
+          revealWorktreeInSidebar: vi.fn(),
+          setIsFullScreen: vi.fn(),
+          updateBrowserTabPageState: vi.fn(),
+          activeTabType: 'browser',
+          editorFontZoomLevel: 0,
+          setEditorFontZoomLevel: vi.fn(),
+          setRateLimitsFromPush: vi.fn(),
+          setSshConnectionState: vi.fn(),
+          setSshTargetLabels: vi.fn(),
+          enqueueSshCredentialRequest: vi.fn(),
+          removeSshCredentialRequest: vi.fn(),
+          settings: { terminalFontSize: 13 },
+          activeBrowserTabId: 'workspace-1',
+          activeBrowserTabIdByWorktree: { 'wt-1': 'workspace-1' },
+          browserTabsByWorktree: {
+            'wt-1': [{ id: 'workspace-1' }]
+          },
+          browserPagesByWorkspace: {
+            'workspace-1': [
+              { id: 'page-1', workspaceId: 'workspace-1' },
+              { id: 'page-2', workspaceId: 'workspace-1' }
+            ]
+          },
+          closeBrowserTab,
+          closeBrowserPage
+        })
+      }
+    }))
+
+    vi.doMock('@/lib/ui-zoom', () => ({
+      applyUIZoom: vi.fn()
+    }))
+    vi.doMock('@/lib/worktree-activation', () => ({
+      activateAndRevealWorktree: vi.fn(),
+      ensureWorktreeHasInitialTerminal: vi.fn()
+    }))
+    vi.doMock('@/components/sidebar/visible-worktrees', () => ({
+      getVisibleWorktreeIds: () => []
+    }))
+    vi.doMock('@/lib/editor-font-zoom', () => ({
+      nextEditorFontZoomLevel: vi.fn(() => 0),
+      computeEditorFontSize: vi.fn(() => 13)
+    }))
+    vi.doMock('@/components/settings/SettingsConstants', () => ({
+      zoomLevelToPercent: vi.fn(() => 100),
+      ZOOM_MIN: -3,
+      ZOOM_MAX: 3
+    }))
+    vi.doMock('@/lib/zoom-events', () => ({
+      dispatchZoomLevelChanged: vi.fn()
+    }))
+
+    vi.stubGlobal('window', {
+      dispatchEvent: vi.fn(),
+      api: {
+        repos: { onChanged: () => () => {} },
+        worktrees: { onChanged: () => () => {} },
+        ui: {
+          onOpenSettings: () => () => {},
+          onToggleLeftSidebar: () => () => {},
+          onToggleRightSidebar: () => () => {},
+          onToggleWorktreePalette: () => () => {},
+          onOpenQuickOpen: () => () => {},
+          onJumpToWorktreeIndex: () => () => {},
+          onActivateWorktree: () => () => {},
+          onNewBrowserTab: () => () => {},
+          onRequestTabCreate: () => () => {},
+          replyTabCreate: () => {},
+          onRequestTabClose: (
+            listener: (data: {
+              requestId: string
+              tabId: string | null
+              worktreeId?: string
+            }) => void
+          ) => {
+            tabCloseListenerRef.current = listener
+            return () => {}
+          },
+          replyTabClose,
+          onNewTerminalTab: () => () => {},
+          onCloseActiveTab: () => () => {},
+          onSwitchTab: () => () => {},
+          onToggleStatusBar: () => () => {},
+          onFullscreenChanged: () => () => {},
+          onTerminalZoom: () => () => {},
+          getZoomLevel: () => 0,
+          set: vi.fn()
+        },
+        updater: {
+          getStatus: () => Promise.resolve({ state: 'idle' }),
+          onStatus: () => () => {},
+          onClearDismissal: () => () => {}
+        },
+        browser: {
+          onGuestLoadFailed: () => () => {},
+          onOpenLinkInOrcaTab: () => () => {},
+          onNavigationUpdate: () => () => {},
+          onActivateView: () => () => {}
+        },
+        rateLimits: {
+          get: () => Promise.resolve({ limits: {}, lastUpdatedAt: Date.now() }),
+          onUpdate: () => () => {}
+        },
+        ssh: {
+          listTargets: () => Promise.resolve([]),
+          getState: () => Promise.resolve(null),
+          onStateChanged: () => () => {},
+          onCredentialRequest: () => () => {},
+          onCredentialResolved: () => () => {}
+        }
+      }
+    })
+
+    const { useIpcEvents } = await import('./useIpcEvents')
+    useIpcEvents()
+
+    tabCloseListenerRef.current?.({
+      requestId: 'req-2',
+      tabId: 'page-2'
+    })
+
+    expect(closeBrowserPage).toHaveBeenCalledWith('page-2')
+    expect(closeBrowserTab).not.toHaveBeenCalled()
+    expect(replyTabClose).toHaveBeenCalledWith({ requestId: 'req-2' })
+  })
+
+  it('rejects explicit unknown browser page ids instead of reporting success', async () => {
+    const closeBrowserTab = vi.fn()
+    const closeBrowserPage = vi.fn()
+    const replyTabClose = vi.fn()
+    const tabCloseListenerRef: {
+      current:
+        | ((data: { requestId: string; tabId: string | null; worktreeId?: string }) => void)
+        | null
+    } = {
+      current: null
+    }
+
+    vi.doMock('react', async () => {
+      const actual = await vi.importActual<typeof ReactModule>('react')
+      return {
+        ...actual,
+        useEffect: (effect: () => void | (() => void)) => {
+          effect()
+        }
+      }
+    })
+
+    vi.doMock('../store', () => ({
+      useAppStore: {
+        getState: () => ({
+          setUpdateStatus: vi.fn(),
+          fetchRepos: vi.fn(),
+          fetchWorktrees: vi.fn(),
+          setActiveView: vi.fn(),
+          activeModal: null,
+          closeModal: vi.fn(),
+          openModal: vi.fn(),
+          activeWorktreeId: 'wt-1',
+          activeView: 'terminal',
+          setActiveRepo: vi.fn(),
+          setActiveWorktree: vi.fn(),
+          revealWorktreeInSidebar: vi.fn(),
+          setIsFullScreen: vi.fn(),
+          updateBrowserTabPageState: vi.fn(),
+          activeTabType: 'browser',
+          editorFontZoomLevel: 0,
+          setEditorFontZoomLevel: vi.fn(),
+          setRateLimitsFromPush: vi.fn(),
+          setSshConnectionState: vi.fn(),
+          setSshTargetLabels: vi.fn(),
+          enqueueSshCredentialRequest: vi.fn(),
+          removeSshCredentialRequest: vi.fn(),
+          settings: { terminalFontSize: 13 },
+          activeBrowserTabId: 'workspace-1',
+          activeBrowserTabIdByWorktree: { 'wt-1': 'workspace-1' },
+          browserTabsByWorktree: {
+            'wt-1': [{ id: 'workspace-1' }]
+          },
+          browserPagesByWorkspace: {
+            'workspace-1': [{ id: 'page-1', workspaceId: 'workspace-1' }]
+          },
+          closeBrowserTab,
+          closeBrowserPage
+        })
+      }
+    }))
+
+    vi.doMock('@/lib/ui-zoom', () => ({
+      applyUIZoom: vi.fn()
+    }))
+    vi.doMock('@/lib/worktree-activation', () => ({
+      activateAndRevealWorktree: vi.fn(),
+      ensureWorktreeHasInitialTerminal: vi.fn()
+    }))
+    vi.doMock('@/components/sidebar/visible-worktrees', () => ({
+      getVisibleWorktreeIds: () => []
+    }))
+    vi.doMock('@/lib/editor-font-zoom', () => ({
+      nextEditorFontZoomLevel: vi.fn(() => 0),
+      computeEditorFontSize: vi.fn(() => 13)
+    }))
+    vi.doMock('@/components/settings/SettingsConstants', () => ({
+      zoomLevelToPercent: vi.fn(() => 100),
+      ZOOM_MIN: -3,
+      ZOOM_MAX: 3
+    }))
+    vi.doMock('@/lib/zoom-events', () => ({
+      dispatchZoomLevelChanged: vi.fn()
+    }))
+
+    vi.stubGlobal('window', {
+      dispatchEvent: vi.fn(),
+      api: {
+        repos: { onChanged: () => () => {} },
+        worktrees: { onChanged: () => () => {} },
+        ui: {
+          onOpenSettings: () => () => {},
+          onToggleLeftSidebar: () => () => {},
+          onToggleRightSidebar: () => () => {},
+          onToggleWorktreePalette: () => () => {},
+          onOpenQuickOpen: () => () => {},
+          onJumpToWorktreeIndex: () => () => {},
+          onActivateWorktree: () => () => {},
+          onNewBrowserTab: () => () => {},
+          onRequestTabCreate: () => () => {},
+          replyTabCreate: () => {},
+          onRequestTabClose: (
+            listener: (data: {
+              requestId: string
+              tabId: string | null
+              worktreeId?: string
+            }) => void
+          ) => {
+            tabCloseListenerRef.current = listener
+            return () => {}
+          },
+          replyTabClose,
+          onNewTerminalTab: () => () => {},
+          onCloseActiveTab: () => () => {},
+          onSwitchTab: () => () => {},
+          onToggleStatusBar: () => () => {},
+          onFullscreenChanged: () => () => {},
+          onTerminalZoom: () => () => {},
+          getZoomLevel: () => 0,
+          set: vi.fn()
+        },
+        updater: {
+          getStatus: () => Promise.resolve({ state: 'idle' }),
+          onStatus: () => () => {},
+          onClearDismissal: () => () => {}
+        },
+        browser: {
+          onGuestLoadFailed: () => () => {},
+          onOpenLinkInOrcaTab: () => () => {},
+          onNavigationUpdate: () => () => {},
+          onActivateView: () => () => {}
+        },
+        rateLimits: {
+          get: () => Promise.resolve({ limits: {}, lastUpdatedAt: Date.now() }),
+          onUpdate: () => () => {}
+        },
+        ssh: {
+          listTargets: () => Promise.resolve([]),
+          getState: () => Promise.resolve(null),
+          onStateChanged: () => () => {},
+          onCredentialRequest: () => () => {},
+          onCredentialResolved: () => () => {}
+        }
+      }
+    })
+
+    const { useIpcEvents } = await import('./useIpcEvents')
+    useIpcEvents()
+
+    tabCloseListenerRef.current?.({
+      requestId: 'req-3',
+      tabId: 'missing-page'
+    })
+
+    expect(closeBrowserPage).not.toHaveBeenCalled()
+    expect(closeBrowserTab).not.toHaveBeenCalled()
+    expect(replyTabClose).toHaveBeenCalledWith({
+      requestId: 'req-3',
+      error: 'Browser tab missing-page not found'
+    })
+  })
+})
+
 describe('useIpcEvents shortcut hint clearing', () => {
  beforeEach(() => {
    vi.resetModules()
@ -485,6 +991,10 @@ describe('useIpcEvents shortcut hint clearing', () => {
          },
          onActivateWorktree: () => () => {},
          onNewBrowserTab: () => () => {},
+          onRequestTabCreate: () => () => {},
+          replyTabCreate: () => {},
+          onRequestTabClose: () => () => {},
+          replyTabClose: () => {},
          onNewTerminalTab: () => () => {},
          onCloseActiveTab: () => () => {},
          onSwitchTab: () => () => {},
@ -501,7 +1011,9 @@ describe('useIpcEvents shortcut hint clearing', () => {
        },
        browser: {
          onGuestLoadFailed: () => () => {},
-          onOpenLinkInOrcaTab: () => () => {}
+          onOpenLinkInOrcaTab: () => () => {},
+          onNavigationUpdate: () => () => {},
+          onActivateView: () => () => {}
        },
        rateLimits: {
          get: () => Promise.resolve({ limits: {}, lastUpdatedAt: Date.now() }),
--- a/src/renderer/src/hooks/useIpcEvents.ts
+++ b/src/renderer/src/hooks/useIpcEvents.ts
@ -156,6 +156,28 @@ export function useIpcEvents(): void {
      })
    )

+    // Why: agent-browser drives navigation via CDP, bypassing Electron's webview
+    // event system. The renderer's did-navigate listener never fires for those
+    // navigations, so the Zustand store (address bar, tab title) stays stale.
+    // This IPC pushes the live URL/title from main after goto/click/back/reload.
+    unsubs.push(
+      window.api.browser.onNavigationUpdate(({ browserPageId, url, title }) => {
+        const store = useAppStore.getState()
+        store.setBrowserPageUrl(browserPageId, url)
+        store.updateBrowserPageState(browserPageId, { title, loading: false })
+      })
+    )
+
+    // Why: browser webviews only start their guest process when the container
+    // has display != none. After app restart, activeTabType defaults to 'terminal'
+    // so persisted browser tabs never mount. The main process sends this IPC
+    // before browser commands so the webview can start and registerGuest fires.
+    unsubs.push(
+      window.api.browser.onActivateView(() => {
+        useAppStore.getState().setActiveTabType('browser')
+      })
+    )
+
    unsubs.push(
      window.api.browser.onOpenLinkInOrcaTab(({ browserPageId, url }) => {
        const store = useAppStore.getState()
@ -187,6 +209,92 @@ export function useIpcEvents(): void {
      })
    )

+    // Why: CLI-driven tab creation sends a request with a specific worktreeId and
+    // url. The renderer creates the tab and replies with the workspace ID so the
+    // main process can wait for registerGuest before returning to the CLI.
+    unsubs.push(
+      window.api.ui.onRequestTabCreate((data) => {
+        try {
+          const store = useAppStore.getState()
+          const worktreeId = data.worktreeId ?? store.activeWorktreeId
+          if (!worktreeId) {
+            window.api.ui.replyTabCreate({ requestId: data.requestId, error: 'No active worktree' })
+            return
+          }
+          const workspace = store.createBrowserTab(worktreeId, data.url, { title: data.url })
+          // Why: registerGuest fires with the page ID (not workspace ID) as
+          // browserPageId. Return the page ID so waitForTabRegistration can
+          // correlate correctly.
+          const pages = useAppStore.getState().browserPagesByWorkspace[workspace.id] ?? []
+          const browserPageId = pages[0]?.id ?? workspace.id
+          window.api.ui.replyTabCreate({ requestId: data.requestId, browserPageId })
+        } catch (err) {
+          window.api.ui.replyTabCreate({
+            requestId: data.requestId,
+            error: err instanceof Error ? err.message : 'Tab creation failed'
+          })
+        }
+      })
+    )
+
+    unsubs.push(
+      window.api.ui.onRequestTabClose((data) => {
+        try {
+          const store = useAppStore.getState()
+          const explicitTargetId = data.tabId ?? null
+          let tabToClose =
+            explicitTargetId ??
+            (data.worktreeId
+              ? (store.activeBrowserTabIdByWorktree?.[data.worktreeId] ?? null)
+              : store.activeBrowserTabId)
+          if (!tabToClose) {
+            window.api.ui.replyTabClose({
+              requestId: data.requestId,
+              error: 'No active browser tab to close'
+            })
+            return
+          }
+          // Why: the bridge stores tabs keyed by browserPageId (which is the page
+          // ID from registerGuest), but closeBrowserTab expects a workspace ID. If
+          // tabToClose is a page ID, close only that page unless it is the
+          // last page in its workspace. The CLI's `tab close --page` contract
+          // targets one browser page, not the entire workspace tab.
+          const isWorkspaceId = Object.values(store.browserTabsByWorktree)
+            .flat()
+            .some((ws) => ws.id === tabToClose)
+          if (!isWorkspaceId) {
+            const owningWorkspace = Object.entries(store.browserPagesByWorkspace).find(
+              ([, pages]) => pages.some((p) => p.id === tabToClose)
+            )
+            if (owningWorkspace) {
+              const [workspaceId, pages] = owningWorkspace
+              if (pages.length <= 1) {
+                store.closeBrowserTab(workspaceId)
+              } else {
+                store.closeBrowserPage(tabToClose)
+              }
+              window.api.ui.replyTabClose({ requestId: data.requestId })
+              return
+            }
+          }
+          if (explicitTargetId) {
+            window.api.ui.replyTabClose({
+              requestId: data.requestId,
+              error: `Browser tab ${explicitTargetId} not found`
+            })
+            return
+          }
+          store.closeBrowserTab(tabToClose)
+          window.api.ui.replyTabClose({ requestId: data.requestId })
+        } catch (err) {
+          window.api.ui.replyTabClose({
+            requestId: data.requestId,
+            error: err instanceof Error ? err.message : 'Tab close failed'
+          })
+        }
+      })
+    )
+
    unsubs.push(
      window.api.ui.onNewTerminalTab(() => {
        const store = useAppStore.getState()
--- a/src/renderer/src/store/slices/browser.ts
+++ b/src/renderer/src/store/slices/browser.ts
@ -595,6 +595,17 @@ export const createBrowserSlice: StateCreator<AppState, [], [], BrowserSlice> =
      }
    })

+    // Why: notify the CDP bridge which guest webContents is now active so
+    // subsequent agent commands (snapshot, click, etc.) target the correct tab.
+    // registerGuest uses page IDs (not workspace IDs), so we resolve the active
+    // page within the workspace to find the correct browserPageId.
+    const workspace = findWorkspace(get().browserTabsByWorktree, tabId)
+    if (workspace?.activePageId && typeof window !== 'undefined' && window.api?.browser) {
+      window.api.browser
+        .notifyActiveTabChanged({ browserPageId: workspace.activePageId })
+        .catch(() => {})
+    }
+
    const item = Object.values(get().unifiedTabsByWorktree)
      .flat()
      .find((entry) => entry.contentType === 'browser' && entry.entityId === tabId)
@ -796,6 +807,12 @@ export const createBrowserSlice: StateCreator<AppState, [], [], BrowserSlice> =
      }
    })

+    // Why: switching the active page within a workspace changes which guest
+    // webContents the CDP bridge should target for agent commands.
+    if (typeof window !== 'undefined' && window.api?.browser) {
+      window.api.browser.notifyActiveTabChanged({ browserPageId: pageId }).catch(() => {})
+    }
+
    const workspace = findWorkspace(get().browserTabsByWorktree, workspaceId)
    if (!workspace) {
      return
--- a/src/shared/runtime-types.ts
+++ b/src/shared/runtime-types.ts
@ -1,3 +1,4 @@
+/* eslint-disable max-lines -- Why: shared type definitions for all runtime RPC methods live in one file for discoverability and import simplicity. */
 import type { TerminalPaneLayoutNode } from './types'
 import type { GitWorktreeInfo, Repo } from './types'

@ -152,3 +153,245 @@ export type RuntimeWorktreeListResult = {
  totalCount: number
  truncated: boolean
 }
+
+// ── Browser automation types ──
+
+export type BrowserSnapshotRef = {
+  ref: string
+  role: string
+  name: string
+}
+
+export type BrowserSnapshotResult = {
+  browserPageId: string
+  snapshot: string
+  refs: BrowserSnapshotRef[]
+  url: string
+  title: string
+}
+
+export type BrowserClickResult = {
+  clicked: string
+}
+
+export type BrowserGotoResult = {
+  url: string
+  title: string
+}
+
+export type BrowserFillResult = {
+  filled: string
+}
+
+export type BrowserTypeResult = {
+  typed: boolean
+}
+
+export type BrowserSelectResult = {
+  selected: string
+}
+
+export type BrowserScrollResult = {
+  scrolled: 'up' | 'down'
+}
+
+export type BrowserBackResult = {
+  url: string
+  title: string
+}
+
+export type BrowserReloadResult = {
+  url: string
+  title: string
+}
+
+export type BrowserScreenshotResult = {
+  data: string
+  format: 'png' | 'jpeg'
+}
+
+export type BrowserEvalResult = {
+  result: string
+  origin: string
+}
+
+export type BrowserTabInfo = {
+  browserPageId: string
+  index: number
+  url: string
+  title: string
+  active: boolean
+}
+
+export type BrowserTabListResult = {
+  tabs: BrowserTabInfo[]
+}
+
+export type BrowserTabSwitchResult = {
+  switched: number
+  browserPageId: string
+}
+
+export type BrowserHoverResult = {
+  hovered: string
+}
+
+export type BrowserDragResult = {
+  dragged: { from: string; to: string }
+}
+
+export type BrowserUploadResult = {
+  uploaded: number
+}
+
+export type BrowserWaitResult = {
+  waited: boolean
+}
+
+export type BrowserCheckResult = {
+  checked: boolean
+}
+
+export type BrowserFocusResult = {
+  focused: string
+}
+
+export type BrowserClearResult = {
+  cleared: string
+}
+
+export type BrowserSelectAllResult = {
+  selected: string
+}
+
+export type BrowserKeypressResult = {
+  pressed: string
+}
+
+export type BrowserPdfResult = {
+  data: string
+}
+
+// ── Cookie management types ──
+
+export type BrowserCookie = {
+  name: string
+  value: string
+  domain: string
+  path: string
+  expires: number
+  httpOnly: boolean
+  secure: boolean
+  sameSite: string
+}
+
+export type BrowserCookieGetResult = {
+  cookies: BrowserCookie[]
+}
+
+export type BrowserCookieSetResult = {
+  success: boolean
+}
+
+export type BrowserCookieDeleteResult = {
+  deleted: boolean
+}
+
+// ── Viewport emulation types ──
+
+export type BrowserViewportResult = {
+  width: number
+  height: number
+  deviceScaleFactor: number
+  mobile: boolean
+}
+
+// ── Geolocation types ──
+
+export type BrowserGeolocationResult = {
+  latitude: number
+  longitude: number
+  accuracy: number
+}
+
+// ── Request interception types ──
+
+export type BrowserInterceptedRequest = {
+  id: string
+  url: string
+  method: string
+  headers: Record<string, string>
+  resourceType: string
+}
+
+export type BrowserInterceptEnableResult = {
+  enabled: boolean
+  patterns: string[]
+}
+
+export type BrowserInterceptDisableResult = {
+  disabled: boolean
+}
+
+// ── Console/network capture types ──
+
+export type BrowserConsoleEntry = {
+  level: string
+  text: string
+  timestamp: number
+  url?: string
+  line?: number
+}
+
+export type BrowserConsoleResult = {
+  entries: BrowserConsoleEntry[]
+  truncated: boolean
+}
+
+export type BrowserNetworkEntry = {
+  url: string
+  method: string
+  status: number
+  mimeType: string
+  size: number
+  timestamp: number
+}
+
+export type BrowserNetworkLogResult = {
+  entries: BrowserNetworkEntry[]
+  truncated: boolean
+}
+
+export type BrowserCaptureStartResult = {
+  capturing: boolean
+}
+
+export type BrowserCaptureStopResult = {
+  stopped: boolean
+}
+
+export type BrowserExecResult = {
+  output: unknown
+}
+
+export type BrowserTabCreateResult = {
+  browserPageId: string
+}
+
+export type BrowserTabCloseResult = {
+  closed: boolean
+}
+
+export type BrowserErrorCode =
+  | 'browser_no_tab'
+  | 'browser_tab_not_found'
+  | 'browser_tab_closed'
+  | 'browser_stale_ref'
+  | 'browser_ref_not_found'
+  | 'browser_navigation_failed'
+  | 'browser_element_not_interactable'
+  | 'browser_eval_error'
+  | 'browser_cdp_error'
+  | 'browser_debugger_detached'
+  | 'browser_timeout'
+  | 'browser_error'