feat: computer use via agent-browser CDP bridge (#856)

This commit is contained in:
Jinwoo Hong 2026-04-20 23:56:14 -04:00 committed by GitHub
parent 22ae50e0ae
commit 7a9bc4ef6d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
35 changed files with 13560 additions and 43 deletions

View file

@ -1,3 +1,6 @@
const { chmodSync, existsSync, readdirSync } = require('node:fs')
const { join } = require('node:path')
const isMacRelease = process.env.ORCA_MAC_RELEASE === '1' const isMacRelease = process.env.ORCA_MAC_RELEASE === '1'
/** @type {import('electron-builder').Configuration} */ /** @type {import('electron-builder').Configuration} */
@ -23,12 +26,34 @@ module.exports = {
// Why: daemon-entry.js is forked as a separate Node.js process and must be // Why: daemon-entry.js is forked as a separate Node.js process and must be
// accessible on disk (not inside the asar archive) for child_process.fork(). // accessible on disk (not inside the asar archive) for child_process.fork().
asarUnpack: ['out/cli/**', 'out/shared/**', 'out/main/daemon-entry.js', 'out/main/chunks/**', 'resources/**'], asarUnpack: ['out/cli/**', 'out/shared/**', 'out/main/daemon-entry.js', 'out/main/chunks/**', 'resources/**'],
afterPack: async (context) => {
const resourcesDir =
context.electronPlatformName === 'darwin'
? join(context.appOutDir, `${context.packager.appInfo.productFilename}.app`, 'Contents', 'Resources')
: join(context.appOutDir, 'resources')
if (!existsSync(resourcesDir)) {
return
}
for (const filename of readdirSync(resourcesDir)) {
if (!filename.startsWith('agent-browser-')) {
continue
}
// Why: the upstream package has inconsistent executable bits across
// platform binaries (notably darwin-x64). child_process.execFile needs
// the copied binary to be executable in packaged apps.
chmodSync(join(resourcesDir, filename), 0o755)
}
},
win: { win: {
executableName: 'Orca', executableName: 'Orca',
extraResources: [ extraResources: [
{ {
from: 'resources/win32/bin/orca.cmd', from: 'resources/win32/bin/orca.cmd',
to: 'bin/orca.cmd' to: 'bin/orca.cmd'
},
{
from: 'node_modules/agent-browser/bin/agent-browser-win32-x64.exe',
to: 'agent-browser-win32-x64.exe'
} }
] ]
}, },
@ -60,6 +85,10 @@ module.exports = {
{ {
from: 'resources/darwin/bin/orca', from: 'resources/darwin/bin/orca',
to: 'bin/orca' to: 'bin/orca'
},
{
from: 'node_modules/agent-browser/bin/agent-browser-darwin-${arch}',
to: 'agent-browser-darwin-${arch}'
} }
], ],
target: [ target: [
@ -84,6 +113,10 @@ module.exports = {
{ {
from: 'resources/linux/bin/orca', from: 'resources/linux/bin/orca',
to: 'bin/orca' to: 'bin/orca'
},
{
from: 'node_modules/agent-browser/bin/agent-browser-linux-${arch}',
to: 'agent-browser-linux-${arch}'
} }
], ],
target: ['AppImage', 'deb'], target: ['AppImage', 'deb'],

View file

@ -73,6 +73,7 @@
"@xterm/addon-webgl": "^0.19.0", "@xterm/addon-webgl": "^0.19.0",
"@xterm/headless": "^6.0.0", "@xterm/headless": "^6.0.0",
"@xterm/xterm": "^6.0.0", "@xterm/xterm": "^6.0.0",
"agent-browser": "~0.24.1",
"class-variance-authority": "^0.7.1", "class-variance-authority": "^0.7.1",
"clsx": "^2.1.1", "clsx": "^2.1.1",
"cmdk": "^1.1.1", "cmdk": "^1.1.1",
@ -99,6 +100,7 @@
"ssh2": "^1.17.0", "ssh2": "^1.17.0",
"tailwind-merge": "^3.5.0", "tailwind-merge": "^3.5.0",
"tw-animate-css": "^1.4.0", "tw-animate-css": "^1.4.0",
"ws": "^8.20.0",
"zod": "^4.3.6", "zod": "^4.3.6",
"zustand": "^5.0.12" "zustand": "^5.0.12"
}, },
@ -111,6 +113,7 @@
"@types/react": "^19.2.14", "@types/react": "^19.2.14",
"@types/react-dom": "^19.2.3", "@types/react-dom": "^19.2.3",
"@types/ssh2": "^1.15.5", "@types/ssh2": "^1.15.5",
"@types/ws": "^8.18.1",
"@typescript/native-preview": "7.0.0-dev.20260406.1", "@typescript/native-preview": "7.0.0-dev.20260406.1",
"@vitejs/plugin-react": "^5.2.0", "@vitejs/plugin-react": "^5.2.0",
"electron": "^41.1.0", "electron": "^41.1.0",

View file

@ -106,6 +106,9 @@ importers:
'@xterm/xterm': '@xterm/xterm':
specifier: ^6.0.0 specifier: ^6.0.0
version: 6.0.0 version: 6.0.0
agent-browser:
specifier: ~0.24.1
version: 0.24.1
class-variance-authority: class-variance-authority:
specifier: ^0.7.1 specifier: ^0.7.1
version: 0.7.1 version: 0.7.1
@ -184,6 +187,9 @@ importers:
tw-animate-css: tw-animate-css:
specifier: ^1.4.0 specifier: ^1.4.0
version: 1.4.0 version: 1.4.0
ws:
specifier: ^8.20.0
version: 8.20.0
zod: zod:
specifier: ^4.3.6 specifier: ^4.3.6
version: 4.3.6 version: 4.3.6
@ -199,7 +205,7 @@ importers:
version: 1.59.1 version: 1.59.1
'@stablyai/playwright-test': '@stablyai/playwright-test':
specifier: ^2.1.13 specifier: ^2.1.13
version: 2.1.13(@playwright/test@1.59.1)(zod@4.3.6) version: 2.1.14(@playwright/test@1.59.1)(zod@4.3.6)
'@tailwindcss/vite': '@tailwindcss/vite':
specifier: ^4.2.2 specifier: ^4.2.2
version: 4.2.2(vite@7.3.1(@types/node@25.5.0)(jiti@2.6.1)(lightningcss@1.32.0)(yaml@2.8.3)) version: 4.2.2(vite@7.3.1(@types/node@25.5.0)(jiti@2.6.1)(lightningcss@1.32.0)(yaml@2.8.3))
@ -215,6 +221,9 @@ importers:
'@types/ssh2': '@types/ssh2':
specifier: ^1.15.5 specifier: ^1.15.5
version: 1.15.5 version: 1.15.5
'@types/ws':
specifier: ^8.18.1
version: 8.18.1
'@typescript/native-preview': '@typescript/native-preview':
specifier: 7.0.0-dev.20260406.1 specifier: 7.0.0-dev.20260406.1
version: 7.0.0-dev.20260406.1 version: 7.0.0-dev.20260406.1
@ -2205,8 +2214,8 @@ packages:
resolution: {integrity: sha512-tlqY9xq5ukxTUZBmoOp+m61cqwQD5pHJtFY3Mn8CA8ps6yghLH/Hw8UPdqg4OLmFW3IFlcXnQNmo/dh8HzXYIQ==} resolution: {integrity: sha512-tlqY9xq5ukxTUZBmoOp+m61cqwQD5pHJtFY3Mn8CA8ps6yghLH/Hw8UPdqg4OLmFW3IFlcXnQNmo/dh8HzXYIQ==}
engines: {node: '>=18'} engines: {node: '>=18'}
'@stablyai/playwright-base@2.1.13': '@stablyai/playwright-base@2.1.14':
resolution: {integrity: sha512-F8lc2qSfNZQ53WeWWDLLZSpu6f2ZCuiVgGP0P0+PGdO9swCKEwV0f+ti7a4MlmgMlHoCsf5tvddXIVpikhPRlQ==} resolution: {integrity: sha512-/iAgMW5tC0ETDo3mFyTzszRrD7rGFIT4fgDgtZxqa9vPhiTLix/1+GeOOBNY0uS+XRLFY0Uc/irsC3XProL47g==}
engines: {node: '>=18'} engines: {node: '>=18'}
peerDependencies: peerDependencies:
'@playwright/test': ^1.52.0 '@playwright/test': ^1.52.0
@ -2215,13 +2224,13 @@ packages:
zod: zod:
optional: true optional: true
'@stablyai/playwright-test@2.1.13': '@stablyai/playwright-test@2.1.14':
resolution: {integrity: sha512-VXy65GukMkIsHtTuYuLhSP3l3YMl21ePTXKI2xLRBCkgzhTLdzat0vHM5TEh7vh58lsxmHlruMFESjcaIeb25g==} resolution: {integrity: sha512-CAyVVnRdsyJg9pbK3Yq5L9lcvEabilFLb2RWeTQybKv7sDkEEqE2t1boXqBt3X6wQO6lsyhUHB9pc10wSwuc4Q==}
peerDependencies: peerDependencies:
'@playwright/test': ^1.52.0 '@playwright/test': ^1.52.0
'@stablyai/playwright@2.1.13': '@stablyai/playwright@2.1.14':
resolution: {integrity: sha512-PGE6hR5WTknfbEBz+KvhG9i2gukSYdie0at6SI0CnJPu13NvGBno1N0Fm/AePhtO5Kjn1mMWW5cRiknVP4bOwA==} resolution: {integrity: sha512-+SkphioOf+o2VWiM3KPm/fFTTjwNHUV5b2ZRPrLMTsW6bwmEvjo2FbVOUobNBqbopQBnntNLd8ZCG2gvw7rwtg==}
peerDependencies: peerDependencies:
'@playwright/test': ^1.52.0 '@playwright/test': ^1.52.0
@ -2751,6 +2760,9 @@ packages:
'@types/verror@1.10.11': '@types/verror@1.10.11':
resolution: {integrity: sha512-RlDm9K7+o5stv0Co8i8ZRGxDbrTxhJtgjqjFyVh/tXQyl/rYtTKlnTvZ88oSTeYREWurwx20Js4kTuKCsFkUtg==} resolution: {integrity: sha512-RlDm9K7+o5stv0Co8i8ZRGxDbrTxhJtgjqjFyVh/tXQyl/rYtTKlnTvZ88oSTeYREWurwx20Js4kTuKCsFkUtg==}
'@types/ws@8.18.1':
resolution: {integrity: sha512-ThVF6DCVhA8kUGy+aazFQ4kXQ7E1Ty7A3ypFOe0IcJV8O/M511G99AW24irKrW56Wt44yG9+ij8FaqoBGkuBXg==}
'@types/yauzl@2.10.3': '@types/yauzl@2.10.3':
resolution: {integrity: sha512-oJoftv0LSuaDZE3Le4DbKX+KS9G36NzOeSap90UIK0yMA/NhKJhqlSGtNDORNRaIbQfzjXDrQa0ytJ6mNRGz/Q==} resolution: {integrity: sha512-oJoftv0LSuaDZE3Le4DbKX+KS9G36NzOeSap90UIK0yMA/NhKJhqlSGtNDORNRaIbQfzjXDrQa0ytJ6mNRGz/Q==}
@ -2880,6 +2892,10 @@ packages:
resolution: {integrity: sha512-MnA+YT8fwfJPgBx3m60MNqakm30XOkyIoH1y6huTQvC0PwZG7ki8NacLBcrPbNoo8vEZy7Jpuk7+jMO+CUovTQ==} resolution: {integrity: sha512-MnA+YT8fwfJPgBx3m60MNqakm30XOkyIoH1y6huTQvC0PwZG7ki8NacLBcrPbNoo8vEZy7Jpuk7+jMO+CUovTQ==}
engines: {node: '>= 14'} engines: {node: '>= 14'}
agent-browser@0.24.1:
resolution: {integrity: sha512-csWJtYEQow52b+p93zVZfNrcNBwbxGCZDXDMNWl2ij2i0MFKubIzN+icUeX2/NrkZe5iIau8px+HQlxata2oPw==}
hasBin: true
ajv-formats@3.0.1: ajv-formats@3.0.1:
resolution: {integrity: sha512-8iUql50EUR+uUcdRQ3HDqa6EVyo3docL8g5WJ3FNcWmu62IbkGUue/pEyLBW8VGKKucTPgqeks4fIU1DA4yowQ==} resolution: {integrity: sha512-8iUql50EUR+uUcdRQ3HDqa6EVyo3docL8g5WJ3FNcWmu62IbkGUue/pEyLBW8VGKKucTPgqeks4fIU1DA4yowQ==}
peerDependencies: peerDependencies:
@ -6047,6 +6063,18 @@ packages:
wrappy@1.0.2: wrappy@1.0.2:
resolution: {integrity: sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==} resolution: {integrity: sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==}
ws@8.20.0:
resolution: {integrity: sha512-sAt8BhgNbzCtgGbt2OxmpuryO63ZoDk/sqaB/znQm94T4fCEsy/yV+7CdC1kJhOU9lboAEU7R3kquuycDoibVA==}
engines: {node: '>=10.0.0'}
peerDependencies:
bufferutil: ^4.0.1
utf-8-validate: '>=5.0.2'
peerDependenciesMeta:
bufferutil:
optional: true
utf-8-validate:
optional: true
wsl-utils@0.3.1: wsl-utils@0.3.1:
resolution: {integrity: sha512-g/eziiSUNBSsdDJtCLB8bdYEUMj4jR7AGeUo96p/3dTafgjHhpF4RiCFPiRILwjQoDXx5MqkBr4fwWtR3Ky4Wg==} resolution: {integrity: sha512-g/eziiSUNBSsdDJtCLB8bdYEUMj4jR7AGeUo96p/3dTafgjHhpF4RiCFPiRILwjQoDXx5MqkBr4fwWtR3Ky4Wg==}
engines: {node: '>=20'} engines: {node: '>=20'}
@ -7897,7 +7925,7 @@ snapshots:
'@sindresorhus/merge-streams@4.0.0': {} '@sindresorhus/merge-streams@4.0.0': {}
'@stablyai/playwright-base@2.1.13(@playwright/test@1.59.1)(zod@4.3.6)': '@stablyai/playwright-base@2.1.14(@playwright/test@1.59.1)(zod@4.3.6)':
dependencies: dependencies:
'@playwright/test': 1.59.1 '@playwright/test': 1.59.1
jpeg-js: 0.4.4 jpeg-js: 0.4.4
@ -7906,18 +7934,18 @@ snapshots:
optionalDependencies: optionalDependencies:
zod: 4.3.6 zod: 4.3.6
'@stablyai/playwright-test@2.1.13(@playwright/test@1.59.1)(zod@4.3.6)': '@stablyai/playwright-test@2.1.14(@playwright/test@1.59.1)(zod@4.3.6)':
dependencies: dependencies:
'@playwright/test': 1.59.1 '@playwright/test': 1.59.1
'@stablyai/playwright': 2.1.13(@playwright/test@1.59.1)(zod@4.3.6) '@stablyai/playwright': 2.1.14(@playwright/test@1.59.1)(zod@4.3.6)
'@stablyai/playwright-base': 2.1.13(@playwright/test@1.59.1)(zod@4.3.6) '@stablyai/playwright-base': 2.1.14(@playwright/test@1.59.1)(zod@4.3.6)
transitivePeerDependencies: transitivePeerDependencies:
- zod - zod
'@stablyai/playwright@2.1.13(@playwright/test@1.59.1)(zod@4.3.6)': '@stablyai/playwright@2.1.14(@playwright/test@1.59.1)(zod@4.3.6)':
dependencies: dependencies:
'@playwright/test': 1.59.1 '@playwright/test': 1.59.1
'@stablyai/playwright-base': 2.1.13(@playwright/test@1.59.1)(zod@4.3.6) '@stablyai/playwright-base': 2.1.14(@playwright/test@1.59.1)(zod@4.3.6)
transitivePeerDependencies: transitivePeerDependencies:
- zod - zod
@ -8482,6 +8510,10 @@ snapshots:
'@types/verror@1.10.11': '@types/verror@1.10.11':
optional: true optional: true
'@types/ws@8.18.1':
dependencies:
'@types/node': 25.5.0
'@types/yauzl@2.10.3': '@types/yauzl@2.10.3':
dependencies: dependencies:
'@types/node': 25.5.0 '@types/node': 25.5.0
@ -8608,6 +8640,8 @@ snapshots:
agent-base@7.1.4: {} agent-base@7.1.4: {}
agent-browser@0.24.1: {}
ajv-formats@3.0.1(ajv@8.18.0): ajv-formats@3.0.1(ajv@8.18.0):
optionalDependencies: optionalDependencies:
ajv: 8.18.0 ajv: 8.18.0
@ -12374,6 +12408,8 @@ snapshots:
wrappy@1.0.2: {} wrappy@1.0.2: {}
ws@8.20.0: {}
wsl-utils@0.3.1: wsl-utils@0.3.1:
dependencies: dependencies:
is-wsl: 3.1.1 is-wsl: 3.1.1

View file

@ -1,6 +1,6 @@
--- ---
name: orca-cli name: orca-cli
description: Use the Orca CLI to orchestrate worktrees and live terminals through a running Orca editor. Use when an agent needs to create, inspect, update, or remove Orca worktrees; inspect repo state known to Orca; or read, send to, wait on, or stop Orca-managed terminals. Coding agents should also keep the current worktree comment updated with the latest meaningful work-in-progress checkpoint whenever useful; this is an expected default behavior, not a special trigger-only action. Triggers include "use orca cli", "manage Orca worktrees", "read Orca terminal", "reply to Claude Code in Orca", "create a worktree in Orca", "update Orca worktree comment", or any task where the agent should operate through Orca instead of talking to git worktrees and terminal processes directly. description: Use the Orca CLI to orchestrate worktrees, live terminals, and browser automation through a running Orca editor. Use when an agent needs to create, inspect, update, or remove Orca worktrees; inspect repo state known to Orca; read, send to, wait on, or stop Orca-managed terminals; or automate the built-in browser (navigate, snapshot, click, fill, screenshot). Coding agents should also keep the current worktree comment updated with the latest meaningful work-in-progress checkpoint whenever useful. Triggers include "use orca cli", "manage Orca worktrees", "read Orca terminal", "reply to Claude Code in Orca", "create a worktree in Orca", "update Orca worktree comment", "click on", "fill the form", "take a screenshot", "navigate to", "interact with the page", "snapshot the page", or any task where the agent should operate through Orca.
--- ---
# Orca CLI # Orca CLI
@ -167,6 +167,374 @@ Why: terminal handles are runtime-scoped and may go stale after reloads. If Orca
- If the user asks for CLI UX feedback, test the public `orca` command first. Only inspect `src/cli` or use `node out/cli/index.js` if the public command is missing or the task is explicitly about implementation internals. - If the user asks for CLI UX feedback, test the public `orca` command first. Only inspect `src/cli` or use `node out/cli/index.js` if the public command is missing or the task is explicitly about implementation internals.
- If a command fails, prefer retrying with the public `orca` command before concluding the CLI is broken, unless the failure already came from `orca` itself. - If a command fails, prefer retrying with the public `orca` command before concluding the CLI is broken, unless the failure already came from `orca` itself.
## Browser Automation
The `orca` CLI also drives the built-in Orca browser. The core workflow is a **snapshot-interact-re-snapshot** loop:
1. **Snapshot** the page to see interactive elements and their refs.
2. **Interact** using refs (`@e1`, `@e3`, etc.) to click, fill, or select.
3. **Re-snapshot** after interactions to see the updated page state.
```bash
orca goto --url https://example.com --json
orca snapshot --json
# Read the refs from the snapshot output
orca click --element @e3 --json
orca snapshot --json
```
### Element Refs
Refs like `@e1`, `@e5` are short identifiers assigned to interactive page elements during a snapshot. They are:
- **Assigned by snapshot**: Run `orca snapshot` to get current refs.
- **Scoped to one tab**: Refs from one tab are not valid in another.
- **Invalidated by navigation**: If the page navigates after a snapshot, refs become stale. Re-snapshot to get fresh refs.
- **Invalidated by tab switch**: Switching tabs with `orca tab switch` invalidates refs. Re-snapshot after switching.
If a ref is stale, the command returns `browser_stale_ref` — re-snapshot and retry.
### Worktree Scoping
Browser commands default to the **current worktree** — only tabs belonging to the agent's worktree are visible and targetable. Tab indices are relative to the filtered tab list.
```bash
# Default: operates on tabs in the current worktree
orca snapshot --json
# Explicitly target all worktrees (cross-worktree access)
orca snapshot --worktree all --json
# Tab indices are relative to the worktree-filtered list
orca tab list --json # Shows tabs [0], [1], [2] for this worktree
orca tab switch --index 1 --json # Switches to tab [1] within this worktree
```
If no tabs are open in the current worktree, commands return `browser_no_tab`.
### Stable Page Targeting
For single-agent flows, bare browser commands are fine: Orca will target the active browser tab in the current worktree.
For concurrent or multi-process browser automation, prefer a stable page id instead of ambient active-tab state:
1. Run `orca tab list --json`.
2. Read `tabs[].browserPageId` from the result.
3. Pass `--page <browserPageId>` to follow-up commands like `snapshot`, `click`, `goto`, `screenshot`, `tab switch`, or `tab close`.
Why: active-tab state and tab indices can change while another Orca CLI process is working. `browserPageId` pins the command to one concrete tab.
```bash
orca tab list --json
orca snapshot --page page-123 --json
orca click --page page-123 --element @e3 --json
orca screenshot --page page-123 --json
orca tab switch --page page-123 --json
orca tab close --page page-123 --json
```
If you also pass `--worktree`, Orca treats it as extra scoping/validation for that page id. Without `--page`, commands still fall back to the current worktree's active tab.
### Navigation
```bash
orca goto --url <url> [--json] # Navigate to URL, waits for page load
orca back [--json] # Go back in browser history
orca forward [--json] # Go forward in browser history
orca reload [--json] # Reload the current page
```
### Observation
```bash
orca snapshot [--page <browserPageId>] [--json] # Accessibility tree snapshot with element refs
orca screenshot [--page <browserPageId>] [--format <png|jpeg>] [--json] # Viewport screenshot (base64)
orca full-screenshot [--page <browserPageId>] [--format <png|jpeg>] [--json] # Full-page screenshot (base64)
orca pdf [--page <browserPageId>] [--json] # Export page as PDF (base64)
```
### Interaction
```bash
orca click --element <ref> [--page <browserPageId>] [--json] # Click an element by ref
orca dblclick --element <ref> [--page <browserPageId>] [--json] # Double-click an element
orca fill --element <ref> --value <text> [--page <browserPageId>] [--json] # Clear and fill an input
orca type --input <text> [--page <browserPageId>] [--json] # Type at current focus (no element targeting)
orca select --element <ref> --value <value> [--page <browserPageId>] [--json] # Select dropdown option
orca check --element <ref> [--page <browserPageId>] [--json] # Check a checkbox
orca uncheck --element <ref> [--page <browserPageId>] [--json] # Uncheck a checkbox
orca scroll --direction <up|down> [--amount <pixels>] [--page <browserPageId>] [--json] # Scroll viewport
orca scrollintoview --element <ref> [--page <browserPageId>] [--json] # Scroll element into view
orca hover --element <ref> [--page <browserPageId>] [--json] # Hover over an element
orca focus --element <ref> [--page <browserPageId>] [--json] # Focus an element
orca drag --from <ref> --to <ref> [--page <browserPageId>] [--json] # Drag from one element to another
orca clear --element <ref> [--page <browserPageId>] [--json] # Clear an input field
orca select-all --element <ref> [--page <browserPageId>] [--json] # Select all text in an element
orca keypress --key <key> [--page <browserPageId>] [--json] # Press a key (Enter, Tab, Escape, etc.)
orca upload --element <ref> --files <paths> [--page <browserPageId>] [--json] # Upload files to a file input
```
### Tab Management
```bash
orca tab list [--json] # List open browser tabs
orca tab switch (--index <n> | --page <browserPageId>) [--json] # Switch active tab (invalidates refs)
orca tab create [--url <url>] [--json] # Open a new browser tab
orca tab close [--index <n> | --page <browserPageId>] [--json] # Close a browser tab
```
### Wait / Synchronization
```bash
orca wait [--timeout <ms>] [--json] # Wait for timeout (default 1000ms)
orca wait --selector <css> [--state <visible|hidden>] [--timeout <ms>] [--json] # Wait for element
orca wait --text <string> [--timeout <ms>] [--json] # Wait for text to appear on page
orca wait --url <substring> [--timeout <ms>] [--json] # Wait for URL to contain substring
orca wait --load <networkidle|load|domcontentloaded> [--timeout <ms>] [--json] # Wait for load state
orca wait --fn <js-expression> [--timeout <ms>] [--json] # Wait for JS condition to be truthy
```
After any page-changing action, pick one:
- Wait for specific content: `orca wait --text "Dashboard" --json`
- Wait for URL change: `orca wait --url "/dashboard" --json`
- Wait for network idle (catch-all for SPA navigation): `orca wait --load networkidle --json`
- Wait for an element: `orca wait --selector ".results" --json`
Avoid bare `orca wait --timeout 2000` except when debugging — it makes scripts slow and flaky.
### Data Extraction
```bash
orca exec --command "get text @e1" [--json] # Get visible text of an element
orca exec --command "get html @e1" [--json] # Get innerHTML
orca exec --command "get value @e1" [--json] # Get input value
orca exec --command "get attr @e1 href" [--json] # Get element attribute
orca exec --command "get title" [--json] # Get page title
orca exec --command "get url" [--json] # Get current URL
orca exec --command "get count .item" [--json] # Count matching elements
```
### State Checks
```bash
orca exec --command "is visible @e1" [--json] # Check if element is visible
orca exec --command "is enabled @e1" [--json] # Check if element is enabled
orca exec --command "is checked @e1" [--json] # Check if checkbox is checked
```
### Page Inspection
```bash
orca eval --expression <js> [--json] # Evaluate JS in page context
```
### Cookie Management
```bash
orca cookie get [--url <url>] [--json] # List cookies
orca cookie set --name <n> --value <v> [--domain <d>] [--json] # Set a cookie
orca cookie delete --name <n> [--domain <d>] [--json] # Delete a cookie
```
### Emulation
```bash
orca viewport --width <w> --height <h> [--scale <n>] [--mobile] [--json]
orca geolocation --latitude <lat> --longitude <lng> [--accuracy <m>] [--json]
```
### Request Interception
```bash
orca intercept enable [--patterns <list>] [--json] # Start intercepting requests
orca intercept disable [--json] # Stop intercepting
orca intercept list [--json] # List paused requests
```
> **Note:** Per-request `intercept continue` and `intercept block` are not yet supported.
> They will be added once agent-browser supports per-request interception decisions.
### Console / Network Capture
```bash
orca capture start [--json] # Start capturing console + network
orca capture stop [--json] # Stop capturing
orca console [--limit <n>] [--json] # Read captured console entries
orca network [--limit <n>] [--json] # Read captured network entries
```
### Mouse Control
```bash
orca exec --command "mouse move 100 200" [--json] # Move mouse to coordinates
orca exec --command "mouse down left" [--json] # Press mouse button
orca exec --command "mouse up left" [--json] # Release mouse button
orca exec --command "mouse wheel 100" [--json] # Scroll wheel
```
### Keyboard
```bash
orca exec --command "keyboard inserttext \"text\"" [--json] # Insert text bypassing key events
orca exec --command "keyboard type \"text\"" [--json] # Raw keystrokes
orca exec --command "keydown Shift" [--json] # Hold key down
orca exec --command "keyup Shift" [--json] # Release key
```
### Frames (Iframes)
Iframes are auto-inlined in snapshots — refs inside iframes work transparently. For scoped interaction:
```bash
orca exec --command "frame @e3" [--json] # Switch to iframe by ref
orca exec --command "frame \"#iframe\"" [--json] # Switch to iframe by CSS selector
orca exec --command "frame main" [--json] # Return to main frame
```
### Semantic Locators (alternative to refs)
When refs aren't available or you want to skip a snapshot:
```bash
orca exec --command "find role button click --name \"Submit\"" [--json]
orca exec --command "find text \"Sign In\" click" [--json]
orca exec --command "find label \"Email\" fill \"user@test.com\"" [--json]
orca exec --command "find placeholder \"Search\" type \"query\"" [--json]
orca exec --command "find testid \"submit-btn\" click" [--json]
```
### Dialogs
`alert` and `beforeunload` are auto-accepted. For `confirm` and `prompt`:
```bash
orca exec --command "dialog status" [--json] # Check for pending dialog
orca exec --command "dialog accept" [--json] # Accept
orca exec --command "dialog accept \"text\"" [--json] # Accept with prompt input
orca exec --command "dialog dismiss" [--json] # Dismiss/cancel
```
### Extended Commands (Passthrough)
```bash
orca exec --command "<agent-browser command>" [--json]
```
The `exec` command provides access to agent-browser's full command surface. Useful for commands without typed Orca handlers:
```bash
orca exec --command "set device \"iPhone 14\"" --json # Emulate device
orca exec --command "set offline on" --json # Toggle offline mode
orca exec --command "set media dark" --json # Emulate color scheme
orca exec --command "network requests" --json # View tracked network requests
orca exec --command "help" --json # See all available commands
```
**Important:** Do not use `orca exec --command "tab ..."` for tab management. Use `orca tab list/create/close/switch` instead — those operate at the Orca level and keep the UI synchronized.
### `fill` vs `type`
- **`fill`** targets a specific element by ref, clears its value first, then enters text. Use for form fields.
- **`type`** types at whatever currently has focus. Use for search boxes or after clicking into an input.
If neither works on a custom input component, try:
```bash
orca focus --element @e1 --json
orca exec --command "keyboard inserttext \"text\"" --json # bypasses key events
```
### Browser Error Codes
| Error Code | Meaning | Recovery |
|-----------|---------|----------|
| `browser_no_tab` | No browser tab is open in this worktree | Open a tab, or use `--worktree all` to check other worktrees |
| `browser_stale_ref` | Ref is invalid (page changed since snapshot) | Run `orca snapshot` to get fresh refs |
| `browser_tab_not_found` | Tab index does not exist | Run `orca tab list` to see available tabs |
| `browser_error` | Error from the browser automation engine | Read the message for details; common causes: element not found, navigation timeout, JS error |
### Browser Worked Example
Agent fills a login form and verifies the dashboard loads:
```bash
# Navigate to the login page
orca goto --url https://app.example.com/login --json
# See what's on the page
orca snapshot --json
# Output includes:
# [@e1] text input "Email"
# [@e2] text input "Password"
# [@e3] button "Sign In"
# Fill the form
orca fill --element @e1 --value "user@example.com" --json
orca fill --element @e2 --value "s3cret" --json
# Submit
orca click --element @e3 --json
# Verify the dashboard loaded
orca snapshot --json
# Output should show dashboard content, not the login form
```
### Browser Troubleshooting
**"Ref not found" / `browser_stale_ref`**
Page changed since the snapshot. Run `orca snapshot --json` again, then use the new refs.
**Element exists but not in snapshot**
It may be off-screen or not yet rendered. Try:
```bash
orca scroll --direction down --amount 1000 --json
orca snapshot --json
# or wait for it:
orca wait --text "..." --json
orca snapshot --json
```
**Click does nothing / overlay swallows the click**
Modals or cookie banners may be blocking. Snapshot, find the dismiss button, click it, then re-snapshot.
**Fill/type doesn't work on a custom input**
Some components intercept key events. Use `keyboard inserttext`:
```bash
orca focus --element @e1 --json
orca exec --command "keyboard inserttext \"text\"" --json
```
**`browser_no_tab` error**
No browser tab is open in the current worktree. Open one with `orca tab create --url <url> --json`.
### Auto-Switch Worktree
Browser commands automatically activate the target worktree in the Orca UI when needed. If the agent issues a browser command targeting a worktree that isn't currently active, Orca will switch to that worktree before executing the command.
### Tab Create Auto-Activation
When `orca tab create` opens a new tab, it is automatically set as the active tab for the worktree. Subsequent commands (`snapshot`, `click`, etc.) will target the newly created tab without needing an explicit `tab switch`.
### Browser Agent Guidance
- Always snapshot before interacting with elements.
- After navigation (`goto`, `back`, `reload`, clicking a link), re-snapshot to get fresh refs.
- After switching tabs, re-snapshot.
- If you get `browser_stale_ref`, re-snapshot and retry with the new refs.
- Use `orca tab list` before `orca tab switch` to know which tabs exist.
- For concurrent browser workflows, prefer `orca tab list --json` and reuse `tabs[].browserPageId` with `--page` on later commands.
- Use `orca wait` to synchronize after actions that trigger async updates (form submits, SPA navigation, modals) instead of arbitrary sleeps.
- Use `orca eval` as an escape hatch for interactions not covered by other commands.
- Use `orca exec --command "help"` to discover extended commands.
- Worktree scoping is automatic — you'll only see tabs from your worktree by default.
- Bare browser commands without `--page` still target the current worktree's active tab, which is convenient but less robust for multi-process automation.
- Tab creation auto-activates the new tab — no need for `tab switch` after `tab create`.
- Browser commands auto-switch the active worktree if needed — no manual worktree activation required.
## Important Constraints ## Important Constraints
- Orca CLI only talks to a running Orca editor. - Orca CLI only talks to a running Orca editor.

View file

@ -1,3 +1,5 @@
/* oxlint-disable max-lines -- Why: CLI parsing behavior is exercised end-to-end
in one file so command and flag interactions stay visible in a single suite. */
import path from 'path' import path from 'path'
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest' import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'
@ -35,7 +37,24 @@ vi.mock('./runtime-client', () => {
} }
}) })
import { buildCurrentWorktreeSelector, main, normalizeWorktreeSelector } from './index' import {
buildCurrentWorktreeSelector,
COMMAND_SPECS,
main,
normalizeWorktreeSelector
} from './index'
import { RuntimeClientError } from './runtime-client'
describe('COMMAND_SPECS collision check', () => {
it('has no duplicate command paths', () => {
const seen = new Set<string>()
for (const spec of COMMAND_SPECS) {
const key = spec.path.join(' ')
expect(seen.has(key), `Duplicate COMMAND_SPECS path: "${key}"`).toBe(false)
seen.add(key)
}
})
})
describe('orca cli worktree awareness', () => { describe('orca cli worktree awareness', () => {
beforeEach(() => { beforeEach(() => {
@ -303,3 +322,312 @@ describe('orca cli worktree awareness', () => {
}) })
}) })
}) })
describe('orca cli browser page targeting', () => {
beforeEach(() => {
callMock.mockReset()
})
afterEach(() => {
vi.restoreAllMocks()
})
it('passes explicit page ids to snapshot without resolving the current worktree', async () => {
callMock.mockResolvedValueOnce({
id: 'req_snapshot',
ok: true,
result: {
browserPageId: 'page-1',
snapshot: 'tree',
refs: [],
url: 'https://example.com',
title: 'Example'
},
_meta: {
runtimeId: 'runtime-1'
}
})
vi.spyOn(console, 'log').mockImplementation(() => {})
await main(['snapshot', '--page', 'page-1', '--json'], '/tmp/not-an-orca-worktree')
expect(callMock).toHaveBeenCalledTimes(1)
expect(callMock).toHaveBeenCalledWith('browser.snapshot', {
page: 'page-1'
})
})
it('resolves current worktree only when --page is combined with --worktree current', async () => {
callMock
.mockResolvedValueOnce({
id: 'req_list',
ok: true,
result: {
worktrees: [
{
id: 'repo::/tmp/repo/feature',
repoId: 'repo',
path: '/tmp/repo/feature',
branch: 'feature/foo',
linkedIssue: null,
git: {
path: '/tmp/repo/feature',
head: 'abc',
branch: 'feature/foo',
isBare: false,
isMainWorktree: false
},
displayName: '',
comment: ''
}
],
totalCount: 1,
truncated: false
},
_meta: {
runtimeId: 'runtime-1'
}
})
.mockResolvedValueOnce({
id: 'req_snapshot',
ok: true,
result: {
browserPageId: 'page-1',
snapshot: 'tree',
refs: [],
url: 'https://example.com',
title: 'Example'
},
_meta: {
runtimeId: 'runtime-1'
}
})
vi.spyOn(console, 'log').mockImplementation(() => {})
await main(
['snapshot', '--page', 'page-1', '--worktree', 'current', '--json'],
'/tmp/repo/feature/src'
)
expect(callMock).toHaveBeenNthCalledWith(1, 'worktree.list', {
limit: 10_000
})
expect(callMock).toHaveBeenNthCalledWith(2, 'browser.snapshot', {
page: 'page-1',
worktree: `path:${path.resolve('/tmp/repo/feature')}`
})
})
it('passes page-targeted tab switches through without auto-scoping to the current worktree', async () => {
callMock.mockResolvedValueOnce({
id: 'req_switch',
ok: true,
result: {
switched: 2,
browserPageId: 'page-2'
},
_meta: {
runtimeId: 'runtime-1'
}
})
vi.spyOn(console, 'log').mockImplementation(() => {})
await main(['tab', 'switch', '--page', 'page-2', '--json'], '/tmp/repo/feature/src')
expect(callMock).toHaveBeenCalledTimes(1)
expect(callMock).toHaveBeenCalledWith('browser.tabSwitch', {
index: undefined,
page: 'page-2'
})
})
it('still resolves the current worktree when tab switch --page is combined with --worktree current', async () => {
callMock
.mockResolvedValueOnce({
id: 'req_list',
ok: true,
result: {
worktrees: [
{
id: 'repo::/tmp/repo/feature',
repoId: 'repo',
path: '/tmp/repo/feature',
branch: 'feature/foo',
linkedIssue: null,
git: {
path: '/tmp/repo/feature',
head: 'abc',
branch: 'feature/foo',
isBare: false,
isMainWorktree: false
},
displayName: '',
comment: ''
}
],
totalCount: 1,
truncated: false
},
_meta: {
runtimeId: 'runtime-1'
}
})
.mockResolvedValueOnce({
id: 'req_switch',
ok: true,
result: {
switched: 2,
browserPageId: 'page-2'
},
_meta: {
runtimeId: 'runtime-1'
}
})
vi.spyOn(console, 'log').mockImplementation(() => {})
await main(
['tab', 'switch', '--page', 'page-2', '--worktree', 'current', '--json'],
'/tmp/repo/feature/src'
)
expect(callMock).toHaveBeenNthCalledWith(1, 'worktree.list', {
limit: 10_000
})
expect(callMock).toHaveBeenNthCalledWith(2, 'browser.tabSwitch', {
index: undefined,
page: 'page-2',
worktree: `path:${path.resolve('/tmp/repo/feature')}`
})
})
})
describe('orca cli browser waits and viewport flags', () => {
beforeEach(() => {
callMock.mockReset()
process.exitCode = undefined
})
afterEach(() => {
vi.restoreAllMocks()
})
it('gives selector waits an explicit RPC timeout budget', async () => {
callMock.mockResolvedValueOnce({
id: 'req_wait',
ok: true,
result: { ok: true },
_meta: {
runtimeId: 'runtime-1'
}
})
vi.spyOn(console, 'log').mockImplementation(() => {})
await main(
['wait', '--selector', '#ready', '--worktree', 'all', '--json'],
'/tmp/not-an-orca-worktree'
)
expect(callMock).toHaveBeenCalledWith(
'browser.wait',
{
selector: '#ready',
timeout: undefined,
text: undefined,
url: undefined,
load: undefined,
fn: undefined,
state: undefined,
worktree: undefined
},
{ timeoutMs: 60_000 }
)
})
it('extends selector wait RPC timeout when the user passes --timeout', async () => {
callMock.mockResolvedValueOnce({
id: 'req_wait',
ok: true,
result: { ok: true },
_meta: {
runtimeId: 'runtime-1'
}
})
vi.spyOn(console, 'log').mockImplementation(() => {})
await main(
['wait', '--selector', '#ready', '--timeout', '12000', '--worktree', 'all', '--json'],
'/tmp/not-an-orca-worktree'
)
expect(callMock).toHaveBeenCalledWith(
'browser.wait',
{
selector: '#ready',
timeout: 12000,
text: undefined,
url: undefined,
load: undefined,
fn: undefined,
state: undefined,
worktree: undefined
},
{ timeoutMs: 17000 }
)
})
it('does not tell users Orca is down for a generic runtime timeout', async () => {
callMock.mockRejectedValueOnce(
new RuntimeClientError(
'runtime_timeout',
'Timed out waiting for the Orca runtime to respond.'
)
)
const errorSpy = vi.spyOn(console, 'error').mockImplementation(() => {})
await main(['wait', '--selector', '#ready', '--worktree', 'all'], '/tmp/not-an-orca-worktree')
expect(errorSpy).toHaveBeenCalledWith('Timed out waiting for the Orca runtime to respond.')
})
it('passes the mobile viewport flag through to browser.viewport', async () => {
callMock.mockResolvedValueOnce({
id: 'req_viewport',
ok: true,
result: {
width: 375,
height: 812,
deviceScaleFactor: 2,
mobile: true
},
_meta: {
runtimeId: 'runtime-1'
}
})
vi.spyOn(console, 'log').mockImplementation(() => {})
await main(
[
'viewport',
'--width',
'375',
'--height',
'812',
'--scale',
'2',
'--mobile',
'--worktree',
'all',
'--json'
],
'/tmp/not-an-orca-worktree'
)
expect(callMock).toHaveBeenCalledWith('browser.viewport', {
width: 375,
height: 812,
deviceScaleFactor: 2,
mobile: true,
worktree: undefined
})
})
})

File diff suppressed because it is too large Load diff

View file

@ -58,7 +58,10 @@ export class RuntimeClient {
private readonly userDataPath: string private readonly userDataPath: string
private readonly requestTimeoutMs: number private readonly requestTimeoutMs: number
constructor(userDataPath = getDefaultUserDataPath(), requestTimeoutMs = 15000) { // Why: browser commands trigger first-time session init (agent-browser connect +
// CDP proxy setup) which can take 15-30s. 60s accommodates cold start without
// being so large that genuine hangs go unnoticed.
constructor(userDataPath = getDefaultUserDataPath(), requestTimeoutMs = 60_000) {
this.userDataPath = userDataPath this.userDataPath = userDataPath
this.requestTimeoutMs = requestTimeoutMs this.requestTimeoutMs = requestTimeoutMs
} }
@ -383,6 +386,12 @@ export function getDefaultUserDataPath(
platform: NodeJS.Platform = process.platform, platform: NodeJS.Platform = process.platform,
homeDir = homedir() homeDir = homedir()
): string { ): string {
// Why: in dev mode, the Electron app writes runtime metadata to `orca-dev`
// instead of `orca` to avoid clobbering the production app's metadata. The
// CLI needs to find the same metadata file, so respect this env var override.
if (process.env.ORCA_USER_DATA_PATH) {
return process.env.ORCA_USER_DATA_PATH
}
if (platform === 'darwin') { if (platform === 'darwin') {
return join(homeDir, 'Library', 'Application Support', 'orca') return join(homeDir, 'Library', 'Application Support', 'orca')
} }

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -3,6 +3,7 @@ import { beforeEach, describe, expect, it, vi } from 'vitest'
const { const {
shellOpenExternalMock, shellOpenExternalMock,
browserWindowFromWebContentsMock,
menuBuildFromTemplateMock, menuBuildFromTemplateMock,
guestOffMock, guestOffMock,
guestOnMock, guestOnMock,
@ -13,6 +14,7 @@ const {
screenGetCursorScreenPointMock screenGetCursorScreenPointMock
} = vi.hoisted(() => ({ } = vi.hoisted(() => ({
shellOpenExternalMock: vi.fn(), shellOpenExternalMock: vi.fn(),
browserWindowFromWebContentsMock: vi.fn(),
menuBuildFromTemplateMock: vi.fn(), menuBuildFromTemplateMock: vi.fn(),
guestOffMock: vi.fn(), guestOffMock: vi.fn(),
guestOnMock: vi.fn(), guestOnMock: vi.fn(),
@ -24,6 +26,9 @@ const {
})) }))
vi.mock('electron', () => ({ vi.mock('electron', () => ({
BrowserWindow: {
fromWebContents: browserWindowFromWebContentsMock
},
clipboard: { writeText: vi.fn() }, clipboard: { writeText: vi.fn() },
shell: { openExternal: shellOpenExternalMock }, shell: { openExternal: shellOpenExternalMock },
Menu: { Menu: {
@ -44,6 +49,7 @@ describe('browserManager', () => {
beforeEach(() => { beforeEach(() => {
shellOpenExternalMock.mockReset() shellOpenExternalMock.mockReset()
browserWindowFromWebContentsMock.mockReset()
menuBuildFromTemplateMock.mockReset() menuBuildFromTemplateMock.mockReset()
guestOffMock.mockReset() guestOffMock.mockReset()
guestOnMock.mockReset() guestOnMock.mockReset()
@ -148,6 +154,295 @@ describe('browserManager', () => {
expect(shellOpenExternalMock).toHaveBeenCalledWith('https://example.com/login') expect(shellOpenExternalMock).toHaveBeenCalledWith('https://example.com/login')
}) })
it('activates the owning browser workspace when ensuring a page-backed guest is visible', async () => {
const rendererExecuteJavaScriptMock = vi
.fn()
.mockResolvedValueOnce({
prevTabType: 'terminal',
prevActiveWorktreeId: 'wt-1',
prevActiveBrowserWorkspaceId: 'workspace-prev',
prevActiveBrowserPageId: 'page-prev',
prevFocusedGroupTabId: 'tab-prev',
targetWorktreeId: 'wt-1',
targetBrowserWorkspaceId: 'workspace-1',
targetBrowserPageId: 'page-1'
})
.mockResolvedValueOnce(undefined)
const guest = {
id: 707,
isDestroyed: vi.fn(() => false),
getType: vi.fn(() => 'webview'),
setBackgroundThrottling: guestSetBackgroundThrottlingMock,
setWindowOpenHandler: guestSetWindowOpenHandlerMock,
on: guestOnMock,
off: guestOffMock,
openDevTools: guestOpenDevToolsMock
}
const renderer = {
id: rendererWebContentsId,
isDestroyed: vi.fn(() => false),
executeJavaScript: rendererExecuteJavaScriptMock
}
browserWindowFromWebContentsMock.mockReturnValue({ isFocused: vi.fn(() => true) })
webContentsFromIdMock.mockImplementation((id: number) => {
if (id === guest.id) {
return guest
}
if (id === rendererWebContentsId) {
return renderer
}
return null
})
browserManager.attachGuestPolicies(guest as never)
browserManager.registerGuest({
browserPageId: 'page-1',
workspaceId: 'workspace-1',
worktreeId: 'wt-1',
webContentsId: guest.id,
rendererWebContentsId
})
const restore = await browserManager.ensureWebviewVisible(guest.id)
const activationScript = rendererExecuteJavaScriptMock.mock.calls[0]?.[0]
expect(activationScript).toContain('var browserWorkspaceId = "workspace-1";')
expect(activationScript).toContain('var browserPageId = "page-1";')
expect(activationScript).toContain('state.setActiveBrowserTab(browserWorkspaceId);')
expect(activationScript).toContain(
'state.setActiveBrowserPage(browserWorkspaceId, browserPageId);'
)
expect(activationScript).toContain('var targetWorktreeId = "wt-1";')
restore()
})
it('restores the previously focused browser workspace after screenshot prep changes tabs', async () => {
const rendererExecuteJavaScriptMock = vi
.fn()
.mockResolvedValueOnce({
prevTabType: 'browser',
prevActiveWorktreeId: 'wt-prev',
prevActiveBrowserWorkspaceId: 'workspace-prev',
prevActiveBrowserPageId: 'page-prev',
prevFocusedGroupTabId: 'tab-prev',
targetWorktreeId: 'wt-target',
targetBrowserWorkspaceId: 'workspace-target',
targetBrowserPageId: 'page-target'
})
.mockResolvedValueOnce(undefined)
const guest = {
id: 708,
isDestroyed: vi.fn(() => false),
getType: vi.fn(() => 'webview'),
setBackgroundThrottling: guestSetBackgroundThrottlingMock,
setWindowOpenHandler: guestSetWindowOpenHandlerMock,
on: guestOnMock,
off: guestOffMock,
openDevTools: guestOpenDevToolsMock
}
const renderer = {
id: rendererWebContentsId,
isDestroyed: vi.fn(() => false),
executeJavaScript: rendererExecuteJavaScriptMock
}
browserWindowFromWebContentsMock.mockReturnValue({ isFocused: vi.fn(() => true) })
webContentsFromIdMock.mockImplementation((id: number) => {
if (id === guest.id) {
return guest
}
if (id === rendererWebContentsId) {
return renderer
}
return null
})
browserManager.attachGuestPolicies(guest as never)
browserManager.registerGuest({
browserPageId: 'page-target',
workspaceId: 'workspace-target',
worktreeId: 'wt-target',
webContentsId: guest.id,
rendererWebContentsId
})
const restore = await browserManager.ensureWebviewVisible(guest.id)
restore()
const restoreScript = rendererExecuteJavaScriptMock.mock.calls[1]?.[0]
expect(restoreScript).toContain('state.setActiveWorktree("wt-prev");')
expect(restoreScript).toContain('state.setActiveBrowserTab("workspace-prev");')
})
it('restores the previously active page when screenshot prep switches pages inside one workspace', async () => {
const rendererExecuteJavaScriptMock = vi
.fn()
.mockResolvedValueOnce({
prevTabType: 'browser',
prevActiveWorktreeId: 'wt-target',
prevActiveBrowserWorkspaceId: 'workspace-target',
prevActiveBrowserPageId: 'page-prev',
prevFocusedGroupTabId: null,
targetWorktreeId: 'wt-target',
targetBrowserWorkspaceId: 'workspace-target',
targetBrowserPageId: 'page-target'
})
.mockResolvedValueOnce(undefined)
const guest = {
id: 709,
isDestroyed: vi.fn(() => false),
getType: vi.fn(() => 'webview'),
setBackgroundThrottling: guestSetBackgroundThrottlingMock,
setWindowOpenHandler: guestSetWindowOpenHandlerMock,
on: guestOnMock,
off: guestOffMock,
openDevTools: guestOpenDevToolsMock
}
const renderer = {
id: rendererWebContentsId,
isDestroyed: vi.fn(() => false),
executeJavaScript: rendererExecuteJavaScriptMock
}
browserWindowFromWebContentsMock.mockReturnValue({ isFocused: vi.fn(() => true) })
webContentsFromIdMock.mockImplementation((id: number) => {
if (id === guest.id) {
return guest
}
if (id === rendererWebContentsId) {
return renderer
}
return null
})
browserManager.attachGuestPolicies(guest as never)
browserManager.registerGuest({
browserPageId: 'page-target',
workspaceId: 'workspace-target',
worktreeId: 'wt-target',
webContentsId: guest.id,
rendererWebContentsId
})
const restore = await browserManager.ensureWebviewVisible(guest.id)
restore()
const restoreScript = rendererExecuteJavaScriptMock.mock.calls[1]?.[0]
expect(restoreScript).toContain('state.setActiveBrowserPage(')
expect(restoreScript).toContain('"workspace-target"')
expect(restoreScript).toContain('"page-prev"')
})
it('restores remembered browser workspace/page even when the visible pane was terminal', async () => {
const rendererExecuteJavaScriptMock = vi
.fn()
.mockResolvedValueOnce({
prevTabType: 'terminal',
prevActiveWorktreeId: 'wt-target',
prevActiveBrowserWorkspaceId: 'workspace-prev',
prevActiveBrowserPageId: 'page-prev',
prevFocusedGroupTabId: 'tab-prev',
targetWorktreeId: 'wt-target',
targetBrowserWorkspaceId: 'workspace-target',
targetBrowserPageId: 'page-target'
})
.mockResolvedValueOnce(undefined)
const guest = {
id: 7091,
isDestroyed: vi.fn(() => false),
getType: vi.fn(() => 'webview'),
setBackgroundThrottling: guestSetBackgroundThrottlingMock,
setWindowOpenHandler: guestSetWindowOpenHandlerMock,
on: guestOnMock,
off: guestOffMock,
openDevTools: guestOpenDevToolsMock
}
const renderer = {
id: rendererWebContentsId,
isDestroyed: vi.fn(() => false),
executeJavaScript: rendererExecuteJavaScriptMock
}
browserWindowFromWebContentsMock.mockReturnValue({ isFocused: vi.fn(() => true) })
webContentsFromIdMock.mockImplementation((id: number) => {
if (id === guest.id) {
return guest
}
if (id === rendererWebContentsId) {
return renderer
}
return null
})
browserManager.attachGuestPolicies(guest as never)
browserManager.registerGuest({
browserPageId: 'page-target',
workspaceId: 'workspace-target',
worktreeId: 'wt-target',
webContentsId: guest.id,
rendererWebContentsId
})
const restore = await browserManager.ensureWebviewVisible(guest.id)
restore()
const restoreScript = rendererExecuteJavaScriptMock.mock.calls[1]?.[0]
expect(restoreScript).toContain('state.setActiveBrowserTab("workspace-prev");')
expect(restoreScript).toContain('state.setActiveBrowserPage(')
expect(restoreScript).toContain('"workspace-prev"')
expect(restoreScript).toContain('"page-prev"')
expect(restoreScript).toContain('state.activateTab("tab-prev");')
expect(restoreScript).toContain('state.setActiveTabType("terminal");')
})
it('does not focus the Orca window while preparing a screenshot', async () => {
const rendererExecuteJavaScriptMock = vi.fn().mockResolvedValueOnce({
prevTabType: 'terminal',
prevActiveWorktreeId: 'wt-1',
prevActiveBrowserWorkspaceId: 'workspace-prev',
prevActiveBrowserPageId: 'page-prev',
prevFocusedGroupTabId: 'tab-prev',
targetWorktreeId: 'wt-1',
targetBrowserWorkspaceId: 'workspace-1',
targetBrowserPageId: 'page-1'
})
const guest = {
id: 710,
isDestroyed: vi.fn(() => false),
getType: vi.fn(() => 'webview'),
setBackgroundThrottling: guestSetBackgroundThrottlingMock,
setWindowOpenHandler: guestSetWindowOpenHandlerMock,
on: guestOnMock,
off: guestOffMock,
openDevTools: guestOpenDevToolsMock
}
const renderer = {
id: rendererWebContentsId,
isDestroyed: vi.fn(() => false),
executeJavaScript: rendererExecuteJavaScriptMock
}
webContentsFromIdMock.mockImplementation((id: number) => {
if (id === guest.id) {
return guest
}
if (id === rendererWebContentsId) {
return renderer
}
return null
})
browserManager.attachGuestPolicies(guest as never)
browserManager.registerGuest({
browserPageId: 'page-1',
workspaceId: 'workspace-1',
worktreeId: 'wt-1',
webContentsId: guest.id,
rendererWebContentsId
})
await browserManager.ensureWebviewVisible(guest.id)
expect(browserWindowFromWebContentsMock).not.toHaveBeenCalled()
})
it('offers opening a link in another Orca browser tab from the guest context menu', () => { it('offers opening a link in another Orca browser tab from the guest context menu', () => {
const rendererSendMock = vi.fn() const rendererSendMock = vi.fn()
const guest = { const guest = {
@ -455,6 +750,101 @@ describe('browserManager', () => {
) )
}) })
it('retires stale guest mappings when a page re-registers after a process swap', () => {
const rendererSendMock = vi.fn()
const oldGuestOnMock = vi.fn()
const oldGuestOffMock = vi.fn()
const newGuestOnMock = vi.fn()
const newGuestOffMock = vi.fn()
const oldGuest = {
id: 501,
isDestroyed: vi.fn(() => false),
getType: vi.fn(() => 'webview'),
setBackgroundThrottling: guestSetBackgroundThrottlingMock,
setWindowOpenHandler: guestSetWindowOpenHandlerMock,
on: oldGuestOnMock,
off: oldGuestOffMock,
openDevTools: guestOpenDevToolsMock,
getURL: vi.fn(() => 'https://old.example')
}
const newGuest = {
id: 502,
isDestroyed: vi.fn(() => false),
getType: vi.fn(() => 'webview'),
setBackgroundThrottling: guestSetBackgroundThrottlingMock,
setWindowOpenHandler: guestSetWindowOpenHandlerMock,
on: newGuestOnMock,
off: newGuestOffMock,
openDevTools: guestOpenDevToolsMock,
getURL: vi.fn(() => 'https://new.example')
}
webContentsFromIdMock.mockImplementation((id: number) => {
if (id === oldGuest.id) {
return oldGuest
}
if (id === newGuest.id) {
return newGuest
}
if (id === rendererWebContentsId) {
return { isDestroyed: vi.fn(() => false), send: rendererSendMock }
}
return null
})
browserManager.attachGuestPolicies(oldGuest as never)
browserManager.registerGuest({
browserPageId: 'browser-1',
webContentsId: oldGuest.id,
rendererWebContentsId
})
browserManager.attachGuestPolicies(newGuest as never)
browserManager.registerGuest({
browserPageId: 'browser-1',
webContentsId: newGuest.id,
rendererWebContentsId
})
const oldDidFailLoadHandler = oldGuestOnMock.mock.calls.find(
([event]) => event === 'did-fail-load'
)?.[1] as
| ((
event: unknown,
errorCode: number,
errorDescription: string,
validatedUrl: string,
isMainFrame: boolean
) => void)
| undefined
const newDidFailLoadHandler = newGuestOnMock.mock.calls.find(
([event]) => event === 'did-fail-load'
)?.[1] as
| ((
event: unknown,
errorCode: number,
errorDescription: string,
validatedUrl: string,
isMainFrame: boolean
) => void)
| undefined
oldDidFailLoadHandler?.(null, -105, 'Old guest failed', 'https://old.example', true)
expect(rendererSendMock).not.toHaveBeenCalled()
newDidFailLoadHandler?.(null, -106, 'New guest failed', 'https://new.example', true)
expect(rendererSendMock).toHaveBeenCalledWith('browser:guest-load-failed', {
browserPageId: 'browser-1',
loadError: {
code: -106,
description: 'New guest failed',
validatedUrl: 'https://new.example'
}
})
expect(oldGuestOffMock).toHaveBeenCalled()
expect(browserManager.getGuestWebContentsId('browser-1')).toBe(newGuest.id)
})
it('does not forward ctrl/cmd+r or readline chords from browser guests', () => { it('does not forward ctrl/cmd+r or readline chords from browser guests', () => {
const rendererSendMock = vi.fn() const rendererSendMock = vi.fn()
const guest = { const guest = {

View file

@ -38,6 +38,7 @@ export type BrowserGuestRegistration = {
browserPageId?: string browserPageId?: string
browserTabId?: string browserTabId?: string
workspaceId?: string workspaceId?: string
worktreeId?: string
webContentsId: number webContentsId: number
rendererWebContentsId: number rendererWebContentsId: number
} }
@ -71,15 +72,20 @@ function safeOrigin(rawUrl: string): string {
} }
} }
class BrowserManager { export class BrowserManager {
private readonly webContentsIdByTabId = new Map<string, number>() private readonly webContentsIdByTabId = new Map<string, number>()
// Why: reverse map enables O(1) guest→tab lookups instead of O(N) linear // Why: reverse map enables O(1) guest→tab lookups instead of O(N) linear
// scans on every mouse event, load failure, permission, and popup event. // scans on every mouse event, load failure, permission, and popup event.
private readonly tabIdByWebContentsId = new Map<number, string>() private readonly tabIdByWebContentsId = new Map<number, string>()
// Why: guest registration is keyed by browser page id, but renderer
// visibility/focus state is keyed by browser workspace id. Screenshot prep
// has to bridge that mismatch to activate the right tab before capture.
private readonly workspaceIdByPageId = new Map<string, string>()
private readonly rendererWebContentsIdByTabId = new Map<string, number>() private readonly rendererWebContentsIdByTabId = new Map<string, number>()
private readonly contextMenuCleanupByTabId = new Map<string, () => void>() private readonly contextMenuCleanupByTabId = new Map<string, () => void>()
private readonly grabShortcutCleanupByTabId = new Map<string, () => void>() private readonly grabShortcutCleanupByTabId = new Map<string, () => void>()
private readonly shortcutForwardingCleanupByTabId = new Map<string, () => void>() private readonly shortcutForwardingCleanupByTabId = new Map<string, () => void>()
private readonly worktreeIdByTabId = new Map<string, string>()
private readonly policyAttachedGuestIds = new Set<number>() private readonly policyAttachedGuestIds = new Set<number>()
private readonly policyCleanupByGuestId = new Map<number, () => void>() private readonly policyCleanupByGuestId = new Map<number, () => void>()
private readonly pendingLoadFailuresByGuestId = new Map< private readonly pendingLoadFailuresByGuestId = new Map<
@ -108,12 +114,230 @@ class BrowserManager {
return renderer return renderer
} }
// Why: screenshot sessions target guest page ids, but Orca's visible browser
// chrome is keyed by workspace ids. If we activate the page id directly, the
// webview stays hidden under the terminal pane and Page.captureScreenshot
// times out even though the guest still exists.
async ensureWebviewVisible(guestWebContentsId: number): Promise<() => void> {
const browserPageId = this.resolveBrowserTabIdForGuestWebContentsId(guestWebContentsId)
if (!browserPageId) {
return () => {}
}
const browserWorkspaceId = this.workspaceIdByPageId.get(browserPageId) ?? browserPageId
const worktreeId = this.worktreeIdByTabId.get(browserPageId) ?? null
const renderer = this.resolveRendererForBrowserTab(browserPageId)
if (!renderer || renderer.isDestroyed()) {
return () => {}
}
const prev = await renderer
.executeJavaScript(
`(function() {
var store = window.__store;
if (!store) return null;
var state = store.getState();
var prevTabType = state.activeTabType;
var prevActiveWorktreeId = state.activeWorktreeId || null;
var prevActiveBrowserWorkspaceId = state.activeBrowserTabId || null;
var prevActiveBrowserPageId = null;
var prevFocusedGroupTabId = null;
var targetWorktreeId = ${JSON.stringify(worktreeId)};
var browserWorkspaceId = ${JSON.stringify(browserWorkspaceId)};
var browserPageId = ${JSON.stringify(browserPageId)};
var browserTabsByWorktree = state.browserTabsByWorktree || {};
if (prevActiveWorktreeId) {
var prevFocusedGroupId = (state.activeGroupIdByWorktree || {})[prevActiveWorktreeId];
var prevGroups = (state.groupsByWorktree || {})[prevActiveWorktreeId] || [];
for (var pg = 0; pg < prevGroups.length; pg++) {
if (prevGroups[pg].id === prevFocusedGroupId) {
prevFocusedGroupTabId = prevGroups[pg].activeTabId;
break;
}
}
}
if (prevActiveBrowserWorkspaceId) {
for (var prevWtId in browserTabsByWorktree) {
var prevBrowserTabs = browserTabsByWorktree[prevWtId] || [];
for (var pbt = 0; pbt < prevBrowserTabs.length; pbt++) {
if (prevBrowserTabs[pbt].id === prevActiveBrowserWorkspaceId) {
prevActiveBrowserPageId = prevBrowserTabs[pbt].activePageId || null;
break;
}
}
if (prevActiveBrowserPageId) break;
}
}
if (
targetWorktreeId &&
prevActiveWorktreeId !== targetWorktreeId &&
typeof state.setActiveWorktree === 'function'
) {
state.setActiveWorktree(targetWorktreeId);
state = store.getState();
}
var foundWorkspace = null;
for (var wtId in browserTabsByWorktree) {
var tabs = browserTabsByWorktree[wtId] || [];
for (var i = 0; i < tabs.length; i++) {
if (tabs[i].id === browserWorkspaceId) {
foundWorkspace = tabs[i];
if (!targetWorktreeId) {
targetWorktreeId = wtId;
}
break;
}
}
if (foundWorkspace) break;
}
var hasTargetPage = false;
var targetPages = (state.browserPagesByWorkspace || {})[browserWorkspaceId] || [];
for (var pageIndex = 0; pageIndex < targetPages.length; pageIndex++) {
if (targetPages[pageIndex].id === browserPageId) {
hasTargetPage = true;
break;
}
}
if (foundWorkspace) {
if (typeof state.setActiveBrowserTab === 'function') {
state.setActiveBrowserTab(browserWorkspaceId);
state = store.getState();
} else {
var allTabs = state.unifiedTabsByWorktree || {};
var found = null;
for (var unifiedWtId in allTabs) {
var unifiedTabs = allTabs[unifiedWtId] || [];
for (var unifiedIndex = 0; unifiedIndex < unifiedTabs.length; unifiedIndex++) {
if (
unifiedTabs[unifiedIndex].contentType === 'browser' &&
unifiedTabs[unifiedIndex].entityId === browserWorkspaceId
) {
found = unifiedTabs[unifiedIndex];
break;
}
}
if (found) break;
}
if (found) {
state.activateTab(found.id);
}
state.setActiveTabType('browser');
state = store.getState();
}
// Why: activating the workspace alone is not enough for screenshot
// capture when a browser workspace contains multiple pages. The
// compositor only paints the currently mounted page guest.
if (
hasTargetPage &&
foundWorkspace.activePageId !== browserPageId &&
typeof state.setActiveBrowserPage === 'function'
) {
state.setActiveBrowserPage(browserWorkspaceId, browserPageId);
state = store.getState();
}
}
return {
prevTabType: prevTabType,
prevActiveWorktreeId: prevActiveWorktreeId,
prevActiveBrowserWorkspaceId: prevActiveBrowserWorkspaceId,
prevActiveBrowserPageId: prevActiveBrowserPageId,
prevFocusedGroupTabId: prevFocusedGroupTabId,
targetWorktreeId: targetWorktreeId,
targetBrowserWorkspaceId: foundWorkspace ? browserWorkspaceId : null,
targetBrowserPageId: foundWorkspace && hasTargetPage ? browserPageId : null
};
})()`
)
.catch(() => null)
const needsRestore =
prev &&
(prev.prevTabType !== 'browser' ||
prev.prevActiveWorktreeId !== prev.targetWorktreeId ||
prev.prevFocusedGroupTabId !== null ||
prev.prevActiveBrowserWorkspaceId !== prev.targetBrowserWorkspaceId ||
prev.prevActiveBrowserPageId !== prev.targetBrowserPageId)
if (!needsRestore) {
return () => {}
}
return () => {
if (!prev || !renderer || renderer.isDestroyed()) {
return
}
renderer
.executeJavaScript(
`(function() {
var store = window.__store;
if (!store) return;
var state = store.getState();
if (
${JSON.stringify(prev?.prevActiveWorktreeId)} &&
${JSON.stringify(prev?.prevActiveWorktreeId)} !==
${JSON.stringify(prev?.targetWorktreeId)} &&
typeof state.setActiveWorktree === 'function'
) {
state.setActiveWorktree(${JSON.stringify(prev?.prevActiveWorktreeId)});
state = store.getState();
}
if (
${JSON.stringify(prev?.prevActiveBrowserWorkspaceId)} &&
${JSON.stringify(prev?.prevActiveBrowserWorkspaceId)} !==
${JSON.stringify(prev?.targetBrowserWorkspaceId)} &&
typeof state.setActiveBrowserTab === 'function'
) {
state.setActiveBrowserTab(${JSON.stringify(prev?.prevActiveBrowserWorkspaceId)});
state = store.getState();
}
if (
${JSON.stringify(prev?.prevActiveBrowserWorkspaceId)} &&
${JSON.stringify(prev?.prevActiveBrowserPageId)} &&
${JSON.stringify(prev?.prevActiveBrowserPageId)} !==
${JSON.stringify(prev?.targetBrowserPageId)} &&
typeof state.setActiveBrowserPage === 'function'
) {
// Why: Orca remembers the last browser workspace/page even when
// the user is currently in terminal/editor view. Screenshot prep
// temporarily switches that hidden browser selection state, so
// restore it independently of the visible tab type.
state.setActiveBrowserPage(
${JSON.stringify(prev?.prevActiveBrowserWorkspaceId)},
${JSON.stringify(prev?.prevActiveBrowserPageId)}
);
state = store.getState();
}
if (
${JSON.stringify(prev?.prevTabType)} !== 'browser' &&
${JSON.stringify(prev?.prevFocusedGroupTabId)}
) {
state.activateTab(${JSON.stringify(prev?.prevFocusedGroupTabId)});
}
if (${JSON.stringify(prev?.prevTabType)} !== 'browser') {
state.setActiveTabType(${JSON.stringify(prev?.prevTabType)});
}
})()`
)
.catch(() => {})
}
}
attachGuestPolicies(guest: Electron.WebContents): void { attachGuestPolicies(guest: Electron.WebContents): void {
if (this.policyAttachedGuestIds.has(guest.id)) { if (this.policyAttachedGuestIds.has(guest.id)) {
return return
} }
this.policyAttachedGuestIds.add(guest.id) this.policyAttachedGuestIds.add(guest.id)
guest.setBackgroundThrottling(true) // Why: background throttling must be disabled so agent-driven screenshots
// (Page.captureScreenshot via CDP proxy) can capture frames even when the
// Orca window is not the focused foreground app. With throttling enabled,
// the compositor stops producing frames and capturePage() returns empty.
guest.setBackgroundThrottling(false)
guest.setWindowOpenHandler(({ url }) => { guest.setWindowOpenHandler(({ url }) => {
const browserTabId = this.resolveBrowserTabIdForGuestWebContentsId(guest.id) const browserTabId = this.resolveBrowserTabIdForGuestWebContentsId(guest.id)
const browserUrl = normalizeBrowserNavigationUrl(url) const browserUrl = normalizeBrowserNavigationUrl(url)
@ -189,9 +413,30 @@ class BrowserManager {
}) })
} }
private retireStaleGuestWebContents(previousWebContentsId: number): void {
// Why: a browser page can re-register with a new guest id after Chromium
// swaps renderer processes. Late events from the dead guest must stop
// resolving to the live page, or stale download/popup/permission callbacks
// can be delivered to the wrong session after the swap.
this.tabIdByWebContentsId.delete(previousWebContentsId)
const policyCleanup = this.policyCleanupByGuestId.get(previousWebContentsId)
if (policyCleanup) {
policyCleanup()
this.policyCleanupByGuestId.delete(previousWebContentsId)
}
this.policyAttachedGuestIds.delete(previousWebContentsId)
this.pendingLoadFailuresByGuestId.delete(previousWebContentsId)
this.pendingPermissionEventsByGuestId.delete(previousWebContentsId)
this.pendingPopupEventsByGuestId.delete(previousWebContentsId)
this.pendingDownloadIdsByGuestId.delete(previousWebContentsId)
}
registerGuest({ registerGuest({
browserPageId, browserPageId,
browserTabId: legacyBrowserTabId, browserTabId: legacyBrowserTabId,
workspaceId,
worktreeId,
webContentsId, webContentsId,
rendererWebContentsId rendererWebContentsId
}: BrowserGuestRegistration): void { }: BrowserGuestRegistration): void {
@ -231,9 +476,20 @@ class BrowserManager {
return return
} }
const previousWebContentsId = this.webContentsIdByTabId.get(browserTabId)
if (previousWebContentsId !== undefined && previousWebContentsId !== webContentsId) {
this.retireStaleGuestWebContents(previousWebContentsId)
}
this.webContentsIdByTabId.set(browserTabId, webContentsId) this.webContentsIdByTabId.set(browserTabId, webContentsId)
this.tabIdByWebContentsId.set(webContentsId, browserTabId) this.tabIdByWebContentsId.set(webContentsId, browserTabId)
if (workspaceId) {
this.workspaceIdByPageId.set(browserTabId, workspaceId)
}
this.rendererWebContentsIdByTabId.set(browserTabId, rendererWebContentsId) this.rendererWebContentsIdByTabId.set(browserTabId, rendererWebContentsId)
if (worktreeId) {
this.worktreeIdByTabId.set(browserTabId, worktreeId)
}
this.setupContextMenu(browserTabId, guest) this.setupContextMenu(browserTabId, guest)
this.setupGrabShortcut(browserTabId, guest) this.setupGrabShortcut(browserTabId, guest)
@ -292,6 +548,8 @@ class BrowserManager {
} }
this.webContentsIdByTabId.delete(browserTabId) this.webContentsIdByTabId.delete(browserTabId)
this.rendererWebContentsIdByTabId.delete(browserTabId) this.rendererWebContentsIdByTabId.delete(browserTabId)
this.workspaceIdByPageId.delete(browserTabId)
this.worktreeIdByTabId.delete(browserTabId)
} }
unregisterAll(): void { unregisterAll(): void {
@ -313,6 +571,7 @@ class BrowserManager {
} }
this.policyCleanupByGuestId.clear() this.policyCleanupByGuestId.clear()
this.tabIdByWebContentsId.clear() this.tabIdByWebContentsId.clear()
this.worktreeIdByTabId.clear()
this.pendingLoadFailuresByGuestId.clear() this.pendingLoadFailuresByGuestId.clear()
this.pendingPermissionEventsByGuestId.clear() this.pendingPermissionEventsByGuestId.clear()
this.pendingPopupEventsByGuestId.clear() this.pendingPopupEventsByGuestId.clear()
@ -323,6 +582,14 @@ class BrowserManager {
return this.webContentsIdByTabId.get(browserTabId) ?? null return this.webContentsIdByTabId.get(browserTabId) ?? null
} }
getWebContentsIdByTabId(): Map<string, number> {
return this.webContentsIdByTabId
}
getWorktreeIdForTab(browserTabId: string): string | undefined {
return this.worktreeIdByTabId.get(browserTabId)
}
notifyPermissionDenied(args: { notifyPermissionDenied(args: {
guestWebContentsId: number guestWebContentsId: number
permission: string permission: string

View file

@ -373,8 +373,12 @@ class BrowserSessionRegistry {
this.configuredPartitions.add(partition) this.configuredPartitions.add(partition)
const sess = session.fromPartition(partition) const sess = session.fromPartition(partition)
// Why: clipboard-read and clipboard-sanitized-write are required for agent-browser's
// clipboard commands to work. Without these, navigator.clipboard.writeText/readText
// throws NotAllowedError even when invoked via CDP with userGesture:true.
const autoGranted = new Set(['fullscreen', 'clipboard-read', 'clipboard-sanitized-write'])
sess.setPermissionRequestHandler((webContents, permission, callback) => { sess.setPermissionRequestHandler((webContents, permission, callback) => {
const allowed = permission === 'fullscreen' const allowed = autoGranted.has(permission)
if (!allowed) { if (!allowed) {
browserManager.notifyPermissionDenied({ browserManager.notifyPermissionDenied({
guestWebContentsId: webContents.id, guestWebContentsId: webContents.id,
@ -385,7 +389,7 @@ class BrowserSessionRegistry {
callback(allowed) callback(allowed)
}) })
sess.setPermissionCheckHandler((_webContents, permission) => { sess.setPermissionCheckHandler((_webContents, permission) => {
return permission === 'fullscreen' return autoGranted.has(permission)
}) })
sess.setDisplayMediaRequestHandler((_request, callback) => { sess.setDisplayMediaRequestHandler((_request, callback) => {
callback({ video: undefined, audio: undefined }) callback({ video: undefined, audio: undefined })

View file

@ -0,0 +1,533 @@
/* eslint-disable max-lines -- Why: integration test covering the full browser automation pipeline end-to-end. */
import { mkdtempSync } from 'fs'
import { tmpdir } from 'os'
import { join } from 'path'
import { createConnection } from 'net'
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'
// ── Electron mocks ──
const { webContentsFromIdMock } = vi.hoisted(() => ({
webContentsFromIdMock: vi.fn()
}))
vi.mock('electron', () => ({
webContents: { fromId: webContentsFromIdMock },
shell: { openExternal: vi.fn() },
ipcMain: { handle: vi.fn(), removeHandler: vi.fn(), on: vi.fn() },
app: { getPath: vi.fn(() => '/tmp'), isPackaged: false }
}))
vi.mock('../git/worktree', () => ({
listWorktrees: vi.fn().mockResolvedValue([])
}))
import { BrowserManager } from './browser-manager'
import { CdpBridge } from './cdp-bridge'
import { OrcaRuntimeService } from '../runtime/orca-runtime'
import { OrcaRuntimeRpcServer } from '../runtime/runtime-rpc'
import { readRuntimeMetadata } from '../runtime/runtime-metadata'
// ── CDP response builders ──
type AXNode = {
nodeId: string
backendDOMNodeId?: number
role?: { type: string; value: string }
name?: { type: string; value: string }
properties?: { name: string; value: { type: string; value: unknown } }[]
childIds?: string[]
ignored?: boolean
}
function axNode(
id: string,
role: string,
name: string,
opts?: { childIds?: string[]; backendDOMNodeId?: number }
): AXNode {
return {
nodeId: id,
backendDOMNodeId: opts?.backendDOMNodeId ?? parseInt(id, 10) * 100,
role: { type: 'role', value: role },
name: { type: 'computedString', value: name },
childIds: opts?.childIds
}
}
const EXAMPLE_COM_TREE: AXNode[] = [
axNode('1', 'WebArea', 'Example Domain', { childIds: ['2', '3', '4'] }),
axNode('2', 'heading', 'Example Domain'),
axNode('3', 'staticText', 'This domain is for use in illustrative examples.'),
axNode('4', 'link', 'More information...', { backendDOMNodeId: 400 })
]
const SEARCH_PAGE_TREE: AXNode[] = [
axNode('1', 'WebArea', 'Search', { childIds: ['2', '3', '4', '5'] }),
axNode('2', 'navigation', 'Main Nav', { childIds: ['3'] }),
axNode('3', 'link', 'Home', { backendDOMNodeId: 300 }),
axNode('4', 'textbox', 'Search query', { backendDOMNodeId: 400 }),
axNode('5', 'button', 'Search', { backendDOMNodeId: 500 })
]
// ── Mock WebContents factory ──
function createMockGuest(id: number, url: string, title: string) {
let currentUrl = url
let currentTitle = title
let currentTree = EXAMPLE_COM_TREE
let navHistoryId = 1
const sendCommandMock = vi.fn(async (method: string, params?: Record<string, unknown>) => {
switch (method) {
case 'Page.enable':
case 'DOM.enable':
case 'Accessibility.enable':
return {}
case 'Accessibility.getFullAXTree':
return { nodes: currentTree }
case 'Page.getNavigationHistory':
return {
entries: [{ id: navHistoryId, url: currentUrl }],
currentIndex: 0
}
case 'Page.navigate': {
const targetUrl = (params as { url: string }).url
if (targetUrl.includes('nonexistent.invalid')) {
return { errorText: 'net::ERR_NAME_NOT_RESOLVED' }
}
navHistoryId++
currentUrl = targetUrl
if (targetUrl.includes('search.example.com')) {
currentTitle = 'Search'
currentTree = SEARCH_PAGE_TREE
} else {
currentTitle = 'Example Domain'
currentTree = EXAMPLE_COM_TREE
}
return {}
}
case 'Runtime.evaluate': {
const expr = (params as { expression: string }).expression
if (expr === 'document.readyState') {
return { result: { value: 'complete' } }
}
if (expr === 'location.origin') {
return { result: { value: new URL(currentUrl).origin } }
}
if (expr.includes('innerWidth')) {
return { result: { value: JSON.stringify({ w: 1280, h: 720 }) } }
}
if (expr.includes('scrollBy')) {
return { result: { value: undefined } }
}
if (expr.includes('dispatchEvent')) {
return { result: { value: undefined } }
}
// eslint-disable-next-line no-eval
return { result: { value: String(eval(expr)), type: 'string' } }
}
case 'DOM.scrollIntoViewIfNeeded':
return {}
case 'DOM.getBoxModel':
return { model: { content: [100, 200, 300, 200, 300, 250, 100, 250] } }
case 'Input.dispatchMouseEvent':
return {}
case 'Input.insertText':
return {}
case 'Input.dispatchKeyEvent':
return {}
case 'DOM.focus':
return {}
case 'DOM.describeNode':
return { node: { nodeId: 1 } }
case 'DOM.requestNode':
return { nodeId: 1 }
case 'DOM.resolveNode':
return { object: { objectId: 'obj-1' } }
case 'Runtime.callFunctionOn':
return { result: { value: undefined } }
case 'DOM.setFileInputFiles':
return {}
case 'Page.captureScreenshot':
return {
data: 'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg=='
}
case 'Page.reload':
return {}
case 'Network.enable':
return {}
case 'Target.setAutoAttach':
return {}
case 'Runtime.enable':
return {}
default:
throw new Error(`Unexpected CDP method: ${method}`)
}
})
const debuggerListeners = new Map<string, ((...args: unknown[]) => void)[]>()
const guest = {
id,
isDestroyed: vi.fn(() => false),
getType: vi.fn(() => 'webview'),
getURL: vi.fn(() => currentUrl),
getTitle: vi.fn(() => currentTitle),
setBackgroundThrottling: vi.fn(),
setWindowOpenHandler: vi.fn(),
on: vi.fn(),
off: vi.fn(),
debugger: {
attach: vi.fn(),
detach: vi.fn(),
sendCommand: sendCommandMock,
on: vi.fn((event: string, handler: (...args: unknown[]) => void) => {
const handlers = debuggerListeners.get(event) ?? []
handlers.push(handler)
debuggerListeners.set(event, handlers)
}),
removeListener: vi.fn((event: string, handler: (...args: unknown[]) => void) => {
const handlers = debuggerListeners.get(event) ?? []
const idx = handlers.indexOf(handler)
if (idx >= 0) {
handlers.splice(idx, 1)
}
}),
removeAllListeners: vi.fn((event: string) => {
debuggerListeners.set(event, [])
}),
off: vi.fn()
}
}
return { guest, sendCommandMock }
}
// ── RPC helper ──
async function sendRequest(
endpoint: string,
request: Record<string, unknown>
): Promise<Record<string, unknown>> {
return await new Promise((resolve, reject) => {
const socket = createConnection(endpoint)
let buffer = ''
socket.setEncoding('utf8')
socket.once('error', reject)
socket.on('data', (chunk) => {
buffer += chunk
const newlineIndex = buffer.indexOf('\n')
if (newlineIndex === -1) {
return
}
const message = buffer.slice(0, newlineIndex)
socket.end()
resolve(JSON.parse(message) as Record<string, unknown>)
})
socket.on('connect', () => {
socket.write(`${JSON.stringify(request)}\n`)
})
})
}
// ── Tests ──
describe('Browser automation pipeline (integration)', () => {
let server: OrcaRuntimeRpcServer
let endpoint: string
let authToken: string
const GUEST_WC_ID = 5001
const RENDERER_WC_ID = 1
beforeEach(async () => {
const { guest } = createMockGuest(GUEST_WC_ID, 'https://example.com', 'Example Domain')
webContentsFromIdMock.mockImplementation((id: number) => {
if (id === GUEST_WC_ID) {
return guest
}
return null
})
const browserManager = new BrowserManager()
// Simulate the attach-time policy (normally done in will-attach-webview)
browserManager.attachGuestPolicies(guest as never)
browserManager.registerGuest({
browserPageId: 'page-1',
webContentsId: GUEST_WC_ID,
rendererWebContentsId: RENDERER_WC_ID
})
const cdpBridge = new CdpBridge(browserManager)
cdpBridge.setActiveTab(GUEST_WC_ID)
const userDataPath = mkdtempSync(join(tmpdir(), 'browser-e2e-'))
const runtime = new OrcaRuntimeService()
// eslint-disable-next-line @typescript-eslint/no-explicit-any
runtime.setAgentBrowserBridge(cdpBridge as any)
server = new OrcaRuntimeRpcServer({ runtime, userDataPath })
await server.start()
const metadata = readRuntimeMetadata(userDataPath)!
endpoint = metadata.transport!.endpoint
authToken = metadata.authToken!
})
afterEach(async () => {
await server.stop()
})
async function rpc(method: string, params?: Record<string, unknown>) {
const response = await sendRequest(endpoint, {
id: `req_${method}`,
authToken,
method,
...(params ? { params } : {})
})
return response
}
// ── Snapshot ──
it('takes a snapshot and returns refs for interactive elements', async () => {
const res = await rpc('browser.snapshot')
expect(res.ok).toBe(true)
const result = res.result as {
snapshot: string
refs: { ref: string; role: string; name: string }[]
url: string
title: string
}
expect(result.url).toBe('https://example.com')
expect(result.title).toBe('Example Domain')
expect(result.snapshot).toContain('heading "Example Domain"')
expect(result.snapshot).toContain('link "More information..."')
expect(result.refs).toHaveLength(1)
expect(result.refs[0]).toMatchObject({
ref: '@e1',
role: 'link',
name: 'More information...'
})
})
// ── Click ──
it('clicks an element by ref after snapshot', async () => {
await rpc('browser.snapshot')
const res = await rpc('browser.click', { element: '@e1' })
expect(res.ok).toBe(true)
expect((res.result as { clicked: string }).clicked).toBe('@e1')
})
it('returns error when clicking without a prior snapshot', async () => {
const res = await rpc('browser.click', { element: '@e1' })
expect(res.ok).toBe(false)
expect((res.error as { code: string }).code).toBe('browser_stale_ref')
})
it('returns error for non-existent ref', async () => {
await rpc('browser.snapshot')
const res = await rpc('browser.click', { element: '@e999' })
expect(res.ok).toBe(false)
expect((res.error as { code: string }).code).toBe('browser_ref_not_found')
})
// ── Navigation ──
it('navigates to a URL and invalidates refs', async () => {
await rpc('browser.snapshot')
const gotoRes = await rpc('browser.goto', { url: 'https://search.example.com' })
expect(gotoRes.ok).toBe(true)
const gotoResult = gotoRes.result as { url: string; title: string }
expect(gotoResult.url).toBe('https://search.example.com')
expect(gotoResult.title).toBe('Search')
// Old refs should be stale after navigation
const clickRes = await rpc('browser.click', { element: '@e1' })
expect(clickRes.ok).toBe(false)
expect((clickRes.error as { code: string }).code).toBe('browser_stale_ref')
// Re-snapshot should work and show new page
const snapRes = await rpc('browser.snapshot')
expect(snapRes.ok).toBe(true)
const snapResult = snapRes.result as { snapshot: string; refs: { name: string }[] }
expect(snapResult.snapshot).toContain('Search')
expect(snapResult.refs.map((r) => r.name)).toContain('Search')
expect(snapResult.refs.map((r) => r.name)).toContain('Home')
})
it('returns error for failed navigation', async () => {
const res = await rpc('browser.goto', { url: 'https://nonexistent.invalid' })
expect(res.ok).toBe(false)
expect((res.error as { code: string }).code).toBe('browser_navigation_failed')
})
// ── Fill ──
it('fills an input by ref', async () => {
await rpc('browser.goto', { url: 'https://search.example.com' })
await rpc('browser.snapshot')
// @e2 should be the textbox "Search query" on the search page
const res = await rpc('browser.fill', { element: '@e2', value: 'hello world' })
expect(res.ok).toBe(true)
expect((res.result as { filled: string }).filled).toBe('@e2')
})
// ── Type ──
it('types text at current focus', async () => {
const res = await rpc('browser.type', { input: 'some text' })
expect(res.ok).toBe(true)
expect((res.result as { typed: boolean }).typed).toBe(true)
})
// ── Select ──
it('selects a dropdown option by ref', async () => {
await rpc('browser.goto', { url: 'https://search.example.com' })
await rpc('browser.snapshot')
const res = await rpc('browser.select', { element: '@e2', value: 'option-1' })
expect(res.ok).toBe(true)
expect((res.result as { selected: string }).selected).toBe('@e2')
})
// ── Scroll ──
it('scrolls the viewport', async () => {
const res = await rpc('browser.scroll', { direction: 'down' })
expect(res.ok).toBe(true)
expect((res.result as { scrolled: string }).scrolled).toBe('down')
const res2 = await rpc('browser.scroll', { direction: 'up', amount: 200 })
expect(res2.ok).toBe(true)
expect((res2.result as { scrolled: string }).scrolled).toBe('up')
})
// ── Reload ──
it('reloads the page', async () => {
const res = await rpc('browser.reload')
expect(res.ok).toBe(true)
expect((res.result as { url: string }).url).toBe('https://example.com')
})
// ── Screenshot ──
it('captures a screenshot', async () => {
const res = await rpc('browser.screenshot', { format: 'png' })
expect(res.ok).toBe(true)
const result = res.result as { data: string; format: string }
expect(result.format).toBe('png')
expect(result.data.length).toBeGreaterThan(0)
})
// ── Eval ──
it('evaluates JavaScript in the page context', async () => {
const res = await rpc('browser.eval', { expression: '2 + 2' })
expect(res.ok).toBe(true)
expect((res.result as { result: string }).result).toBe('4')
})
// ── Tab management ──
it('lists open tabs', async () => {
const res = await rpc('browser.tabList')
expect(res.ok).toBe(true)
const result = res.result as { tabs: { index: number; url: string; active: boolean }[] }
expect(result.tabs).toHaveLength(1)
expect(result.tabs[0]).toMatchObject({
index: 0,
url: 'https://example.com',
active: true
})
})
it('returns error for out-of-range tab switch', async () => {
const res = await rpc('browser.tabSwitch', { index: 5 })
expect(res.ok).toBe(false)
expect((res.error as { code: string }).code).toBe('browser_tab_not_found')
})
// ── Full agent workflow simulation ──
it('simulates a complete agent workflow: navigate → snapshot → interact → re-snapshot', async () => {
// 1. Navigate to search page
const gotoRes = await rpc('browser.goto', { url: 'https://search.example.com' })
expect(gotoRes.ok).toBe(true)
// 2. Snapshot the page
const snap1 = await rpc('browser.snapshot')
expect(snap1.ok).toBe(true)
const snap1Result = snap1.result as {
snapshot: string
refs: { ref: string; role: string; name: string }[]
}
// Verify we see the search page structure
expect(snap1Result.snapshot).toContain('[Main Nav]')
expect(snap1Result.snapshot).toContain('text input "Search query"')
expect(snap1Result.snapshot).toContain('button "Search"')
// 3. Fill the search input
const searchInput = snap1Result.refs.find((r) => r.name === 'Search query')
expect(searchInput).toBeDefined()
const fillRes = await rpc('browser.fill', {
element: searchInput!.ref,
value: 'integration testing'
})
expect(fillRes.ok).toBe(true)
// 4. Click the search button
const searchBtn = snap1Result.refs.find((r) => r.name === 'Search')
expect(searchBtn).toBeDefined()
const clickRes = await rpc('browser.click', { element: searchBtn!.ref })
expect(clickRes.ok).toBe(true)
// 5. Take a screenshot
const ssRes = await rpc('browser.screenshot')
expect(ssRes.ok).toBe(true)
// 6. Check tab list
const tabRes = await rpc('browser.tabList')
expect(tabRes.ok).toBe(true)
const tabs = (tabRes.result as { tabs: { url: string }[] }).tabs
expect(tabs[0].url).toBe('https://search.example.com')
})
// ── No tab errors ──
it('returns browser_no_tab when no tabs are registered', async () => {
// Create a fresh setup with no registered tabs
const emptyManager = new BrowserManager()
const emptyBridge = new CdpBridge(emptyManager)
const userDataPath2 = mkdtempSync(join(tmpdir(), 'browser-e2e-empty-'))
const runtime2 = new OrcaRuntimeService()
// eslint-disable-next-line @typescript-eslint/no-explicit-any
runtime2.setAgentBrowserBridge(emptyBridge as any)
const server2 = new OrcaRuntimeRpcServer({ runtime: runtime2, userDataPath: userDataPath2 })
await server2.start()
const metadata2 = readRuntimeMetadata(userDataPath2)!
const res = await sendRequest(metadata2.transport!.endpoint, {
id: 'req_no_tab',
authToken: metadata2.authToken,
method: 'browser.snapshot'
})
expect(res.ok).toBe(false)
expect((res.error as { code: string }).code).toBe('browser_no_tab')
await server2.stop()
})
})

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,246 @@
import { afterEach, describe, expect, it, vi } from 'vitest'
import { captureFullPageScreenshot, captureScreenshot } from './cdp-screenshot'
function createMockWebContents() {
return {
isDestroyed: vi.fn(() => false),
invalidate: vi.fn(),
capturePage: vi.fn(),
debugger: {
isAttached: vi.fn(() => true),
sendCommand: vi.fn()
}
}
}
describe('captureScreenshot', () => {
afterEach(() => {
vi.useRealTimers()
})
it('invalidates the guest before forwarding Page.captureScreenshot', async () => {
const webContents = createMockWebContents()
webContents.debugger.sendCommand.mockResolvedValueOnce({ data: 'png-data' })
const onResult = vi.fn()
const onError = vi.fn()
captureScreenshot(webContents as never, { format: 'png' }, onResult, onError)
await Promise.resolve()
expect(webContents.invalidate).toHaveBeenCalledTimes(1)
expect(webContents.debugger.sendCommand).toHaveBeenCalledWith('Page.captureScreenshot', {
format: 'png'
})
expect(onResult).toHaveBeenCalledWith({ data: 'png-data' })
expect(onError).not.toHaveBeenCalled()
})
it('falls back to capturePage when Page.captureScreenshot stalls', async () => {
vi.useFakeTimers()
const webContents = createMockWebContents()
webContents.debugger.sendCommand.mockImplementation(() => new Promise(() => {}))
webContents.capturePage.mockResolvedValueOnce({
isEmpty: () => false,
toPNG: () => Buffer.from('fallback-png')
})
const onResult = vi.fn()
const onError = vi.fn()
captureScreenshot(webContents as never, { format: 'png' }, onResult, onError)
await vi.advanceTimersByTimeAsync(8000)
expect(webContents.capturePage).toHaveBeenCalledTimes(1)
expect(onResult).toHaveBeenCalledWith({
data: Buffer.from('fallback-png').toString('base64')
})
expect(onError).not.toHaveBeenCalled()
})
it('crops the fallback image when the request includes a visible clip rect', async () => {
vi.useFakeTimers()
const croppedImage = {
isEmpty: () => false,
toPNG: () => Buffer.from('cropped-png')
}
const webContents = createMockWebContents()
webContents.debugger.sendCommand.mockImplementation(() => new Promise(() => {}))
webContents.capturePage.mockResolvedValueOnce({
isEmpty: () => false,
getSize: () => ({ width: 400, height: 300 }),
crop: vi.fn(() => croppedImage),
toPNG: () => Buffer.from('full-png')
})
const onResult = vi.fn()
const onError = vi.fn()
captureScreenshot(
webContents as never,
{
format: 'png',
clip: { x: 10, y: 20, width: 30, height: 40, scale: 2 }
},
onResult,
onError
)
await vi.advanceTimersByTimeAsync(8000)
const fallbackImage = await webContents.capturePage.mock.results[0]?.value
expect(fallbackImage.crop).toHaveBeenCalledWith({ x: 20, y: 40, width: 60, height: 80 })
expect(onResult).toHaveBeenCalledWith({
data: Buffer.from('cropped-png').toString('base64')
})
expect(onError).not.toHaveBeenCalled()
})
it('keeps the timeout error when the request needs beyond-viewport pixels', async () => {
vi.useFakeTimers()
const webContents = createMockWebContents()
webContents.debugger.sendCommand.mockImplementation(() => new Promise(() => {}))
webContents.capturePage.mockResolvedValueOnce({
isEmpty: () => false,
getSize: () => ({ width: 400, height: 300 }),
crop: vi.fn(),
toPNG: () => Buffer.from('full-png')
})
const onResult = vi.fn()
const onError = vi.fn()
captureScreenshot(
webContents as never,
{
format: 'png',
captureBeyondViewport: true,
clip: { x: 0, y: 0, width: 800, height: 1200, scale: 1 }
},
onResult,
onError
)
await vi.advanceTimersByTimeAsync(8000)
expect(onResult).not.toHaveBeenCalled()
expect(onError).toHaveBeenCalledWith(
'Screenshot timed out — the browser tab may not be visible or the window may not have focus.'
)
})
it('ignores the fallback result when CDP settles first after the timeout fires', async () => {
vi.useFakeTimers()
let resolveCapturePage: ((value: unknown) => void) | null = null
let resolveSendCommand: ((value: unknown) => void) | null = null
const webContents = createMockWebContents()
webContents.debugger.sendCommand.mockImplementation(
() =>
new Promise((resolve) => {
resolveSendCommand = resolve
})
)
webContents.capturePage.mockImplementation(
() =>
new Promise((resolve) => {
resolveCapturePage = resolve
})
)
const onResult = vi.fn()
const onError = vi.fn()
captureScreenshot(webContents as never, { format: 'png' }, onResult, onError)
await vi.advanceTimersByTimeAsync(8000)
expect(resolveSendCommand).toBeTypeOf('function')
resolveSendCommand!({ data: 'cdp-png' })
await Promise.resolve()
expect(resolveCapturePage).toBeTypeOf('function')
resolveCapturePage!({
isEmpty: () => false,
getSize: () => ({ width: 100, height: 100 }),
crop: vi.fn(),
toPNG: () => Buffer.from('fallback-png')
})
await Promise.resolve()
expect(onResult).toHaveBeenCalledTimes(1)
expect(onResult).toHaveBeenCalledWith({ data: 'cdp-png' })
expect(onError).not.toHaveBeenCalled()
})
it('reports the original timeout when the fallback capture is unavailable', async () => {
vi.useFakeTimers()
const webContents = createMockWebContents()
webContents.debugger.sendCommand.mockImplementation(() => new Promise(() => {}))
webContents.capturePage.mockResolvedValueOnce({
isEmpty: () => true,
toPNG: () => Buffer.from('unused')
})
const onResult = vi.fn()
const onError = vi.fn()
captureScreenshot(webContents as never, { format: 'png' }, onResult, onError)
await vi.advanceTimersByTimeAsync(8000)
expect(onResult).not.toHaveBeenCalled()
expect(onError).toHaveBeenCalledWith(
'Screenshot timed out — the browser tab may not be visible or the window may not have focus.'
)
})
})
describe('captureFullPageScreenshot', () => {
it('uses cssContentSize so HiDPI pages are captured at the real page size', async () => {
const webContents = createMockWebContents()
webContents.debugger.sendCommand.mockImplementation((method: string) => {
if (method === 'Page.getLayoutMetrics') {
return Promise.resolve({
cssContentSize: { width: 640.25, height: 1280.75 },
contentSize: { width: 1280.5, height: 2561.5 }
})
}
if (method === 'Page.captureScreenshot') {
return Promise.resolve({ data: 'full-page-data' })
}
return Promise.resolve({})
})
await expect(captureFullPageScreenshot(webContents as never, 'png')).resolves.toEqual({
data: 'full-page-data',
format: 'png'
})
expect(webContents.debugger.sendCommand).toHaveBeenNthCalledWith(1, 'Page.getLayoutMetrics', {})
expect(webContents.debugger.sendCommand).toHaveBeenNthCalledWith(2, 'Page.captureScreenshot', {
format: 'png',
captureBeyondViewport: true,
clip: { x: 0, y: 0, width: 641, height: 1281, scale: 1 }
})
})
it('falls back to legacy contentSize when cssContentSize is unavailable', async () => {
const webContents = createMockWebContents()
webContents.debugger.sendCommand.mockImplementation((method: string) => {
if (method === 'Page.getLayoutMetrics') {
return Promise.resolve({
contentSize: { width: 800, height: 1600 }
})
}
if (method === 'Page.captureScreenshot') {
return Promise.resolve({ data: 'legacy-full-page-data' })
}
return Promise.resolve({})
})
await expect(captureFullPageScreenshot(webContents as never, 'jpeg')).resolves.toEqual({
data: 'legacy-full-page-data',
format: 'jpeg'
})
expect(webContents.debugger.sendCommand).toHaveBeenNthCalledWith(2, 'Page.captureScreenshot', {
format: 'jpeg',
captureBeyondViewport: true,
clip: { x: 0, y: 0, width: 800, height: 1600, scale: 1 }
})
})
})

View file

@ -0,0 +1,264 @@
import type { WebContents } from 'electron'
const SCREENSHOT_TIMEOUT_MS = 8000
const SCREENSHOT_TIMEOUT_MESSAGE =
'Screenshot timed out — the browser tab may not be visible or the window may not have focus.'
function applyFallbackClip(
image: Electron.NativeImage,
params: Record<string, unknown> | undefined
): Electron.NativeImage | null {
if (params?.captureBeyondViewport) {
// Why: capturePage() can only see the currently painted viewport. If the
// caller asked for beyond-viewport pixels, returning a viewport-sized image
// would silently lie about what was captured.
return null
}
const clip = params?.clip
if (!clip || typeof clip !== 'object') {
return image
}
const clipRect = clip as Record<string, unknown>
const x = typeof clipRect.x === 'number' ? clipRect.x : NaN
const y = typeof clipRect.y === 'number' ? clipRect.y : NaN
const width = typeof clipRect.width === 'number' ? clipRect.width : NaN
const height = typeof clipRect.height === 'number' ? clipRect.height : NaN
const scale =
typeof clipRect.scale === 'number' && Number.isFinite(clipRect.scale) && clipRect.scale > 0
? clipRect.scale
: 1
if (![x, y, width, height].every(Number.isFinite) || width <= 0 || height <= 0) {
return null
}
const cropRect = {
x: Math.round(x * scale),
y: Math.round(y * scale),
width: Math.round(width * scale),
height: Math.round(height * scale)
}
const imageSize = image.getSize()
if (
cropRect.x < 0 ||
cropRect.y < 0 ||
cropRect.width <= 0 ||
cropRect.height <= 0 ||
cropRect.x + cropRect.width > imageSize.width ||
cropRect.y + cropRect.height > imageSize.height
) {
return null
}
return image.crop(cropRect)
}
function encodeNativeImageScreenshot(
image: Electron.NativeImage,
params: Record<string, unknown> | undefined
): { data: string } | null {
if (image.isEmpty()) {
return null
}
const clippedImage = applyFallbackClip(image, params)
if (!clippedImage || clippedImage.isEmpty()) {
return null
}
const format = params?.format === 'jpeg' ? 'jpeg' : 'png'
const quality =
typeof params?.quality === 'number' && Number.isFinite(params.quality)
? Math.max(0, Math.min(100, Math.round(params.quality)))
: undefined
const buffer = format === 'jpeg' ? clippedImage.toJPEG(quality ?? 90) : clippedImage.toPNG()
return { data: buffer.toString('base64') }
}
function getLayoutClip(metrics: {
cssContentSize?: { width?: number; height?: number }
contentSize?: { width?: number; height?: number }
}): { x: number; y: number; width: number; height: number; scale: number } | null {
// Why: Page.captureScreenshot clip coordinates are in CSS pixels. On HiDPI
// Electron guests, `contentSize` can reflect device pixels, which makes
// Chromium tile the page into a duplicated 2x2 grid. Prefer cssContentSize
// and only fall back to contentSize when older Chromium builds omit it.
const size = metrics.cssContentSize ?? metrics.contentSize
const width = size?.width
const height = size?.height
if (
typeof width !== 'number' ||
!Number.isFinite(width) ||
width <= 0 ||
typeof height !== 'number' ||
!Number.isFinite(height) ||
height <= 0
) {
return null
}
return {
x: 0,
y: 0,
width: Math.ceil(width),
height: Math.ceil(height),
scale: 1
}
}
async function sendCommandWithTimeout<T>(
webContents: WebContents,
method: string,
params: Record<string, unknown> | undefined,
timeoutMessage: string
): Promise<T> {
let timer: NodeJS.Timeout | null = null
try {
return await Promise.race([
webContents.debugger.sendCommand(method, params ?? {}) as Promise<T>,
new Promise<T>((_, reject) => {
timer = setTimeout(() => reject(new Error(timeoutMessage)), SCREENSHOT_TIMEOUT_MS)
})
])
} finally {
if (timer) {
clearTimeout(timer)
}
}
}
export async function captureFullPageScreenshot(
webContents: WebContents,
format: 'png' | 'jpeg' = 'png'
): Promise<{ data: string; format: 'png' | 'jpeg' }> {
if (webContents.isDestroyed()) {
throw new Error('WebContents destroyed')
}
const dbg = webContents.debugger
if (!dbg.isAttached()) {
throw new Error('Debugger not attached')
}
try {
webContents.invalidate()
} catch {
// Some guest teardown paths reject repaint requests. Fall through to CDP.
}
const metrics = await sendCommandWithTimeout<{
cssContentSize?: { width?: number; height?: number }
contentSize?: { width?: number; height?: number }
}>(webContents, 'Page.getLayoutMetrics', undefined, SCREENSHOT_TIMEOUT_MESSAGE)
const clip = getLayoutClip(metrics)
if (!clip) {
throw new Error('Unable to determine full-page screenshot bounds')
}
const { data } = await sendCommandWithTimeout<{ data: string }>(
webContents,
'Page.captureScreenshot',
{
format,
captureBeyondViewport: true,
clip
},
SCREENSHOT_TIMEOUT_MESSAGE
)
return { data, format }
}
// Why: Electron's capturePage() is unreliable on webview guests — the compositor
// may not produce frames when the webview panel is inactive, unfocused, or in a
// split-pane layout. Instead, use the debugger's Page.captureScreenshot which
// renders server-side in the Blink compositor and doesn't depend on OS-level
// window focus or display state. Guard with a timeout so agent-browser doesn't
// hang on its 30s CDP timeout if the debugger stalls.
export function captureScreenshot(
webContents: WebContents,
params: Record<string, unknown> | undefined,
onResult: (result: unknown) => void,
onError: (message: string) => void
): void {
if (webContents.isDestroyed()) {
onError('WebContents destroyed')
return
}
const dbg = webContents.debugger
if (!dbg.isAttached()) {
onError('Debugger not attached')
return
}
const screenshotParams: Record<string, unknown> = {}
if (params?.format) {
screenshotParams.format = params.format
}
if (params?.quality) {
screenshotParams.quality = params.quality
}
if (params?.clip) {
screenshotParams.clip = params.clip
}
if (params?.captureBeyondViewport != null) {
screenshotParams.captureBeyondViewport = params.captureBeyondViewport
}
if (params?.fromSurface != null) {
screenshotParams.fromSurface = params.fromSurface
}
let settled = false
// Why: a compositor invalidate is cheap and can recover guest instances that
// are visible but have not produced a fresh frame since being reclaimed into
// the active browser tab.
try {
webContents.invalidate()
} catch {
// Some guest teardown paths reject repaint requests. Fall through to CDP.
}
const timer = setTimeout(async () => {
if (!settled) {
try {
const image = await webContents.capturePage()
if (settled) {
return
}
const fallback = encodeNativeImageScreenshot(image, params)
if (fallback) {
if (settled) {
return
}
settled = true
onResult(fallback)
return
}
} catch {
// Fall through to the original timeout error below.
}
if (!settled) {
settled = true
onError(SCREENSHOT_TIMEOUT_MESSAGE)
}
}
}, SCREENSHOT_TIMEOUT_MS)
dbg
.sendCommand('Page.captureScreenshot', screenshotParams)
.then((result) => {
if (!settled) {
settled = true
clearTimeout(timer)
onResult(result)
}
})
.catch((err) => {
if (!settled) {
settled = true
clearTimeout(timer)
onError((err as Error).message)
}
})
}

View file

@ -0,0 +1,311 @@
import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'
import WebSocket from 'ws'
import { CdpWsProxy } from './cdp-ws-proxy'
vi.mock('electron', () => ({
webContents: { fromId: vi.fn() }
}))
type DebuggerListener = (...args: unknown[]) => void
function createMockWebContents() {
const listeners = new Map<string, DebuggerListener[]>()
const debuggerObj = {
isAttached: vi.fn(() => false),
attach: vi.fn(),
detach: vi.fn(),
sendCommand: vi.fn(async () => ({})),
on: vi.fn((event: string, handler: DebuggerListener) => {
const arr = listeners.get(event) ?? []
arr.push(handler)
listeners.set(event, arr)
}),
removeListener: vi.fn((event: string, handler: DebuggerListener) => {
const arr = listeners.get(event) ?? []
listeners.set(
event,
arr.filter((h) => h !== handler)
)
})
}
return {
webContents: {
debugger: debuggerObj,
isDestroyed: () => false,
focus: vi.fn(),
getTitle: vi.fn(() => 'Example'),
getURL: vi.fn(() => 'https://example.com')
},
listeners,
emit(event: string, ...args: unknown[]) {
for (const handler of listeners.get(event) ?? []) {
handler(...args)
}
}
}
}
describe('CdpWsProxy', () => {
let mock: ReturnType<typeof createMockWebContents>
let proxy: CdpWsProxy
let endpoint: string
beforeEach(async () => {
mock = createMockWebContents()
// eslint-disable-next-line @typescript-eslint/no-explicit-any
proxy = new CdpWsProxy(mock.webContents as any)
endpoint = await proxy.start()
})
afterEach(async () => {
await proxy.stop()
})
function connect(): Promise<WebSocket> {
return new Promise((resolve) => {
const ws = new WebSocket(endpoint)
ws.on('open', () => resolve(ws))
})
}
function sendAndReceive(
ws: WebSocket,
msg: Record<string, unknown>
): Promise<Record<string, unknown>> {
return new Promise((resolve) => {
ws.once('message', (data) => resolve(JSON.parse(data.toString())))
ws.send(JSON.stringify(msg))
})
}
it('starts on a random port and returns ws:// URL', () => {
expect(endpoint).toMatch(/^ws:\/\/127\.0\.0\.1:\d+$/)
expect(proxy.getPort()).toBeGreaterThan(0)
})
it('attaches debugger on start', () => {
expect(mock.webContents.debugger.attach).toHaveBeenCalledWith('1.3')
})
// ── CDP message ID correlation ──
it('correlates CDP request/response IDs', async () => {
mock.webContents.debugger.sendCommand.mockResolvedValueOnce({ tree: 'nodes' })
const ws = connect()
const client = await ws
const response = await sendAndReceive(client, {
id: 42,
method: 'Accessibility.getFullAXTree',
params: {}
})
expect(response.id).toBe(42)
expect(response.result).toEqual({ tree: 'nodes' })
client.close()
})
it('returns error response when sendCommand fails', async () => {
mock.webContents.debugger.sendCommand.mockRejectedValueOnce(new Error('Node not found'))
const client = await connect()
const response = await sendAndReceive(client, {
id: 7,
method: 'DOM.describeNode',
params: { nodeId: 999 }
})
expect(response.id).toBe(7)
expect(response.error).toEqual({ code: -32000, message: 'Node not found' })
client.close()
})
// ── Concurrent requests get correct responses ──
it('handles concurrent requests with correct correlation', async () => {
let resolveFirst: (v: unknown) => void
const firstPromise = new Promise((r) => {
resolveFirst = r
})
mock.webContents.debugger.sendCommand
.mockImplementationOnce(async () => {
await firstPromise
return { result: 'slow' }
})
.mockResolvedValueOnce({ result: 'fast' })
const client = await connect()
const responses: Record<string, unknown>[] = []
client.on('message', (data) => {
responses.push(JSON.parse(data.toString()))
})
client.send(JSON.stringify({ id: 1, method: 'DOM.enable', params: {} }))
await new Promise((r) => setTimeout(r, 10))
client.send(JSON.stringify({ id: 2, method: 'Page.enable', params: {} }))
await new Promise((r) => setTimeout(r, 20))
resolveFirst!(undefined)
await new Promise((r) => setTimeout(r, 20))
expect(responses).toHaveLength(2)
const resp1 = responses.find((r) => r.id === 1)
const resp2 = responses.find((r) => r.id === 2)
expect(resp1?.result).toEqual({ result: 'slow' })
expect(resp2?.result).toEqual({ result: 'fast' })
client.close()
})
it('does not deliver a late response from a closed client to a newer websocket', async () => {
let resolveSlowCommand: ((value: { result: string }) => void) | null = null
mock.webContents.debugger.sendCommand
.mockImplementationOnce(
() =>
new Promise((resolve) => {
resolveSlowCommand = resolve
})
)
.mockResolvedValueOnce({ result: 'new-client' })
const firstClient = await connect()
firstClient.send(JSON.stringify({ id: 1, method: 'DOM.enable', params: {} }))
await new Promise((resolve) => setTimeout(resolve, 10))
const secondClient = await connect()
const responses: Record<string, unknown>[] = []
secondClient.on('message', (data) => {
responses.push(JSON.parse(data.toString()))
})
secondClient.send(JSON.stringify({ id: 2, method: 'Page.enable', params: {} }))
await new Promise((resolve) => setTimeout(resolve, 20))
resolveSlowCommand!({ result: 'old-client' })
await new Promise((resolve) => setTimeout(resolve, 20))
expect(responses).toEqual([{ id: 2, result: { result: 'new-client' } }])
secondClient.close()
})
// ── sessionId envelope translation ──
it('forwards sessionId to sendCommand for OOPIF support', async () => {
mock.webContents.debugger.sendCommand.mockResolvedValueOnce({})
const client = await connect()
await sendAndReceive(client, {
id: 1,
method: 'DOM.enable',
params: {},
sessionId: 'oopif-session-123'
})
expect(mock.webContents.debugger.sendCommand).toHaveBeenCalledWith(
'DOM.enable',
{},
'oopif-session-123'
)
client.close()
})
// ── Event forwarding ──
it('forwards CDP events from debugger to client', async () => {
const client = await connect()
const eventPromise = new Promise<Record<string, unknown>>((resolve) => {
client.on('message', (data) => resolve(JSON.parse(data.toString())))
})
mock.emit('message', {}, 'Console.messageAdded', { entry: { text: 'hello' } })
const event = await eventPromise
expect(event.method).toBe('Console.messageAdded')
expect(event.params).toEqual({ entry: { text: 'hello' } })
client.close()
})
it('forwards sessionId in events when present', async () => {
const client = await connect()
const eventPromise = new Promise<Record<string, unknown>>((resolve) => {
client.on('message', (data) => resolve(JSON.parse(data.toString())))
})
mock.emit('message', {}, 'DOM.nodeInserted', { node: {} }, 'iframe-session-456')
const event = await eventPromise
expect(event.sessionId).toBe('iframe-session-456')
client.close()
})
it('does not focus the guest for Runtime.evaluate polling commands', async () => {
const client = await connect()
await sendAndReceive(client, {
id: 9,
method: 'Runtime.evaluate',
params: { expression: 'document.readyState' }
})
expect(mock.webContents.focus).not.toHaveBeenCalled()
client.close()
})
it('still focuses the guest for Input.insertText', async () => {
const client = await connect()
await sendAndReceive(client, {
id: 10,
method: 'Input.insertText',
params: { text: 'hello' }
})
expect(mock.webContents.focus).toHaveBeenCalledTimes(1)
client.close()
})
// ── Page.frameNavigated interception ──
// ── Cleanup ──
it('detaches debugger and closes server on stop', async () => {
const client = await connect()
await proxy.stop()
expect(mock.webContents.debugger.detach).toHaveBeenCalled()
expect(proxy.getPort()).toBeGreaterThan(0) // port stays set but server is closed
await new Promise<void>((resolve) => {
client.on('close', () => resolve())
if (client.readyState === WebSocket.CLOSED) {
resolve()
}
})
})
it('rejects inflight requests on stop', async () => {
let resolveCommand: (v: unknown) => void
mock.webContents.debugger.sendCommand.mockImplementation(
() =>
new Promise((r) => {
resolveCommand = r as (v: unknown) => void
})
)
const client = await connect()
client.send(JSON.stringify({ id: 1, method: 'Page.enable', params: {} }))
await new Promise((r) => setTimeout(r, 10))
await proxy.stop()
resolveCommand!({})
client.close()
})
})

View file

@ -0,0 +1,301 @@
import { WebSocketServer, WebSocket } from 'ws'
import { createServer, type Server, type IncomingMessage, type ServerResponse } from 'http'
import type { WebContents } from 'electron'
import { captureScreenshot } from './cdp-screenshot'
export class CdpWsProxy {
private httpServer: Server | null = null
private wss: WebSocketServer | null = null
private client: WebSocket | null = null
private port = 0
private debuggerMessageHandler: ((...args: unknown[]) => void) | null = null
private debuggerDetachHandler: ((...args: unknown[]) => void) | null = null
private attached = false
// Why: agent-browser filters events by sessionId from Target.attachToTarget.
private clientSessionId: string | undefined = undefined
constructor(private readonly webContents: WebContents) {}
async start(): Promise<string> {
await this.attachDebugger()
return new Promise<string>((resolve, reject) => {
this.httpServer = createServer((req, res) => this.handleHttpRequest(req, res))
this.wss = new WebSocketServer({ server: this.httpServer })
this.wss.on('connection', (ws) => {
if (this.client) {
this.client.close()
}
this.client = ws
ws.on('message', (data) => this.handleClientMessage(ws, data.toString()))
ws.on('close', () => {
if (this.client === ws) {
this.client = null
}
})
})
this.httpServer.listen(0, '127.0.0.1', () => {
const addr = this.httpServer!.address()
if (typeof addr === 'object' && addr) {
this.port = addr.port
resolve(`ws://127.0.0.1:${this.port}`)
} else {
reject(new Error('Failed to bind proxy server'))
}
})
this.httpServer.on('error', reject)
})
}
async stop(): Promise<void> {
this.detachDebugger()
if (this.client) {
this.client.close()
this.client = null
}
if (this.wss) {
this.wss.close()
this.wss = null
}
if (this.httpServer) {
this.httpServer.close()
this.httpServer = null
}
}
getPort(): number {
return this.port
}
private send(payload: unknown, client = this.client): void {
if (client?.readyState === WebSocket.OPEN) {
client.send(JSON.stringify(payload))
}
}
private sendResult(clientId: number, result: unknown, client = this.client): void {
this.send({ id: clientId, result }, client)
}
private sendError(clientId: number, message: string, client = this.client): void {
this.send({ id: clientId, error: { code: -32000, message } }, client)
}
private buildTargetInfo(): Record<string, unknown> {
const destroyed = this.webContents.isDestroyed()
return {
targetId: 'orca-proxy-target',
type: 'page',
title: destroyed ? '' : this.webContents.getTitle(),
url: destroyed ? '' : this.webContents.getURL(),
attached: true,
canAccessOpener: false
}
}
private handleHttpRequest(req: IncomingMessage, res: ServerResponse): void {
const url = req.url ?? ''
if (url === '/json/version' || url === '/json/version/') {
res.writeHead(200, { 'Content-Type': 'application/json' })
res.end(
JSON.stringify({
Browser: 'Orca/CdpWsProxy',
'Protocol-Version': '1.3',
webSocketDebuggerUrl: `ws://127.0.0.1:${this.port}`
})
)
return
}
if (url === '/json' || url === '/json/' || url === '/json/list' || url === '/json/list/') {
res.writeHead(200, { 'Content-Type': 'application/json' })
res.end(
JSON.stringify([
{
...this.buildTargetInfo(),
id: 'orca-proxy-target',
webSocketDebuggerUrl: `ws://127.0.0.1:${this.port}`
}
])
)
return
}
res.writeHead(404)
res.end()
}
private async attachDebugger(): Promise<void> {
if (this.attached) {
return
}
if (!this.webContents.debugger.isAttached()) {
try {
this.webContents.debugger.attach('1.3')
} catch {
throw new Error('Could not attach debugger. DevTools may already be open for this tab.')
}
}
this.attached = true
this.debuggerMessageHandler = (_event: unknown, ...rest: unknown[]) => {
const [method, params, sessionId] = rest as [
string,
Record<string, unknown>,
string | undefined
]
if (!this.client || this.client.readyState !== WebSocket.OPEN) {
return
}
// Why: Electron passes empty string (not undefined) for root-session events, but
// agent-browser filters events by the sessionId from Target.attachToTarget.
const msg: Record<string, unknown> = { method, params }
msg.sessionId = sessionId || this.clientSessionId
this.client.send(JSON.stringify(msg))
}
this.debuggerDetachHandler = () => {
this.attached = false
this.stop()
}
this.webContents.debugger.on('message', this.debuggerMessageHandler as never)
this.webContents.debugger.on('detach', this.debuggerDetachHandler as never)
}
private detachDebugger(): void {
if (this.debuggerMessageHandler) {
this.webContents.debugger.removeListener('message', this.debuggerMessageHandler as never)
this.debuggerMessageHandler = null
}
if (this.debuggerDetachHandler) {
this.webContents.debugger.removeListener('detach', this.debuggerDetachHandler as never)
this.debuggerDetachHandler = null
}
if (this.attached) {
try {
this.webContents.debugger.detach()
} catch {
/* already detached */
}
this.attached = false
}
}
private handleClientMessage(client: WebSocket, raw: string): void {
let msg: { id?: number; method?: string; params?: Record<string, unknown>; sessionId?: string }
try {
msg = JSON.parse(raw)
} catch {
return
}
if (msg.id == null || !msg.method) {
return
}
const clientId = msg.id
if (msg.method === 'Target.getTargets') {
this.sendResult(clientId, { targetInfos: [this.buildTargetInfo()] }, client)
return
}
if (msg.method === 'Target.getTargetInfo') {
this.sendResult(clientId, { targetInfo: this.buildTargetInfo() }, client)
return
}
if (msg.method === 'Target.setDiscoverTargets' || msg.method === 'Target.detachFromTarget') {
if (msg.method === 'Target.detachFromTarget') {
this.clientSessionId = undefined
}
this.sendResult(clientId, {}, client)
return
}
if (msg.method === 'Target.attachToTarget') {
this.clientSessionId = 'orca-proxy-session'
this.sendResult(clientId, { sessionId: this.clientSessionId }, client)
return
}
if (msg.method === 'Browser.getVersion') {
this.sendResult(
clientId,
{
protocolVersion: '1.3',
product: 'Orca/Electron',
userAgent: '',
jsVersion: ''
},
client
)
return
}
if (msg.method === 'Page.bringToFront') {
if (!this.webContents.isDestroyed()) {
this.webContents.focus()
}
this.sendResult(clientId, {}, client)
return
}
// Why: Page.captureScreenshot via debugger.sendCommand hangs on Electron webview guests.
if (msg.method === 'Page.captureScreenshot') {
this.handleScreenshot(client, clientId, msg.params)
return
}
// Why: Input.insertText can still require native focus in Electron webviews.
// Do not auto-focus generic Runtime.evaluate/callFunctionOn traffic: wait
// polling and read-only JS probes use those methods heavily, and focusing on
// every eval steals the user's foreground window while background automation
// is running.
if (msg.method === 'Input.insertText' && !this.webContents.isDestroyed()) {
this.webContents.focus()
}
// Why: agent-browser waits for network idle to detect navigation completion.
// Electron webview CDP subscriptions silently lapse after cross-process swaps.
if (msg.method === 'Page.navigate' && !this.webContents.isDestroyed()) {
void this.navigateWithLifecycleEnsured(client, clientId, msg.params ?? {})
return
}
this.forwardCommand(client, clientId, msg.method, msg.params ?? {}, msg.sessionId)
}
private forwardCommand(
client: WebSocket,
clientId: number,
method: string,
params: Record<string, unknown>,
msgSessionId?: string
): void {
const sessionId =
msgSessionId && msgSessionId !== this.clientSessionId ? msgSessionId : undefined
this.webContents.debugger
.sendCommand(method, params, sessionId)
.then((result) => {
this.sendResult(clientId, result, client)
})
.catch((err: Error) => {
this.sendError(clientId, err.message, client)
})
}
private async navigateWithLifecycleEnsured(
client: WebSocket,
clientId: number,
params: Record<string, unknown>
): Promise<void> {
try {
const dbg = this.webContents.debugger
// Why: without Network.enable, agent-browser never sees network idle → goto times out.
await dbg.sendCommand('Network.enable', {})
await dbg.sendCommand('Page.enable', {})
await dbg.sendCommand('Page.setLifecycleEventsEnabled', { enabled: true })
} catch {
/* best-effort */
}
this.forwardCommand(client, clientId, 'Page.navigate', params)
}
private handleScreenshot(
client: WebSocket,
clientId: number,
params?: Record<string, unknown>
): void {
captureScreenshot(
this.webContents,
params,
(result) => this.sendResult(clientId, result, client),
(message) => this.sendError(clientId, message, client)
)
}
}

View file

@ -0,0 +1,196 @@
import { describe, expect, it, vi } from 'vitest'
import { buildSnapshot, type CdpCommandSender } from './snapshot-engine'
type AXNode = {
nodeId: string
backendDOMNodeId?: number
role?: { type: string; value: string }
name?: { type: string; value: string }
properties?: { name: string; value: { type: string; value: unknown } }[]
childIds?: string[]
ignored?: boolean
}
function makeSender(nodes: AXNode[]): CdpCommandSender {
return vi.fn(async (method: string) => {
if (method === 'Accessibility.enable') {
return {}
}
if (method === 'Accessibility.getFullAXTree') {
return { nodes }
}
throw new Error(`Unexpected CDP method: ${method}`)
})
}
function node(
id: string,
role: string,
name: string,
opts?: {
childIds?: string[]
backendDOMNodeId?: number
ignored?: boolean
properties?: AXNode['properties']
}
): AXNode {
return {
nodeId: id,
backendDOMNodeId: opts?.backendDOMNodeId ?? parseInt(id, 10),
role: { type: 'role', value: role },
name: { type: 'computedString', value: name },
childIds: opts?.childIds,
ignored: opts?.ignored,
properties: opts?.properties
}
}
describe('buildSnapshot', () => {
it('returns empty snapshot for empty tree', async () => {
const result = await buildSnapshot(makeSender([]))
expect(result.snapshot).toBe('')
expect(result.refs).toEqual([])
expect(result.refMap.size).toBe(0)
})
it('assigns refs to interactive elements', async () => {
const nodes: AXNode[] = [
node('1', 'WebArea', 'page', { childIds: ['2', '3'] }),
node('2', 'button', 'Submit', { backendDOMNodeId: 10 }),
node('3', 'link', 'Home', { backendDOMNodeId: 11 })
]
const result = await buildSnapshot(makeSender(nodes))
expect(result.refs).toHaveLength(2)
expect(result.refs[0]).toEqual({ ref: '@e1', role: 'button', name: 'Submit' })
expect(result.refs[1]).toEqual({ ref: '@e2', role: 'link', name: 'Home' })
expect(result.snapshot).toContain('[@e1] button "Submit"')
expect(result.snapshot).toContain('[@e2] link "Home"')
})
it('renders text inputs with friendly role name', async () => {
const nodes: AXNode[] = [
node('1', 'WebArea', 'page', { childIds: ['2'] }),
node('2', 'textbox', 'Email', { backendDOMNodeId: 10 })
]
const result = await buildSnapshot(makeSender(nodes))
expect(result.snapshot).toContain('text input "Email"')
})
it('renders landmarks without refs', async () => {
const nodes: AXNode[] = [
node('1', 'WebArea', 'page', { childIds: ['2'] }),
node('2', 'navigation', 'Main Nav', { childIds: ['3'] }),
node('3', 'link', 'About', { backendDOMNodeId: 10 })
]
const result = await buildSnapshot(makeSender(nodes))
expect(result.snapshot).toContain('[Main Nav]')
expect(result.refs).toHaveLength(1)
expect(result.refs[0].name).toBe('About')
})
it('renders headings without refs', async () => {
const nodes: AXNode[] = [
node('1', 'WebArea', 'page', { childIds: ['2'] }),
node('2', 'heading', 'Welcome')
]
const result = await buildSnapshot(makeSender(nodes))
expect(result.snapshot).toContain('heading "Welcome"')
expect(result.refs).toHaveLength(0)
})
it('renders static text without refs', async () => {
const nodes: AXNode[] = [
node('1', 'WebArea', 'page', { childIds: ['2'] }),
node('2', 'staticText', 'Hello world')
]
const result = await buildSnapshot(makeSender(nodes))
expect(result.snapshot).toContain('text "Hello world"')
expect(result.refs).toHaveLength(0)
})
it('skips generic/none/presentation roles', async () => {
const nodes: AXNode[] = [
node('1', 'WebArea', 'page', { childIds: ['2'] }),
node('2', 'generic', '', { childIds: ['3'] }),
node('3', 'button', 'OK', { backendDOMNodeId: 10 })
]
const result = await buildSnapshot(makeSender(nodes))
expect(result.refs).toHaveLength(1)
expect(result.refs[0].name).toBe('OK')
expect(result.snapshot).not.toContain('generic')
})
it('skips ignored nodes but walks their children', async () => {
const nodes: AXNode[] = [
node('1', 'WebArea', 'page', { childIds: ['2'] }),
node('2', 'group', 'ignored group', { childIds: ['3'], ignored: true }),
node('3', 'button', 'Deep', { backendDOMNodeId: 10 })
]
const result = await buildSnapshot(makeSender(nodes))
expect(result.refs).toHaveLength(1)
expect(result.refs[0].name).toBe('Deep')
})
it('skips interactive elements without a name', async () => {
const nodes: AXNode[] = [
node('1', 'WebArea', 'page', { childIds: ['2', '3'] }),
node('2', 'button', '', { backendDOMNodeId: 10 }),
node('3', 'button', 'Labeled', { backendDOMNodeId: 11 })
]
const result = await buildSnapshot(makeSender(nodes))
expect(result.refs).toHaveLength(1)
expect(result.refs[0].name).toBe('Labeled')
})
it('populates refMap with backendDOMNodeId', async () => {
const nodes: AXNode[] = [
node('1', 'WebArea', 'page', { childIds: ['2'] }),
node('2', 'checkbox', 'Agree', { backendDOMNodeId: 42 })
]
const result = await buildSnapshot(makeSender(nodes))
const entry = result.refMap.get('@e1')
expect(entry).toBeDefined()
expect(entry!.backendDOMNodeId).toBe(42)
expect(entry!.role).toBe('checkbox')
expect(entry!.name).toBe('Agree')
})
it('indents children under landmarks', async () => {
const nodes: AXNode[] = [
node('1', 'WebArea', 'page', { childIds: ['2'] }),
node('2', 'main', '', { childIds: ['3'] }),
node('3', 'button', 'Action', { backendDOMNodeId: 10 })
]
const result = await buildSnapshot(makeSender(nodes))
const lines = result.snapshot.split('\n')
const mainLine = lines.find((l) => l.includes('[Main Content]'))
const buttonLine = lines.find((l) => l.includes('Action'))
expect(mainLine).toBeDefined()
expect(buttonLine).toBeDefined()
expect(buttonLine!.startsWith(' ')).toBe(true)
})
it('handles a realistic page structure', async () => {
const nodes: AXNode[] = [
node('1', 'WebArea', 'page', { childIds: ['2', '3', '4'] }),
node('2', 'banner', '', { childIds: ['5'] }),
node('3', 'main', '', { childIds: ['6', '7', '8'] }),
node('4', 'contentinfo', '', {}),
node('5', 'link', 'Logo', { backendDOMNodeId: 10 }),
node('6', 'heading', 'Dashboard'),
node('7', 'textbox', 'Search', { backendDOMNodeId: 20 }),
node('8', 'button', 'Go', { backendDOMNodeId: 21 })
]
const result = await buildSnapshot(makeSender(nodes))
expect(result.refs).toHaveLength(3)
expect(result.refs.map((r) => r.name)).toEqual(['Logo', 'Search', 'Go'])
expect(result.snapshot).toContain('[Header]')
expect(result.snapshot).toContain('[Main Content]')
expect(result.snapshot).toContain('[Footer]')
expect(result.snapshot).toContain('heading "Dashboard"')
})
})

View file

@ -0,0 +1,451 @@
/* eslint-disable max-lines -- Why: snapshot building, AX tree walking, ref mapping, and cursor-interactive detection are tightly coupled and belong in one module. */
import type { BrowserSnapshotRef } from '../../shared/runtime-types'
export type CdpCommandSender = (
method: string,
params?: Record<string, unknown>
) => Promise<unknown>
type AXNode = {
nodeId: string
backendDOMNodeId?: number
role?: { type: string; value: string }
name?: { type: string; value: string }
properties?: { name: string; value: { type: string; value: unknown } }[]
childIds?: string[]
ignored?: boolean
}
type SnapshotEntry = {
ref: string
role: string
name: string
backendDOMNodeId: number
depth: number
}
export type RefEntry = {
backendDOMNodeId: number
role: string
name: string
sessionId?: string
// Why: when multiple elements share the same role+name, nth tracks which
// occurrence this ref represents (1-indexed). Used during stale ref recovery
// to disambiguate duplicates.
nth?: number
}
export type SnapshotResult = {
snapshot: string
refs: BrowserSnapshotRef[]
refMap: Map<string, RefEntry>
}
const INTERACTIVE_ROLES = new Set([
'button',
'link',
'textbox',
'searchbox',
'combobox',
'checkbox',
'radio',
'switch',
'slider',
'spinbutton',
'menuitem',
'menuitemcheckbox',
'menuitemradio',
'tab',
'option',
'treeitem'
])
const LANDMARK_ROLES = new Set([
'banner',
'navigation',
'main',
'complementary',
'contentinfo',
'region',
'form',
'search'
])
const HEADING_PATTERN = /^heading$/
const SKIP_ROLES = new Set(['none', 'presentation', 'generic'])
export async function buildSnapshot(
sendCommand: CdpCommandSender,
iframeSessions?: Map<string, string>,
makeIframeSender?: (sessionId: string) => CdpCommandSender
): Promise<SnapshotResult> {
await sendCommand('Accessibility.enable')
const { nodes } = (await sendCommand('Accessibility.getFullAXTree')) as { nodes: AXNode[] }
const nodeById = new Map<string, AXNode>()
for (const node of nodes) {
nodeById.set(node.nodeId, node)
}
const entries: SnapshotEntry[] = []
let refCounter = 1
const root = nodes[0]
if (!root) {
return { snapshot: '', refs: [], refMap: new Map() }
}
walkTree(root, nodeById, 0, entries, () => refCounter++)
// Why: many modern SPAs use styled <div>s, <span>s, and custom elements as
// interactive controls without proper ARIA roles. These elements are invisible
// to the accessibility tree walk above but are clearly interactive (cursor:pointer,
// onclick, tabindex, contenteditable). This DOM query pass discovers them and
// promotes them to interactive refs so the agent can interact with them.
const cursorInteractiveEntries = await findCursorInteractiveElements(sendCommand, entries)
for (const cie of cursorInteractiveEntries) {
cie.ref = `@e${refCounter++}`
entries.push(cie)
}
// Why: cross-origin iframes have their own AX trees accessible only through
// their dedicated CDP session. Append their elements after the parent tree
// so the agent can see and interact with iframe content.
const iframeRefSessions: { ref: string; sessionId: string }[] = []
if (iframeSessions && makeIframeSender && iframeSessions.size > 0) {
for (const [_frameId, sessionId] of iframeSessions) {
try {
const iframeSender = makeIframeSender(sessionId)
await iframeSender('Accessibility.enable')
const { nodes: iframeNodes } = (await iframeSender('Accessibility.getFullAXTree')) as {
nodes: AXNode[]
}
if (iframeNodes.length === 0) {
continue
}
const iframeNodeById = new Map<string, AXNode>()
for (const n of iframeNodes) {
iframeNodeById.set(n.nodeId, n)
}
const iframeRoot = iframeNodes[0]
if (iframeRoot) {
const startRef = refCounter
walkTree(iframeRoot, iframeNodeById, 1, entries, () => refCounter++)
for (let i = startRef; i < refCounter; i++) {
iframeRefSessions.push({ ref: `@e${i}`, sessionId })
}
}
} catch {
// Iframe session may be stale — skip silently
}
}
}
const refMap = new Map<string, RefEntry>()
const refs: BrowserSnapshotRef[] = []
const lines: string[] = []
// Why: when multiple elements share the same role+name (e.g. 3 "Submit"
// buttons), the agent can't distinguish them from text alone. Appending a
// disambiguation suffix like "(2nd)" lets the agent refer to duplicates.
const nameCounts = new Map<string, number>()
const nameOccurrence = new Map<string, number>()
for (const entry of entries) {
if (entry.ref) {
const key = `${entry.role}:${entry.name}`
nameCounts.set(key, (nameCounts.get(key) ?? 0) + 1)
}
}
for (const entry of entries) {
const indent = ' '.repeat(entry.depth)
if (entry.ref) {
const key = `${entry.role}:${entry.name}`
const total = nameCounts.get(key) ?? 1
let displayName = entry.name
const nth = (nameOccurrence.get(key) ?? 0) + 1
nameOccurrence.set(key, nth)
if (total > 1 && nth > 1) {
displayName = `${entry.name} (${ordinal(nth)})`
}
lines.push(`${indent}[${entry.ref}] ${entry.role} "${displayName}"`)
refs.push({ ref: entry.ref, role: entry.role, name: displayName })
const iframeSession = iframeRefSessions.find((s) => s.ref === entry.ref)
refMap.set(entry.ref, {
backendDOMNodeId: entry.backendDOMNodeId,
role: entry.role,
name: entry.name,
sessionId: iframeSession?.sessionId,
nth: total > 1 ? nth : undefined
})
} else {
lines.push(`${indent}${entry.role} "${entry.name}"`)
}
}
return { snapshot: lines.join('\n'), refs, refMap }
}
function walkTree(
node: AXNode,
nodeById: Map<string, AXNode>,
depth: number,
entries: SnapshotEntry[],
nextRef: () => number
): void {
if (node.ignored) {
walkChildren(node, nodeById, depth, entries, nextRef)
return
}
const role = node.role?.value ?? ''
const name = node.name?.value ?? ''
if (SKIP_ROLES.has(role)) {
walkChildren(node, nodeById, depth, entries, nextRef)
return
}
const isInteractive = INTERACTIVE_ROLES.has(role)
const isHeading = HEADING_PATTERN.test(role)
const isLandmark = LANDMARK_ROLES.has(role)
const isStaticText = role === 'staticText' || role === 'StaticText'
if (!isInteractive && !isHeading && !isLandmark && !isStaticText) {
walkChildren(node, nodeById, depth, entries, nextRef)
return
}
if (!name && !isLandmark) {
walkChildren(node, nodeById, depth, entries, nextRef)
return
}
const hasFocusable = isInteractive && isFocusable(node)
if (isLandmark) {
entries.push({
ref: '',
role: formatLandmarkRole(role, name),
name: name || role,
backendDOMNodeId: node.backendDOMNodeId ?? 0,
depth
})
walkChildren(node, nodeById, depth + 1, entries, nextRef)
return
}
if (isHeading) {
entries.push({
ref: '',
role: 'heading',
name,
backendDOMNodeId: node.backendDOMNodeId ?? 0,
depth
})
return
}
if (isStaticText && name.trim().length > 0) {
entries.push({
ref: '',
role: 'text',
name: name.trim(),
backendDOMNodeId: node.backendDOMNodeId ?? 0,
depth
})
return
}
if (isInteractive && (hasFocusable || node.backendDOMNodeId)) {
const ref = `@e${nextRef()}`
entries.push({
ref,
role: formatInteractiveRole(role),
name: name || '(unlabeled)',
backendDOMNodeId: node.backendDOMNodeId ?? 0,
depth
})
return
}
walkChildren(node, nodeById, depth, entries, nextRef)
}
function walkChildren(
node: AXNode,
nodeById: Map<string, AXNode>,
depth: number,
entries: SnapshotEntry[],
nextRef: () => number
): void {
if (!node.childIds) {
return
}
for (const childId of node.childIds) {
const child = nodeById.get(childId)
if (child) {
walkTree(child, nodeById, depth, entries, nextRef)
}
}
}
function isFocusable(node: AXNode): boolean {
if (!node.properties) {
return true
}
const focusable = node.properties.find((p) => p.name === 'focusable')
if (focusable && focusable.value.value === false) {
return false
}
return true
}
function formatInteractiveRole(role: string): string {
switch (role) {
case 'textbox':
case 'searchbox':
return 'text input'
case 'combobox':
return 'combobox'
case 'menuitem':
case 'menuitemcheckbox':
case 'menuitemradio':
return 'menu item'
case 'spinbutton':
return 'number input'
case 'treeitem':
return 'tree item'
default:
return role
}
}
function formatLandmarkRole(role: string, name: string): string {
if (name) {
return `[${name}]`
}
switch (role) {
case 'banner':
return '[Header]'
case 'navigation':
return '[Navigation]'
case 'main':
return '[Main Content]'
case 'complementary':
return '[Sidebar]'
case 'contentinfo':
return '[Footer]'
case 'search':
return '[Search]'
default:
return `[${role}]`
}
}
function ordinal(n: number): string {
const s = ['th', 'st', 'nd', 'rd']
const v = n % 100
return `${n}${s[(v - 20) % 10] || s[v] || s[0]}`
}
// Why: finds DOM elements that are visually interactive (cursor:pointer, onclick,
// tabindex, contenteditable) but lack standard ARIA roles. These are common in
// modern SPAs where styled <div>s act as buttons. Returns them as a JS array of
// remote object references that we can resolve to backendNodeIds via CDP.
async function findCursorInteractiveElements(
sendCommand: CdpCommandSender,
existingEntries: SnapshotEntry[]
): Promise<SnapshotEntry[]> {
const existingNodeIds = new Set(existingEntries.map((e) => e.backendDOMNodeId))
const results: SnapshotEntry[] = []
try {
// Single evaluate call that finds interactive elements and returns their info
// along with a way to reference them by index
const { result } = (await sendCommand('Runtime.evaluate', {
expression: `(() => {
const SKIP_ROLES = new Set(['button','link','textbox','checkbox','radio','tab',
'menuitem','option','switch','slider','combobox','searchbox','spinbutton','treeitem',
'menuitemcheckbox','menuitemradio']);
const SKIP_TAGS = new Set(['input','button','select','textarea','a']);
const seen = new Set();
const found = [];
const matchedElements = [];
function check(el) {
if (seen.has(el)) return;
seen.add(el);
const tag = el.tagName.toLowerCase();
if (SKIP_TAGS.has(tag)) return;
const role = el.getAttribute('role');
if (role && SKIP_ROLES.has(role)) return;
const rect = el.getBoundingClientRect();
if (rect.width === 0 || rect.height === 0) return;
const text = (el.ariaLabel || el.getAttribute('aria-label') || el.textContent || '').trim().slice(0, 80);
if (!text) return;
found.push({ text, tag });
matchedElements.push(el);
if (found.length >= 50) return;
}
document.querySelectorAll('[onclick], [tabindex]:not([tabindex="-1"]), [contenteditable="true"]').forEach(el => {
if (found.length < 50) check(el);
});
document.querySelectorAll('div, span, li, td, img, svg, label').forEach(el => {
if (found.length >= 50) return;
try {
if (window.getComputedStyle(el).cursor === 'pointer') check(el);
} catch {}
});
window.__orcaCursorInteractive = matchedElements;
return JSON.stringify(found);
})()`,
returnByValue: true
})) as { result: { value: string } }
const elements = JSON.parse(result.value) as { text: string; tag: string }[]
for (let i = 0; i < elements.length; i++) {
try {
const { result: objResult } = (await sendCommand('Runtime.evaluate', {
expression: `window.__orcaCursorInteractive[${i}]`
})) as { result: { objectId?: string } }
if (!objResult.objectId) {
continue
}
const { node } = (await sendCommand('DOM.describeNode', {
objectId: objResult.objectId
})) as { node: { backendNodeId: number } }
if (existingNodeIds.has(node.backendNodeId)) {
continue
}
results.push({
ref: '',
role: 'clickable',
name: elements[i].text,
backendDOMNodeId: node.backendNodeId,
depth: 0
})
} catch {
continue
}
}
// Clean up
await sendCommand('Runtime.evaluate', {
expression: 'delete window.__orcaCursorInteractive',
returnByValue: true
})
} catch {
// DOM query failed — not critical, just return empty
}
return results
}

View file

@ -35,6 +35,8 @@ import { CodexAccountService } from './codex-accounts/service'
import { CodexRuntimeHomeService } from './codex-accounts/runtime-home-service' import { CodexRuntimeHomeService } from './codex-accounts/runtime-home-service'
import { openCodeHookService } from './opencode/hook-service' import { openCodeHookService } from './opencode/hook-service'
import { StarNagService } from './star-nag/service' import { StarNagService } from './star-nag/service'
import { AgentBrowserBridge } from './browser/agent-browser-bridge'
import { browserManager } from './browser/browser-manager'
let mainWindow: BrowserWindow | null = null let mainWindow: BrowserWindow | null = null
/** Whether a manual app.quit() (Cmd+Q, etc.) is in progress. Shared with the /** Whether a manual app.quit() (Cmd+Q, etc.) is in progress. Shared with the
@ -158,6 +160,7 @@ app.whenReady().then(async () => {
starNag = new StarNagService(store, stats) starNag = new StarNagService(store, stats)
starNag.start() starNag.start()
starNag.registerIpcHandlers() starNag.registerIpcHandlers()
runtime.setAgentBrowserBridge(new AgentBrowserBridge(browserManager))
nativeTheme.themeSource = store.getSettings().theme ?? 'system' nativeTheme.themeSource = store.getSettings().theme ?? 'system'
registerAppMenu({ registerAppMenu({
onCheckForUpdates: () => checkForUpdatesFromMenu(), onCheckForUpdates: () => checkForUpdatesFromMenu(),
@ -265,6 +268,9 @@ app.on('will-quit', () => {
openCodeHookService.stop() openCodeHookService.stop()
starNag?.stop() starNag?.stop()
stats?.flush() stats?.flush()
// Why: agent-browser daemon processes would otherwise linger after Orca quits,
// holding ports and leaving stale session state on disk.
runtime?.getAgentBrowserBridge()?.destroyAllSessions()
killAllPty() killAllPty()
// Why: in daemon mode, killAllPty is a no-op (daemon sessions survive app // Why: in daemon mode, killAllPty is a no-op (daemon sessions survive app
// quit) but the client connection must be closed so sockets are released. // quit) but the client connection must be closed so sockets are released.

View file

@ -5,6 +5,8 @@ const {
handleMock, handleMock,
registerGuestMock, registerGuestMock,
unregisterGuestMock, unregisterGuestMock,
getGuestWebContentsIdMock,
getWorktreeIdForTabMock,
openDevToolsMock, openDevToolsMock,
getDownloadPromptMock, getDownloadPromptMock,
acceptDownloadMock, acceptDownloadMock,
@ -16,6 +18,8 @@ const {
handleMock: vi.fn(), handleMock: vi.fn(),
registerGuestMock: vi.fn(), registerGuestMock: vi.fn(),
unregisterGuestMock: vi.fn(), unregisterGuestMock: vi.fn(),
getGuestWebContentsIdMock: vi.fn(),
getWorktreeIdForTabMock: vi.fn(),
openDevToolsMock: vi.fn().mockResolvedValue(true), openDevToolsMock: vi.fn().mockResolvedValue(true),
getDownloadPromptMock: vi.fn(), getDownloadPromptMock: vi.fn(),
acceptDownloadMock: vi.fn(), acceptDownloadMock: vi.fn(),
@ -41,6 +45,8 @@ vi.mock('../browser/browser-manager', () => ({
browserManager: { browserManager: {
registerGuest: registerGuestMock, registerGuest: registerGuestMock,
unregisterGuest: unregisterGuestMock, unregisterGuest: unregisterGuestMock,
getGuestWebContentsId: getGuestWebContentsIdMock,
getWorktreeIdForTab: getWorktreeIdForTabMock,
openDevTools: openDevToolsMock, openDevTools: openDevToolsMock,
getDownloadPrompt: getDownloadPromptMock, getDownloadPrompt: getDownloadPromptMock,
acceptDownload: acceptDownloadMock, acceptDownload: acceptDownloadMock,
@ -48,7 +54,7 @@ vi.mock('../browser/browser-manager', () => ({
} }
})) }))
import { registerBrowserHandlers } from './browser' import { registerBrowserHandlers, setAgentBrowserBridgeRef } from './browser'
describe('registerBrowserHandlers', () => { describe('registerBrowserHandlers', () => {
beforeEach(() => { beforeEach(() => {
@ -56,6 +62,8 @@ describe('registerBrowserHandlers', () => {
handleMock.mockReset() handleMock.mockReset()
registerGuestMock.mockReset() registerGuestMock.mockReset()
unregisterGuestMock.mockReset() unregisterGuestMock.mockReset()
getGuestWebContentsIdMock.mockReset()
getWorktreeIdForTabMock.mockReset()
openDevToolsMock.mockReset() openDevToolsMock.mockReset()
getDownloadPromptMock.mockReset() getDownloadPromptMock.mockReset()
acceptDownloadMock.mockReset() acceptDownloadMock.mockReset()
@ -63,6 +71,7 @@ describe('registerBrowserHandlers', () => {
showSaveDialogMock.mockReset() showSaveDialogMock.mockReset()
browserWindowFromWebContentsMock.mockReset() browserWindowFromWebContentsMock.mockReset()
openDevToolsMock.mockResolvedValue(true) openDevToolsMock.mockResolvedValue(true)
setAgentBrowserBridgeRef(null)
}) })
it('rejects non-window callers', async () => { it('rejects non-window callers', async () => {
@ -118,4 +127,31 @@ describe('registerBrowserHandlers', () => {
}) })
expect(result).toEqual({ ok: true }) expect(result).toEqual({ ok: true })
}) })
it('updates the bridge active tab for the owning worktree', async () => {
const onTabChangedMock = vi.fn()
getGuestWebContentsIdMock.mockReturnValue(4242)
getWorktreeIdForTabMock.mockReturnValue('wt-browser')
setAgentBrowserBridgeRef({ onTabChanged: onTabChangedMock } as never)
registerBrowserHandlers()
const activeTabChangedHandler = handleMock.mock.calls.find(
([channel]) => channel === 'browser:activeTabChanged'
)?.[1] as (event: { sender: Electron.WebContents }, args: { browserPageId: string }) => boolean
const result = activeTabChangedHandler(
{
sender: {
isDestroyed: () => false,
getType: () => 'window',
getURL: () => 'file:///renderer/index.html'
} as Electron.WebContents
},
{ browserPageId: 'page-1' }
)
expect(result).toBe(true)
expect(onTabChangedMock).toHaveBeenCalledWith(4242, 'wt-browser')
})
}) })

View file

@ -2,6 +2,7 @@
trust boundary (isTrustedBrowserRenderer) and handler teardown stay consistent. */ trust boundary (isTrustedBrowserRenderer) and handler teardown stay consistent. */
import { BrowserWindow, dialog, ipcMain } from 'electron' import { BrowserWindow, dialog, ipcMain } from 'electron'
import { browserManager } from '../browser/browser-manager' import { browserManager } from '../browser/browser-manager'
import type { AgentBrowserBridge } from '../browser/agent-browser-bridge'
import { browserSessionRegistry } from '../browser/browser-session-registry' import { browserSessionRegistry } from '../browser/browser-session-registry'
import { import {
pickCookieFile, pickCookieFile,
@ -28,11 +29,37 @@ import type {
} from '../../shared/types' } from '../../shared/types'
let trustedBrowserRendererWebContentsId: number | null = null let trustedBrowserRendererWebContentsId: number | null = null
let agentBrowserBridgeRef: AgentBrowserBridge | null = null
// Why: CLI-driven tab creation must wait until the renderer mounts the webview
// and calls registerGuest, so the tab has a webContentsId and is operable by
// subsequent commands. This map holds one-shot resolvers keyed by browserPageId.
const pendingTabRegistrations = new Map<string, () => void>()
export function waitForTabRegistration(browserPageId: string, timeoutMs = 8_000): Promise<void> {
if (browserManager.getGuestWebContentsId(browserPageId) !== null) {
return Promise.resolve()
}
return new Promise<void>((resolve, reject) => {
const timer = setTimeout(() => {
pendingTabRegistrations.delete(browserPageId)
reject(new Error('Tab registration timed out'))
}, timeoutMs)
pendingTabRegistrations.set(browserPageId, () => {
clearTimeout(timer)
resolve()
})
})
}
export function setTrustedBrowserRendererWebContentsId(webContentsId: number | null): void { export function setTrustedBrowserRendererWebContentsId(webContentsId: number | null): void {
trustedBrowserRendererWebContentsId = webContentsId trustedBrowserRendererWebContentsId = webContentsId
} }
export function setAgentBrowserBridgeRef(bridge: AgentBrowserBridge | null): void {
agentBrowserBridgeRef = bridge
}
function isTrustedBrowserRenderer(sender: Electron.WebContents): boolean { function isTrustedBrowserRenderer(sender: Electron.WebContents): boolean {
if (sender.isDestroyed() || sender.getType() !== 'window') { if (sender.isDestroyed() || sender.getType() !== 'window') {
return false return false
@ -64,17 +91,39 @@ export function registerBrowserHandlers(): void {
ipcMain.removeHandler('browser:cancelGrab') ipcMain.removeHandler('browser:cancelGrab')
ipcMain.removeHandler('browser:captureSelectionScreenshot') ipcMain.removeHandler('browser:captureSelectionScreenshot')
ipcMain.removeHandler('browser:extractHoverPayload') ipcMain.removeHandler('browser:extractHoverPayload')
ipcMain.removeHandler('browser:activeTabChanged')
ipcMain.handle( ipcMain.handle(
'browser:registerGuest', 'browser:registerGuest',
(event, args: { browserPageId: string; workspaceId: string; webContentsId: number }) => { (
event,
args: {
browserPageId: string
workspaceId: string
worktreeId: string
webContentsId: number
}
) => {
if (!isTrustedBrowserRenderer(event.sender)) { if (!isTrustedBrowserRenderer(event.sender)) {
return false return false
} }
// Why: when Chromium swaps a guest's renderer process (navigation,
// crash recovery), the renderer re-registers the same browserPageId
// with a new webContentsId. The bridge must destroy the old session's
// proxy (its webContents is gone) and let the next command recreate it.
const previousWcId = browserManager.getGuestWebContentsId(args.browserPageId)
browserManager.registerGuest({ browserManager.registerGuest({
...args, ...args,
rendererWebContentsId: event.sender.id rendererWebContentsId: event.sender.id
}) })
if (agentBrowserBridgeRef && previousWcId !== null && previousWcId !== args.webContentsId) {
agentBrowserBridgeRef.onProcessSwap(args.browserPageId, args.webContentsId, previousWcId)
}
const pendingResolve = pendingTabRegistrations.get(args.browserPageId)
if (pendingResolve) {
pendingTabRegistrations.delete(args.browserPageId)
pendingResolve()
}
return true return true
} }
) )
@ -83,10 +132,39 @@ export function registerBrowserHandlers(): void {
if (!isTrustedBrowserRenderer(event.sender)) { if (!isTrustedBrowserRenderer(event.sender)) {
return false return false
} }
// Why: notify bridge before unregistering so it can destroy the session
// process and proxy. Must happen before unregisterGuest clears the mapping.
const wcId = browserManager.getGuestWebContentsId(args.browserPageId)
if (wcId !== null && agentBrowserBridgeRef) {
agentBrowserBridgeRef.onTabClosed(wcId)
}
browserManager.unregisterGuest(args.browserPageId) browserManager.unregisterGuest(args.browserPageId)
return true return true
}) })
// Why: keeps the bridge's active tab in sync with the renderer's UI state.
// Without this, a user switching tabs in the UI would leave the agent operating
// on the previous tab, which is confusing.
ipcMain.handle('browser:activeTabChanged', (event, args: { browserPageId: string }) => {
if (!isTrustedBrowserRenderer(event.sender)) {
return false
}
if (!agentBrowserBridgeRef) {
return false
}
const wcId = browserManager.getGuestWebContentsId(args.browserPageId)
if (wcId !== null) {
// Why: renderer tab changes are scoped to a worktree. If we only update
// the global active guest, later worktree-scoped commands can still
// resolve to the previously active page inside that worktree.
agentBrowserBridgeRef.onTabChanged(
wcId,
browserManager.getWorktreeIdForTab(args.browserPageId)
)
}
return true
})
ipcMain.handle('browser:openDevTools', (event, args: { browserPageId: string }) => { ipcMain.handle('browser:openDevTools', (event, args: { browserPageId: string }) => {
if (!isTrustedBrowserRenderer(event.sender)) { if (!isTrustedBrowserRenderer(event.sender)) {
return false return false

View file

@ -20,6 +20,7 @@ const {
registerUpdaterHandlersMock, registerUpdaterHandlersMock,
registerRateLimitHandlersMock, registerRateLimitHandlersMock,
registerBrowserHandlersMock, registerBrowserHandlersMock,
setAgentBrowserBridgeRefMock,
setTrustedBrowserRendererWebContentsIdMock, setTrustedBrowserRendererWebContentsIdMock,
registerFilesystemWatcherHandlersMock, registerFilesystemWatcherHandlersMock,
registerAppHandlersMock, registerAppHandlersMock,
@ -44,6 +45,7 @@ const {
registerUpdaterHandlersMock: vi.fn(), registerUpdaterHandlersMock: vi.fn(),
registerRateLimitHandlersMock: vi.fn(), registerRateLimitHandlersMock: vi.fn(),
registerBrowserHandlersMock: vi.fn(), registerBrowserHandlersMock: vi.fn(),
setAgentBrowserBridgeRefMock: vi.fn(),
setTrustedBrowserRendererWebContentsIdMock: vi.fn(), setTrustedBrowserRendererWebContentsIdMock: vi.fn(),
registerFilesystemWatcherHandlersMock: vi.fn(), registerFilesystemWatcherHandlersMock: vi.fn(),
registerAppHandlersMock: vi.fn(), registerAppHandlersMock: vi.fn(),
@ -129,7 +131,8 @@ vi.mock('../window/attach-main-window-services', () => ({
vi.mock('./browser', () => ({ vi.mock('./browser', () => ({
registerBrowserHandlers: registerBrowserHandlersMock, registerBrowserHandlers: registerBrowserHandlersMock,
setTrustedBrowserRendererWebContentsId: setTrustedBrowserRendererWebContentsIdMock setTrustedBrowserRendererWebContentsId: setTrustedBrowserRendererWebContentsIdMock,
setAgentBrowserBridgeRef: setAgentBrowserBridgeRefMock
})) }))
vi.mock('./app', () => ({ vi.mock('./app', () => ({
@ -159,6 +162,7 @@ describe('registerCoreHandlers', () => {
registerUpdaterHandlersMock.mockReset() registerUpdaterHandlersMock.mockReset()
registerRateLimitHandlersMock.mockReset() registerRateLimitHandlersMock.mockReset()
registerBrowserHandlersMock.mockReset() registerBrowserHandlersMock.mockReset()
setAgentBrowserBridgeRefMock.mockReset()
setTrustedBrowserRendererWebContentsIdMock.mockReset() setTrustedBrowserRendererWebContentsIdMock.mockReset()
registerFilesystemWatcherHandlersMock.mockReset() registerFilesystemWatcherHandlersMock.mockReset()
registerAppHandlersMock.mockReset() registerAppHandlersMock.mockReset()
@ -167,7 +171,7 @@ describe('registerCoreHandlers', () => {
it('passes the store through to handler registrars that need it', () => { it('passes the store through to handler registrars that need it', () => {
const store = { marker: 'store' } const store = { marker: 'store' }
const runtime = { marker: 'runtime' } const runtime = { marker: 'runtime', getAgentBrowserBridge: () => null }
const stats = { marker: 'stats' } const stats = { marker: 'stats' }
const claudeUsage = { marker: 'claudeUsage' } const claudeUsage = { marker: 'claudeUsage' }
const codexUsage = { marker: 'codexUsage' } const codexUsage = { marker: 'codexUsage' }
@ -211,7 +215,7 @@ describe('registerCoreHandlers', () => {
// The first test already called registerCoreHandlers, so the module-level // The first test already called registerCoreHandlers, so the module-level
// guard is now set. beforeEach reset all mocks, so call counts are 0. // guard is now set. beforeEach reset all mocks, so call counts are 0.
const store2 = { marker: 'store2' } const store2 = { marker: 'store2' }
const runtime2 = { marker: 'runtime2' } const runtime2 = { marker: 'runtime2', getAgentBrowserBridge: () => null }
const stats2 = { marker: 'stats2' } const stats2 = { marker: 'stats2' }
const claudeUsage2 = { marker: 'claudeUsage2' } const claudeUsage2 = { marker: 'claudeUsage2' }
const codexUsage2 = { marker: 'codexUsage2' } const codexUsage2 = { marker: 'codexUsage2' }

View file

@ -15,7 +15,7 @@ import { registerStatsHandlers } from './stats'
import { registerRateLimitHandlers } from './rate-limits' import { registerRateLimitHandlers } from './rate-limits'
import { registerRuntimeHandlers } from './runtime' import { registerRuntimeHandlers } from './runtime'
import { registerNotificationHandlers } from './notifications' import { registerNotificationHandlers } from './notifications'
import { setTrustedBrowserRendererWebContentsId } from './browser' import { setTrustedBrowserRendererWebContentsId, setAgentBrowserBridgeRef } from './browser'
import { registerSessionHandlers } from './session' import { registerSessionHandlers } from './session'
import { registerSettingsHandlers } from './settings' import { registerSettingsHandlers } from './settings'
import { registerBrowserHandlers } from './browser' import { registerBrowserHandlers } from './browser'
@ -50,6 +50,7 @@ export function registerCoreHandlers(
// if a channel is registered twice, so we guard to register only once and // if a channel is registered twice, so we guard to register only once and
// just update the per-window web-contents ID on subsequent calls. // just update the per-window web-contents ID on subsequent calls.
setTrustedBrowserRendererWebContentsId(mainWindowWebContentsId) setTrustedBrowserRendererWebContentsId(mainWindowWebContentsId)
setAgentBrowserBridgeRef(runtime.getAgentBrowserBridge())
if (registered) { if (registered) {
return return
} }

View file

@ -764,4 +764,158 @@ describe('OrcaRuntimeService', () => {
} }
]) ])
}) })
describe('browser page targeting', () => {
it('passes explicit page ids through without resolving the current worktree', async () => {
vi.mocked(listWorktrees).mockClear()
const runtime = createRuntime()
const snapshotMock = vi.fn().mockResolvedValue({
browserPageId: 'page-1',
snapshot: 'tree',
refs: [],
url: 'https://example.com',
title: 'Example'
})
runtime.setAgentBrowserBridge({
snapshot: snapshotMock
} as never)
const result = await runtime.browserSnapshot({ page: 'page-1' })
expect(result.browserPageId).toBe('page-1')
expect(snapshotMock).toHaveBeenCalledWith(undefined, 'page-1')
expect(listWorktrees).not.toHaveBeenCalled()
})
it('resolves explicit worktree selectors when page ids are also provided', async () => {
vi.mocked(listWorktrees).mockClear()
const runtime = createRuntime()
const snapshotMock = vi.fn().mockResolvedValue({
browserPageId: 'page-1',
snapshot: 'tree',
refs: [],
url: 'https://example.com',
title: 'Example'
})
runtime.setAgentBrowserBridge({
snapshot: snapshotMock,
getRegisteredTabs: vi.fn(() => new Map([['page-1', 1]]))
} as never)
await runtime.browserSnapshot({
worktree: 'branch:feature/foo',
page: 'page-1'
})
expect(snapshotMock).toHaveBeenCalledWith(TEST_WORKTREE_ID, 'page-1')
})
it('routes tab switch and capture start by explicit page id', async () => {
const runtime = createRuntime()
const tabSwitchMock = vi.fn().mockResolvedValue({
switched: 2,
browserPageId: 'page-2'
})
const captureStartMock = vi.fn().mockResolvedValue({
capturing: true
})
runtime.setAgentBrowserBridge({
tabSwitch: tabSwitchMock,
captureStart: captureStartMock
} as never)
await expect(runtime.browserTabSwitch({ page: 'page-2' })).resolves.toEqual({
switched: 2,
browserPageId: 'page-2'
})
await expect(runtime.browserCaptureStart({ page: 'page-2' })).resolves.toEqual({
capturing: true
})
expect(tabSwitchMock).toHaveBeenCalledWith(undefined, undefined, 'page-2')
expect(captureStartMock).toHaveBeenCalledWith(undefined, 'page-2')
})
it('does not silently drop invalid explicit worktree selectors for page-targeted commands', async () => {
vi.mocked(listWorktrees).mockResolvedValue(MOCK_GIT_WORKTREES)
const runtime = createRuntime()
const snapshotMock = vi.fn()
runtime.setAgentBrowserBridge({
snapshot: snapshotMock,
getRegisteredTabs: vi.fn(() => new Map([['page-1', 1]]))
} as never)
await expect(
runtime.browserSnapshot({
worktree: 'path:/tmp/missing-worktree',
page: 'page-1'
})
).rejects.toThrow('selector_not_found')
expect(snapshotMock).not.toHaveBeenCalled()
})
it('does not silently drop invalid explicit worktree selectors for non-page browser commands', async () => {
vi.mocked(listWorktrees).mockResolvedValue(MOCK_GIT_WORKTREES)
const runtime = createRuntime()
const tabListMock = vi.fn()
runtime.setAgentBrowserBridge({
tabList: tabListMock
} as never)
await expect(
runtime.browserTabList({
worktree: 'path:/tmp/missing-worktree'
})
).rejects.toThrow('selector_not_found')
expect(tabListMock).not.toHaveBeenCalled()
})
it('rejects closing an unknown page id instead of treating it as success', async () => {
vi.mocked(listWorktrees).mockResolvedValue(MOCK_GIT_WORKTREES)
const runtime = createRuntime()
runtime.setAgentBrowserBridge({
getRegisteredTabs: vi.fn(() => new Map([['page-1', 1]]))
} as never)
await expect(
runtime.browserTabClose({
page: 'missing-page'
})
).rejects.toThrow('Browser page missing-page was not found')
})
it('rejects closing a page outside the explicitly scoped worktree', async () => {
vi.mocked(listWorktrees).mockResolvedValue([
...MOCK_GIT_WORKTREES,
{
path: '/tmp/worktree-b',
head: 'def',
branch: 'feature/bar',
isBare: false,
isMainWorktree: false
}
])
const runtime = createRuntime()
const getRegisteredTabsMock = vi.fn((worktreeId?: string) =>
worktreeId === `${TEST_REPO_ID}::/tmp/worktree-b` ? new Map() : new Map([['page-1', 1]])
)
runtime.setAgentBrowserBridge({
getRegisteredTabs: getRegisteredTabsMock
} as never)
await expect(
runtime.browserTabClose({
page: 'page-1',
worktree: 'path:/tmp/worktree-b'
})
).rejects.toThrow('Browser page page-1 was not found in this worktree')
expect(getRegisteredTabsMock).toHaveBeenCalledWith(`${TEST_REPO_ID}::/tmp/worktree-b`)
})
})
}) })

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -89,6 +89,7 @@ export type BrowserApi = {
registerGuest: (args: { registerGuest: (args: {
browserPageId: string browserPageId: string
workspaceId: string workspaceId: string
worktreeId: string
webContentsId: number webContentsId: number
}) => Promise<void> }) => Promise<void>
unregisterGuest: (args: { browserPageId: string }) => Promise<void> unregisterGuest: (args: { browserPageId: string }) => Promise<void>
@ -107,6 +108,10 @@ export type BrowserApi = {
onContextMenuDismissed: ( onContextMenuDismissed: (
callback: (event: BrowserContextMenuDismissedEvent) => void callback: (event: BrowserContextMenuDismissedEvent) => void
) => () => void ) => () => void
onNavigationUpdate: (
callback: (event: { browserPageId: string; url: string; title: string }) => void
) => () => void
onActivateView: (callback: (data: { worktreeId: string }) => void) => () => void
onOpenLinkInOrcaTab: ( onOpenLinkInOrcaTab: (
callback: (event: { browserPageId: string; url: string }) => void callback: (event: { browserPageId: string; url: string }) => void
) => () => void ) => () => void
@ -140,6 +145,7 @@ export type BrowserApi = {
browserProfile?: string browserProfile?: string
}) => Promise<BrowserCookieImportResult> }) => Promise<BrowserCookieImportResult>
sessionClearDefaultCookies: () => Promise<boolean> sessionClearDefaultCookies: () => Promise<boolean>
notifyActiveTabChanged: (args: { browserPageId: string }) => Promise<boolean>
} }
export type DetectedBrowserProfileInfo = { export type DetectedBrowserProfileInfo = {
@ -604,6 +610,14 @@ export type PreloadApi = {
onOpenQuickOpen: (callback: () => void) => () => void onOpenQuickOpen: (callback: () => void) => () => void
onJumpToWorktreeIndex: (callback: (index: number) => void) => () => void onJumpToWorktreeIndex: (callback: (index: number) => void) => () => void
onNewBrowserTab: (callback: () => void) => () => void onNewBrowserTab: (callback: () => void) => () => void
onRequestTabCreate: (
callback: (data: { requestId: string; url: string; worktreeId?: string }) => void
) => () => void
replyTabCreate: (reply: { requestId: string; browserPageId?: string; error?: string }) => void
onRequestTabClose: (
callback: (data: { requestId: string; tabId: string | null; worktreeId?: string }) => void
) => () => void
replyTabClose: (reply: { requestId: string; error?: string }) => void
onNewTerminalTab: (callback: () => void) => () => void onNewTerminalTab: (callback: () => void) => () => void
onFocusBrowserAddressBar: (callback: () => void) => () => void onFocusBrowserAddressBar: (callback: () => void) => () => void
onFindInBrowserPage: (callback: () => void) => () => void onFindInBrowserPage: (callback: () => void) => () => void

View file

@ -509,6 +509,7 @@ const api = {
registerGuest: (args: { registerGuest: (args: {
browserPageId: string browserPageId: string
workspaceId: string workspaceId: string
worktreeId: string
webContentsId: number webContentsId: number
}): Promise<void> => ipcRenderer.invoke('browser:registerGuest', args), }): Promise<void> => ipcRenderer.invoke('browser:registerGuest', args),
@ -666,6 +667,24 @@ const api = {
return () => ipcRenderer.removeListener('browser:context-menu-dismissed', listener) return () => ipcRenderer.removeListener('browser:context-menu-dismissed', listener)
}, },
onNavigationUpdate: (
callback: (event: { browserPageId: string; url: string; title: string }) => void
): (() => void) => {
const listener = (
_event: Electron.IpcRendererEvent,
data: { browserPageId: string; url: string; title: string }
) => callback(data)
ipcRenderer.on('browser:navigation-update', listener)
return () => ipcRenderer.removeListener('browser:navigation-update', listener)
},
onActivateView: (callback: (data: { worktreeId: string }) => void): (() => void) => {
const listener = (_event: Electron.IpcRendererEvent, data: { worktreeId: string }) =>
callback(data)
ipcRenderer.on('browser:activateView', listener)
return () => ipcRenderer.removeListener('browser:activateView', listener)
},
onOpenLinkInOrcaTab: ( onOpenLinkInOrcaTab: (
callback: (event: { browserPageId: string; url: string }) => void callback: (event: { browserPageId: string; url: string }) => void
): (() => void) => { ): (() => void) => {
@ -757,7 +776,10 @@ const api = {
> => ipcRenderer.invoke('browser:session:importFromBrowser', args), > => ipcRenderer.invoke('browser:session:importFromBrowser', args),
sessionClearDefaultCookies: (): Promise<boolean> => sessionClearDefaultCookies: (): Promise<boolean> =>
ipcRenderer.invoke('browser:session:clearDefaultCookies') ipcRenderer.invoke('browser:session:clearDefaultCookies'),
notifyActiveTabChanged: (args: { browserPageId: string }): Promise<boolean> =>
ipcRenderer.invoke('browser:activeTabChanged', args)
}, },
hooks: { hooks: {
@ -1059,6 +1081,36 @@ const api = {
ipcRenderer.on('ui:newBrowserTab', listener) ipcRenderer.on('ui:newBrowserTab', listener)
return () => ipcRenderer.removeListener('ui:newBrowserTab', listener) return () => ipcRenderer.removeListener('ui:newBrowserTab', listener)
}, },
onRequestTabCreate: (
callback: (data: { requestId: string; url: string; worktreeId?: string }) => void
): (() => void) => {
const listener = (
_event: Electron.IpcRendererEvent,
data: { requestId: string; url: string; worktreeId?: string }
) => callback(data)
ipcRenderer.on('browser:requestTabCreate', listener)
return () => ipcRenderer.removeListener('browser:requestTabCreate', listener)
},
replyTabCreate: (reply: {
requestId: string
browserPageId?: string
error?: string
}): void => {
ipcRenderer.send('browser:tabCreateReply', reply)
},
onRequestTabClose: (
callback: (data: { requestId: string; tabId: string | null; worktreeId?: string }) => void
): (() => void) => {
const listener = (
_event: Electron.IpcRendererEvent,
data: { requestId: string; tabId: string | null; worktreeId?: string }
) => callback(data)
ipcRenderer.on('browser:requestTabClose', listener)
return () => ipcRenderer.removeListener('browser:requestTabClose', listener)
},
replyTabClose: (reply: { requestId: string; error?: string }): void => {
ipcRenderer.send('browser:tabCloseReply', reply)
},
onNewTerminalTab: (callback: () => void): (() => void) => { onNewTerminalTab: (callback: () => void): (() => void) => {
const listener = (_event: Electron.IpcRendererEvent) => callback() const listener = (_event: Electron.IpcRendererEvent) => callback()
ipcRenderer.on('ui:newTerminalTab', listener) ipcRenderer.on('ui:newTerminalTab', listener)

View file

@ -999,6 +999,7 @@ function BrowserPagePane({
void window.api.browser.registerGuest({ void window.api.browser.registerGuest({
browserPageId: browserTab.id, browserPageId: browserTab.id,
workspaceId, workspaceId,
worktreeId,
webContentsId webContentsId
}) })
} }

View file

@ -152,6 +152,10 @@ describe('useIpcEvents updater integration', () => {
onJumpToWorktreeIndex: () => () => {}, onJumpToWorktreeIndex: () => () => {},
onActivateWorktree: () => () => {}, onActivateWorktree: () => () => {},
onNewBrowserTab: () => () => {}, onNewBrowserTab: () => () => {},
onRequestTabCreate: () => () => {},
replyTabCreate: () => {},
onRequestTabClose: () => () => {},
replyTabClose: () => {},
onNewTerminalTab: () => () => {}, onNewTerminalTab: () => () => {},
onCloseActiveTab: () => () => {}, onCloseActiveTab: () => () => {},
onSwitchTab: () => () => {}, onSwitchTab: () => () => {},
@ -171,7 +175,9 @@ describe('useIpcEvents updater integration', () => {
}, },
browser: { browser: {
onGuestLoadFailed: () => () => {}, onGuestLoadFailed: () => () => {},
onOpenLinkInOrcaTab: () => () => {} onOpenLinkInOrcaTab: () => () => {},
onNavigationUpdate: () => () => {},
onActivateView: () => () => {}
}, },
rateLimits: { rateLimits: {
get: () => Promise.resolve({ limits: {}, lastUpdatedAt: Date.now() }), get: () => Promise.resolve({ limits: {}, lastUpdatedAt: Date.now() }),
@ -314,6 +320,10 @@ describe('useIpcEvents updater integration', () => {
onJumpToWorktreeIndex: () => () => {}, onJumpToWorktreeIndex: () => () => {},
onActivateWorktree: () => () => {}, onActivateWorktree: () => () => {},
onNewBrowserTab: () => () => {}, onNewBrowserTab: () => () => {},
onRequestTabCreate: () => () => {},
replyTabCreate: () => {},
onRequestTabClose: () => () => {},
replyTabClose: () => {},
onNewTerminalTab: () => () => {}, onNewTerminalTab: () => () => {},
onCloseActiveTab: () => () => {}, onCloseActiveTab: () => () => {},
onSwitchTab: () => () => {}, onSwitchTab: () => () => {},
@ -330,7 +340,9 @@ describe('useIpcEvents updater integration', () => {
}, },
browser: { browser: {
onGuestLoadFailed: () => () => {}, onGuestLoadFailed: () => () => {},
onOpenLinkInOrcaTab: () => () => {} onOpenLinkInOrcaTab: () => () => {},
onNavigationUpdate: () => () => {},
onActivateView: () => () => {}
}, },
rateLimits: { rateLimits: {
get: () => Promise.resolve({ limits: {}, lastUpdatedAt: Date.now() }), get: () => Promise.resolve({ limits: {}, lastUpdatedAt: Date.now() }),
@ -372,6 +384,500 @@ describe('useIpcEvents updater integration', () => {
}) })
}) })
describe('useIpcEvents browser tab close routing', () => {
beforeEach(() => {
vi.resetModules()
vi.unstubAllGlobals()
})
it('closes the active browser tab for the requested worktree when main does not provide a tab id', async () => {
const closeBrowserTab = vi.fn()
const closeBrowserPage = vi.fn()
const replyTabClose = vi.fn()
const tabCloseListenerRef: {
current:
| ((data: { requestId: string; tabId: string | null; worktreeId?: string }) => void)
| null
} = {
current: null
}
vi.doMock('react', async () => {
const actual = await vi.importActual<typeof ReactModule>('react')
return {
...actual,
useEffect: (effect: () => void | (() => void)) => {
effect()
}
}
})
vi.doMock('../store', () => ({
useAppStore: {
getState: () => ({
setUpdateStatus: vi.fn(),
fetchRepos: vi.fn(),
fetchWorktrees: vi.fn(),
setActiveView: vi.fn(),
activeModal: null,
closeModal: vi.fn(),
openModal: vi.fn(),
activeWorktreeId: 'wt-1',
activeView: 'terminal',
setActiveRepo: vi.fn(),
setActiveWorktree: vi.fn(),
revealWorktreeInSidebar: vi.fn(),
setIsFullScreen: vi.fn(),
updateBrowserTabPageState: vi.fn(),
activeTabType: 'browser',
editorFontZoomLevel: 0,
setEditorFontZoomLevel: vi.fn(),
setRateLimitsFromPush: vi.fn(),
setSshConnectionState: vi.fn(),
setSshTargetLabels: vi.fn(),
enqueueSshCredentialRequest: vi.fn(),
removeSshCredentialRequest: vi.fn(),
settings: { terminalFontSize: 13 },
activeBrowserTabId: 'workspace-global',
activeBrowserTabIdByWorktree: {
'wt-1': 'workspace-global',
'wt-2': 'workspace-target'
},
browserTabsByWorktree: {
'wt-1': [{ id: 'workspace-global' }],
'wt-2': [{ id: 'workspace-target' }]
},
browserPagesByWorkspace: {},
closeBrowserTab,
closeBrowserPage
})
}
}))
vi.doMock('@/lib/ui-zoom', () => ({
applyUIZoom: vi.fn()
}))
vi.doMock('@/lib/worktree-activation', () => ({
activateAndRevealWorktree: vi.fn(),
ensureWorktreeHasInitialTerminal: vi.fn()
}))
vi.doMock('@/components/sidebar/visible-worktrees', () => ({
getVisibleWorktreeIds: () => []
}))
vi.doMock('@/lib/editor-font-zoom', () => ({
nextEditorFontZoomLevel: vi.fn(() => 0),
computeEditorFontSize: vi.fn(() => 13)
}))
vi.doMock('@/components/settings/SettingsConstants', () => ({
zoomLevelToPercent: vi.fn(() => 100),
ZOOM_MIN: -3,
ZOOM_MAX: 3
}))
vi.doMock('@/lib/zoom-events', () => ({
dispatchZoomLevelChanged: vi.fn()
}))
vi.stubGlobal('window', {
dispatchEvent: vi.fn(),
api: {
repos: { onChanged: () => () => {} },
worktrees: { onChanged: () => () => {} },
ui: {
onOpenSettings: () => () => {},
onToggleLeftSidebar: () => () => {},
onToggleRightSidebar: () => () => {},
onToggleWorktreePalette: () => () => {},
onOpenQuickOpen: () => () => {},
onJumpToWorktreeIndex: () => () => {},
onActivateWorktree: () => () => {},
onNewBrowserTab: () => () => {},
onRequestTabCreate: () => () => {},
replyTabCreate: () => {},
onRequestTabClose: (
listener: (data: {
requestId: string
tabId: string | null
worktreeId?: string
}) => void
) => {
tabCloseListenerRef.current = listener
return () => {}
},
replyTabClose,
onNewTerminalTab: () => () => {},
onCloseActiveTab: () => () => {},
onSwitchTab: () => () => {},
onToggleStatusBar: () => () => {},
onFullscreenChanged: () => () => {},
onTerminalZoom: () => () => {},
getZoomLevel: () => 0,
set: vi.fn()
},
updater: {
getStatus: () => Promise.resolve({ state: 'idle' }),
onStatus: () => () => {},
onClearDismissal: () => () => {}
},
browser: {
onGuestLoadFailed: () => () => {},
onOpenLinkInOrcaTab: () => () => {},
onNavigationUpdate: () => () => {},
onActivateView: () => () => {}
},
rateLimits: {
get: () => Promise.resolve({ limits: {}, lastUpdatedAt: Date.now() }),
onUpdate: () => () => {}
},
ssh: {
listTargets: () => Promise.resolve([]),
getState: () => Promise.resolve(null),
onStateChanged: () => () => {},
onCredentialRequest: () => () => {},
onCredentialResolved: () => () => {}
}
}
})
const { useIpcEvents } = await import('./useIpcEvents')
useIpcEvents()
expect(tabCloseListenerRef.current).toBeTypeOf('function')
tabCloseListenerRef.current?.({
requestId: 'req-1',
tabId: null,
worktreeId: 'wt-2'
})
expect(closeBrowserTab).toHaveBeenCalledWith('workspace-target')
expect(closeBrowserPage).not.toHaveBeenCalled()
expect(replyTabClose).toHaveBeenCalledWith({ requestId: 'req-1' })
})
it('closes only the requested browser page when a workspace has multiple pages', async () => {
const closeBrowserTab = vi.fn()
const closeBrowserPage = vi.fn()
const replyTabClose = vi.fn()
const tabCloseListenerRef: {
current:
| ((data: { requestId: string; tabId: string | null; worktreeId?: string }) => void)
| null
} = {
current: null
}
vi.doMock('react', async () => {
const actual = await vi.importActual<typeof ReactModule>('react')
return {
...actual,
useEffect: (effect: () => void | (() => void)) => {
effect()
}
}
})
vi.doMock('../store', () => ({
useAppStore: {
getState: () => ({
setUpdateStatus: vi.fn(),
fetchRepos: vi.fn(),
fetchWorktrees: vi.fn(),
setActiveView: vi.fn(),
activeModal: null,
closeModal: vi.fn(),
openModal: vi.fn(),
activeWorktreeId: 'wt-1',
activeView: 'terminal',
setActiveRepo: vi.fn(),
setActiveWorktree: vi.fn(),
revealWorktreeInSidebar: vi.fn(),
setIsFullScreen: vi.fn(),
updateBrowserTabPageState: vi.fn(),
activeTabType: 'browser',
editorFontZoomLevel: 0,
setEditorFontZoomLevel: vi.fn(),
setRateLimitsFromPush: vi.fn(),
setSshConnectionState: vi.fn(),
setSshTargetLabels: vi.fn(),
enqueueSshCredentialRequest: vi.fn(),
removeSshCredentialRequest: vi.fn(),
settings: { terminalFontSize: 13 },
activeBrowserTabId: 'workspace-1',
activeBrowserTabIdByWorktree: { 'wt-1': 'workspace-1' },
browserTabsByWorktree: {
'wt-1': [{ id: 'workspace-1' }]
},
browserPagesByWorkspace: {
'workspace-1': [
{ id: 'page-1', workspaceId: 'workspace-1' },
{ id: 'page-2', workspaceId: 'workspace-1' }
]
},
closeBrowserTab,
closeBrowserPage
})
}
}))
vi.doMock('@/lib/ui-zoom', () => ({
applyUIZoom: vi.fn()
}))
vi.doMock('@/lib/worktree-activation', () => ({
activateAndRevealWorktree: vi.fn(),
ensureWorktreeHasInitialTerminal: vi.fn()
}))
vi.doMock('@/components/sidebar/visible-worktrees', () => ({
getVisibleWorktreeIds: () => []
}))
vi.doMock('@/lib/editor-font-zoom', () => ({
nextEditorFontZoomLevel: vi.fn(() => 0),
computeEditorFontSize: vi.fn(() => 13)
}))
vi.doMock('@/components/settings/SettingsConstants', () => ({
zoomLevelToPercent: vi.fn(() => 100),
ZOOM_MIN: -3,
ZOOM_MAX: 3
}))
vi.doMock('@/lib/zoom-events', () => ({
dispatchZoomLevelChanged: vi.fn()
}))
vi.stubGlobal('window', {
dispatchEvent: vi.fn(),
api: {
repos: { onChanged: () => () => {} },
worktrees: { onChanged: () => () => {} },
ui: {
onOpenSettings: () => () => {},
onToggleLeftSidebar: () => () => {},
onToggleRightSidebar: () => () => {},
onToggleWorktreePalette: () => () => {},
onOpenQuickOpen: () => () => {},
onJumpToWorktreeIndex: () => () => {},
onActivateWorktree: () => () => {},
onNewBrowserTab: () => () => {},
onRequestTabCreate: () => () => {},
replyTabCreate: () => {},
onRequestTabClose: (
listener: (data: {
requestId: string
tabId: string | null
worktreeId?: string
}) => void
) => {
tabCloseListenerRef.current = listener
return () => {}
},
replyTabClose,
onNewTerminalTab: () => () => {},
onCloseActiveTab: () => () => {},
onSwitchTab: () => () => {},
onToggleStatusBar: () => () => {},
onFullscreenChanged: () => () => {},
onTerminalZoom: () => () => {},
getZoomLevel: () => 0,
set: vi.fn()
},
updater: {
getStatus: () => Promise.resolve({ state: 'idle' }),
onStatus: () => () => {},
onClearDismissal: () => () => {}
},
browser: {
onGuestLoadFailed: () => () => {},
onOpenLinkInOrcaTab: () => () => {},
onNavigationUpdate: () => () => {},
onActivateView: () => () => {}
},
rateLimits: {
get: () => Promise.resolve({ limits: {}, lastUpdatedAt: Date.now() }),
onUpdate: () => () => {}
},
ssh: {
listTargets: () => Promise.resolve([]),
getState: () => Promise.resolve(null),
onStateChanged: () => () => {},
onCredentialRequest: () => () => {},
onCredentialResolved: () => () => {}
}
}
})
const { useIpcEvents } = await import('./useIpcEvents')
useIpcEvents()
tabCloseListenerRef.current?.({
requestId: 'req-2',
tabId: 'page-2'
})
expect(closeBrowserPage).toHaveBeenCalledWith('page-2')
expect(closeBrowserTab).not.toHaveBeenCalled()
expect(replyTabClose).toHaveBeenCalledWith({ requestId: 'req-2' })
})
it('rejects explicit unknown browser page ids instead of reporting success', async () => {
const closeBrowserTab = vi.fn()
const closeBrowserPage = vi.fn()
const replyTabClose = vi.fn()
const tabCloseListenerRef: {
current:
| ((data: { requestId: string; tabId: string | null; worktreeId?: string }) => void)
| null
} = {
current: null
}
vi.doMock('react', async () => {
const actual = await vi.importActual<typeof ReactModule>('react')
return {
...actual,
useEffect: (effect: () => void | (() => void)) => {
effect()
}
}
})
vi.doMock('../store', () => ({
useAppStore: {
getState: () => ({
setUpdateStatus: vi.fn(),
fetchRepos: vi.fn(),
fetchWorktrees: vi.fn(),
setActiveView: vi.fn(),
activeModal: null,
closeModal: vi.fn(),
openModal: vi.fn(),
activeWorktreeId: 'wt-1',
activeView: 'terminal',
setActiveRepo: vi.fn(),
setActiveWorktree: vi.fn(),
revealWorktreeInSidebar: vi.fn(),
setIsFullScreen: vi.fn(),
updateBrowserTabPageState: vi.fn(),
activeTabType: 'browser',
editorFontZoomLevel: 0,
setEditorFontZoomLevel: vi.fn(),
setRateLimitsFromPush: vi.fn(),
setSshConnectionState: vi.fn(),
setSshTargetLabels: vi.fn(),
enqueueSshCredentialRequest: vi.fn(),
removeSshCredentialRequest: vi.fn(),
settings: { terminalFontSize: 13 },
activeBrowserTabId: 'workspace-1',
activeBrowserTabIdByWorktree: { 'wt-1': 'workspace-1' },
browserTabsByWorktree: {
'wt-1': [{ id: 'workspace-1' }]
},
browserPagesByWorkspace: {
'workspace-1': [{ id: 'page-1', workspaceId: 'workspace-1' }]
},
closeBrowserTab,
closeBrowserPage
})
}
}))
vi.doMock('@/lib/ui-zoom', () => ({
applyUIZoom: vi.fn()
}))
vi.doMock('@/lib/worktree-activation', () => ({
activateAndRevealWorktree: vi.fn(),
ensureWorktreeHasInitialTerminal: vi.fn()
}))
vi.doMock('@/components/sidebar/visible-worktrees', () => ({
getVisibleWorktreeIds: () => []
}))
vi.doMock('@/lib/editor-font-zoom', () => ({
nextEditorFontZoomLevel: vi.fn(() => 0),
computeEditorFontSize: vi.fn(() => 13)
}))
vi.doMock('@/components/settings/SettingsConstants', () => ({
zoomLevelToPercent: vi.fn(() => 100),
ZOOM_MIN: -3,
ZOOM_MAX: 3
}))
vi.doMock('@/lib/zoom-events', () => ({
dispatchZoomLevelChanged: vi.fn()
}))
vi.stubGlobal('window', {
dispatchEvent: vi.fn(),
api: {
repos: { onChanged: () => () => {} },
worktrees: { onChanged: () => () => {} },
ui: {
onOpenSettings: () => () => {},
onToggleLeftSidebar: () => () => {},
onToggleRightSidebar: () => () => {},
onToggleWorktreePalette: () => () => {},
onOpenQuickOpen: () => () => {},
onJumpToWorktreeIndex: () => () => {},
onActivateWorktree: () => () => {},
onNewBrowserTab: () => () => {},
onRequestTabCreate: () => () => {},
replyTabCreate: () => {},
onRequestTabClose: (
listener: (data: {
requestId: string
tabId: string | null
worktreeId?: string
}) => void
) => {
tabCloseListenerRef.current = listener
return () => {}
},
replyTabClose,
onNewTerminalTab: () => () => {},
onCloseActiveTab: () => () => {},
onSwitchTab: () => () => {},
onToggleStatusBar: () => () => {},
onFullscreenChanged: () => () => {},
onTerminalZoom: () => () => {},
getZoomLevel: () => 0,
set: vi.fn()
},
updater: {
getStatus: () => Promise.resolve({ state: 'idle' }),
onStatus: () => () => {},
onClearDismissal: () => () => {}
},
browser: {
onGuestLoadFailed: () => () => {},
onOpenLinkInOrcaTab: () => () => {},
onNavigationUpdate: () => () => {},
onActivateView: () => () => {}
},
rateLimits: {
get: () => Promise.resolve({ limits: {}, lastUpdatedAt: Date.now() }),
onUpdate: () => () => {}
},
ssh: {
listTargets: () => Promise.resolve([]),
getState: () => Promise.resolve(null),
onStateChanged: () => () => {},
onCredentialRequest: () => () => {},
onCredentialResolved: () => () => {}
}
}
})
const { useIpcEvents } = await import('./useIpcEvents')
useIpcEvents()
tabCloseListenerRef.current?.({
requestId: 'req-3',
tabId: 'missing-page'
})
expect(closeBrowserPage).not.toHaveBeenCalled()
expect(closeBrowserTab).not.toHaveBeenCalled()
expect(replyTabClose).toHaveBeenCalledWith({
requestId: 'req-3',
error: 'Browser tab missing-page not found'
})
})
})
describe('useIpcEvents shortcut hint clearing', () => { describe('useIpcEvents shortcut hint clearing', () => {
beforeEach(() => { beforeEach(() => {
vi.resetModules() vi.resetModules()
@ -485,6 +991,10 @@ describe('useIpcEvents shortcut hint clearing', () => {
}, },
onActivateWorktree: () => () => {}, onActivateWorktree: () => () => {},
onNewBrowserTab: () => () => {}, onNewBrowserTab: () => () => {},
onRequestTabCreate: () => () => {},
replyTabCreate: () => {},
onRequestTabClose: () => () => {},
replyTabClose: () => {},
onNewTerminalTab: () => () => {}, onNewTerminalTab: () => () => {},
onCloseActiveTab: () => () => {}, onCloseActiveTab: () => () => {},
onSwitchTab: () => () => {}, onSwitchTab: () => () => {},
@ -501,7 +1011,9 @@ describe('useIpcEvents shortcut hint clearing', () => {
}, },
browser: { browser: {
onGuestLoadFailed: () => () => {}, onGuestLoadFailed: () => () => {},
onOpenLinkInOrcaTab: () => () => {} onOpenLinkInOrcaTab: () => () => {},
onNavigationUpdate: () => () => {},
onActivateView: () => () => {}
}, },
rateLimits: { rateLimits: {
get: () => Promise.resolve({ limits: {}, lastUpdatedAt: Date.now() }), get: () => Promise.resolve({ limits: {}, lastUpdatedAt: Date.now() }),

View file

@ -156,6 +156,28 @@ export function useIpcEvents(): void {
}) })
) )
// Why: agent-browser drives navigation via CDP, bypassing Electron's webview
// event system. The renderer's did-navigate listener never fires for those
// navigations, so the Zustand store (address bar, tab title) stays stale.
// This IPC pushes the live URL/title from main after goto/click/back/reload.
unsubs.push(
window.api.browser.onNavigationUpdate(({ browserPageId, url, title }) => {
const store = useAppStore.getState()
store.setBrowserPageUrl(browserPageId, url)
store.updateBrowserPageState(browserPageId, { title, loading: false })
})
)
// Why: browser webviews only start their guest process when the container
// has display != none. After app restart, activeTabType defaults to 'terminal'
// so persisted browser tabs never mount. The main process sends this IPC
// before browser commands so the webview can start and registerGuest fires.
unsubs.push(
window.api.browser.onActivateView(() => {
useAppStore.getState().setActiveTabType('browser')
})
)
unsubs.push( unsubs.push(
window.api.browser.onOpenLinkInOrcaTab(({ browserPageId, url }) => { window.api.browser.onOpenLinkInOrcaTab(({ browserPageId, url }) => {
const store = useAppStore.getState() const store = useAppStore.getState()
@ -187,6 +209,92 @@ export function useIpcEvents(): void {
}) })
) )
// Why: CLI-driven tab creation sends a request with a specific worktreeId and
// url. The renderer creates the tab and replies with the workspace ID so the
// main process can wait for registerGuest before returning to the CLI.
unsubs.push(
window.api.ui.onRequestTabCreate((data) => {
try {
const store = useAppStore.getState()
const worktreeId = data.worktreeId ?? store.activeWorktreeId
if (!worktreeId) {
window.api.ui.replyTabCreate({ requestId: data.requestId, error: 'No active worktree' })
return
}
const workspace = store.createBrowserTab(worktreeId, data.url, { title: data.url })
// Why: registerGuest fires with the page ID (not workspace ID) as
// browserPageId. Return the page ID so waitForTabRegistration can
// correlate correctly.
const pages = useAppStore.getState().browserPagesByWorkspace[workspace.id] ?? []
const browserPageId = pages[0]?.id ?? workspace.id
window.api.ui.replyTabCreate({ requestId: data.requestId, browserPageId })
} catch (err) {
window.api.ui.replyTabCreate({
requestId: data.requestId,
error: err instanceof Error ? err.message : 'Tab creation failed'
})
}
})
)
unsubs.push(
window.api.ui.onRequestTabClose((data) => {
try {
const store = useAppStore.getState()
const explicitTargetId = data.tabId ?? null
let tabToClose =
explicitTargetId ??
(data.worktreeId
? (store.activeBrowserTabIdByWorktree?.[data.worktreeId] ?? null)
: store.activeBrowserTabId)
if (!tabToClose) {
window.api.ui.replyTabClose({
requestId: data.requestId,
error: 'No active browser tab to close'
})
return
}
// Why: the bridge stores tabs keyed by browserPageId (which is the page
// ID from registerGuest), but closeBrowserTab expects a workspace ID. If
// tabToClose is a page ID, close only that page unless it is the
// last page in its workspace. The CLI's `tab close --page` contract
// targets one browser page, not the entire workspace tab.
const isWorkspaceId = Object.values(store.browserTabsByWorktree)
.flat()
.some((ws) => ws.id === tabToClose)
if (!isWorkspaceId) {
const owningWorkspace = Object.entries(store.browserPagesByWorkspace).find(
([, pages]) => pages.some((p) => p.id === tabToClose)
)
if (owningWorkspace) {
const [workspaceId, pages] = owningWorkspace
if (pages.length <= 1) {
store.closeBrowserTab(workspaceId)
} else {
store.closeBrowserPage(tabToClose)
}
window.api.ui.replyTabClose({ requestId: data.requestId })
return
}
}
if (explicitTargetId) {
window.api.ui.replyTabClose({
requestId: data.requestId,
error: `Browser tab ${explicitTargetId} not found`
})
return
}
store.closeBrowserTab(tabToClose)
window.api.ui.replyTabClose({ requestId: data.requestId })
} catch (err) {
window.api.ui.replyTabClose({
requestId: data.requestId,
error: err instanceof Error ? err.message : 'Tab close failed'
})
}
})
)
unsubs.push( unsubs.push(
window.api.ui.onNewTerminalTab(() => { window.api.ui.onNewTerminalTab(() => {
const store = useAppStore.getState() const store = useAppStore.getState()

View file

@ -595,6 +595,17 @@ export const createBrowserSlice: StateCreator<AppState, [], [], BrowserSlice> =
} }
}) })
// Why: notify the CDP bridge which guest webContents is now active so
// subsequent agent commands (snapshot, click, etc.) target the correct tab.
// registerGuest uses page IDs (not workspace IDs), so we resolve the active
// page within the workspace to find the correct browserPageId.
const workspace = findWorkspace(get().browserTabsByWorktree, tabId)
if (workspace?.activePageId && typeof window !== 'undefined' && window.api?.browser) {
window.api.browser
.notifyActiveTabChanged({ browserPageId: workspace.activePageId })
.catch(() => {})
}
const item = Object.values(get().unifiedTabsByWorktree) const item = Object.values(get().unifiedTabsByWorktree)
.flat() .flat()
.find((entry) => entry.contentType === 'browser' && entry.entityId === tabId) .find((entry) => entry.contentType === 'browser' && entry.entityId === tabId)
@ -796,6 +807,12 @@ export const createBrowserSlice: StateCreator<AppState, [], [], BrowserSlice> =
} }
}) })
// Why: switching the active page within a workspace changes which guest
// webContents the CDP bridge should target for agent commands.
if (typeof window !== 'undefined' && window.api?.browser) {
window.api.browser.notifyActiveTabChanged({ browserPageId: pageId }).catch(() => {})
}
const workspace = findWorkspace(get().browserTabsByWorktree, workspaceId) const workspace = findWorkspace(get().browserTabsByWorktree, workspaceId)
if (!workspace) { if (!workspace) {
return return

View file

@ -1,3 +1,4 @@
/* eslint-disable max-lines -- Why: shared type definitions for all runtime RPC methods live in one file for discoverability and import simplicity. */
import type { TerminalPaneLayoutNode } from './types' import type { TerminalPaneLayoutNode } from './types'
import type { GitWorktreeInfo, Repo } from './types' import type { GitWorktreeInfo, Repo } from './types'
@ -152,3 +153,245 @@ export type RuntimeWorktreeListResult = {
totalCount: number totalCount: number
truncated: boolean truncated: boolean
} }
// ── Browser automation types ──
export type BrowserSnapshotRef = {
ref: string
role: string
name: string
}
export type BrowserSnapshotResult = {
browserPageId: string
snapshot: string
refs: BrowserSnapshotRef[]
url: string
title: string
}
export type BrowserClickResult = {
clicked: string
}
export type BrowserGotoResult = {
url: string
title: string
}
export type BrowserFillResult = {
filled: string
}
export type BrowserTypeResult = {
typed: boolean
}
export type BrowserSelectResult = {
selected: string
}
export type BrowserScrollResult = {
scrolled: 'up' | 'down'
}
export type BrowserBackResult = {
url: string
title: string
}
export type BrowserReloadResult = {
url: string
title: string
}
export type BrowserScreenshotResult = {
data: string
format: 'png' | 'jpeg'
}
export type BrowserEvalResult = {
result: string
origin: string
}
export type BrowserTabInfo = {
browserPageId: string
index: number
url: string
title: string
active: boolean
}
export type BrowserTabListResult = {
tabs: BrowserTabInfo[]
}
export type BrowserTabSwitchResult = {
switched: number
browserPageId: string
}
export type BrowserHoverResult = {
hovered: string
}
export type BrowserDragResult = {
dragged: { from: string; to: string }
}
export type BrowserUploadResult = {
uploaded: number
}
export type BrowserWaitResult = {
waited: boolean
}
export type BrowserCheckResult = {
checked: boolean
}
export type BrowserFocusResult = {
focused: string
}
export type BrowserClearResult = {
cleared: string
}
export type BrowserSelectAllResult = {
selected: string
}
export type BrowserKeypressResult = {
pressed: string
}
export type BrowserPdfResult = {
data: string
}
// ── Cookie management types ──
export type BrowserCookie = {
name: string
value: string
domain: string
path: string
expires: number
httpOnly: boolean
secure: boolean
sameSite: string
}
export type BrowserCookieGetResult = {
cookies: BrowserCookie[]
}
export type BrowserCookieSetResult = {
success: boolean
}
export type BrowserCookieDeleteResult = {
deleted: boolean
}
// ── Viewport emulation types ──
export type BrowserViewportResult = {
width: number
height: number
deviceScaleFactor: number
mobile: boolean
}
// ── Geolocation types ──
export type BrowserGeolocationResult = {
latitude: number
longitude: number
accuracy: number
}
// ── Request interception types ──
export type BrowserInterceptedRequest = {
id: string
url: string
method: string
headers: Record<string, string>
resourceType: string
}
export type BrowserInterceptEnableResult = {
enabled: boolean
patterns: string[]
}
export type BrowserInterceptDisableResult = {
disabled: boolean
}
// ── Console/network capture types ──
export type BrowserConsoleEntry = {
level: string
text: string
timestamp: number
url?: string
line?: number
}
export type BrowserConsoleResult = {
entries: BrowserConsoleEntry[]
truncated: boolean
}
export type BrowserNetworkEntry = {
url: string
method: string
status: number
mimeType: string
size: number
timestamp: number
}
export type BrowserNetworkLogResult = {
entries: BrowserNetworkEntry[]
truncated: boolean
}
export type BrowserCaptureStartResult = {
capturing: boolean
}
export type BrowserCaptureStopResult = {
stopped: boolean
}
export type BrowserExecResult = {
output: unknown
}
export type BrowserTabCreateResult = {
browserPageId: string
}
export type BrowserTabCloseResult = {
closed: boolean
}
export type BrowserErrorCode =
| 'browser_no_tab'
| 'browser_tab_not_found'
| 'browser_tab_closed'
| 'browser_stale_ref'
| 'browser_ref_not_found'
| 'browser_navigation_failed'
| 'browser_element_not_interactable'
| 'browser_eval_error'
| 'browser_cdp_error'
| 'browser_debugger_detached'
| 'browser_timeout'
| 'browser_error'