diff --git a/config/electron-builder.config.cjs b/config/electron-builder.config.cjs index 685c8ee6..af383fd6 100644 --- a/config/electron-builder.config.cjs +++ b/config/electron-builder.config.cjs @@ -1,3 +1,6 @@ +const { chmodSync, existsSync, readdirSync } = require('node:fs') +const { join } = require('node:path') + const isMacRelease = process.env.ORCA_MAC_RELEASE === '1' /** @type {import('electron-builder').Configuration} */ @@ -23,12 +26,34 @@ module.exports = { // Why: daemon-entry.js is forked as a separate Node.js process and must be // accessible on disk (not inside the asar archive) for child_process.fork(). asarUnpack: ['out/cli/**', 'out/shared/**', 'out/main/daemon-entry.js', 'out/main/chunks/**', 'resources/**'], + afterPack: async (context) => { + const resourcesDir = + context.electronPlatformName === 'darwin' + ? join(context.appOutDir, `${context.packager.appInfo.productFilename}.app`, 'Contents', 'Resources') + : join(context.appOutDir, 'resources') + if (!existsSync(resourcesDir)) { + return + } + for (const filename of readdirSync(resourcesDir)) { + if (!filename.startsWith('agent-browser-')) { + continue + } + // Why: the upstream package has inconsistent executable bits across + // platform binaries (notably darwin-x64). child_process.execFile needs + // the copied binary to be executable in packaged apps. + chmodSync(join(resourcesDir, filename), 0o755) + } + }, win: { executableName: 'Orca', extraResources: [ { from: 'resources/win32/bin/orca.cmd', to: 'bin/orca.cmd' + }, + { + from: 'node_modules/agent-browser/bin/agent-browser-win32-x64.exe', + to: 'agent-browser-win32-x64.exe' } ] }, @@ -60,6 +85,10 @@ module.exports = { { from: 'resources/darwin/bin/orca', to: 'bin/orca' + }, + { + from: 'node_modules/agent-browser/bin/agent-browser-darwin-${arch}', + to: 'agent-browser-darwin-${arch}' } ], target: [ @@ -84,6 +113,10 @@ module.exports = { { from: 'resources/linux/bin/orca', to: 'bin/orca' + }, + { + from: 'node_modules/agent-browser/bin/agent-browser-linux-${arch}', + to: 'agent-browser-linux-${arch}' } ], target: ['AppImage', 'deb'], diff --git a/package.json b/package.json index 51d8e185..cfa71687 100644 --- a/package.json +++ b/package.json @@ -73,6 +73,7 @@ "@xterm/addon-webgl": "^0.19.0", "@xterm/headless": "^6.0.0", "@xterm/xterm": "^6.0.0", + "agent-browser": "~0.24.1", "class-variance-authority": "^0.7.1", "clsx": "^2.1.1", "cmdk": "^1.1.1", @@ -99,6 +100,7 @@ "ssh2": "^1.17.0", "tailwind-merge": "^3.5.0", "tw-animate-css": "^1.4.0", + "ws": "^8.20.0", "zod": "^4.3.6", "zustand": "^5.0.12" }, @@ -111,6 +113,7 @@ "@types/react": "^19.2.14", "@types/react-dom": "^19.2.3", "@types/ssh2": "^1.15.5", + "@types/ws": "^8.18.1", "@typescript/native-preview": "7.0.0-dev.20260406.1", "@vitejs/plugin-react": "^5.2.0", "electron": "^41.1.0", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index a7bfe959..b33c102b 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -106,6 +106,9 @@ importers: '@xterm/xterm': specifier: ^6.0.0 version: 6.0.0 + agent-browser: + specifier: ~0.24.1 + version: 0.24.1 class-variance-authority: specifier: ^0.7.1 version: 0.7.1 @@ -184,6 +187,9 @@ importers: tw-animate-css: specifier: ^1.4.0 version: 1.4.0 + ws: + specifier: ^8.20.0 + version: 8.20.0 zod: specifier: ^4.3.6 version: 4.3.6 @@ -199,7 +205,7 @@ importers: version: 1.59.1 '@stablyai/playwright-test': specifier: ^2.1.13 - version: 2.1.13(@playwright/test@1.59.1)(zod@4.3.6) + version: 2.1.14(@playwright/test@1.59.1)(zod@4.3.6) '@tailwindcss/vite': specifier: ^4.2.2 version: 4.2.2(vite@7.3.1(@types/node@25.5.0)(jiti@2.6.1)(lightningcss@1.32.0)(yaml@2.8.3)) @@ -215,6 +221,9 @@ importers: '@types/ssh2': specifier: ^1.15.5 version: 1.15.5 + '@types/ws': + specifier: ^8.18.1 + version: 8.18.1 '@typescript/native-preview': specifier: 7.0.0-dev.20260406.1 version: 7.0.0-dev.20260406.1 @@ -2205,8 +2214,8 @@ packages: resolution: {integrity: sha512-tlqY9xq5ukxTUZBmoOp+m61cqwQD5pHJtFY3Mn8CA8ps6yghLH/Hw8UPdqg4OLmFW3IFlcXnQNmo/dh8HzXYIQ==} engines: {node: '>=18'} - '@stablyai/playwright-base@2.1.13': - resolution: {integrity: sha512-F8lc2qSfNZQ53WeWWDLLZSpu6f2ZCuiVgGP0P0+PGdO9swCKEwV0f+ti7a4MlmgMlHoCsf5tvddXIVpikhPRlQ==} + '@stablyai/playwright-base@2.1.14': + resolution: {integrity: sha512-/iAgMW5tC0ETDo3mFyTzszRrD7rGFIT4fgDgtZxqa9vPhiTLix/1+GeOOBNY0uS+XRLFY0Uc/irsC3XProL47g==} engines: {node: '>=18'} peerDependencies: '@playwright/test': ^1.52.0 @@ -2215,13 +2224,13 @@ packages: zod: optional: true - '@stablyai/playwright-test@2.1.13': - resolution: {integrity: sha512-VXy65GukMkIsHtTuYuLhSP3l3YMl21ePTXKI2xLRBCkgzhTLdzat0vHM5TEh7vh58lsxmHlruMFESjcaIeb25g==} + '@stablyai/playwright-test@2.1.14': + resolution: {integrity: sha512-CAyVVnRdsyJg9pbK3Yq5L9lcvEabilFLb2RWeTQybKv7sDkEEqE2t1boXqBt3X6wQO6lsyhUHB9pc10wSwuc4Q==} peerDependencies: '@playwright/test': ^1.52.0 - '@stablyai/playwright@2.1.13': - resolution: {integrity: sha512-PGE6hR5WTknfbEBz+KvhG9i2gukSYdie0at6SI0CnJPu13NvGBno1N0Fm/AePhtO5Kjn1mMWW5cRiknVP4bOwA==} + '@stablyai/playwright@2.1.14': + resolution: {integrity: sha512-+SkphioOf+o2VWiM3KPm/fFTTjwNHUV5b2ZRPrLMTsW6bwmEvjo2FbVOUobNBqbopQBnntNLd8ZCG2gvw7rwtg==} peerDependencies: '@playwright/test': ^1.52.0 @@ -2751,6 +2760,9 @@ packages: '@types/verror@1.10.11': resolution: {integrity: sha512-RlDm9K7+o5stv0Co8i8ZRGxDbrTxhJtgjqjFyVh/tXQyl/rYtTKlnTvZ88oSTeYREWurwx20Js4kTuKCsFkUtg==} + '@types/ws@8.18.1': + resolution: {integrity: sha512-ThVF6DCVhA8kUGy+aazFQ4kXQ7E1Ty7A3ypFOe0IcJV8O/M511G99AW24irKrW56Wt44yG9+ij8FaqoBGkuBXg==} + '@types/yauzl@2.10.3': resolution: {integrity: sha512-oJoftv0LSuaDZE3Le4DbKX+KS9G36NzOeSap90UIK0yMA/NhKJhqlSGtNDORNRaIbQfzjXDrQa0ytJ6mNRGz/Q==} @@ -2880,6 +2892,10 @@ packages: resolution: {integrity: sha512-MnA+YT8fwfJPgBx3m60MNqakm30XOkyIoH1y6huTQvC0PwZG7ki8NacLBcrPbNoo8vEZy7Jpuk7+jMO+CUovTQ==} engines: {node: '>= 14'} + agent-browser@0.24.1: + resolution: {integrity: sha512-csWJtYEQow52b+p93zVZfNrcNBwbxGCZDXDMNWl2ij2i0MFKubIzN+icUeX2/NrkZe5iIau8px+HQlxata2oPw==} + hasBin: true + ajv-formats@3.0.1: resolution: {integrity: sha512-8iUql50EUR+uUcdRQ3HDqa6EVyo3docL8g5WJ3FNcWmu62IbkGUue/pEyLBW8VGKKucTPgqeks4fIU1DA4yowQ==} peerDependencies: @@ -6047,6 +6063,18 @@ packages: wrappy@1.0.2: resolution: {integrity: sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==} + ws@8.20.0: + resolution: {integrity: sha512-sAt8BhgNbzCtgGbt2OxmpuryO63ZoDk/sqaB/znQm94T4fCEsy/yV+7CdC1kJhOU9lboAEU7R3kquuycDoibVA==} + engines: {node: '>=10.0.0'} + peerDependencies: + bufferutil: ^4.0.1 + utf-8-validate: '>=5.0.2' + peerDependenciesMeta: + bufferutil: + optional: true + utf-8-validate: + optional: true + wsl-utils@0.3.1: resolution: {integrity: sha512-g/eziiSUNBSsdDJtCLB8bdYEUMj4jR7AGeUo96p/3dTafgjHhpF4RiCFPiRILwjQoDXx5MqkBr4fwWtR3Ky4Wg==} engines: {node: '>=20'} @@ -7897,7 +7925,7 @@ snapshots: '@sindresorhus/merge-streams@4.0.0': {} - '@stablyai/playwright-base@2.1.13(@playwright/test@1.59.1)(zod@4.3.6)': + '@stablyai/playwright-base@2.1.14(@playwright/test@1.59.1)(zod@4.3.6)': dependencies: '@playwright/test': 1.59.1 jpeg-js: 0.4.4 @@ -7906,18 +7934,18 @@ snapshots: optionalDependencies: zod: 4.3.6 - '@stablyai/playwright-test@2.1.13(@playwright/test@1.59.1)(zod@4.3.6)': + '@stablyai/playwright-test@2.1.14(@playwright/test@1.59.1)(zod@4.3.6)': dependencies: '@playwright/test': 1.59.1 - '@stablyai/playwright': 2.1.13(@playwright/test@1.59.1)(zod@4.3.6) - '@stablyai/playwright-base': 2.1.13(@playwright/test@1.59.1)(zod@4.3.6) + '@stablyai/playwright': 2.1.14(@playwright/test@1.59.1)(zod@4.3.6) + '@stablyai/playwright-base': 2.1.14(@playwright/test@1.59.1)(zod@4.3.6) transitivePeerDependencies: - zod - '@stablyai/playwright@2.1.13(@playwright/test@1.59.1)(zod@4.3.6)': + '@stablyai/playwright@2.1.14(@playwright/test@1.59.1)(zod@4.3.6)': dependencies: '@playwright/test': 1.59.1 - '@stablyai/playwright-base': 2.1.13(@playwright/test@1.59.1)(zod@4.3.6) + '@stablyai/playwright-base': 2.1.14(@playwright/test@1.59.1)(zod@4.3.6) transitivePeerDependencies: - zod @@ -8482,6 +8510,10 @@ snapshots: '@types/verror@1.10.11': optional: true + '@types/ws@8.18.1': + dependencies: + '@types/node': 25.5.0 + '@types/yauzl@2.10.3': dependencies: '@types/node': 25.5.0 @@ -8608,6 +8640,8 @@ snapshots: agent-base@7.1.4: {} + agent-browser@0.24.1: {} + ajv-formats@3.0.1(ajv@8.18.0): optionalDependencies: ajv: 8.18.0 @@ -12374,6 +12408,8 @@ snapshots: wrappy@1.0.2: {} + ws@8.20.0: {} + wsl-utils@0.3.1: dependencies: is-wsl: 3.1.1 diff --git a/skills/orca-cli/SKILL.md b/skills/orca-cli/SKILL.md index 31c1de2d..623f5fc2 100644 --- a/skills/orca-cli/SKILL.md +++ b/skills/orca-cli/SKILL.md @@ -1,6 +1,6 @@ --- name: orca-cli -description: Use the Orca CLI to orchestrate worktrees and live terminals through a running Orca editor. Use when an agent needs to create, inspect, update, or remove Orca worktrees; inspect repo state known to Orca; or read, send to, wait on, or stop Orca-managed terminals. Coding agents should also keep the current worktree comment updated with the latest meaningful work-in-progress checkpoint whenever useful; this is an expected default behavior, not a special trigger-only action. Triggers include "use orca cli", "manage Orca worktrees", "read Orca terminal", "reply to Claude Code in Orca", "create a worktree in Orca", "update Orca worktree comment", or any task where the agent should operate through Orca instead of talking to git worktrees and terminal processes directly. +description: Use the Orca CLI to orchestrate worktrees, live terminals, and browser automation through a running Orca editor. Use when an agent needs to create, inspect, update, or remove Orca worktrees; inspect repo state known to Orca; read, send to, wait on, or stop Orca-managed terminals; or automate the built-in browser (navigate, snapshot, click, fill, screenshot). Coding agents should also keep the current worktree comment updated with the latest meaningful work-in-progress checkpoint whenever useful. Triggers include "use orca cli", "manage Orca worktrees", "read Orca terminal", "reply to Claude Code in Orca", "create a worktree in Orca", "update Orca worktree comment", "click on", "fill the form", "take a screenshot", "navigate to", "interact with the page", "snapshot the page", or any task where the agent should operate through Orca. --- # Orca CLI @@ -167,6 +167,374 @@ Why: terminal handles are runtime-scoped and may go stale after reloads. If Orca - If the user asks for CLI UX feedback, test the public `orca` command first. Only inspect `src/cli` or use `node out/cli/index.js` if the public command is missing or the task is explicitly about implementation internals. - If a command fails, prefer retrying with the public `orca` command before concluding the CLI is broken, unless the failure already came from `orca` itself. +## Browser Automation + +The `orca` CLI also drives the built-in Orca browser. The core workflow is a **snapshot-interact-re-snapshot** loop: + +1. **Snapshot** the page to see interactive elements and their refs. +2. **Interact** using refs (`@e1`, `@e3`, etc.) to click, fill, or select. +3. **Re-snapshot** after interactions to see the updated page state. + +```bash +orca goto --url https://example.com --json +orca snapshot --json +# Read the refs from the snapshot output +orca click --element @e3 --json +orca snapshot --json +``` + +### Element Refs + +Refs like `@e1`, `@e5` are short identifiers assigned to interactive page elements during a snapshot. They are: + +- **Assigned by snapshot**: Run `orca snapshot` to get current refs. +- **Scoped to one tab**: Refs from one tab are not valid in another. +- **Invalidated by navigation**: If the page navigates after a snapshot, refs become stale. Re-snapshot to get fresh refs. +- **Invalidated by tab switch**: Switching tabs with `orca tab switch` invalidates refs. Re-snapshot after switching. + +If a ref is stale, the command returns `browser_stale_ref` — re-snapshot and retry. + +### Worktree Scoping + +Browser commands default to the **current worktree** — only tabs belonging to the agent's worktree are visible and targetable. Tab indices are relative to the filtered tab list. + +```bash +# Default: operates on tabs in the current worktree +orca snapshot --json + +# Explicitly target all worktrees (cross-worktree access) +orca snapshot --worktree all --json + +# Tab indices are relative to the worktree-filtered list +orca tab list --json # Shows tabs [0], [1], [2] for this worktree +orca tab switch --index 1 --json # Switches to tab [1] within this worktree +``` + +If no tabs are open in the current worktree, commands return `browser_no_tab`. + +### Stable Page Targeting + +For single-agent flows, bare browser commands are fine: Orca will target the active browser tab in the current worktree. + +For concurrent or multi-process browser automation, prefer a stable page id instead of ambient active-tab state: + +1. Run `orca tab list --json`. +2. Read `tabs[].browserPageId` from the result. +3. Pass `--page ` to follow-up commands like `snapshot`, `click`, `goto`, `screenshot`, `tab switch`, or `tab close`. + +Why: active-tab state and tab indices can change while another Orca CLI process is working. `browserPageId` pins the command to one concrete tab. + +```bash +orca tab list --json +orca snapshot --page page-123 --json +orca click --page page-123 --element @e3 --json +orca screenshot --page page-123 --json +orca tab switch --page page-123 --json +orca tab close --page page-123 --json +``` + +If you also pass `--worktree`, Orca treats it as extra scoping/validation for that page id. Without `--page`, commands still fall back to the current worktree's active tab. + +### Navigation + +```bash +orca goto --url [--json] # Navigate to URL, waits for page load +orca back [--json] # Go back in browser history +orca forward [--json] # Go forward in browser history +orca reload [--json] # Reload the current page +``` + +### Observation + +```bash +orca snapshot [--page ] [--json] # Accessibility tree snapshot with element refs +orca screenshot [--page ] [--format ] [--json] # Viewport screenshot (base64) +orca full-screenshot [--page ] [--format ] [--json] # Full-page screenshot (base64) +orca pdf [--page ] [--json] # Export page as PDF (base64) +``` + +### Interaction + +```bash +orca click --element [--page ] [--json] # Click an element by ref +orca dblclick --element [--page ] [--json] # Double-click an element +orca fill --element --value [--page ] [--json] # Clear and fill an input +orca type --input [--page ] [--json] # Type at current focus (no element targeting) +orca select --element --value [--page ] [--json] # Select dropdown option +orca check --element [--page ] [--json] # Check a checkbox +orca uncheck --element [--page ] [--json] # Uncheck a checkbox +orca scroll --direction [--amount ] [--page ] [--json] # Scroll viewport +orca scrollintoview --element [--page ] [--json] # Scroll element into view +orca hover --element [--page ] [--json] # Hover over an element +orca focus --element [--page ] [--json] # Focus an element +orca drag --from --to [--page ] [--json] # Drag from one element to another +orca clear --element [--page ] [--json] # Clear an input field +orca select-all --element [--page ] [--json] # Select all text in an element +orca keypress --key [--page ] [--json] # Press a key (Enter, Tab, Escape, etc.) +orca upload --element --files [--page ] [--json] # Upload files to a file input +``` + +### Tab Management + +```bash +orca tab list [--json] # List open browser tabs +orca tab switch (--index | --page ) [--json] # Switch active tab (invalidates refs) +orca tab create [--url ] [--json] # Open a new browser tab +orca tab close [--index | --page ] [--json] # Close a browser tab +``` + +### Wait / Synchronization + +```bash +orca wait [--timeout ] [--json] # Wait for timeout (default 1000ms) +orca wait --selector [--state ] [--timeout ] [--json] # Wait for element +orca wait --text [--timeout ] [--json] # Wait for text to appear on page +orca wait --url [--timeout ] [--json] # Wait for URL to contain substring +orca wait --load [--timeout ] [--json] # Wait for load state +orca wait --fn [--timeout ] [--json] # Wait for JS condition to be truthy +``` + +After any page-changing action, pick one: + +- Wait for specific content: `orca wait --text "Dashboard" --json` +- Wait for URL change: `orca wait --url "/dashboard" --json` +- Wait for network idle (catch-all for SPA navigation): `orca wait --load networkidle --json` +- Wait for an element: `orca wait --selector ".results" --json` + +Avoid bare `orca wait --timeout 2000` except when debugging — it makes scripts slow and flaky. + +### Data Extraction + +```bash +orca exec --command "get text @e1" [--json] # Get visible text of an element +orca exec --command "get html @e1" [--json] # Get innerHTML +orca exec --command "get value @e1" [--json] # Get input value +orca exec --command "get attr @e1 href" [--json] # Get element attribute +orca exec --command "get title" [--json] # Get page title +orca exec --command "get url" [--json] # Get current URL +orca exec --command "get count .item" [--json] # Count matching elements +``` + +### State Checks + +```bash +orca exec --command "is visible @e1" [--json] # Check if element is visible +orca exec --command "is enabled @e1" [--json] # Check if element is enabled +orca exec --command "is checked @e1" [--json] # Check if checkbox is checked +``` + +### Page Inspection + +```bash +orca eval --expression [--json] # Evaluate JS in page context +``` + +### Cookie Management + +```bash +orca cookie get [--url ] [--json] # List cookies +orca cookie set --name --value [--domain ] [--json] # Set a cookie +orca cookie delete --name [--domain ] [--json] # Delete a cookie +``` + +### Emulation + +```bash +orca viewport --width --height [--scale ] [--mobile] [--json] +orca geolocation --latitude --longitude [--accuracy ] [--json] +``` + +### Request Interception + +```bash +orca intercept enable [--patterns ] [--json] # Start intercepting requests +orca intercept disable [--json] # Stop intercepting +orca intercept list [--json] # List paused requests +``` + +> **Note:** Per-request `intercept continue` and `intercept block` are not yet supported. +> They will be added once agent-browser supports per-request interception decisions. + +### Console / Network Capture + +```bash +orca capture start [--json] # Start capturing console + network +orca capture stop [--json] # Stop capturing +orca console [--limit ] [--json] # Read captured console entries +orca network [--limit ] [--json] # Read captured network entries +``` + +### Mouse Control + +```bash +orca exec --command "mouse move 100 200" [--json] # Move mouse to coordinates +orca exec --command "mouse down left" [--json] # Press mouse button +orca exec --command "mouse up left" [--json] # Release mouse button +orca exec --command "mouse wheel 100" [--json] # Scroll wheel +``` + +### Keyboard + +```bash +orca exec --command "keyboard inserttext \"text\"" [--json] # Insert text bypassing key events +orca exec --command "keyboard type \"text\"" [--json] # Raw keystrokes +orca exec --command "keydown Shift" [--json] # Hold key down +orca exec --command "keyup Shift" [--json] # Release key +``` + +### Frames (Iframes) + +Iframes are auto-inlined in snapshots — refs inside iframes work transparently. For scoped interaction: + +```bash +orca exec --command "frame @e3" [--json] # Switch to iframe by ref +orca exec --command "frame \"#iframe\"" [--json] # Switch to iframe by CSS selector +orca exec --command "frame main" [--json] # Return to main frame +``` + +### Semantic Locators (alternative to refs) + +When refs aren't available or you want to skip a snapshot: + +```bash +orca exec --command "find role button click --name \"Submit\"" [--json] +orca exec --command "find text \"Sign In\" click" [--json] +orca exec --command "find label \"Email\" fill \"user@test.com\"" [--json] +orca exec --command "find placeholder \"Search\" type \"query\"" [--json] +orca exec --command "find testid \"submit-btn\" click" [--json] +``` + +### Dialogs + +`alert` and `beforeunload` are auto-accepted. For `confirm` and `prompt`: + +```bash +orca exec --command "dialog status" [--json] # Check for pending dialog +orca exec --command "dialog accept" [--json] # Accept +orca exec --command "dialog accept \"text\"" [--json] # Accept with prompt input +orca exec --command "dialog dismiss" [--json] # Dismiss/cancel +``` + +### Extended Commands (Passthrough) + +```bash +orca exec --command "" [--json] +``` + +The `exec` command provides access to agent-browser's full command surface. Useful for commands without typed Orca handlers: + +```bash +orca exec --command "set device \"iPhone 14\"" --json # Emulate device +orca exec --command "set offline on" --json # Toggle offline mode +orca exec --command "set media dark" --json # Emulate color scheme +orca exec --command "network requests" --json # View tracked network requests +orca exec --command "help" --json # See all available commands +``` + +**Important:** Do not use `orca exec --command "tab ..."` for tab management. Use `orca tab list/create/close/switch` instead — those operate at the Orca level and keep the UI synchronized. + +### `fill` vs `type` + +- **`fill`** targets a specific element by ref, clears its value first, then enters text. Use for form fields. +- **`type`** types at whatever currently has focus. Use for search boxes or after clicking into an input. + +If neither works on a custom input component, try: + +```bash +orca focus --element @e1 --json +orca exec --command "keyboard inserttext \"text\"" --json # bypasses key events +``` + +### Browser Error Codes + +| Error Code | Meaning | Recovery | +|-----------|---------|----------| +| `browser_no_tab` | No browser tab is open in this worktree | Open a tab, or use `--worktree all` to check other worktrees | +| `browser_stale_ref` | Ref is invalid (page changed since snapshot) | Run `orca snapshot` to get fresh refs | +| `browser_tab_not_found` | Tab index does not exist | Run `orca tab list` to see available tabs | +| `browser_error` | Error from the browser automation engine | Read the message for details; common causes: element not found, navigation timeout, JS error | + +### Browser Worked Example + +Agent fills a login form and verifies the dashboard loads: + +```bash +# Navigate to the login page +orca goto --url https://app.example.com/login --json + +# See what's on the page +orca snapshot --json +# Output includes: +# [@e1] text input "Email" +# [@e2] text input "Password" +# [@e3] button "Sign In" + +# Fill the form +orca fill --element @e1 --value "user@example.com" --json +orca fill --element @e2 --value "s3cret" --json + +# Submit +orca click --element @e3 --json + +# Verify the dashboard loaded +orca snapshot --json +# Output should show dashboard content, not the login form +``` + +### Browser Troubleshooting + +**"Ref not found" / `browser_stale_ref`** +Page changed since the snapshot. Run `orca snapshot --json` again, then use the new refs. + +**Element exists but not in snapshot** +It may be off-screen or not yet rendered. Try: + +```bash +orca scroll --direction down --amount 1000 --json +orca snapshot --json +# or wait for it: +orca wait --text "..." --json +orca snapshot --json +``` + +**Click does nothing / overlay swallows the click** +Modals or cookie banners may be blocking. Snapshot, find the dismiss button, click it, then re-snapshot. + +**Fill/type doesn't work on a custom input** +Some components intercept key events. Use `keyboard inserttext`: + +```bash +orca focus --element @e1 --json +orca exec --command "keyboard inserttext \"text\"" --json +``` + +**`browser_no_tab` error** +No browser tab is open in the current worktree. Open one with `orca tab create --url --json`. + +### Auto-Switch Worktree + +Browser commands automatically activate the target worktree in the Orca UI when needed. If the agent issues a browser command targeting a worktree that isn't currently active, Orca will switch to that worktree before executing the command. + +### Tab Create Auto-Activation + +When `orca tab create` opens a new tab, it is automatically set as the active tab for the worktree. Subsequent commands (`snapshot`, `click`, etc.) will target the newly created tab without needing an explicit `tab switch`. + +### Browser Agent Guidance + +- Always snapshot before interacting with elements. +- After navigation (`goto`, `back`, `reload`, clicking a link), re-snapshot to get fresh refs. +- After switching tabs, re-snapshot. +- If you get `browser_stale_ref`, re-snapshot and retry with the new refs. +- Use `orca tab list` before `orca tab switch` to know which tabs exist. +- For concurrent browser workflows, prefer `orca tab list --json` and reuse `tabs[].browserPageId` with `--page` on later commands. +- Use `orca wait` to synchronize after actions that trigger async updates (form submits, SPA navigation, modals) instead of arbitrary sleeps. +- Use `orca eval` as an escape hatch for interactions not covered by other commands. +- Use `orca exec --command "help"` to discover extended commands. +- Worktree scoping is automatic — you'll only see tabs from your worktree by default. +- Bare browser commands without `--page` still target the current worktree's active tab, which is convenient but less robust for multi-process automation. +- Tab creation auto-activates the new tab — no need for `tab switch` after `tab create`. +- Browser commands auto-switch the active worktree if needed — no manual worktree activation required. + ## Important Constraints - Orca CLI only talks to a running Orca editor. diff --git a/src/cli/index.test.ts b/src/cli/index.test.ts index 5961c03f..557c422b 100644 --- a/src/cli/index.test.ts +++ b/src/cli/index.test.ts @@ -1,3 +1,5 @@ +/* oxlint-disable max-lines -- Why: CLI parsing behavior is exercised end-to-end +in one file so command and flag interactions stay visible in a single suite. */ import path from 'path' import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest' @@ -35,7 +37,24 @@ vi.mock('./runtime-client', () => { } }) -import { buildCurrentWorktreeSelector, main, normalizeWorktreeSelector } from './index' +import { + buildCurrentWorktreeSelector, + COMMAND_SPECS, + main, + normalizeWorktreeSelector +} from './index' +import { RuntimeClientError } from './runtime-client' + +describe('COMMAND_SPECS collision check', () => { + it('has no duplicate command paths', () => { + const seen = new Set() + for (const spec of COMMAND_SPECS) { + const key = spec.path.join(' ') + expect(seen.has(key), `Duplicate COMMAND_SPECS path: "${key}"`).toBe(false) + seen.add(key) + } + }) +}) describe('orca cli worktree awareness', () => { beforeEach(() => { @@ -303,3 +322,312 @@ describe('orca cli worktree awareness', () => { }) }) }) + +describe('orca cli browser page targeting', () => { + beforeEach(() => { + callMock.mockReset() + }) + + afterEach(() => { + vi.restoreAllMocks() + }) + + it('passes explicit page ids to snapshot without resolving the current worktree', async () => { + callMock.mockResolvedValueOnce({ + id: 'req_snapshot', + ok: true, + result: { + browserPageId: 'page-1', + snapshot: 'tree', + refs: [], + url: 'https://example.com', + title: 'Example' + }, + _meta: { + runtimeId: 'runtime-1' + } + }) + vi.spyOn(console, 'log').mockImplementation(() => {}) + + await main(['snapshot', '--page', 'page-1', '--json'], '/tmp/not-an-orca-worktree') + + expect(callMock).toHaveBeenCalledTimes(1) + expect(callMock).toHaveBeenCalledWith('browser.snapshot', { + page: 'page-1' + }) + }) + + it('resolves current worktree only when --page is combined with --worktree current', async () => { + callMock + .mockResolvedValueOnce({ + id: 'req_list', + ok: true, + result: { + worktrees: [ + { + id: 'repo::/tmp/repo/feature', + repoId: 'repo', + path: '/tmp/repo/feature', + branch: 'feature/foo', + linkedIssue: null, + git: { + path: '/tmp/repo/feature', + head: 'abc', + branch: 'feature/foo', + isBare: false, + isMainWorktree: false + }, + displayName: '', + comment: '' + } + ], + totalCount: 1, + truncated: false + }, + _meta: { + runtimeId: 'runtime-1' + } + }) + .mockResolvedValueOnce({ + id: 'req_snapshot', + ok: true, + result: { + browserPageId: 'page-1', + snapshot: 'tree', + refs: [], + url: 'https://example.com', + title: 'Example' + }, + _meta: { + runtimeId: 'runtime-1' + } + }) + vi.spyOn(console, 'log').mockImplementation(() => {}) + + await main( + ['snapshot', '--page', 'page-1', '--worktree', 'current', '--json'], + '/tmp/repo/feature/src' + ) + + expect(callMock).toHaveBeenNthCalledWith(1, 'worktree.list', { + limit: 10_000 + }) + expect(callMock).toHaveBeenNthCalledWith(2, 'browser.snapshot', { + page: 'page-1', + worktree: `path:${path.resolve('/tmp/repo/feature')}` + }) + }) + + it('passes page-targeted tab switches through without auto-scoping to the current worktree', async () => { + callMock.mockResolvedValueOnce({ + id: 'req_switch', + ok: true, + result: { + switched: 2, + browserPageId: 'page-2' + }, + _meta: { + runtimeId: 'runtime-1' + } + }) + vi.spyOn(console, 'log').mockImplementation(() => {}) + + await main(['tab', 'switch', '--page', 'page-2', '--json'], '/tmp/repo/feature/src') + + expect(callMock).toHaveBeenCalledTimes(1) + expect(callMock).toHaveBeenCalledWith('browser.tabSwitch', { + index: undefined, + page: 'page-2' + }) + }) + + it('still resolves the current worktree when tab switch --page is combined with --worktree current', async () => { + callMock + .mockResolvedValueOnce({ + id: 'req_list', + ok: true, + result: { + worktrees: [ + { + id: 'repo::/tmp/repo/feature', + repoId: 'repo', + path: '/tmp/repo/feature', + branch: 'feature/foo', + linkedIssue: null, + git: { + path: '/tmp/repo/feature', + head: 'abc', + branch: 'feature/foo', + isBare: false, + isMainWorktree: false + }, + displayName: '', + comment: '' + } + ], + totalCount: 1, + truncated: false + }, + _meta: { + runtimeId: 'runtime-1' + } + }) + .mockResolvedValueOnce({ + id: 'req_switch', + ok: true, + result: { + switched: 2, + browserPageId: 'page-2' + }, + _meta: { + runtimeId: 'runtime-1' + } + }) + vi.spyOn(console, 'log').mockImplementation(() => {}) + + await main( + ['tab', 'switch', '--page', 'page-2', '--worktree', 'current', '--json'], + '/tmp/repo/feature/src' + ) + + expect(callMock).toHaveBeenNthCalledWith(1, 'worktree.list', { + limit: 10_000 + }) + expect(callMock).toHaveBeenNthCalledWith(2, 'browser.tabSwitch', { + index: undefined, + page: 'page-2', + worktree: `path:${path.resolve('/tmp/repo/feature')}` + }) + }) +}) + +describe('orca cli browser waits and viewport flags', () => { + beforeEach(() => { + callMock.mockReset() + process.exitCode = undefined + }) + + afterEach(() => { + vi.restoreAllMocks() + }) + + it('gives selector waits an explicit RPC timeout budget', async () => { + callMock.mockResolvedValueOnce({ + id: 'req_wait', + ok: true, + result: { ok: true }, + _meta: { + runtimeId: 'runtime-1' + } + }) + vi.spyOn(console, 'log').mockImplementation(() => {}) + + await main( + ['wait', '--selector', '#ready', '--worktree', 'all', '--json'], + '/tmp/not-an-orca-worktree' + ) + + expect(callMock).toHaveBeenCalledWith( + 'browser.wait', + { + selector: '#ready', + timeout: undefined, + text: undefined, + url: undefined, + load: undefined, + fn: undefined, + state: undefined, + worktree: undefined + }, + { timeoutMs: 60_000 } + ) + }) + + it('extends selector wait RPC timeout when the user passes --timeout', async () => { + callMock.mockResolvedValueOnce({ + id: 'req_wait', + ok: true, + result: { ok: true }, + _meta: { + runtimeId: 'runtime-1' + } + }) + vi.spyOn(console, 'log').mockImplementation(() => {}) + + await main( + ['wait', '--selector', '#ready', '--timeout', '12000', '--worktree', 'all', '--json'], + '/tmp/not-an-orca-worktree' + ) + + expect(callMock).toHaveBeenCalledWith( + 'browser.wait', + { + selector: '#ready', + timeout: 12000, + text: undefined, + url: undefined, + load: undefined, + fn: undefined, + state: undefined, + worktree: undefined + }, + { timeoutMs: 17000 } + ) + }) + + it('does not tell users Orca is down for a generic runtime timeout', async () => { + callMock.mockRejectedValueOnce( + new RuntimeClientError( + 'runtime_timeout', + 'Timed out waiting for the Orca runtime to respond.' + ) + ) + const errorSpy = vi.spyOn(console, 'error').mockImplementation(() => {}) + + await main(['wait', '--selector', '#ready', '--worktree', 'all'], '/tmp/not-an-orca-worktree') + + expect(errorSpy).toHaveBeenCalledWith('Timed out waiting for the Orca runtime to respond.') + }) + + it('passes the mobile viewport flag through to browser.viewport', async () => { + callMock.mockResolvedValueOnce({ + id: 'req_viewport', + ok: true, + result: { + width: 375, + height: 812, + deviceScaleFactor: 2, + mobile: true + }, + _meta: { + runtimeId: 'runtime-1' + } + }) + vi.spyOn(console, 'log').mockImplementation(() => {}) + + await main( + [ + 'viewport', + '--width', + '375', + '--height', + '812', + '--scale', + '2', + '--mobile', + '--worktree', + 'all', + '--json' + ], + '/tmp/not-an-orca-worktree' + ) + + expect(callMock).toHaveBeenCalledWith('browser.viewport', { + width: 375, + height: 812, + deviceScaleFactor: 2, + mobile: true, + worktree: undefined + }) + }) +}) diff --git a/src/cli/index.ts b/src/cli/index.ts index 442e4462..f41fc823 100644 --- a/src/cli/index.ts +++ b/src/cli/index.ts @@ -13,7 +13,42 @@ import type { RuntimeTerminalListResult, RuntimeTerminalShow, RuntimeTerminalSend, - RuntimeTerminalWait + RuntimeTerminalWait, + BrowserSnapshotResult, + BrowserClickResult, + BrowserGotoResult, + BrowserFillResult, + BrowserTypeResult, + BrowserSelectResult, + BrowserScrollResult, + BrowserBackResult, + BrowserReloadResult, + BrowserScreenshotResult, + BrowserEvalResult, + BrowserTabListResult, + BrowserTabSwitchResult, + BrowserHoverResult, + BrowserDragResult, + BrowserUploadResult, + BrowserWaitResult, + BrowserCheckResult, + BrowserFocusResult, + BrowserClearResult, + BrowserSelectAllResult, + BrowserKeypressResult, + BrowserPdfResult, + BrowserCookieGetResult, + BrowserCookieSetResult, + BrowserCookieDeleteResult, + BrowserViewportResult, + BrowserGeolocationResult, + BrowserInterceptEnableResult, + BrowserInterceptDisableResult, + BrowserInterceptedRequest, + BrowserCaptureStartResult, + BrowserCaptureStopResult, + BrowserConsoleResult, + BrowserNetworkLogResult } from '../shared/runtime-types' import { RuntimeClient, @@ -37,9 +72,15 @@ type CommandSpec = { notes?: string[] } +type BrowserCliTarget = { + worktree?: string + page?: string +} + const DEFAULT_TERMINAL_WAIT_RPC_TIMEOUT_MS = 5 * 60 * 1000 +const DEFAULT_BROWSER_WAIT_RPC_TIMEOUT_MS = 60_000 const GLOBAL_FLAGS = ['help', 'json'] -const COMMAND_SPECS: CommandSpec[] = [ +export const COMMAND_SPECS: CommandSpec[] = [ { path: ['open'], summary: 'Launch Orca and wait for the runtime to be reachable', @@ -169,6 +210,460 @@ const COMMAND_SPECS: CommandSpec[] = [ summary: 'Stop terminals for a worktree', usage: 'orca terminal stop --worktree [--json]', allowedFlags: [...GLOBAL_FLAGS, 'worktree'] + }, + // ── Browser automation ── + { + path: ['snapshot'], + summary: 'Capture an accessibility snapshot of the active browser tab', + usage: 'orca snapshot [--worktree ] [--json]', + allowedFlags: [...GLOBAL_FLAGS, 'worktree'] + }, + { + path: ['screenshot'], + summary: 'Capture a viewport screenshot of the active browser tab', + usage: 'orca screenshot [--format ] [--worktree ] [--json]', + allowedFlags: [...GLOBAL_FLAGS, 'format', 'worktree'] + }, + { + path: ['click'], + summary: 'Click a browser element by ref', + usage: 'orca click --element [--worktree ] [--json]', + allowedFlags: [...GLOBAL_FLAGS, 'element', 'worktree'] + }, + { + path: ['fill'], + summary: 'Clear and fill a browser input by ref', + usage: 'orca fill --element --value [--worktree ] [--json]', + allowedFlags: [...GLOBAL_FLAGS, 'element', 'value', 'worktree'] + }, + { + path: ['type'], + summary: 'Type text at the current browser focus', + usage: 'orca type --input [--worktree ] [--json]', + allowedFlags: [...GLOBAL_FLAGS, 'input', 'worktree'] + }, + { + path: ['select'], + summary: 'Select a dropdown option by ref', + usage: 'orca select --element --value [--worktree ] [--json]', + allowedFlags: [...GLOBAL_FLAGS, 'element', 'value', 'worktree'] + }, + { + path: ['scroll'], + summary: 'Scroll the browser viewport', + usage: 'orca scroll --direction [--amount ] [--worktree ] [--json]', + allowedFlags: [...GLOBAL_FLAGS, 'direction', 'amount', 'worktree'] + }, + { + path: ['goto'], + summary: 'Navigate the active browser tab to a URL', + usage: 'orca goto --url [--worktree ] [--json]', + allowedFlags: [...GLOBAL_FLAGS, 'url', 'worktree'] + }, + { + path: ['back'], + summary: 'Navigate back in browser history', + usage: 'orca back [--worktree ] [--json]', + allowedFlags: [...GLOBAL_FLAGS, 'worktree'] + }, + { + path: ['reload'], + summary: 'Reload the active browser tab', + usage: 'orca reload [--worktree ] [--json]', + allowedFlags: [...GLOBAL_FLAGS, 'worktree'] + }, + { + path: ['eval'], + summary: 'Evaluate JavaScript in the browser page context', + usage: 'orca eval --expression [--worktree ] [--json]', + allowedFlags: [...GLOBAL_FLAGS, 'expression', 'worktree'] + }, + { + path: ['wait'], + summary: 'Wait for element, text, URL, load state, JS condition, or timeout', + usage: + 'orca wait [--selector ] [--timeout ] [--text ] [--url ] [--load ] [--fn ] [--state ] [--worktree ] [--json]', + allowedFlags: [ + ...GLOBAL_FLAGS, + 'selector', + 'timeout', + 'text', + 'url', + 'load', + 'fn', + 'state', + 'worktree' + ] + }, + { + path: ['check'], + summary: 'Check a checkbox/radio by ref', + usage: 'orca check --element [--worktree ] [--json]', + allowedFlags: [...GLOBAL_FLAGS, 'element', 'worktree'] + }, + { + path: ['uncheck'], + summary: 'Uncheck a checkbox/radio by ref', + usage: 'orca uncheck --element [--worktree ] [--json]', + allowedFlags: [...GLOBAL_FLAGS, 'element', 'worktree'] + }, + { + path: ['focus'], + summary: 'Focus a browser element by ref', + usage: 'orca focus --element [--worktree ] [--json]', + allowedFlags: [...GLOBAL_FLAGS, 'element', 'worktree'] + }, + { + path: ['clear'], + summary: 'Clear an input element by ref', + usage: 'orca clear --element [--worktree ] [--json]', + allowedFlags: [...GLOBAL_FLAGS, 'element', 'worktree'] + }, + { + path: ['select-all'], + summary: 'Select all text in an input by ref', + usage: 'orca select-all --element [--worktree ] [--json]', + allowedFlags: [...GLOBAL_FLAGS, 'element', 'worktree'] + }, + { + path: ['keypress'], + summary: 'Press a key (Enter, Tab, Escape, ArrowDown, etc.)', + usage: 'orca keypress --key [--worktree ] [--json]', + allowedFlags: [...GLOBAL_FLAGS, 'key', 'worktree'] + }, + { + path: ['pdf'], + summary: 'Export the active browser tab as PDF', + usage: 'orca pdf [--worktree ] [--json]', + allowedFlags: [...GLOBAL_FLAGS, 'worktree'] + }, + { + path: ['full-screenshot'], + summary: 'Capture a full-page screenshot (beyond viewport)', + usage: 'orca full-screenshot [--format ] [--worktree ] [--json]', + allowedFlags: [...GLOBAL_FLAGS, 'format', 'worktree'] + }, + { + path: ['hover'], + summary: 'Hover over a browser element by ref', + usage: 'orca hover --element [--worktree ] [--json]', + allowedFlags: [...GLOBAL_FLAGS, 'element', 'worktree'] + }, + { + path: ['drag'], + summary: 'Drag from one element to another', + usage: 'orca drag --from --to [--worktree ] [--json]', + allowedFlags: [...GLOBAL_FLAGS, 'from', 'to', 'worktree'] + }, + { + path: ['upload'], + summary: 'Upload files to a file input element', + usage: 'orca upload --element --files [--worktree ] [--json]', + allowedFlags: [...GLOBAL_FLAGS, 'element', 'files', 'worktree'] + }, + { + path: ['tab', 'list'], + summary: 'List open browser tabs', + usage: 'orca tab list [--worktree ] [--json]', + allowedFlags: [...GLOBAL_FLAGS, 'worktree'] + }, + { + path: ['tab', 'switch'], + summary: 'Switch the active browser tab', + usage: 'orca tab switch (--index | --page ) [--worktree ] [--json]', + allowedFlags: [...GLOBAL_FLAGS, 'index', 'worktree'] + }, + { + path: ['tab', 'create'], + summary: 'Create a new browser tab in the current worktree', + usage: 'orca tab create [--url ] [--worktree ] [--json]', + allowedFlags: [...GLOBAL_FLAGS, 'url', 'worktree'] + }, + { + path: ['tab', 'close'], + summary: 'Close a browser tab', + usage: 'orca tab close [--index ] [--worktree ] [--json]', + allowedFlags: [...GLOBAL_FLAGS, 'index', 'worktree'] + }, + { + path: ['exec'], + summary: 'Run any agent-browser command against the active browser tab', + usage: 'orca exec --command "" [--worktree ] [--json]', + allowedFlags: [...GLOBAL_FLAGS, 'command', 'worktree'] + }, + // ── Cookie management ── + { + path: ['cookie', 'get'], + summary: 'Get cookies for the active tab (optionally filter by URL)', + usage: 'orca cookie get [--url ] [--worktree ] [--json]', + allowedFlags: [...GLOBAL_FLAGS, 'url', 'worktree'] + }, + { + path: ['cookie', 'set'], + summary: 'Set a cookie', + usage: + 'orca cookie set --name --value [--domain ] [--path

] [--secure] [--httpOnly] [--sameSite ] [--expires ] [--worktree ] [--json]', + allowedFlags: [ + ...GLOBAL_FLAGS, + 'name', + 'value', + 'domain', + 'path', + 'secure', + 'httpOnly', + 'sameSite', + 'expires', + 'worktree' + ] + }, + { + path: ['cookie', 'delete'], + summary: 'Delete a cookie by name', + usage: + 'orca cookie delete --name [--domain ] [--url ] [--worktree ] [--json]', + allowedFlags: [...GLOBAL_FLAGS, 'name', 'domain', 'url', 'worktree'] + }, + // ── Viewport ── + { + path: ['viewport'], + summary: 'Set browser viewport size', + usage: + 'orca viewport --width --height [--scale ] [--mobile] [--worktree ] [--json]', + allowedFlags: [...GLOBAL_FLAGS, 'width', 'height', 'scale', 'mobile', 'worktree'] + }, + // ── Geolocation ── + { + path: ['geolocation'], + summary: 'Override browser geolocation', + usage: + 'orca geolocation --latitude --longitude [--accuracy ] [--worktree ] [--json]', + allowedFlags: [...GLOBAL_FLAGS, 'latitude', 'longitude', 'accuracy', 'worktree'] + }, + // ── Request interception ── + { + path: ['intercept', 'enable'], + summary: 'Enable request interception (pause matching requests)', + usage: 'orca intercept enable [--patterns ] [--worktree ] [--json]', + allowedFlags: [...GLOBAL_FLAGS, 'patterns', 'worktree'] + }, + { + path: ['intercept', 'disable'], + summary: 'Disable request interception', + usage: 'orca intercept disable [--worktree ] [--json]', + allowedFlags: [...GLOBAL_FLAGS, 'worktree'] + }, + { + path: ['intercept', 'list'], + summary: 'List paused (intercepted) requests', + usage: 'orca intercept list [--worktree ] [--json]', + allowedFlags: [...GLOBAL_FLAGS, 'worktree'] + }, + // TODO: add intercept continue/block once agent-browser supports per-request + // interception decisions (currently only supports URL-pattern-based route/unroute). + // ── Console/network capture ── + { + path: ['capture', 'start'], + summary: 'Start capturing console and network events', + usage: 'orca capture start [--worktree ] [--json]', + allowedFlags: [...GLOBAL_FLAGS, 'worktree'] + }, + { + path: ['capture', 'stop'], + summary: 'Stop capturing console and network events', + usage: 'orca capture stop [--worktree ] [--json]', + allowedFlags: [...GLOBAL_FLAGS, 'worktree'] + }, + { + path: ['console'], + summary: 'Show captured console log entries', + usage: 'orca console [--limit ] [--worktree ] [--json]', + allowedFlags: [...GLOBAL_FLAGS, 'limit', 'worktree'] + }, + { + path: ['network'], + summary: 'Show captured network requests', + usage: 'orca network [--limit ] [--worktree ] [--json]', + allowedFlags: [...GLOBAL_FLAGS, 'limit', 'worktree'] + }, + // ── Additional core commands ── + { + path: ['dblclick'], + summary: 'Double-click element by ref', + usage: 'orca dblclick --element [--worktree ] [--json]', + allowedFlags: [...GLOBAL_FLAGS, 'element', 'worktree'] + }, + { + path: ['forward'], + summary: 'Navigate forward in browser history', + usage: 'orca forward [--worktree ] [--json]', + allowedFlags: [...GLOBAL_FLAGS, 'worktree'] + }, + { + path: ['scrollintoview'], + summary: 'Scroll element into view', + usage: 'orca scrollintoview --element [--worktree ] [--json]', + allowedFlags: [...GLOBAL_FLAGS, 'element', 'worktree'] + }, + { + path: ['get'], + summary: 'Get element property (text, html, value, url, title, count, box)', + usage: 'orca get --what [--element ] [--worktree ] [--json]', + allowedFlags: [...GLOBAL_FLAGS, 'what', 'element', 'worktree'] + }, + { + path: ['is'], + summary: 'Check element state (visible, enabled, checked)', + usage: 'orca is --what --element [--worktree ] [--json]', + allowedFlags: [...GLOBAL_FLAGS, 'what', 'element', 'worktree'] + }, + // ── Keyboard insert text ── + { + path: ['inserttext'], + summary: 'Insert text without key events', + usage: 'orca inserttext --text [--worktree ] [--json]', + allowedFlags: [...GLOBAL_FLAGS, 'text', 'worktree'] + }, + // ── Mouse commands ── + { + path: ['mouse', 'move'], + summary: 'Move mouse to x,y coordinates', + usage: 'orca mouse move --x --y [--worktree ] [--json]', + allowedFlags: [...GLOBAL_FLAGS, 'x', 'y', 'worktree'] + }, + { + path: ['mouse', 'down'], + summary: 'Press mouse button', + usage: 'orca mouse down [--button ] [--worktree ] [--json]', + allowedFlags: [...GLOBAL_FLAGS, 'button', 'worktree'] + }, + { + path: ['mouse', 'up'], + summary: 'Release mouse button', + usage: 'orca mouse up [--button ] [--worktree ] [--json]', + allowedFlags: [...GLOBAL_FLAGS, 'button', 'worktree'] + }, + { + path: ['mouse', 'wheel'], + summary: 'Scroll wheel', + usage: 'orca mouse wheel --dy [--dx ] [--worktree ] [--json]', + allowedFlags: [...GLOBAL_FLAGS, 'dy', 'dx', 'worktree'] + }, + // ── Find (semantic locators) ── + { + path: ['find'], + summary: 'Find element by semantic locator and perform action', + usage: + 'orca find --locator --value --action [--text ] [--worktree ] [--json]', + allowedFlags: [...GLOBAL_FLAGS, 'locator', 'value', 'action', 'text', 'worktree'] + }, + // ── Set commands ── + { + path: ['set', 'device'], + summary: 'Emulate a device', + usage: 'orca set device --name [--worktree ] [--json]', + allowedFlags: [...GLOBAL_FLAGS, 'name', 'worktree'] + }, + { + path: ['set', 'offline'], + summary: 'Toggle offline mode', + usage: 'orca set offline [--state ] [--worktree ] [--json]', + allowedFlags: [...GLOBAL_FLAGS, 'state', 'worktree'] + }, + { + path: ['set', 'headers'], + summary: 'Set extra HTTP headers', + usage: 'orca set headers --headers [--worktree ] [--json]', + allowedFlags: [...GLOBAL_FLAGS, 'headers', 'worktree'] + }, + { + path: ['set', 'credentials'], + summary: 'Set HTTP auth credentials', + usage: 'orca set credentials --user --pass [--worktree ] [--json]', + allowedFlags: [...GLOBAL_FLAGS, 'user', 'pass', 'worktree'] + }, + { + path: ['set', 'media'], + summary: 'Set color scheme and reduced motion preferences', + usage: + 'orca set media [--color-scheme ] [--reduced-motion ] [--worktree ] [--json]', + allowedFlags: [...GLOBAL_FLAGS, 'color-scheme', 'reduced-motion', 'worktree'] + }, + // ── Clipboard commands ── + { + path: ['clipboard', 'read'], + summary: 'Read clipboard contents', + usage: 'orca clipboard read [--worktree ] [--json]', + allowedFlags: [...GLOBAL_FLAGS, 'worktree'] + }, + { + path: ['clipboard', 'write'], + summary: 'Write text to clipboard', + usage: 'orca clipboard write --text [--worktree ] [--json]', + allowedFlags: [...GLOBAL_FLAGS, 'text', 'worktree'] + }, + // ── Dialog commands ── + { + path: ['dialog', 'accept'], + summary: 'Accept a browser dialog', + usage: 'orca dialog accept [--text ] [--worktree ] [--json]', + allowedFlags: [...GLOBAL_FLAGS, 'text', 'worktree'] + }, + { + path: ['dialog', 'dismiss'], + summary: 'Dismiss a browser dialog', + usage: 'orca dialog dismiss [--worktree ] [--json]', + allowedFlags: [...GLOBAL_FLAGS, 'worktree'] + }, + // ── Storage commands ── + { + path: ['storage', 'local', 'get'], + summary: 'Get a localStorage value by key', + usage: 'orca storage local get --key [--worktree ] [--json]', + allowedFlags: [...GLOBAL_FLAGS, 'key', 'worktree'] + }, + { + path: ['storage', 'local', 'set'], + summary: 'Set a localStorage value', + usage: 'orca storage local set --key --value [--worktree ] [--json]', + allowedFlags: [...GLOBAL_FLAGS, 'key', 'value', 'worktree'] + }, + { + path: ['storage', 'local', 'clear'], + summary: 'Clear all localStorage', + usage: 'orca storage local clear [--worktree ] [--json]', + allowedFlags: [...GLOBAL_FLAGS, 'worktree'] + }, + { + path: ['storage', 'session', 'get'], + summary: 'Get a sessionStorage value by key', + usage: 'orca storage session get --key [--worktree ] [--json]', + allowedFlags: [...GLOBAL_FLAGS, 'key', 'worktree'] + }, + { + path: ['storage', 'session', 'set'], + summary: 'Set a sessionStorage value', + usage: 'orca storage session set --key --value [--worktree ] [--json]', + allowedFlags: [...GLOBAL_FLAGS, 'key', 'value', 'worktree'] + }, + { + path: ['storage', 'session', 'clear'], + summary: 'Clear all sessionStorage', + usage: 'orca storage session clear [--worktree ] [--json]', + allowedFlags: [...GLOBAL_FLAGS, 'worktree'] + }, + // ── Download command ── + { + path: ['download'], + summary: 'Download a file by clicking a selector', + usage: 'orca download --selector --path [--worktree ] [--json]', + allowedFlags: [...GLOBAL_FLAGS, 'selector', 'path', 'worktree'] + }, + // ── Highlight command ── + { + path: ['highlight'], + summary: 'Highlight an element by selector', + usage: 'orca highlight --selector [--worktree ] [--json]', + allowedFlags: [...GLOBAL_FLAGS, 'selector', 'worktree'] } ] @@ -362,6 +857,750 @@ export async function main(argv = process.argv.slice(2), cwd = process.cwd()): P return printResult(result, json, (value) => `removed: ${value.removed}`) } + // ── Browser automation dispatch ── + + if (matches(commandPath, ['snapshot'])) { + const target = await getBrowserCommandTarget(parsed.flags, cwd, client) + const result = await client.call('browser.snapshot', target) + return printResult(result, json, formatSnapshot) + } + + if (matches(commandPath, ['screenshot'])) { + const format = getOptionalStringFlag(parsed.flags, 'format') + const target = await getBrowserCommandTarget(parsed.flags, cwd, client) + const result = await client.call('browser.screenshot', { + format: format === 'jpeg' ? 'jpeg' : undefined, + ...target + }) + return printResult(result, json, formatScreenshot) + } + + if (matches(commandPath, ['click'])) { + const element = getRequiredStringFlag(parsed.flags, 'element') + const target = await getBrowserCommandTarget(parsed.flags, cwd, client) + const result = await client.call('browser.click', { element, ...target }) + return printResult(result, json, (v) => `Clicked ${v.clicked}`) + } + + if (matches(commandPath, ['fill'])) { + const element = getRequiredStringFlag(parsed.flags, 'element') + const value = getRequiredStringFlag(parsed.flags, 'value') + const target = await getBrowserCommandTarget(parsed.flags, cwd, client) + const result = await client.call('browser.fill', { + element, + value, + ...target + }) + return printResult(result, json, (v) => `Filled ${v.filled}`) + } + + if (matches(commandPath, ['type'])) { + const input = getRequiredStringFlag(parsed.flags, 'input') + const target = await getBrowserCommandTarget(parsed.flags, cwd, client) + const result = await client.call('browser.type', { input, ...target }) + return printResult(result, json, () => 'Typed input') + } + + if (matches(commandPath, ['select'])) { + const element = getRequiredStringFlag(parsed.flags, 'element') + const value = getRequiredStringFlag(parsed.flags, 'value') + const target = await getBrowserCommandTarget(parsed.flags, cwd, client) + const result = await client.call('browser.select', { + element, + value, + ...target + }) + return printResult(result, json, (v) => `Selected ${v.selected}`) + } + + if (matches(commandPath, ['scroll'])) { + const direction = getRequiredStringFlag(parsed.flags, 'direction') + if (direction !== 'up' && direction !== 'down') { + throw new RuntimeClientError('invalid_argument', '--direction must be "up" or "down"') + } + const amount = getOptionalPositiveIntegerFlag(parsed.flags, 'amount') + const target = await getBrowserCommandTarget(parsed.flags, cwd, client) + const result = await client.call('browser.scroll', { + direction, + amount, + ...target + }) + return printResult(result, json, (v) => `Scrolled ${v.scrolled}`) + } + + if (matches(commandPath, ['goto'])) { + const url = getRequiredStringFlag(parsed.flags, 'url') + const target = await getBrowserCommandTarget(parsed.flags, cwd, client) + // Why: navigation waits for network idle which can exceed the default 15s RPC timeout + const result = await client.call( + 'browser.goto', + { url, ...target }, + { timeoutMs: 60_000 } + ) + return printResult(result, json, (v) => `Navigated to ${v.url} — ${v.title}`) + } + + if (matches(commandPath, ['back'])) { + const target = await getBrowserCommandTarget(parsed.flags, cwd, client) + const result = await client.call('browser.back', target) + return printResult(result, json, (v) => `Back to ${v.url} — ${v.title}`) + } + + if (matches(commandPath, ['reload'])) { + const target = await getBrowserCommandTarget(parsed.flags, cwd, client) + const result = await client.call('browser.reload', target, { + timeoutMs: 60_000 + }) + return printResult(result, json, (v) => `Reloaded ${v.url} — ${v.title}`) + } + + if (matches(commandPath, ['eval'])) { + const expression = getRequiredStringFlag(parsed.flags, 'expression') + const target = await getBrowserCommandTarget(parsed.flags, cwd, client) + const result = await client.call('browser.eval', { expression, ...target }) + return printResult(result, json, (v) => v.result) + } + + if (matches(commandPath, ['tab', 'list'])) { + const worktree = await getBrowserWorktreeSelector(parsed.flags, cwd, client) + const result = await client.call('browser.tabList', { worktree }) + return printResult(result, json, formatTabList) + } + + if (matches(commandPath, ['tab', 'switch'])) { + const index = getOptionalNonNegativeIntegerFlag(parsed.flags, 'index') + const page = getOptionalStringFlag(parsed.flags, 'page') + if (index === undefined && !page) { + throw new RuntimeClientError('invalid_argument', 'Missing required --index or --page') + } + // Why: a stable browser page id is globally unique across Orca, so page- + // targeted tab switches should match the rest of the --page command model: + // global by default, with --worktree only acting as explicit validation. + const target = await getBrowserCommandTarget(parsed.flags, cwd, client) + const result = await client.call('browser.tabSwitch', { + index, + page, + ...target + }) + return printResult(result, json, (v) => `Switched to tab ${v.switched} (${v.browserPageId})`) + } + + if (matches(commandPath, ['tab', 'create'])) { + const url = getOptionalStringFlag(parsed.flags, 'url') + const worktree = await getBrowserWorktreeSelector(parsed.flags, cwd, client) + const result = await client.call<{ browserPageId: string }>( + 'browser.tabCreate', + { url, worktree }, + { timeoutMs: 60_000 } + ) + return printResult(result, json, (v) => `Created tab ${v.browserPageId}`) + } + + if (matches(commandPath, ['tab', 'close'])) { + const index = getOptionalNonNegativeIntegerFlag(parsed.flags, 'index') + const target = await getBrowserCommandTarget(parsed.flags, cwd, client) + const result = await client.call<{ closed: boolean }>('browser.tabClose', { + index, + ...target + }) + return printResult(result, json, () => 'Tab closed') + } + + if (matches(commandPath, ['exec'])) { + const command = getRequiredStringFlag(parsed.flags, 'command') + const target = await getBrowserCommandTarget(parsed.flags, cwd, client) + const result = await client.call('browser.exec', { command, ...target }) + return printResult(result, json, (v) => JSON.stringify(v, null, 2)) + } + + if (matches(commandPath, ['wait'])) { + const selector = getOptionalStringFlag(parsed.flags, 'selector') + const timeout = getOptionalPositiveIntegerFlag(parsed.flags, 'timeout') + const text = getOptionalStringFlag(parsed.flags, 'text') + const url = getOptionalStringFlag(parsed.flags, 'url') + const load = getOptionalStringFlag(parsed.flags, 'load') + const fn = getOptionalStringFlag(parsed.flags, 'fn') + const state = getOptionalStringFlag(parsed.flags, 'state') + const target = await getBrowserCommandTarget(parsed.flags, cwd, client) + const result = await client.call( + 'browser.wait', + { + selector, + timeout, + text, + url, + load, + fn, + state, + ...target + }, + { + // Why: selector/text/url waits can legitimately take longer than a + // normal RPC round-trip, even when Orca is healthy. Give browser.wait + // an explicit timeout budget so slow waits do not get mislabeled as + // "Orca is not running" by the generic client timeout path. + timeoutMs: timeout ? timeout + 5000 : DEFAULT_BROWSER_WAIT_RPC_TIMEOUT_MS + } + ) + return printResult(result, json, (v) => JSON.stringify(v, null, 2)) + } + + if (matches(commandPath, ['check']) || matches(commandPath, ['uncheck'])) { + const element = getRequiredStringFlag(parsed.flags, 'element') + const checked = matches(commandPath, ['check']) + const target = await getBrowserCommandTarget(parsed.flags, cwd, client) + const result = await client.call('browser.check', { + element, + checked, + ...target + }) + return printResult(result, json, (v) => + v.checked ? `Checked ${element}` : `Unchecked ${element}` + ) + } + + if (matches(commandPath, ['focus'])) { + const element = getRequiredStringFlag(parsed.flags, 'element') + const target = await getBrowserCommandTarget(parsed.flags, cwd, client) + const result = await client.call('browser.focus', { element, ...target }) + return printResult(result, json, (v) => `Focused ${v.focused}`) + } + + if (matches(commandPath, ['clear'])) { + const element = getRequiredStringFlag(parsed.flags, 'element') + const target = await getBrowserCommandTarget(parsed.flags, cwd, client) + const result = await client.call('browser.clear', { element, ...target }) + return printResult(result, json, (v) => `Cleared ${v.cleared}`) + } + + if (matches(commandPath, ['select-all'])) { + const element = getRequiredStringFlag(parsed.flags, 'element') + const target = await getBrowserCommandTarget(parsed.flags, cwd, client) + const result = await client.call('browser.selectAll', { + element, + ...target + }) + return printResult(result, json, (v) => `Selected all in ${v.selected}`) + } + + if (matches(commandPath, ['keypress'])) { + const key = getRequiredStringFlag(parsed.flags, 'key') + const target = await getBrowserCommandTarget(parsed.flags, cwd, client) + const result = await client.call('browser.keypress', { + key, + ...target + }) + return printResult(result, json, (v) => `Pressed ${v.pressed}`) + } + + if (matches(commandPath, ['pdf'])) { + const target = await getBrowserCommandTarget(parsed.flags, cwd, client) + const result = await client.call('browser.pdf', target) + return printResult(result, json, (v) => `PDF exported (${v.data.length} bytes base64)`) + } + + if (matches(commandPath, ['full-screenshot'])) { + const format = getOptionalStringFlag(parsed.flags, 'format') === 'jpeg' ? 'jpeg' : 'png' + const target = await getBrowserCommandTarget(parsed.flags, cwd, client) + const result = await client.call('browser.fullScreenshot', { + format, + ...target + }) + return printResult(result, json, (v) => `Full-page screenshot captured (${v.format})`) + } + + if (matches(commandPath, ['hover'])) { + const element = getRequiredStringFlag(parsed.flags, 'element') + const target = await getBrowserCommandTarget(parsed.flags, cwd, client) + const result = await client.call('browser.hover', { element, ...target }) + return printResult(result, json, (v) => `Hovered ${v.hovered}`) + } + + if (matches(commandPath, ['drag'])) { + const from = getRequiredStringFlag(parsed.flags, 'from') + const to = getRequiredStringFlag(parsed.flags, 'to') + const target = await getBrowserCommandTarget(parsed.flags, cwd, client) + const result = await client.call('browser.drag', { from, to, ...target }) + return printResult(result, json, (v) => `Dragged ${v.dragged.from} → ${v.dragged.to}`) + } + + if (matches(commandPath, ['upload'])) { + const element = getRequiredStringFlag(parsed.flags, 'element') + const filesStr = getRequiredStringFlag(parsed.flags, 'files') + const files = filesStr.split(',').map((f) => f.trim()) + const target = await getBrowserCommandTarget(parsed.flags, cwd, client) + const result = await client.call('browser.upload', { + element, + files, + ...target + }) + return printResult(result, json, (v) => `Uploaded ${v.uploaded} file(s)`) + } + + // ── Cookie management ── + + if (matches(commandPath, ['cookie', 'get'])) { + const url = getOptionalStringFlag(parsed.flags, 'url') + const target = await getBrowserCommandTarget(parsed.flags, cwd, client) + const result = await client.call('browser.cookie.get', { + url, + ...target + }) + return printResult(result, json, (v) => { + if (v.cookies.length === 0) { + return 'No cookies' + } + return v.cookies.map((c) => `${c.name}=${c.value} (${c.domain})`).join('\n') + }) + } + + if (matches(commandPath, ['cookie', 'set'])) { + const name = getRequiredStringFlag(parsed.flags, 'name') + const value = getRequiredStringFlag(parsed.flags, 'value') + const params: Record = { name, value } + const domain = getOptionalStringFlag(parsed.flags, 'domain') + const path = getOptionalStringFlag(parsed.flags, 'path') + const sameSite = getOptionalStringFlag(parsed.flags, 'sameSite') + const expires = getOptionalStringFlag(parsed.flags, 'expires') + if (domain) { + params.domain = domain + } + if (path) { + params.path = path + } + if (parsed.flags.has('secure')) { + params.secure = true + } + if (parsed.flags.has('httpOnly')) { + params.httpOnly = true + } + if (sameSite) { + params.sameSite = sameSite + } + if (expires) { + params.expires = Number(expires) + } + Object.assign(params, await getBrowserCommandTarget(parsed.flags, cwd, client)) + const result = await client.call('browser.cookie.set', params) + return printResult(result, json, (v) => + v.success ? `Cookie "${name}" set` : `Failed to set cookie "${name}"` + ) + } + + if (matches(commandPath, ['cookie', 'delete'])) { + const name = getRequiredStringFlag(parsed.flags, 'name') + const params: Record = { name } + const domain = getOptionalStringFlag(parsed.flags, 'domain') + const url = getOptionalStringFlag(parsed.flags, 'url') + if (domain) { + params.domain = domain + } + if (url) { + params.url = url + } + Object.assign(params, await getBrowserCommandTarget(parsed.flags, cwd, client)) + const result = await client.call('browser.cookie.delete', params) + return printResult(result, json, () => `Cookie "${name}" deleted`) + } + + // ── Viewport ── + + if (matches(commandPath, ['viewport'])) { + const width = getRequiredPositiveNumber(parsed.flags, 'width') + const height = getRequiredPositiveNumber(parsed.flags, 'height') + const params: Record = { width, height } + const scale = getOptionalStringFlag(parsed.flags, 'scale') + if (scale) { + const n = Number(scale) + if (!Number.isFinite(n) || n <= 0) { + throw new RuntimeClientError('invalid_argument', '--scale must be a positive number') + } + params.deviceScaleFactor = n + } + if (parsed.flags.has('mobile')) { + params.mobile = true + } + Object.assign(params, await getBrowserCommandTarget(parsed.flags, cwd, client)) + const result = await client.call('browser.viewport', params) + return printResult( + result, + json, + (v) => `Viewport set to ${v.width}×${v.height}${v.mobile ? ' (mobile)' : ''}` + ) + } + + // ── Geolocation ── + + if (matches(commandPath, ['geolocation'])) { + const latitude = getRequiredFiniteNumber(parsed.flags, 'latitude') + const longitude = getRequiredFiniteNumber(parsed.flags, 'longitude') + const params: Record = { latitude, longitude } + const accuracy = getOptionalStringFlag(parsed.flags, 'accuracy') + if (accuracy) { + const n = Number(accuracy) + if (!Number.isFinite(n) || n <= 0) { + throw new RuntimeClientError('invalid_argument', '--accuracy must be a positive number') + } + params.accuracy = n + } + Object.assign(params, await getBrowserCommandTarget(parsed.flags, cwd, client)) + const result = await client.call('browser.geolocation', params) + return printResult(result, json, (v) => `Geolocation set to ${v.latitude}, ${v.longitude}`) + } + + // ── Request interception ── + + if (matches(commandPath, ['intercept', 'enable'])) { + const params: Record = {} + const patternsStr = getOptionalStringFlag(parsed.flags, 'patterns') + if (patternsStr) { + params.patterns = patternsStr.split(',').map((p) => p.trim()) + } + Object.assign(params, await getBrowserCommandTarget(parsed.flags, cwd, client)) + const result = await client.call( + 'browser.intercept.enable', + params + ) + return printResult( + result, + json, + (v) => `Interception enabled for: ${(v.patterns ?? []).join(', ') || '*'}` + ) + } + + if (matches(commandPath, ['intercept', 'disable'])) { + const target = await getBrowserCommandTarget(parsed.flags, cwd, client) + const result = await client.call( + 'browser.intercept.disable', + target + ) + return printResult(result, json, () => 'Interception disabled') + } + + if (matches(commandPath, ['intercept', 'list'])) { + const target = await getBrowserCommandTarget(parsed.flags, cwd, client) + const result = await client.call<{ requests: BrowserInterceptedRequest[] }>( + 'browser.intercept.list', + target + ) + return printResult(result, json, (v) => { + if (v.requests.length === 0) { + return 'No paused requests' + } + return v.requests + .map((r) => `[${r.id}] ${r.method} ${r.url} (${r.resourceType})`) + .join('\n') + }) + } + + // ── Console/network capture ── + + if (matches(commandPath, ['capture', 'start'])) { + const target = await getBrowserCommandTarget(parsed.flags, cwd, client) + const result = await client.call('browser.capture.start', target) + return printResult(result, json, () => 'Capture started (console + network)') + } + + if (matches(commandPath, ['capture', 'stop'])) { + const target = await getBrowserCommandTarget(parsed.flags, cwd, client) + const result = await client.call('browser.capture.stop', target) + return printResult(result, json, () => 'Capture stopped') + } + + if (matches(commandPath, ['console'])) { + const params: Record = {} + const limit = getOptionalPositiveIntegerFlag(parsed.flags, 'limit') + if (limit !== undefined) { + params.limit = limit + } + Object.assign(params, await getBrowserCommandTarget(parsed.flags, cwd, client)) + const result = await client.call('browser.console', params) + return printResult(result, json, (v) => { + if (v.entries.length === 0) { + return 'No console entries' + } + return v.entries.map((e) => `[${e.level}] ${e.text}`).join('\n') + }) + } + + if (matches(commandPath, ['network'])) { + const params: Record = {} + const limit = getOptionalPositiveIntegerFlag(parsed.flags, 'limit') + if (limit !== undefined) { + params.limit = limit + } + Object.assign(params, await getBrowserCommandTarget(parsed.flags, cwd, client)) + const result = await client.call('browser.network', params) + return printResult(result, json, (v) => { + if (v.entries.length === 0) { + return 'No network entries' + } + return v.entries.map((e) => `${e.status} ${e.url} (${e.mimeType}, ${e.size}B)`).join('\n') + }) + } + + // ── Additional core commands ── + + if (matches(commandPath, ['dblclick'])) { + const element = getRequiredStringFlag(parsed.flags, 'element') + const target = await getBrowserCommandTarget(parsed.flags, cwd, client) + const result = await client.call('browser.dblclick', { element, ...target }) + return printResult(result, json, () => `Double-clicked ${element}`) + } + + if (matches(commandPath, ['forward'])) { + const target = await getBrowserCommandTarget(parsed.flags, cwd, client) + const result = await client.call('browser.forward', target) + // eslint-disable-next-line @typescript-eslint/no-explicit-any + return printResult(result, json, (v: any) => + v?.url ? `Navigated forward to ${v.url}` : 'Navigated forward' + ) + } + + if (matches(commandPath, ['scrollintoview'])) { + const element = getRequiredStringFlag(parsed.flags, 'element') + const target = await getBrowserCommandTarget(parsed.flags, cwd, client) + const result = await client.call('browser.scrollIntoView', { element, ...target }) + return printResult(result, json, () => `Scrolled ${element} into view`) + } + + if (matches(commandPath, ['get'])) { + const what = getRequiredStringFlag(parsed.flags, 'what') + const element = getOptionalStringFlag(parsed.flags, 'element') + const target = await getBrowserCommandTarget(parsed.flags, cwd, client) + const result = await client.call('browser.get', { + what, + selector: element, + ...target + }) + return printResult(result, json, (v) => + typeof v === 'string' ? v : JSON.stringify(v, null, 2) + ) + } + + if (matches(commandPath, ['is'])) { + const what = getRequiredStringFlag(parsed.flags, 'what') + const element = getRequiredStringFlag(parsed.flags, 'element') + const target = await getBrowserCommandTarget(parsed.flags, cwd, client) + const result = await client.call('browser.is', { + what, + selector: element, + ...target + }) + return printResult(result, json, (v) => String(v)) + } + + // ── Keyboard insert text ── + + if (matches(commandPath, ['inserttext'])) { + const text = getRequiredStringFlag(parsed.flags, 'text') + const target = await getBrowserCommandTarget(parsed.flags, cwd, client) + const result = await client.call('browser.keyboardInsertText', { text, ...target }) + return printResult(result, json, () => 'Text inserted') + } + + // ── Mouse commands ── + + if (matches(commandPath, ['mouse', 'move'])) { + const x = getRequiredFiniteNumber(parsed.flags, 'x') + const y = getRequiredFiniteNumber(parsed.flags, 'y') + const target = await getBrowserCommandTarget(parsed.flags, cwd, client) + const result = await client.call('browser.mouseMove', { x, y, ...target }) + return printResult(result, json, () => `Mouse moved to ${x},${y}`) + } + + if (matches(commandPath, ['mouse', 'down'])) { + const button = getOptionalStringFlag(parsed.flags, 'button') + const target = await getBrowserCommandTarget(parsed.flags, cwd, client) + const result = await client.call('browser.mouseDown', { button, ...target }) + return printResult(result, json, () => `Mouse button ${button ?? 'left'} pressed`) + } + + if (matches(commandPath, ['mouse', 'up'])) { + const button = getOptionalStringFlag(parsed.flags, 'button') + const target = await getBrowserCommandTarget(parsed.flags, cwd, client) + const result = await client.call('browser.mouseUp', { button, ...target }) + return printResult(result, json, () => `Mouse button ${button ?? 'left'} released`) + } + + if (matches(commandPath, ['mouse', 'wheel'])) { + const dy = getRequiredFiniteNumber(parsed.flags, 'dy') + const dx = getOptionalNumberFlag(parsed.flags, 'dx') + const target = await getBrowserCommandTarget(parsed.flags, cwd, client) + const result = await client.call('browser.mouseWheel', { dy, dx, ...target }) + return printResult( + result, + json, + () => `Mouse wheel scrolled dy=${dy}${dx != null ? ` dx=${dx}` : ''}` + ) + } + + // ── Find (semantic locators) ── + + if (matches(commandPath, ['find'])) { + const locator = getRequiredStringFlag(parsed.flags, 'locator') + const value = getRequiredStringFlag(parsed.flags, 'value') + const action = getRequiredStringFlag(parsed.flags, 'action') + const text = getOptionalStringFlag(parsed.flags, 'text') + const target = await getBrowserCommandTarget(parsed.flags, cwd, client) + const result = await client.call('browser.find', { + locator, + value, + action, + text, + ...target + }) + return printResult(result, json, (v) => JSON.stringify(v, null, 2)) + } + + // ── Set commands ── + + if (matches(commandPath, ['set', 'device'])) { + const name = getRequiredStringFlag(parsed.flags, 'name') + const target = await getBrowserCommandTarget(parsed.flags, cwd, client) + const result = await client.call('browser.setDevice', { name, ...target }) + return printResult(result, json, () => `Device emulation set to ${name}`) + } + + if (matches(commandPath, ['set', 'offline'])) { + const state = getOptionalStringFlag(parsed.flags, 'state') + const target = await getBrowserCommandTarget(parsed.flags, cwd, client) + const result = await client.call('browser.setOffline', { state, ...target }) + return printResult(result, json, () => `Offline mode ${state ?? 'toggled'}`) + } + + if (matches(commandPath, ['set', 'headers'])) { + const headers = getRequiredStringFlag(parsed.flags, 'headers') + const target = await getBrowserCommandTarget(parsed.flags, cwd, client) + const result = await client.call('browser.setHeaders', { headers, ...target }) + return printResult(result, json, () => 'Extra HTTP headers set') + } + + if (matches(commandPath, ['set', 'credentials'])) { + const user = getRequiredStringFlag(parsed.flags, 'user') + const pass = getRequiredStringFlag(parsed.flags, 'pass') + const target = await getBrowserCommandTarget(parsed.flags, cwd, client) + const result = await client.call('browser.setCredentials', { + user, + pass, + ...target + }) + return printResult(result, json, () => `HTTP auth credentials set for ${user}`) + } + + if (matches(commandPath, ['set', 'media'])) { + const colorScheme = getOptionalStringFlag(parsed.flags, 'color-scheme') + const reducedMotion = getOptionalStringFlag(parsed.flags, 'reduced-motion') + const target = await getBrowserCommandTarget(parsed.flags, cwd, client) + const result = await client.call('browser.setMedia', { + colorScheme, + reducedMotion, + ...target + }) + return printResult(result, json, () => 'Media preferences set') + } + + // ── Clipboard commands ── + + if (matches(commandPath, ['clipboard', 'read'])) { + const target = await getBrowserCommandTarget(parsed.flags, cwd, client) + const result = await client.call('browser.clipboardRead', target) + return printResult(result, json, (v) => JSON.stringify(v, null, 2)) + } + + if (matches(commandPath, ['clipboard', 'write'])) { + const text = getRequiredStringFlag(parsed.flags, 'text') + const target = await getBrowserCommandTarget(parsed.flags, cwd, client) + const result = await client.call('browser.clipboardWrite', { text, ...target }) + return printResult(result, json, () => 'Clipboard updated') + } + + // ── Dialog commands ── + + if (matches(commandPath, ['dialog', 'accept'])) { + const text = getOptionalStringFlag(parsed.flags, 'text') + const target = await getBrowserCommandTarget(parsed.flags, cwd, client) + const result = await client.call('browser.dialogAccept', { text, ...target }) + return printResult(result, json, () => 'Dialog accepted') + } + + if (matches(commandPath, ['dialog', 'dismiss'])) { + const target = await getBrowserCommandTarget(parsed.flags, cwd, client) + const result = await client.call('browser.dialogDismiss', target) + return printResult(result, json, () => 'Dialog dismissed') + } + + // ── Storage commands ── + + if (matches(commandPath, ['storage', 'local', 'get'])) { + const key = getRequiredStringFlag(parsed.flags, 'key') + const target = await getBrowserCommandTarget(parsed.flags, cwd, client) + const result = await client.call('browser.storage.local.get', { key, ...target }) + return printResult(result, json, (v) => JSON.stringify(v, null, 2)) + } + + if (matches(commandPath, ['storage', 'local', 'set'])) { + const key = getRequiredStringFlag(parsed.flags, 'key') + const value = getRequiredStringFlag(parsed.flags, 'value') + const target = await getBrowserCommandTarget(parsed.flags, cwd, client) + const result = await client.call('browser.storage.local.set', { + key, + value, + ...target + }) + return printResult(result, json, () => `localStorage["${key}"] set`) + } + + if (matches(commandPath, ['storage', 'local', 'clear'])) { + const target = await getBrowserCommandTarget(parsed.flags, cwd, client) + const result = await client.call('browser.storage.local.clear', target) + return printResult(result, json, () => 'localStorage cleared') + } + + if (matches(commandPath, ['storage', 'session', 'get'])) { + const key = getRequiredStringFlag(parsed.flags, 'key') + const target = await getBrowserCommandTarget(parsed.flags, cwd, client) + const result = await client.call('browser.storage.session.get', { key, ...target }) + return printResult(result, json, (v) => JSON.stringify(v, null, 2)) + } + + if (matches(commandPath, ['storage', 'session', 'set'])) { + const key = getRequiredStringFlag(parsed.flags, 'key') + const value = getRequiredStringFlag(parsed.flags, 'value') + const target = await getBrowserCommandTarget(parsed.flags, cwd, client) + const result = await client.call('browser.storage.session.set', { + key, + value, + ...target + }) + return printResult(result, json, () => `sessionStorage["${key}"] set`) + } + + if (matches(commandPath, ['storage', 'session', 'clear'])) { + const target = await getBrowserCommandTarget(parsed.flags, cwd, client) + const result = await client.call('browser.storage.session.clear', target) + return printResult(result, json, () => 'sessionStorage cleared') + } + + // ── Download command ── + + if (matches(commandPath, ['download'])) { + const selector = getRequiredStringFlag(parsed.flags, 'selector') + const path = getRequiredStringFlag(parsed.flags, 'path') + const target = await getBrowserCommandTarget(parsed.flags, cwd, client) + const result = await client.call('browser.download', { selector, path, ...target }) + return printResult(result, json, () => `Downloaded to ${path}`) + } + + // ── Highlight command ── + + if (matches(commandPath, ['highlight'])) { + const selector = getRequiredStringFlag(parsed.flags, 'selector') + const target = await getBrowserCommandTarget(parsed.flags, cwd, client) + const result = await client.call('browser.highlight', { selector, ...target }) + return printResult(result, json, () => `Highlighted ${selector}`) + } + throw new RuntimeClientError('invalid_argument', `Unknown command: ${commandPath.join(' ')}`) } catch (error) { if (json) { @@ -432,7 +1671,10 @@ export function validateCommandAndFlags(parsed: ParsedArgs): void { } for (const flag of parsed.flags.keys()) { - if (!spec.allowedFlags.includes(flag)) { + if ( + !spec.allowedFlags.includes(flag) && + !(flag === 'page' && supportsBrowserPageFlag(spec.path)) + ) { throw new RuntimeClientError( 'invalid_argument', `Unknown flag --${flag} for command: ${spec.path.join(' ')}` @@ -445,8 +1687,38 @@ export function findCommandSpec(commandPath: string[]): CommandSpec | undefined return COMMAND_SPECS.find((spec) => matches(spec.path, commandPath)) } +function supportsBrowserPageFlag(commandPath: string[]): boolean { + const joined = commandPath.join(' ') + if (['open', 'status'].includes(commandPath[0])) { + return false + } + if (['repo', 'worktree', 'terminal'].includes(commandPath[0])) { + return false + } + return !['tab list', 'tab create'].includes(joined) +} + function isCommandGroup(commandPath: string[]): boolean { - return commandPath.length === 1 && ['repo', 'worktree', 'terminal'].includes(commandPath[0]) + return ( + (commandPath.length === 1 && + [ + 'repo', + 'worktree', + 'terminal', + 'tab', + 'cookie', + 'intercept', + 'capture', + 'mouse', + 'set', + 'clipboard', + 'dialog', + 'storage' + ].includes(commandPath[0])) || + (commandPath.length === 2 && + commandPath[0] === 'storage' && + ['local', 'session'].includes(commandPath[1])) + ) } function getRequiredStringFlag(flags: Map, name: string): string { @@ -533,6 +1805,60 @@ async function getRequiredWorktreeSelector( return normalizeWorktreeSelector(value, cwd) } +// Why: browser commands default to the current worktree (auto-resolve from cwd). +// --worktree all bypasses filtering. Omitting --worktree auto-resolves. +async function getBrowserWorktreeSelector( + flags: Map, + cwd: string, + client: RuntimeClient +): Promise { + const value = getOptionalStringFlag(flags, 'worktree') + if (value === 'all') { + return undefined + } + if (value) { + if (value === 'active' || value === 'current') { + return await resolveCurrentWorktreeSelector(cwd, client) + } + return normalizeWorktreeSelector(value, cwd) + } + // Default: auto-resolve from cwd + try { + return await resolveCurrentWorktreeSelector(cwd, client) + } catch { + // Not inside a managed worktree — no filter + return undefined + } +} + +async function getBrowserCommandTarget( + flags: Map, + cwd: string, + client: RuntimeClient +): Promise { + const page = getOptionalStringFlag(flags, 'page') + if (!page) { + return { + worktree: await getBrowserWorktreeSelector(flags, cwd, client) + } + } + + const explicitWorktree = getOptionalStringFlag(flags, 'worktree') + if (!explicitWorktree || explicitWorktree === 'all') { + return { page } + } + if (explicitWorktree === 'active' || explicitWorktree === 'current') { + return { + page, + worktree: await resolveCurrentWorktreeSelector(cwd, client) + } + } + return { + page, + worktree: normalizeWorktreeSelector(explicitWorktree, cwd) + } +} + function getOptionalNumberFlag( flags: Map, name: string @@ -562,6 +1888,38 @@ function getOptionalPositiveIntegerFlag( return value } +function getOptionalNonNegativeIntegerFlag( + flags: Map, + name: string +): number | undefined { + const value = getOptionalNumberFlag(flags, name) + if (value === undefined) { + return undefined + } + if (!Number.isInteger(value) || value < 0) { + throw new RuntimeClientError('invalid_argument', `Invalid non-negative integer for --${name}`) + } + return value +} + +function getRequiredPositiveNumber(flags: Map, name: string): number { + const raw = getRequiredStringFlag(flags, name) + const value = Number(raw) + if (!Number.isFinite(value) || value <= 0) { + throw new RuntimeClientError('invalid_argument', `--${name} must be a positive number`) + } + return value +} + +function getRequiredFiniteNumber(flags: Map, name: string): number { + const raw = getRequiredStringFlag(flags, name) + const value = Number(raw) + if (!Number.isFinite(value)) { + throw new RuntimeClientError('invalid_argument', `--${name} must be a valid number`) + } + return value +} + function getOptionalNullableNumberFlag( flags: Map, name: string @@ -608,10 +1966,7 @@ function formatCliStatus(status: CliStatusResult): string { function formatCliError(error: unknown): string { const message = error instanceof Error ? error.message : String(error) - if ( - error instanceof RuntimeClientError && - (error.code === 'runtime_unavailable' || error.code === 'runtime_timeout') - ) { + if (error instanceof RuntimeClientError && error.code === 'runtime_unavailable') { return `${message}\nOrca is not running. Run 'orca open' first.` } if ( @@ -737,6 +2092,27 @@ function formatWorktreeShow(result: { worktree: RuntimeWorktreeRecord }): string .join('\n') } +function formatSnapshot(result: BrowserSnapshotResult): string { + const header = `page: ${result.browserPageId}\n${result.title} — ${result.url}\n` + return header + result.snapshot +} + +function formatScreenshot(result: BrowserScreenshotResult): string { + return `Screenshot captured (${result.format}, ${Math.round(result.data.length * 0.75)} bytes)` +} + +function formatTabList(result: BrowserTabListResult): string { + if (result.tabs.length === 0) { + return 'No browser tabs open.' + } + return result.tabs + .map((t) => { + const marker = t.active ? '* ' : ' ' + return `${marker}[${t.index}] ${t.browserPageId} ${t.title} — ${t.url}` + }) + .join('\n') +} + function printHelp(commandPath: string[] = []): void { const exactSpec = findCommandSpec(commandPath) if (exactSpec) { @@ -785,6 +2161,61 @@ Terminals: terminal wait Wait for a terminal condition terminal stop Stop terminals for a worktree +Browser Automation: + tab create Create a new browser tab (navigates to --url) + tab list List open browser tabs + tab switch Switch the active browser tab by --index or --page + tab close Close a browser tab by --index/--page or the current tab + snapshot Accessibility snapshot with element refs (e.g. @e1, @e2) + goto Navigate the active tab to --url + click Click element by --element ref + fill Clear and fill input by --element ref with --value + type Type --input text at the current focus (no element needed) + select Select dropdown option by --element ref and --value + hover Hover element by --element ref + keypress Press a key (e.g. --key Enter, --key Tab) + scroll Scroll --direction (up/down) by --amount pixels + back Navigate back in browser history + reload Reload the active browser tab + screenshot Capture viewport screenshot (--format png|jpeg) + eval Evaluate --expression JavaScript in the page context + wait Wait for page idle or --timeout ms + check Check a checkbox by --element ref + uncheck Uncheck a checkbox by --element ref + focus Focus an element by --element ref + clear Clear an input by --element ref + drag Drag --from ref to --to ref + upload Upload --files to a file input by --element ref + dblclick Double-click element by --element ref + forward Navigate forward in browser history + scrollintoview Scroll --element into view + get Get element property (--what: text, html, value, url, title) + is Check element state (--what: visible, enabled, checked) + inserttext Insert text without key events + mouse move Move mouse to --x --y coordinates + mouse down Press mouse button + mouse up Release mouse button + mouse wheel Scroll wheel --dy [--dx] + find Find element by locator (--locator role|text|label --value ) + set device Emulate device (--name "iPhone 12") + set offline Toggle offline mode (--state on|off) + set headers Set HTTP headers (--headers '{"key":"val"}') + set credentials Set HTTP auth (--user --pass

) + set media Set color scheme (--color-scheme dark|light) + clipboard read Read clipboard contents + clipboard write Write --text to clipboard + dialog accept Accept browser dialog (--text for prompt response) + dialog dismiss Dismiss browser dialog + storage local get Get localStorage value by --key + storage local set Set localStorage --key --value + storage local clear Clear localStorage + storage session get Get sessionStorage value by --key + storage session set Set sessionStorage --key --value + storage session clear Clear sessionStorage + download Download file via --selector to --path + highlight Highlight --selector on page + exec Run any agent-browser command (--command "...") + Common Commands: orca open [--json] orca status [--json] @@ -829,6 +2260,37 @@ Behavior: Most commands require a running Orca runtime. If Orca is not open yet, run \`orca open\` first. Use selectors for discovery and handles for repeated live terminal operations. +Browser Workflow: + 1. Create or navigate: orca tab create --url https://example.com + orca goto --url https://example.com + 2. Inspect the page: orca snapshot + (Returns an accessibility tree with element refs like e1, e2, e3) + For concurrent workflows, prefer: orca tab list --json + then reuse tabs[].browserPageId with --page on later commands. + 3. Interact: orca click --element e2 + orca fill --element e5 --value "search query" + orca keypress --key Enter + 4. Re-inspect: orca snapshot + (Element refs change after navigation — always re-snapshot before interacting) + +Browser Options: + --element Element ref from snapshot (e.g. @e3) + --url URL to navigate to + --value Value to fill or select + --input Text to type at current focus (no element needed) + --expression JavaScript expression to evaluate + --key Key to press (Enter, Tab, Escape, Control+a, etc.) + --direction

Scroll direction: up or down + --amount Scroll distance in pixels (default: viewport height) + --index Tab index (from \`tab list\`) + --page Stable browser page id (preferred for concurrent workflows) + --format Screenshot image format + --from Drag source element ref + --to Drag target element ref + --files Comma-separated file paths for upload + --timeout Wait timeout in milliseconds + --worktree Scope commands to a specific worktree's browser tabs + Examples: $ orca open $ orca status --json @@ -840,15 +2302,26 @@ Examples: $ orca worktree ps --limit 10 $ orca terminal list --worktree path:/Users/me/orca/workspaces/orca/cli-test-1 --json $ orca terminal send --terminal term_123 --text "hi" --enter - $ orca terminal wait --terminal term_123 --for exit --timeout-ms 60000 --json`) + $ orca terminal wait --terminal term_123 --for exit --timeout-ms 60000 --json + $ orca tab create --url https://example.com + $ orca snapshot + $ orca click --element e3 + $ orca fill --element e5 --value "hello" + $ orca goto --url https://example.com/login + $ orca keypress --key Enter + $ orca eval --expression "document.title" + $ orca tab list --json`) } function formatCommandHelp(spec: CommandSpec): string { const lines = [`orca ${spec.path.join(' ')}`, '', `Usage: ${spec.usage}`, '', spec.summary] + const displayedFlags = supportsBrowserPageFlag(spec.path) + ? [...spec.allowedFlags, 'page'] + : spec.allowedFlags - if (spec.allowedFlags.length > 0) { + if (displayedFlags.length > 0) { lines.push('', 'Options:') - for (const flag of spec.allowedFlags) { + for (const flag of displayedFlags) { lines.push(` ${formatFlagHelp(flag)}`) } } @@ -902,7 +2375,18 @@ function formatFlagHelp(flag: string): string { text: '--text Text to send to the terminal', 'timeout-ms': '--timeout-ms Maximum wait time before timing out', worktree: - '--worktree Worktree selector such as id:, branch:, issue:, path:, or active/current' + '--worktree Worktree selector such as id:, branch:, issue:, path:, or active/current', + // Browser automation flags + element: '--element Element ref from snapshot (e.g. e3)', + url: '--url URL to navigate to', + value: '--value Value to fill or select', + input: '--input Text to type at current focus', + expression: '--expression JavaScript expression to evaluate', + direction: '--direction Scroll direction', + amount: '--amount Scroll distance in pixels', + index: '--index Tab index to switch to', + page: '--page Stable browser page id from `orca tab list --json`', + format: '--format Screenshot image format' } return helpByFlag[flag] ?? `--${flag}` diff --git a/src/cli/runtime-client.ts b/src/cli/runtime-client.ts index f8c38d01..0af231a0 100644 --- a/src/cli/runtime-client.ts +++ b/src/cli/runtime-client.ts @@ -58,7 +58,10 @@ export class RuntimeClient { private readonly userDataPath: string private readonly requestTimeoutMs: number - constructor(userDataPath = getDefaultUserDataPath(), requestTimeoutMs = 15000) { + // Why: browser commands trigger first-time session init (agent-browser connect + + // CDP proxy setup) which can take 15-30s. 60s accommodates cold start without + // being so large that genuine hangs go unnoticed. + constructor(userDataPath = getDefaultUserDataPath(), requestTimeoutMs = 60_000) { this.userDataPath = userDataPath this.requestTimeoutMs = requestTimeoutMs } @@ -383,6 +386,12 @@ export function getDefaultUserDataPath( platform: NodeJS.Platform = process.platform, homeDir = homedir() ): string { + // Why: in dev mode, the Electron app writes runtime metadata to `orca-dev` + // instead of `orca` to avoid clobbering the production app's metadata. The + // CLI needs to find the same metadata file, so respect this env var override. + if (process.env.ORCA_USER_DATA_PATH) { + return process.env.ORCA_USER_DATA_PATH + } if (platform === 'darwin') { return join(homeDir, 'Library', 'Application Support', 'orca') } diff --git a/src/main/browser/agent-browser-bridge.test.ts b/src/main/browser/agent-browser-bridge.test.ts new file mode 100644 index 00000000..6e6fd480 --- /dev/null +++ b/src/main/browser/agent-browser-bridge.test.ts @@ -0,0 +1,1061 @@ +/* eslint-disable max-lines */ +import { describe, it, expect, vi, beforeEach } from 'vitest' + +const { execFileMock, webContentsFromIdMock, existsSyncMock, readFileSyncMock } = vi.hoisted( + () => ({ + execFileMock: vi.fn(), + webContentsFromIdMock: vi.fn(), + existsSyncMock: vi.fn(() => false), + readFileSyncMock: vi.fn(() => Buffer.from('')) + }) +) + +vi.mock('child_process', () => ({ execFile: execFileMock })) +vi.mock('fs', () => ({ + existsSync: existsSyncMock, + readFileSync: readFileSyncMock, + accessSync: vi.fn(), + chmodSync: vi.fn(), + constants: { X_OK: 1 } +})) +vi.mock('os', () => ({ platform: () => 'darwin', arch: () => 'arm64' })) +vi.mock('electron', () => { + return { + app: { getPath: vi.fn(() => '/app'), getAppPath: vi.fn(() => '/project'), isPackaged: false }, + webContents: { fromId: webContentsFromIdMock } + } +}) +const { CdpWsProxyMock } = vi.hoisted(() => { + const instances: unknown[] = [] + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const MockClass = vi.fn().mockImplementation(function (this: any, _wc: unknown) { + this.start = vi.fn(async () => 'ws://127.0.0.1:9222') + this.stop = vi.fn(async () => {}) + this.getPort = vi.fn(() => 9222) + instances.push(this) + }) + return { CdpWsProxyMock: Object.assign(MockClass, { instances }) } +}) + +vi.mock('./cdp-ws-proxy', () => ({ + CdpWsProxy: CdpWsProxyMock +})) +vi.mock('./cdp-bridge', () => ({ + BrowserError: class BrowserError extends Error { + code: string + constructor(code: string, message: string) { + super(message) + this.code = code + } + } +})) + +import { AgentBrowserBridge } from './agent-browser-bridge' +import type { BrowserManager } from './browser-manager' + +// Why: the bridge resolves webContents via dynamic require('electron').webContents.fromId +// inside a try/catch. Override the private method to inject our mock. +// eslint-disable-next-line @typescript-eslint/no-explicit-any +;(AgentBrowserBridge.prototype as any).getWebContents = function (id: number) { + return webContentsFromIdMock(id) ?? null +} + +function mockBrowserManager( + tabs: Map = new Map([['tab-1', 100]]), + worktrees: Map = new Map(), + overrides: Partial = {} +): BrowserManager { + return { + getWebContentsIdByTabId: () => tabs, + getWorktreeIdForTab: (tabId: string) => worktrees.get(tabId), + getGuestWebContentsId: vi.fn(() => null), + ensureWebviewVisible: vi.fn(async () => () => {}), + ...overrides + } as unknown as BrowserManager +} + +function mockWebContents(id: number, url = 'https://example.com', title = 'Example') { + return { + id, + getURL: () => url, + getTitle: () => title, + isDestroyed: () => false, + invalidate: vi.fn(), + debugger: { + isAttached: vi.fn(() => true), + attach: vi.fn(), + detach: vi.fn(), + sendCommand: vi.fn(), + on: vi.fn(), + removeListener: vi.fn() + } + } +} + +function succeedWith(data: unknown): void { + execFileMock.mockImplementation((_bin: string, _args: string[], _opts: unknown, cb: Function) => { + cb(null, JSON.stringify({ success: true, data }), '') + }) +} + +function failWith(error: string): void { + execFileMock.mockImplementation((_bin: string, _args: string[], _opts: unknown, cb: Function) => { + cb(null, JSON.stringify({ success: false, error }), '') + }) +} + +const CDP_DISCOVERY_FAILURE = + 'Auto-launch failed: All CDP discovery methods failed: connect ECONNREFUSED 127.0.0.1:9222; WebSocket connect failed' + +describe('AgentBrowserBridge', () => { + let bridge: AgentBrowserBridge + + beforeEach(() => { + vi.clearAllMocks() + CdpWsProxyMock.instances.length = 0 + existsSyncMock.mockReturnValue(false) + readFileSyncMock.mockReturnValue(Buffer.from('')) + const wc = mockWebContents(100) + webContentsFromIdMock.mockReturnValue(wc) + bridge = new AgentBrowserBridge(mockBrowserManager()) + bridge.setActiveTab(100) + }) + + // ── Session naming ── + + it('uses browserPageId as session name', async () => { + succeedWith({ snapshot: '...' }) + await bridge.snapshot() + + const args = execFileMock.mock.calls[0][1] as string[] + expect(args).toContain('--session') + expect(args[args.indexOf('--session') + 1]).toBe('orca-tab-tab-1') + }) + + // ── --cdp first-use only ── + + it('passes --cdp only on first command for a session', async () => { + succeedWith({ snapshot: '...' }) + await bridge.snapshot() + + // Why: calls[0] is stale-session 'close'; find the snapshot call + const snapshotCall = execFileMock.mock.calls.find((c: unknown[]) => + (c[1] as string[]).includes('snapshot') + ) + expect(snapshotCall![1]).toContain('--cdp') + const cdpIdx = (snapshotCall![1] as string[]).indexOf('--cdp') + expect((snapshotCall![1] as string[])[cdpIdx + 1]).toBe('9222') + + succeedWith({ clicked: '@e1' }) + await bridge.click('@e1') + + const clickCall = execFileMock.mock.calls.find((c: unknown[]) => + (c[1] as string[]).includes('click') + ) + expect(clickCall![1]).not.toContain('--cdp') + }) + + // ── --json always appended ── + + it('always appends --json to commands', async () => { + succeedWith({ snapshot: '...' }) + await bridge.snapshot() + + const snapshotCall = execFileMock.mock.calls.find((c: unknown[]) => + (c[1] as string[]).includes('snapshot') + ) + expect((snapshotCall![1] as string[]).at(-1)).toBe('--json') + }) + + // ── Output translation ── + + it('translates success response to result', async () => { + succeedWith({ snapshot: 'tree output' }) + const result = await bridge.snapshot() + expect(result).toEqual({ browserPageId: 'tab-1', snapshot: 'tree output' }) + }) + + it('routes snapshot to an explicit browser page id without changing the active tab', async () => { + const tabs = new Map([ + ['tab-a', 1], + ['tab-b', 2] + ]) + const wc1 = mockWebContents(1, 'https://a.com', 'A') + const wc2 = mockWebContents(2, 'https://b.com', 'B') + webContentsFromIdMock.mockImplementation((id: number) => (id === 1 ? wc1 : wc2)) + + const b = new AgentBrowserBridge(mockBrowserManager(tabs)) + b.setActiveTab(1) + + succeedWith({ snapshot: 'tree output' }) + const result = await b.snapshot(undefined, 'tab-b') + + const snapshotCall = execFileMock.mock.calls.find((c: unknown[]) => + (c[1] as string[]).includes('snapshot') + ) + expect(snapshotCall).toBeTruthy() + expect(snapshotCall![1]).toContain('--session') + expect( + (snapshotCall![1] as string[])[(snapshotCall![1] as string[]).indexOf('--session') + 1] + ).toBe('orca-tab-tab-b') + expect(result).toEqual({ browserPageId: 'tab-b', snapshot: 'tree output' }) + expect(b.getActiveWebContentsId()).toBe(1) + }) + + it('translates error response to BrowserError', async () => { + failWith('Element not found') + await expect(bridge.click('@e1')).rejects.toThrow('Element not found') + }) + + it('keeps CDP discovery failures generic while the tab session is still live', async () => { + failWith(CDP_DISCOVERY_FAILURE) + await expect(bridge.snapshot()).rejects.toMatchObject({ + code: 'browser_error', + message: CDP_DISCOVERY_FAILURE + }) + }) + + it('maps in-flight CDP discovery failures to tab not found after the session disappears', async () => { + let releaseSnapshot: (() => void) | null = null + const activeChild = { kill: vi.fn() } + execFileMock.mockImplementation( + (_bin: string, args: string[], _opts: unknown, cb: Function) => { + if (args.includes('snapshot')) { + releaseSnapshot = () => { + cb(null, JSON.stringify({ success: false, error: CDP_DISCOVERY_FAILURE }), '') + } + return activeChild + } + cb(null, JSON.stringify({ success: true, data: null }), '') + return { kill: vi.fn() } + } + ) + + const snapshotPromise = bridge.snapshot() + + await vi.waitFor(() => { + expect(releaseSnapshot).not.toBeNull() + }) + // Why: this reproduces the teardown race where the tab close path has + // already removed the bridge session before agent-browser reports that + // its CDP proxy disappeared. + ;(bridge as unknown as { sessions: Map }).sessions.delete('orca-tab-tab-1') + releaseSnapshot!() + + await expect(snapshotPromise).rejects.toMatchObject({ + code: 'browser_tab_not_found', + message: 'Browser page tab-1 is no longer available' + }) + }) + + it('maps target disappearance during session creation to tab not found', async () => { + const wc = mockWebContents(100) + webContentsFromIdMock.mockImplementationOnce(() => wc).mockImplementationOnce(() => null) + + await expect(bridge.snapshot(undefined, 'tab-1')).rejects.toMatchObject({ + code: 'browser_tab_not_found', + message: 'Browser page tab-1 is no longer available' + }) + }) + + it('handles malformed JSON from agent-browser', async () => { + execFileMock.mockImplementation( + (_bin: string, _args: string[], _opts: unknown, cb: Function) => { + cb(null, 'not json at all', '') + } + ) + await expect(bridge.snapshot()).rejects.toThrow() + }) + + // ── exec passthrough ── + + it('strips --cdp and --session from exec commands', async () => { + succeedWith({ output: 'ok' }) + await bridge.exec('dblclick @e3 --cdp ws://evil --session hijack') + + // Why: find the actual exec call (contains 'dblclick'), not the stale-session close + const execCall = execFileMock.mock.calls.find((c: unknown[]) => + (c[1] as string[]).includes('dblclick') + ) + const args = execCall![1] as string[] + // The bridge's own --session and --cdp (for session init) are expected. + // Verify the user-injected ones were stripped: no 'ws://evil' or 'hijack' + expect(args.join(' ')).not.toContain('ws://evil') + expect(args.join(' ')).not.toContain('hijack') + expect(args).toContain('dblclick') + expect(args).toContain('@e3') + }) + + // ── Worktree filtering ── + + describe('worktree filtering', () => { + it('returns all tabs when no worktreeId', () => { + const tabs = new Map([ + ['tab-a', 1], + ['tab-b', 2] + ]) + const b = new AgentBrowserBridge(mockBrowserManager(tabs)) + const result = b.tabList() + expect(result.tabs).toHaveLength(2) + }) + + it('returns only matching worktree tabs', () => { + const tabs = new Map([ + ['tab-a', 1], + ['tab-b', 2] + ]) + const worktrees = new Map([ + ['tab-a', 'wt-1'], + ['tab-b', 'wt-2'] + ]) + const wc1 = mockWebContents(1, 'https://a.com', 'A') + const wc2 = mockWebContents(2, 'https://b.com', 'B') + webContentsFromIdMock.mockImplementation((id: number) => (id === 1 ? wc1 : wc2)) + + const b = new AgentBrowserBridge(mockBrowserManager(tabs, worktrees)) + const result = b.tabList('wt-1') + expect(result.tabs).toHaveLength(1) + expect(result.tabs[0].browserPageId).toBe('tab-a') + expect(result.tabs[0].url).toBe('https://a.com') + }) + + it('does not mutate active-tab routing when tab-list infers the first live tab', () => { + const tabs = new Map([ + ['tab-a', 1], + ['tab-b', 2] + ]) + const wc1 = mockWebContents(1, 'https://a.com', 'A') + const wc2 = mockWebContents(2, 'https://b.com', 'B') + webContentsFromIdMock.mockImplementation((id: number) => (id === 1 ? wc1 : wc2)) + + const b = new AgentBrowserBridge(mockBrowserManager(tabs)) + + const result = b.tabList() + expect(result.tabs).toMatchObject([ + { browserPageId: 'tab-a', active: true }, + { browserPageId: 'tab-b', active: false } + ]) + expect(b.getActiveWebContentsId()).toBeNull() + }) + }) + + // ── Tab switch ── + + it('throws on out-of-range tab index', async () => { + await expect(bridge.tabSwitch(99)).rejects.toThrow('Tab index 99 out of range') + }) + + // ── No tab error ── + + it('throws browser_no_tab when no tabs registered', async () => { + const b = new AgentBrowserBridge(mockBrowserManager(new Map())) + await expect(b.snapshot()).rejects.toThrow('No browser tab open') + }) + + // ── Command queue serialization ── + + it('serializes concurrent commands per session', async () => { + const commandCalls: string[][] = [] + + execFileMock.mockImplementation( + (_bin: string, args: string[], _opts: unknown, cb: Function) => { + commandCalls.push(args) + cb(null, JSON.stringify({ success: true, data: { ok: true } }), '') + } + ) + + const [r1, r2] = await Promise.all([bridge.snapshot(), bridge.click('@e1')]) + expect(r1).toEqual({ browserPageId: 'tab-1', ok: true }) + expect(r2).toEqual({ ok: true }) + // Why: close runs first (stale session cleanup), then commands execute sequentially + const snapshotIdx = commandCalls.findIndex((a) => a.includes('snapshot')) + const clickIdx = commandCalls.findIndex((a) => a.includes('click')) + expect(snapshotIdx).toBeLessThan(clickIdx) + }) + + it('serializes screenshot visibility prep across sessions', async () => { + vi.useFakeTimers() + try { + const tabs = new Map([ + ['tab-1', 1], + ['tab-2', 2] + ]) + const worktrees = new Map([ + ['tab-1', 'wt-1'], + ['tab-2', 'wt-2'] + ]) + const lifecycleEvents: string[] = [] + const ensureWebviewVisibleMock = vi.fn(async (webContentsId: number) => { + lifecycleEvents.push(`ensure-${webContentsId}`) + return () => { + lifecycleEvents.push(`restore-${webContentsId}`) + } + }) + const wc1 = mockWebContents(1) + const wc2 = mockWebContents(2) + webContentsFromIdMock.mockImplementation((id: number) => + id === 1 ? wc1 : id === 2 ? wc2 : null + ) + existsSyncMock.mockReturnValue(true) + const screenshotBytes = Buffer.from('serialized-screenshot') + readFileSyncMock.mockReturnValue(screenshotBytes) + + const b = new AgentBrowserBridge( + mockBrowserManager(tabs, worktrees, { + ensureWebviewVisible: ensureWebviewVisibleMock + }) + ) + b.setActiveTab(1, 'wt-1') + b.setActiveTab(2, 'wt-2') + + let releaseFirstScreenshot: (() => void) | null = null + execFileMock.mockImplementation( + (_bin: string, args: string[], _opts: unknown, cb: Function) => { + if (args.includes('close')) { + cb(null, JSON.stringify({ success: true, data: null }), '') + return + } + if (args.includes('screenshot')) { + const sessionName = args[args.indexOf('--session') + 1] + lifecycleEvents.push(`command-${sessionName}`) + if (sessionName === 'orca-tab-tab-1' && !releaseFirstScreenshot) { + releaseFirstScreenshot = () => { + cb(null, JSON.stringify({ success: true, data: { path: '/tmp/tab-1.png' } }), '') + } + return + } + cb( + null, + JSON.stringify({ success: true, data: { path: `/tmp/${sessionName}.png` } }), + '' + ) + return + } + cb(null, JSON.stringify({ success: true, data: { ok: true } }), '') + } + ) + + const first = b.screenshot('png', 'wt-1') + const second = b.screenshot('png', 'wt-2') + + await Promise.resolve() + await Promise.resolve() + await vi.advanceTimersByTimeAsync(300) + + expect(lifecycleEvents).toContain('ensure-1') + expect(lifecycleEvents).toContain('command-orca-tab-tab-1') + expect(lifecycleEvents).not.toContain('ensure-2') + + expect(releaseFirstScreenshot).not.toBeNull() + releaseFirstScreenshot!() + await expect(first).resolves.toEqual({ + data: screenshotBytes.toString('base64'), + format: 'png' + }) + + await Promise.resolve() + await Promise.resolve() + + expect(lifecycleEvents.indexOf('restore-1')).toBeLessThan(lifecycleEvents.indexOf('ensure-2')) + + await vi.advanceTimersByTimeAsync(300) + await expect(second).resolves.toEqual({ + data: screenshotBytes.toString('base64'), + format: 'png' + }) + } finally { + vi.useRealTimers() + } + }) + + it('captures full-page screenshots directly through CDP using CSS layout bounds', async () => { + vi.useFakeTimers() + try { + const wc = mockWebContents(100) + wc.debugger.sendCommand.mockImplementation((method: string) => { + if (method === 'Page.getLayoutMetrics') { + return Promise.resolve({ + cssContentSize: { width: 600.2, height: 900.4 }, + contentSize: { width: 1200.4, height: 1800.8 } + }) + } + if (method === 'Page.captureScreenshot') { + return Promise.resolve({ data: 'full-cdp-shot' }) + } + return Promise.resolve({}) + }) + webContentsFromIdMock.mockReturnValue(wc) + + execFileMock.mockImplementation( + (_bin: string, _args: string[], _opts: unknown, cb: Function) => { + cb(null, JSON.stringify({ success: true, data: null }), '') + } + ) + + const screenshotPromise = bridge.fullPageScreenshot('png') + await vi.advanceTimersByTimeAsync(500) + + await expect(screenshotPromise).resolves.toEqual({ + data: 'full-cdp-shot', + format: 'png' + }) + + expect(wc.debugger.sendCommand).toHaveBeenNthCalledWith(1, 'Page.getLayoutMetrics', {}) + expect(wc.debugger.sendCommand).toHaveBeenNthCalledWith(2, 'Page.captureScreenshot', { + format: 'png', + captureBeyondViewport: true, + clip: { x: 0, y: 0, width: 601, height: 901, scale: 1 } + }) + const screenshotCall = execFileMock.mock.calls.find((call: unknown[]) => + (call[1] as string[]).includes('screenshot') + ) + expect(screenshotCall).toBeUndefined() + } finally { + vi.useRealTimers() + } + }) + + // ── Timeout escalation ── + + it('destroys session after 3 consecutive timeouts', async () => { + const killedError = Object.assign(new Error('timeout'), { killed: true }) + + execFileMock.mockImplementation( + (_bin: string, _args: string[], _opts: unknown, cb: Function) => { + cb(killedError, '', '') + } + ) + + for (let i = 0; i < 3; i++) { + await expect(bridge.snapshot()).rejects.toThrow('timed out') + } + + // Session is destroyed — next command should re-create it (new --cdp flag) + succeedWith({ snapshot: 'fresh' }) + await bridge.snapshot() + + const lastArgs = execFileMock.mock.calls.at(-1)![1] as string[] + expect(lastArgs).toContain('--cdp') + }) + + it('waits for pending session destruction before recreating the same session', async () => { + succeedWith({ snapshot: 'initial' }) + await bridge.snapshot() + + execFileMock.mockClear() + + const commandCalls: string[][] = [] + let releaseDestroyClose: (() => void) | null = null + execFileMock.mockImplementation( + (_bin: string, args: string[], _opts: unknown, cb: Function) => { + commandCalls.push(args) + if (args.includes('close')) { + if (!releaseDestroyClose) { + releaseDestroyClose = () => { + cb(null, JSON.stringify({ success: true, data: null }), '') + } + return + } + cb(null, JSON.stringify({ success: true, data: null }), '') + return + } + if (args.includes('snapshot')) { + cb(null, JSON.stringify({ success: true, data: { snapshot: 'after-destroy' } }), '') + return + } + cb(null, JSON.stringify({ success: true, data: { ok: true } }), '') + } + ) + + const destroyPromise = ( + bridge as unknown as { destroySession: (name: string) => Promise } + ).destroySession('orca-tab-tab-1') + const nextSnapshot = bridge.snapshot() + + await Promise.resolve() + await Promise.resolve() + + expect(commandCalls.filter((args) => args.includes('close'))).toHaveLength(1) + expect(commandCalls.some((args) => args.includes('snapshot'))).toBe(false) + expect(releaseDestroyClose).not.toBeNull() + + releaseDestroyClose!() + await destroyPromise + await expect(nextSnapshot).resolves.toEqual({ + browserPageId: 'tab-1', + snapshot: 'after-destroy' + }) + expect(commandCalls.filter((args) => args.includes('close'))).toHaveLength(2) + }) + + it('cancels the command already running when a session is destroyed', async () => { + succeedWith({ snapshot: 'initial' }) + await bridge.snapshot() + + execFileMock.mockClear() + + const killedError = Object.assign(new Error('killed'), { killed: true }) + let resolveRunningCommand: (() => void) | null = null + const activeChild = { + kill: vi.fn(() => { + resolveRunningCommand?.() + }) + } + + execFileMock.mockImplementation( + (_bin: string, args: string[], _opts: unknown, cb: Function) => { + if (args.includes('snapshot')) { + resolveRunningCommand = () => cb(killedError, '', '') + return activeChild + } + if (args.includes('close')) { + cb(null, JSON.stringify({ success: true, data: null }), '') + return { kill: vi.fn() } + } + cb(null, JSON.stringify({ success: true, data: { ok: true } }), '') + return { kill: vi.fn() } + } + ) + + const runningSnapshot = bridge.snapshot() + await Promise.resolve() + + const destroyPromise = ( + bridge as unknown as { destroySession: (name: string) => Promise } + ).destroySession('orca-tab-tab-1') + + expect(activeChild.kill).toHaveBeenCalledTimes(1) + await expect(runningSnapshot).rejects.toMatchObject({ + code: 'browser_tab_closed', + message: 'Tab was closed while command was running' + }) + await destroyPromise + }) + + // ── Process swap ── + + it('destroys session on process swap and re-inits with --cdp', async () => { + const tabs = new Map([['tab-1', 100]]) + const mgr = mockBrowserManager(tabs) + const b = new AgentBrowserBridge(mgr) + b.setActiveTab(100) + + succeedWith({ snapshot: 'tree' }) + await b.snapshot() + + // Why: calls[0] is the stale-session 'close'; find the snapshot call with --cdp + const firstSnapshotCall = execFileMock.mock.calls.find((c: unknown[]) => + (c[1] as string[]).includes('snapshot') + ) + expect(firstSnapshotCall![1]).toContain('--cdp') + + // Simulate process swap: update tab mapping + notify bridge + tabs.set('tab-1', 200) + const newWc = mockWebContents(200) + webContentsFromIdMock.mockReturnValue(newWc) + succeedWith(null) // for the 'close' command in destroySession + await b.onProcessSwap('tab-1', 200) + + // Next command should re-init with --cdp since session was destroyed + succeedWith({ snapshot: 'new tree' }) + await b.snapshot() + + const snapshotCalls = execFileMock.mock.calls.filter((c: unknown[]) => + (c[1] as string[]).includes('snapshot') + ) + expect(snapshotCalls.length).toBeGreaterThanOrEqual(2) + const lastSnapshotArgs = snapshotCalls.at(-1)![1] as string[] + // After process swap + session destroy, the new session must re-init with --cdp + expect(lastSnapshotArgs).toContain('--cdp') + }) + + it('does not replay stale intercept routes after process swap when the first command disables routing', async () => { + const tabs = new Map([['tab-1', 100]]) + const mgr = mockBrowserManager(tabs) + const b = new AgentBrowserBridge(mgr) + b.setActiveTab(100) + + succeedWith({ ok: true }) + await b.interceptEnable(['https://old.example/**']) + + tabs.set('tab-1', 200) + webContentsFromIdMock.mockReturnValue(mockWebContents(200)) + succeedWith(null) + await b.onProcessSwap('tab-1', 200) + + const commandCalls: string[][] = [] + execFileMock.mockImplementation( + (_bin: string, args: string[], _opts: unknown, cb: Function) => { + commandCalls.push(args) + cb(null, JSON.stringify({ success: true, data: { ok: true } }), '') + } + ) + + await b.interceptDisable() + + const routeCalls = commandCalls.filter( + (args) => args.includes('network') && args.includes('route') + ) + expect(routeCalls).toHaveLength(0) + + const unrouteCall = commandCalls.find( + (args) => args.includes('network') && args.includes('unroute') + ) + expect(unrouteCall).toBeDefined() + expect(unrouteCall).toContain('--cdp') + }) + + it('does not replay stale intercept routes after process swap when the first command enables a new route', async () => { + const tabs = new Map([['tab-1', 100]]) + const mgr = mockBrowserManager(tabs) + const b = new AgentBrowserBridge(mgr) + b.setActiveTab(100) + + succeedWith({ ok: true }) + await b.interceptEnable(['https://old.example/**']) + + tabs.set('tab-1', 200) + webContentsFromIdMock.mockReturnValue(mockWebContents(200)) + succeedWith(null) + await b.onProcessSwap('tab-1', 200) + + const commandCalls: string[][] = [] + execFileMock.mockImplementation( + (_bin: string, args: string[], _opts: unknown, cb: Function) => { + commandCalls.push(args) + cb(null, JSON.stringify({ success: true, data: { ok: true } }), '') + } + ) + + await b.interceptEnable(['https://new.example/**']) + + const routeCalls = commandCalls.filter( + (args) => args.includes('network') && args.includes('route') + ) + expect(routeCalls).toHaveLength(1) + expect(routeCalls[0]).toContain('https://new.example/**') + expect(routeCalls[0]).not.toContain('https://old.example/**') + expect(routeCalls[0]).toContain('--cdp') + }) + + // ── Tab close clears active ── + + it('clears activeWebContentsId on tab close', async () => { + succeedWith({ snapshot: 'tree' }) + await bridge.snapshot() + + await bridge.onTabClosed(100) + expect(bridge.getActiveWebContentsId()).toBeNull() + }) + + it('repairs per-worktree active routing when the active tab closes', async () => { + const tabs = new Map([ + ['tab-a', 1], + ['tab-b', 2] + ]) + const worktrees = new Map([ + ['tab-a', 'wt-1'], + ['tab-b', 'wt-1'] + ]) + const wc2 = mockWebContents(2, 'https://b.com', 'B') + webContentsFromIdMock.mockImplementation((id: number) => (id === 2 ? wc2 : null)) + + const b = new AgentBrowserBridge(mockBrowserManager(tabs, worktrees)) + b.setActiveTab(1, 'wt-1') + + await b.onTabClosed(1) + + expect(b.getActiveWebContentsId()).toBe(2) + expect(b.tabList('wt-1').tabs).toMatchObject([{ browserPageId: 'tab-b', active: true }]) + }) + + it('repairs per-worktree active routing when an active tab swaps processes', async () => { + const tabs = new Map([['tab-a', 200]]) + const worktrees = new Map([['tab-a', 'wt-1']]) + const wc = mockWebContents(200, 'https://a.com', 'A') + webContentsFromIdMock.mockImplementation((id: number) => (id === 200 ? wc : null)) + + const b = new AgentBrowserBridge(mockBrowserManager(tabs, worktrees)) + b.setActiveTab(100, 'wt-1') + + await b.onProcessSwap('tab-a', 200, 100) + + expect(b.getActiveWebContentsId()).toBe(200) + expect(b.tabList('wt-1').tabs).toMatchObject([{ browserPageId: 'tab-a', active: true }]) + }) + + // ── tabSwitch success ── + + it('switches active tab and returns switched index', async () => { + const tabs = new Map([ + ['tab-a', 1], + ['tab-b', 2] + ]) + const wc1 = mockWebContents(1) + const wc2 = mockWebContents(2) + webContentsFromIdMock.mockImplementation((id: number) => (id === 1 ? wc1 : wc2)) + + const b = new AgentBrowserBridge(mockBrowserManager(tabs)) + b.setActiveTab(1) + + const result = await b.tabSwitch(1) + expect(result).toEqual({ switched: 1, browserPageId: 'tab-b' }) + expect(b.getActiveWebContentsId()).toBe(2) + }) + + it('switches tabs by explicit browser page id', async () => { + const tabs = new Map([ + ['tab-a', 1], + ['tab-b', 2] + ]) + const wc1 = mockWebContents(1) + const wc2 = mockWebContents(2) + webContentsFromIdMock.mockImplementation((id: number) => (id === 1 ? wc1 : wc2)) + + const b = new AgentBrowserBridge(mockBrowserManager(tabs)) + b.setActiveTab(1) + + const result = await b.tabSwitch(undefined, undefined, 'tab-b') + expect(result).toEqual({ switched: 1, browserPageId: 'tab-b' }) + expect(b.getActiveWebContentsId()).toBe(2) + }) + + it('updates the owning worktree active tab when switching by browser page id', async () => { + const tabs = new Map([ + ['tab-a', 1], + ['tab-b', 2] + ]) + const worktrees = new Map([ + ['tab-a', 'wt-1'], + ['tab-b', 'wt-1'] + ]) + const wc1 = mockWebContents(1, 'https://a.com', 'A') + const wc2 = mockWebContents(2, 'https://b.com', 'B') + webContentsFromIdMock.mockImplementation((id: number) => (id === 1 ? wc1 : wc2)) + + const b = new AgentBrowserBridge(mockBrowserManager(tabs, worktrees)) + b.setActiveTab(2, 'wt-1') + + await expect(b.tabSwitch(undefined, undefined, 'tab-a')).resolves.toEqual({ + switched: 0, + browserPageId: 'tab-a' + }) + expect(b.tabList('wt-1').tabs).toMatchObject([ + { browserPageId: 'tab-a', active: true }, + { browserPageId: 'tab-b', active: false } + ]) + }) + + it('queues tabSwitch behind in-flight commands on the current session', async () => { + const tabs = new Map([ + ['tab-a', 1], + ['tab-b', 2] + ]) + const worktrees = new Map([ + ['tab-a', 'wt-1'], + ['tab-b', 'wt-1'] + ]) + const wc1 = mockWebContents(1) + const wc2 = mockWebContents(2) + webContentsFromIdMock.mockImplementation((id: number) => + id === 1 ? wc1 : id === 2 ? wc2 : null + ) + + const b = new AgentBrowserBridge(mockBrowserManager(tabs, worktrees)) + b.setActiveTab(1, 'wt-1') + + let releaseSnapshot: (() => void) | null = null + execFileMock.mockImplementation( + (_bin: string, args: string[], _opts: unknown, cb: Function) => { + if (args.includes('close')) { + cb(null, JSON.stringify({ success: true, data: null }), '') + return + } + if (args.includes('snapshot')) { + releaseSnapshot = () => { + cb(null, JSON.stringify({ success: true, data: { snapshot: 'tree' } }), '') + } + return + } + cb(null, JSON.stringify({ success: true, data: { ok: true } }), '') + } + ) + + const snapshot = b.snapshot('wt-1') + const switched = b.tabSwitch(1, 'wt-1') + + await Promise.resolve() + await Promise.resolve() + await new Promise((resolve) => setTimeout(resolve, 0)) + + expect(b.getActiveWebContentsId()).toBe(1) + expect(releaseSnapshot).not.toBeNull() + + releaseSnapshot!() + await expect(snapshot).resolves.toEqual({ browserPageId: 'tab-a', snapshot: 'tree' }) + await expect(switched).resolves.toEqual({ switched: 1, browserPageId: 'tab-b' }) + expect(b.getActiveWebContentsId()).toBe(2) + }) + + // ── goto command ── + + it('passes url to goto command', async () => { + succeedWith({ url: 'https://example.com', title: 'Example' }) + await bridge.goto('https://example.com') + + const args = execFileMock.mock.calls.at(-1)![1] as string[] + expect(args).toContain('goto') + expect(args).toContain('https://example.com') + }) + + // ── Cookie command arg building ── + + it('builds cookie set args with all options', async () => { + succeedWith({ success: true }) + await bridge.cookieSet({ + name: 'sid', + value: 'abc', + domain: '.example.com', + path: '/', + secure: true, + httpOnly: true, + sameSite: 'Lax', + expires: 1700000000 + }) + + const args = execFileMock.mock.calls.at(-1)![1] as string[] + expect(args).toContain('cookies') + expect(args).toContain('set') + expect(args).toContain('sid') + expect(args).toContain('abc') + expect(args).toContain('--domain') + expect(args).toContain('.example.com') + expect(args).toContain('--path') + expect(args).toContain('/') + expect(args).toContain('--secure') + expect(args).toContain('--httpOnly') + expect(args).toContain('--sameSite') + expect(args).toContain('Lax') + expect(args).toContain('--expires') + expect(args).toContain('1700000000') + }) + + // ── Viewport command arg building ── + + it('applies viewport emulation through CDP so mobile mode is preserved', async () => { + const wc = mockWebContents(100) + webContentsFromIdMock.mockReturnValue(wc) + + await bridge.setViewport(375, 812, 2, true) + + expect(wc.debugger.sendCommand).toHaveBeenCalledWith('Emulation.setDeviceMetricsOverride', { + width: 375, + height: 812, + deviceScaleFactor: 2, + mobile: true + }) + const viewportCall = execFileMock.mock.calls.find((call: unknown[]) => + (call[1] as string[]).includes('viewport') + ) + expect(viewportCall).toBeUndefined() + }) + + it('normalizes selector wait state=visible to the default supported semantics', async () => { + succeedWith({ selector: 'h1', waited: 'selector' }) + + await bridge.wait({ selector: 'h1', state: 'visible' }) + + const args = execFileMock.mock.calls.at(-1)![1] as string[] + expect(args).toContain('wait') + expect(args).toContain('h1') + expect(args).not.toContain('--state') + }) + + it('enforces conditional wait timeouts at the bridge layer', async () => { + succeedWith({ selector: '#ready', waited: 'selector' }) + + await bridge.wait({ selector: '#ready', timeout: 1200 }) + + const args = execFileMock.mock.calls.at(-1)![1] as string[] + const options = execFileMock.mock.calls.at(-1)![2] as { timeout?: number; env?: unknown } + expect(args).toContain('wait') + expect(args).toContain('#ready') + expect(options.timeout).toBe(2200) + expect(options.env).toBe(process.env) + }) + + it('returns browser_timeout for timed conditional waits without recycling the session', async () => { + const killedError = Object.assign(new Error('timeout'), { killed: true }) + execFileMock.mockImplementation( + (_bin: string, args: string[], _opts: unknown, cb: Function) => { + if (args.includes('wait')) { + cb(killedError, '', '') + return + } + cb(null, JSON.stringify({ success: true, data: { snapshot: 'fresh' } }), '') + } + ) + + for (let i = 0; i < 3; i++) { + await expect(bridge.wait({ selector: '.missing', timeout: 1200 })).rejects.toThrow( + 'Timed out waiting for browser condition after 1200ms.' + ) + } + + await bridge.snapshot() + + expect(CdpWsProxyMock.instances).toHaveLength(1) + }) + + // ── Stderr passthrough on non-timeout errors ── + + it('passes stderr through as error message on execFile failure', async () => { + execFileMock.mockImplementation( + (_bin: string, _args: string[], _opts: unknown, cb: Function) => { + cb(new Error('exit code 1'), '', 'daemon crashed: segfault') + } + ) + await expect(bridge.snapshot()).rejects.toThrow('daemon crashed: segfault') + }) + + it('falls back to error.message when stderr is empty', async () => { + execFileMock.mockImplementation( + (_bin: string, _args: string[], _opts: unknown, cb: Function) => { + cb(new Error('Command failed'), '', '') + } + ) + await expect(bridge.snapshot()).rejects.toThrow('Command failed') + }) + + // ── Malformed JSON returns BrowserError ── + + it('returns browser_error with truncated output for malformed JSON', async () => { + execFileMock.mockImplementation( + (_bin: string, _args: string[], _opts: unknown, cb: Function) => { + cb(null, 'Error: not json output', '') + } + ) + await expect(bridge.snapshot()).rejects.toThrow('Unexpected output from agent-browser') + }) + + // ── destroyAllSessions ── + + it('destroys all active sessions', async () => { + succeedWith({ snapshot: 'tree' }) + await bridge.snapshot() + + // Should have one session now + succeedWith(null) // for the 'close' call + await bridge.destroyAllSessions() + + // Next command should re-create session with --cdp + succeedWith({ snapshot: 'fresh' }) + await bridge.snapshot() + + const snapshotCalls = execFileMock.mock.calls.filter((c: unknown[]) => + (c[1] as string[]).includes('snapshot') + ) + const lastSnapshotArgs = snapshotCalls.at(-1)![1] as string[] + expect(lastSnapshotArgs).toContain('--cdp') + }) +}) diff --git a/src/main/browser/agent-browser-bridge.ts b/src/main/browser/agent-browser-bridge.ts new file mode 100644 index 00000000..734c0acb --- /dev/null +++ b/src/main/browser/agent-browser-bridge.ts @@ -0,0 +1,1918 @@ +/* eslint-disable max-lines */ +import { execFile, type ChildProcess } from 'child_process' +import { existsSync, accessSync, chmodSync, readFileSync, constants } from 'fs' +import { join } from 'path' +import { platform, arch } from 'os' +import { app } from 'electron' +import { CdpWsProxy } from './cdp-ws-proxy' +import { captureFullPageScreenshot } from './cdp-screenshot' +import type { BrowserManager } from './browser-manager' +import { BrowserError } from './cdp-bridge' +import type { + BrowserTabInfo, + BrowserTabListResult, + BrowserTabSwitchResult, + BrowserSnapshotResult, + BrowserClickResult, + BrowserGotoResult, + BrowserFillResult, + BrowserTypeResult, + BrowserSelectResult, + BrowserScrollResult, + BrowserBackResult, + BrowserReloadResult, + BrowserScreenshotResult, + BrowserEvalResult, + BrowserHoverResult, + BrowserDragResult, + BrowserUploadResult, + BrowserWaitResult, + BrowserCheckResult, + BrowserFocusResult, + BrowserClearResult, + BrowserSelectAllResult, + BrowserKeypressResult, + BrowserPdfResult, + BrowserCookieGetResult, + BrowserCookieSetResult, + BrowserCookieDeleteResult, + BrowserViewportResult, + BrowserGeolocationResult, + BrowserInterceptEnableResult, + BrowserInterceptDisableResult, + BrowserConsoleResult, + BrowserNetworkLogResult, + BrowserCaptureStartResult, + BrowserCaptureStopResult, + BrowserCookie +} from '../../shared/runtime-types' + +// Why: must exceed agent-browser's internal per-command timeouts (goto defaults to 30s, +// wait can be up to 60s). Using 90s ensures the bridge never kills a command before +// agent-browser's own timeout fires and returns a proper error. +const EXEC_TIMEOUT_MS = 90_000 +const CONSECUTIVE_TIMEOUT_LIMIT = 3 +const WAIT_PROCESS_TIMEOUT_GRACE_MS = 1_000 + +type SessionState = { + proxy: CdpWsProxy + cdpEndpoint: string + initialized: boolean + consecutiveTimeouts: number + // Why: track active interception patterns so they can be re-enabled after session restart + activeInterceptPatterns: string[] + activeCapture: boolean + // Why: store the webContentsId so we can verify the tab is still alive at execution time, + // not just at enqueue time. The queue delay can allow the tab to be destroyed in between. + webContentsId: number + activeProcess: ChildProcess | null +} + +type QueuedCommand = { + execute: () => Promise + resolve: (value: unknown) => void + reject: (reason: unknown) => void +} + +type ResolvedBrowserCommandTarget = { + browserPageId: string + webContentsId: number +} + +type AgentBrowserExecOptions = { + envOverrides?: NodeJS.ProcessEnv + timeoutMs?: number + timeoutError?: BrowserError +} + +function agentBrowserNativeName(): string { + const ext = process.platform === 'win32' ? '.exe' : '' + return `agent-browser-${platform()}-${arch()}${ext}` +} + +function resolveAgentBrowserBinary(): string { + // Why: production builds copy the platform-specific binary into resources/ + // via electron-builder extraResources. Use Electron's resolved resourcesPath + // instead of hand-rolling ../resources so packaged macOS builds keep working + // on case-sensitive filesystems where Contents/Resources casing matters. + const bundledResourcesPath = + process.resourcesPath ?? + (process.platform === 'darwin' + ? join(app.getPath('exe'), '..', '..', 'Resources') + : join(app.getPath('exe'), '..', 'resources')) + const bundled = join(bundledResourcesPath, agentBrowserNativeName()) + if (existsSync(bundled)) { + return bundled + } + + // Why: in dev mode, resolve directly to the native binary inside node_modules. + // Use app.getAppPath() for a stable project root — __dirname is unreliable after + // electron-vite bundles main process code into out/main/index.js. + const nmBin = join( + app.getAppPath(), + 'node_modules', + 'agent-browser', + 'bin', + agentBrowserNativeName() + ) + if (existsSync(nmBin)) { + if (process.platform !== 'win32') { + try { + accessSync(nmBin, constants.X_OK) + } catch { + chmodSync(nmBin, 0o755) + } + } + return nmBin + } + + // Last resort: assume it's on PATH + return 'agent-browser' +} + +// Why: exec commands arrive as a single string (e.g. 'keyboard inserttext "hello world"'). +// Naive split on whitespace breaks quoted arguments. This parser respects double and +// single quotes so the value arrives as a single argument without surrounding quotes. +function parseShellArgs(input: string): string[] { + const args: string[] = [] + let current = '' + let inDouble = false + let inSingle = false + + for (let i = 0; i < input.length; i++) { + const ch = input[i] + if (ch === '"' && !inSingle) { + inDouble = !inDouble + } else if (ch === "'" && !inDouble) { + inSingle = !inSingle + } else if (ch === ' ' && !inDouble && !inSingle) { + if (current) { + args.push(current) + current = '' + } + } else { + current += ch + } + } + if (current) { + args.push(current) + } + return args +} + +// Why: agent-browser returns generic error messages for stale/unknown refs. +// Map them to a specific code so agents can reliably detect and re-snapshot. +function classifyErrorCode(message: string): string { + if (/unknown ref|ref not found|element not found: @e/i.test(message)) { + return 'browser_stale_ref' + } + return 'browser_error' +} + +function isTabClosedTransportError(message: string): boolean { + return /session destroyed while command|session destroyed while commands|connection refused|cdp discovery methods failed|websocket connect failed/i.test( + message + ) +} + +function pageUnavailableMessageForSession(sessionName: string): string { + const prefix = 'orca-tab-' + const browserPageId = sessionName.startsWith(prefix) ? sessionName.slice(prefix.length) : null + return browserPageId + ? `Browser page ${browserPageId} is no longer available` + : 'Browser tab is no longer available' +} + +function translateResult( + stdout: string +): { ok: true; result: unknown } | { ok: false; error: { code: string; message: string } } { + let parsed: { success?: boolean; data?: unknown; error?: string } + try { + parsed = JSON.parse(stdout) + } catch { + return { + ok: false, + error: { + code: 'browser_error', + message: `Unexpected output from agent-browser: ${stdout.slice(0, 1000)}` + } + } + } + if (parsed.success) { + return { ok: true, result: parsed.data } + } + const message = parsed.error ?? 'Unknown browser error' + return { + ok: false, + error: { + code: classifyErrorCode(message), + message + } + } +} + +export class AgentBrowserBridge { + // Why: per-worktree active tab prevents one worktree's tab switch from + // affecting another worktree's command targeting. + private readonly activeWebContentsPerWorktree = new Map() + private activeWebContentsId: number | null = null + private readonly sessions = new Map() + private readonly commandQueues = new Map() + private readonly processingQueues = new Set() + // Why: screenshot prep temporarily changes shared renderer visibility/focus + // state. Per-session queues only serialize commands within one browser tab, so + // concurrent screenshots on different tabs can otherwise interleave + // ensureWebviewVisible()/restore and blank each other's capture. + private screenshotTurn: Promise = Promise.resolve() + private readonly agentBrowserBin: string + // Why: when a process swap destroys a session that had active intercept patterns, + // store them here keyed by sessionName so the next ensureSession + first successful + // command can restore them automatically. + private readonly pendingInterceptRestore = new Map() + // Why: two concurrent CLI calls can both enter ensureSession before either creates + // the session entry. This promise-based lock ensures only one creation proceeds. + private readonly pendingSessionCreation = new Map>() + // Why: session destruction shells out to `agent-browser close`, which is async + // and keyed by session name. Recreating the same session before that close + // finishes can let the old teardown close the new daemon session. + private readonly pendingSessionDestruction = new Map>() + private readonly cancelledProcesses = new WeakSet() + + constructor(private readonly browserManager: BrowserManager) { + this.agentBrowserBin = resolveAgentBrowserBinary() + } + + // ── Tab tracking ── + + setActiveTab(webContentsId: number, worktreeId?: string): void { + this.activeWebContentsId = webContentsId + if (worktreeId) { + this.activeWebContentsPerWorktree.set(worktreeId, webContentsId) + } + } + + private selectFallbackActiveWebContents( + worktreeId: string, + excludedWebContentsId?: number + ): number | null { + for (const [, wcId] of this.getRegisteredTabs(worktreeId)) { + if (wcId === excludedWebContentsId) { + continue + } + if (this.getWebContents(wcId)) { + this.activeWebContentsPerWorktree.set(worktreeId, wcId) + return wcId + } + } + this.activeWebContentsPerWorktree.delete(worktreeId) + return null + } + + getActiveWebContentsId(): number | null { + return this.activeWebContentsId + } + + getPageInfo( + worktreeId?: string, + browserPageId?: string + ): { browserPageId: string; url: string; title: string } | null { + try { + const target = this.resolveCommandTarget(worktreeId, browserPageId) + const wc = this.getWebContents(target.webContentsId) + if (!wc) { + return null + } + return { + browserPageId: target.browserPageId, + url: wc.getURL() ?? '', + title: wc.getTitle() ?? '' + } + } catch { + return null + } + } + + onTabChanged(webContentsId: number, worktreeId?: string): void { + this.activeWebContentsId = webContentsId + if (worktreeId) { + this.activeWebContentsPerWorktree.set(worktreeId, webContentsId) + } + } + + async onTabClosed(webContentsId: number): Promise { + const browserPageId = this.resolveTabIdSafe(webContentsId) + const owningWorktreeId = browserPageId + ? this.browserManager.getWorktreeIdForTab(browserPageId) + : undefined + let nextWorktreeActiveWebContentsId: number | null = null + if ( + owningWorktreeId && + this.activeWebContentsPerWorktree.get(owningWorktreeId) === webContentsId + ) { + nextWorktreeActiveWebContentsId = this.selectFallbackActiveWebContents( + owningWorktreeId, + webContentsId + ) + } + if (this.activeWebContentsId === webContentsId) { + this.activeWebContentsId = nextWorktreeActiveWebContentsId + } + if (browserPageId) { + await this.destroySession(`orca-tab-${browserPageId}`) + } + } + + async onProcessSwap( + browserPageId: string, + newWebContentsId: number, + previousWebContentsId?: number + ): Promise { + // Why: Electron process swaps give same browserPageId but new webContentsId. + // Old proxy's webContents is destroyed, so destroy session and let next command recreate. + const sessionName = `orca-tab-${browserPageId}` + const session = this.sessions.get(sessionName) + const oldWebContentsId = previousWebContentsId ?? session?.webContentsId + const owningWorktreeId = this.browserManager.getWorktreeIdForTab(browserPageId) + // Why: save active intercept patterns before destroying so they can be restored + // on the new session after the next successful init command. + if (session && session.activeInterceptPatterns.length > 0) { + this.pendingInterceptRestore.set(sessionName, [...session.activeInterceptPatterns]) + } + await this.destroySession(sessionName) + if (oldWebContentsId != null && this.activeWebContentsId === oldWebContentsId) { + this.activeWebContentsId = newWebContentsId + } + if ( + owningWorktreeId && + oldWebContentsId != null && + this.activeWebContentsPerWorktree.get(owningWorktreeId) === oldWebContentsId + ) { + this.activeWebContentsPerWorktree.set(owningWorktreeId, newWebContentsId) + } + } + + // ── Worktree-scoped tab queries ── + + getRegisteredTabs(worktreeId?: string): Map { + const all = this.browserManager.getWebContentsIdByTabId() + if (!worktreeId) { + return all + } + + const filtered = new Map() + for (const [tabId, wcId] of all) { + if (this.browserManager.getWorktreeIdForTab(tabId) === worktreeId) { + filtered.set(tabId, wcId) + } + } + return filtered + } + + // ── Tab management ── + + tabList(worktreeId?: string): BrowserTabListResult { + const tabs = this.getRegisteredTabs(worktreeId) + // Why: use per-worktree active tab for the "active" flag so tab-list is + // consistent with what resolveActiveTab would pick for command routing. + // Keep this read-only though: discovery commands must not mutate the + // active-tab state that later bare commands rely on. + let activeWcId = + (worktreeId && this.activeWebContentsPerWorktree.get(worktreeId)) ?? this.activeWebContentsId + const result: BrowserTabInfo[] = [] + let index = 0 + let firstLiveWcId: number | null = null + for (const [tabId, wcId] of tabs) { + const wc = this.getWebContents(wcId) + if (!wc) { + continue + } + if (firstLiveWcId === null) { + firstLiveWcId = wcId + } + result.push({ + browserPageId: tabId, + index: index++, + url: wc.getURL() ?? '', + title: wc.getTitle() ?? '', + active: wcId === activeWcId + }) + } + // Why: if no tab has been explicitly activated yet, surface the first live + // tab as active in the listing without mutating bridge state. That keeps + // `tab list` side-effect free while still showing users which tab a bare + // command would select next. + if (activeWcId == null && firstLiveWcId !== null) { + activeWcId = firstLiveWcId + if (result.length > 0) { + result[0].active = true + } + } + return { tabs: result } + } + + // Why: tab switch must go through the command queue to prevent race conditions + // with in-flight commands that target the previously active tab. + async tabSwitch( + index: number | undefined, + worktreeId?: string, + browserPageId?: string + ): Promise { + return this.enqueueCommand(worktreeId, async () => { + const tabs = this.getRegisteredTabs(worktreeId) + // Why: queue delay means the tab list can change between RPC arrival and + // execution time. Recompute against live webContents here so we never + // activate a tab index that disappeared while earlier commands were running. + const liveEntries = [...tabs.entries()].filter(([, wcId]) => this.getWebContents(wcId)) + let switchedIndex = index ?? -1 + let resolvedPageId = browserPageId + if (resolvedPageId) { + switchedIndex = liveEntries.findIndex(([tabId]) => tabId === resolvedPageId) + } + if (switchedIndex < 0 || switchedIndex >= liveEntries.length) { + const targetLabel = + resolvedPageId != null ? `Browser page ${resolvedPageId}` : `Tab index ${index}` + throw new BrowserError( + 'browser_tab_not_found', + `${targetLabel} out of range (0-${liveEntries.length - 1})` + ) + } + const [tabId, wcId] = liveEntries[switchedIndex] + this.activeWebContentsId = wcId + // Why: resolveActiveTab prefers the per-worktree map over the global when + // worktreeId is provided. Without this update, subsequent commands would + // still route to the previous tab despite tabSwitch reporting success. + const owningWorktreeId = worktreeId ?? this.browserManager.getWorktreeIdForTab(tabId) + // Why: `tab switch --page ` may omit --worktree because the page id is + // already a stable target. We still need to update the owning worktree's + // active-tab slot so later worktree-scoped commands follow the tab that was + // just activated instead of the previously active one. + if (owningWorktreeId) { + this.activeWebContentsPerWorktree.set(owningWorktreeId, wcId) + } + return { switched: switchedIndex, browserPageId: tabId } + }) + } + + // ── Core commands (typed) ── + + async snapshot(worktreeId?: string, browserPageId?: string): Promise { + // Why: snapshot creates fresh refs so it must bypass the stale-ref guard + return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName, target) => { + const result = (await this.execAgentBrowser(sessionName, [ + 'snapshot' + ])) as BrowserSnapshotResult + return { + ...result, + browserPageId: target.browserPageId + } + }) + } + + async click( + element: string, + worktreeId?: string, + browserPageId?: string + ): Promise { + return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => { + return (await this.execAgentBrowser(sessionName, ['click', element])) as BrowserClickResult + }) + } + + async dblclick( + element: string, + worktreeId?: string, + browserPageId?: string + ): Promise { + return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => { + return (await this.execAgentBrowser(sessionName, ['dblclick', element])) as BrowserClickResult + }) + } + + async goto(url: string, worktreeId?: string, browserPageId?: string): Promise { + return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => { + return (await this.execAgentBrowser(sessionName, ['goto', url])) as BrowserGotoResult + }) + } + + async fill( + element: string, + value: string, + worktreeId?: string, + browserPageId?: string + ): Promise { + // Why: Input.insertText via Electron's debugger API does not deliver text to + // focused inputs in webviews — this is a fundamental Electron limitation. + // Agent-browser's fill and click also fail for the same reason. + // Workaround: use agent-browser's focus to resolve the ref, then set the value + // directly via JS and dispatch input/change events for React/framework compat. + return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => { + await this.execAgentBrowser(sessionName, ['focus', element]) + const escaped = value.replace(/\\/g, '\\\\').replace(/'/g, "\\'") + await this.execAgentBrowser(sessionName, [ + 'eval', + `(() => { const el = document.activeElement; if (el) { const nativeSetter = Object.getOwnPropertyDescriptor(Object.getPrototypeOf(el), 'value')?.set; if (nativeSetter) { nativeSetter.call(el, '${escaped}'); } else { el.value = '${escaped}'; } el.dispatchEvent(new Event('input', { bubbles: true })); el.dispatchEvent(new Event('change', { bubbles: true })); } })()` + ]) + return { filled: element } as BrowserFillResult + }) + } + + async type( + input: string, + worktreeId?: string, + browserPageId?: string + ): Promise { + return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => { + return (await this.execAgentBrowser(sessionName, [ + 'keyboard', + 'type', + input + ])) as BrowserTypeResult + }) + } + + async select( + element: string, + value: string, + worktreeId?: string, + browserPageId?: string + ): Promise { + return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => { + return (await this.execAgentBrowser(sessionName, [ + 'select', + element, + value + ])) as BrowserSelectResult + }) + } + + async scroll( + direction: string, + amount?: number, + worktreeId?: string, + browserPageId?: string + ): Promise { + return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => { + const args = ['scroll', direction] + if (amount != null) { + args.push(String(amount)) + } + return (await this.execAgentBrowser(sessionName, args)) as BrowserScrollResult + }) + } + + async scrollIntoView( + element: string, + worktreeId?: string, + browserPageId?: string + ): Promise { + return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => { + return await this.execAgentBrowser(sessionName, ['scrollintoview', element]) + }) + } + + async get( + what: string, + selector?: string, + worktreeId?: string, + browserPageId?: string + ): Promise { + return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => { + const args = ['get', what] + if (selector) { + args.push(selector) + } + return await this.execAgentBrowser(sessionName, args) + }) + } + + async is( + what: string, + selector: string, + worktreeId?: string, + browserPageId?: string + ): Promise { + return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => { + return await this.execAgentBrowser(sessionName, ['is', what, selector]) + }) + } + + // ── Keyboard commands ── + + async keyboardInsertText( + text: string, + worktreeId?: string, + browserPageId?: string + ): Promise { + return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => { + return await this.execAgentBrowser(sessionName, ['keyboard', 'inserttext', text]) + }) + } + + // ── Mouse commands ── + + async mouseMove( + x: number, + y: number, + worktreeId?: string, + browserPageId?: string + ): Promise { + return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => { + return await this.execAgentBrowser(sessionName, ['mouse', 'move', String(x), String(y)]) + }) + } + + async mouseDown(button?: string, worktreeId?: string, browserPageId?: string): Promise { + return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => { + const args = ['mouse', 'down'] + if (button) { + args.push(button) + } + return await this.execAgentBrowser(sessionName, args) + }) + } + + async mouseUp(button?: string, worktreeId?: string, browserPageId?: string): Promise { + return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => { + const args = ['mouse', 'up'] + if (button) { + args.push(button) + } + return await this.execAgentBrowser(sessionName, args) + }) + } + + async mouseWheel( + dy: number, + dx?: number, + worktreeId?: string, + browserPageId?: string + ): Promise { + return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => { + const args = ['mouse', 'wheel', String(dy)] + if (dx != null) { + args.push(String(dx)) + } + return await this.execAgentBrowser(sessionName, args) + }) + } + + // ── Find (semantic locators) ── + + async find( + locator: string, + value: string, + action: string, + text?: string, + worktreeId?: string, + browserPageId?: string + ): Promise { + return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => { + const args = ['find', locator, value, action] + if (text) { + args.push(text) + } + return await this.execAgentBrowser(sessionName, args) + }) + } + + // ── Set commands ── + + async setDevice(name: string, worktreeId?: string, browserPageId?: string): Promise { + return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => { + return await this.execAgentBrowser(sessionName, ['set', 'device', name]) + }) + } + + async setOffline(state?: string, worktreeId?: string, browserPageId?: string): Promise { + return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => { + const args = ['set', 'offline'] + if (state) { + args.push(state) + } + return await this.execAgentBrowser(sessionName, args) + }) + } + + async setHeaders( + headersJson: string, + worktreeId?: string, + browserPageId?: string + ): Promise { + return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => { + return await this.execAgentBrowser(sessionName, ['set', 'headers', headersJson]) + }) + } + + async setCredentials( + user: string, + pass: string, + worktreeId?: string, + browserPageId?: string + ): Promise { + return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => { + return await this.execAgentBrowser(sessionName, ['set', 'credentials', user, pass]) + }) + } + + async setMedia( + colorScheme?: string, + reducedMotion?: string, + worktreeId?: string, + browserPageId?: string + ): Promise { + return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => { + const args = ['set', 'media'] + if (colorScheme) { + args.push(colorScheme) + } + if (reducedMotion) { + args.push(reducedMotion) + } + return await this.execAgentBrowser(sessionName, args) + }) + } + + // ── Clipboard commands ── + + async clipboardRead(worktreeId?: string, browserPageId?: string): Promise { + return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => { + return await this.execAgentBrowser(sessionName, ['clipboard', 'read']) + }) + } + + async clipboardWrite( + text: string, + worktreeId?: string, + browserPageId?: string + ): Promise { + return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => { + return await this.execAgentBrowser(sessionName, ['clipboard', 'write', text]) + }) + } + + // ── Dialog commands ── + + async dialogAccept(text?: string, worktreeId?: string, browserPageId?: string): Promise { + return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => { + const args = ['dialog', 'accept'] + if (text) { + args.push(text) + } + return await this.execAgentBrowser(sessionName, args) + }) + } + + async dialogDismiss(worktreeId?: string, browserPageId?: string): Promise { + return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => { + return await this.execAgentBrowser(sessionName, ['dialog', 'dismiss']) + }) + } + + // ── Storage commands ── + + async storageLocalGet( + key: string, + worktreeId?: string, + browserPageId?: string + ): Promise { + return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => { + return await this.execAgentBrowser(sessionName, ['storage', 'local', 'get', key]) + }) + } + + async storageLocalSet( + key: string, + value: string, + worktreeId?: string, + browserPageId?: string + ): Promise { + return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => { + return await this.execAgentBrowser(sessionName, ['storage', 'local', 'set', key, value]) + }) + } + + async storageLocalClear(worktreeId?: string, browserPageId?: string): Promise { + return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => { + return await this.execAgentBrowser(sessionName, ['storage', 'local', 'clear']) + }) + } + + async storageSessionGet( + key: string, + worktreeId?: string, + browserPageId?: string + ): Promise { + return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => { + return await this.execAgentBrowser(sessionName, ['storage', 'session', 'get', key]) + }) + } + + async storageSessionSet( + key: string, + value: string, + worktreeId?: string, + browserPageId?: string + ): Promise { + return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => { + return await this.execAgentBrowser(sessionName, ['storage', 'session', 'set', key, value]) + }) + } + + async storageSessionClear(worktreeId?: string, browserPageId?: string): Promise { + return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => { + return await this.execAgentBrowser(sessionName, ['storage', 'session', 'clear']) + }) + } + + // ── Download command ── + + async download( + selector: string, + path: string, + worktreeId?: string, + browserPageId?: string + ): Promise { + return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => { + return await this.execAgentBrowser(sessionName, ['download', selector, path]) + }) + } + + // ── Highlight command ── + + async highlight(selector: string, worktreeId?: string, browserPageId?: string): Promise { + return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => { + return await this.execAgentBrowser(sessionName, ['highlight', selector]) + }) + } + + async back(worktreeId?: string, browserPageId?: string): Promise { + return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => { + return (await this.execAgentBrowser(sessionName, ['back'])) as BrowserBackResult + }) + } + + async forward(worktreeId?: string, browserPageId?: string): Promise { + return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => { + return (await this.execAgentBrowser(sessionName, ['forward'])) as BrowserBackResult + }) + } + + async reload(worktreeId?: string, browserPageId?: string): Promise { + // Why: reload can trigger a process swap in Electron (site-isolation), which + // destroys the session mid-command. Use the webContents directly for reload + // instead of going through agent-browser to avoid the session lifecycle issue. + // Routed through enqueueCommand so it serializes with other in-flight commands. + return this.enqueueTargetedCommand(worktreeId, browserPageId, async (_sessionName, target) => { + const wc = this.getWebContents(target.webContentsId) + if (!wc) { + throw new BrowserError('browser_no_tab', 'Tab is no longer available') + } + wc.reload() + await new Promise((resolve) => { + const onFinish = (): void => { + wc.removeListener('did-finish-load', onFinish) + wc.removeListener('did-fail-load', onFail) + resolve() + } + const onFail = (): void => { + wc.removeListener('did-finish-load', onFinish) + wc.removeListener('did-fail-load', onFail) + resolve() + } + wc.on('did-finish-load', onFinish) + wc.on('did-fail-load', onFail) + setTimeout(onFinish, 10_000) + }) + return { url: wc.getURL(), title: wc.getTitle() } + }) + } + + async screenshot( + format?: string, + worktreeId?: string, + browserPageId?: string + ): Promise { + // Why: agent-browser writes the screenshot to a temp file and returns + // { "path": "/tmp/screenshot-xxx.png" }. We read the file and return base64. + return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => { + return this.captureScreenshotCommand(sessionName, ['screenshot'], 300, format) + }) + } + + async fullPageScreenshot( + format?: string, + worktreeId?: string, + browserPageId?: string + ): Promise { + return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName, target) => { + return this.captureFullPageScreenshotCommand( + sessionName, + target.webContentsId, + 500, + format === 'jpeg' ? 'jpeg' : 'png' + ) + }) + } + + private readScreenshotFromResult(raw: unknown, format?: string): BrowserScreenshotResult { + const parsed = raw as { path?: string } | undefined + if (!parsed?.path) { + throw new BrowserError('browser_error', 'Screenshot returned no file path') + } + if (!existsSync(parsed.path)) { + throw new BrowserError('browser_error', `Screenshot file not found: ${parsed.path}`) + } + const data = readFileSync(parsed.path).toString('base64') + return { data, format: format === 'jpeg' ? 'jpeg' : 'png' } as BrowserScreenshotResult + } + + private async captureScreenshotCommand( + sessionName: string, + commandArgs: string[], + settleMs: number, + format?: string + ): Promise { + return this.withSerializedScreenshotAccess(async () => { + const session = this.sessions.get(sessionName) + const restore = session + ? await this.browserManager.ensureWebviewVisible(session.webContentsId) + : () => {} + try { + // Why: after focusing the window and unhiding the webview, the compositor + // needs a short settle period to produce a painted frame. Waiting inside + // the global screenshot lock prevents another tab from stealing visible + // state before the current capture actually hits CDP. + await new Promise((r) => setTimeout(r, settleMs)) + const raw = await this.execAgentBrowser(sessionName, commandArgs) + return this.readScreenshotFromResult(raw, format) + } finally { + restore() + } + }) + } + + private async captureFullPageScreenshotCommand( + sessionName: string, + webContentsId: number, + settleMs: number, + format: 'png' | 'jpeg' + ): Promise { + return this.withSerializedScreenshotAccess(async () => { + const session = this.sessions.get(sessionName) + const restore = session + ? await this.browserManager.ensureWebviewVisible(session.webContentsId) + : () => {} + try { + // Why: full-page capture still depends on the guest compositor producing + // a fresh frame. Wait after activating the target webview so the direct + // CDP capture sees the live page instead of a stale surface. + await new Promise((r) => setTimeout(r, settleMs)) + const wc = this.getWebContents(webContentsId) + if (!wc) { + throw new BrowserError('browser_tab_not_found', 'Tab is no longer available') + } + return await captureFullPageScreenshot(wc, format) + } catch (error) { + throw new BrowserError('browser_error', (error as Error).message) + } finally { + restore() + } + }) + } + + private async withSerializedScreenshotAccess(execute: () => Promise): Promise { + const previousTurn = this.screenshotTurn.catch(() => {}) + let releaseTurn!: () => void + this.screenshotTurn = new Promise((resolve) => { + releaseTurn = resolve + }) + await previousTurn + try { + return await execute() + } finally { + releaseTurn() + } + } + + async evaluate( + expression: string, + worktreeId?: string, + browserPageId?: string + ): Promise { + return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => { + return (await this.execAgentBrowser(sessionName, ['eval', expression])) as BrowserEvalResult + }) + } + + async hover( + element: string, + worktreeId?: string, + browserPageId?: string + ): Promise { + return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => { + return (await this.execAgentBrowser(sessionName, ['hover', element])) as BrowserHoverResult + }) + } + + async drag( + from: string, + to: string, + worktreeId?: string, + browserPageId?: string + ): Promise { + return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => { + return (await this.execAgentBrowser(sessionName, ['drag', from, to])) as BrowserDragResult + }) + } + + async upload( + element: string, + filePaths: string[], + worktreeId?: string, + browserPageId?: string + ): Promise { + return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => { + return (await this.execAgentBrowser(sessionName, [ + 'upload', + element, + ...filePaths + ])) as BrowserUploadResult + }) + } + + async wait( + options?: { + selector?: string + timeout?: number + text?: string + url?: string + load?: string + fn?: string + state?: string + }, + worktreeId?: string, + browserPageId?: string + ): Promise { + return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => { + const args = ['wait'] + const hasCondition = + !!options?.selector || !!options?.text || !!options?.url || !!options?.load || !!options?.fn + if (options?.selector) { + args.push(options.selector) + } else if (options?.timeout != null && !hasCondition) { + args.push(String(options.timeout)) + } + if (options?.text) { + args.push('--text', options.text) + } + if (options?.url) { + args.push('--url', options.url) + } + if (options?.load) { + args.push('--load', options.load) + } + if (options?.fn) { + args.push('--fn', options.fn) + } + const normalizedState = options?.state === 'visible' ? undefined : options?.state + if (normalizedState) { + args.push('--state', normalizedState) + } + // Why: agent-browser's selector wait surface does not support `--state visible` + // or a documented per-command `--timeout`. Orca normalizes "visible" back + // to the default selector wait semantics and enforces the requested timeout + // at the bridge layer so missing selectors fail as browser_timeout instead + // of hanging until the generic runtime RPC timeout fires. + return (await this.execAgentBrowser(sessionName, args, { + timeoutMs: + options?.timeout != null && hasCondition + ? options.timeout + WAIT_PROCESS_TIMEOUT_GRACE_MS + : undefined, + timeoutError: + options?.timeout != null && hasCondition + ? new BrowserError( + 'browser_timeout', + `Timed out waiting for browser condition after ${options.timeout}ms.` + ) + : undefined + })) as BrowserWaitResult + }) + } + + async check( + element: string, + checked: boolean, + worktreeId?: string, + browserPageId?: string + ): Promise { + return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => { + const args = checked ? ['check', element] : ['uncheck', element] + return (await this.execAgentBrowser(sessionName, args)) as BrowserCheckResult + }) + } + + async focus( + element: string, + worktreeId?: string, + browserPageId?: string + ): Promise { + return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => { + return (await this.execAgentBrowser(sessionName, ['focus', element])) as BrowserFocusResult + }) + } + + async clear( + element: string, + worktreeId?: string, + browserPageId?: string + ): Promise { + return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => { + // Why: agent-browser has no clear command — use fill with empty string + return (await this.execAgentBrowser(sessionName, ['fill', element, ''])) as BrowserClearResult + }) + } + + async selectAll( + element: string, + worktreeId?: string, + browserPageId?: string + ): Promise { + return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => { + // Why: agent-browser has no select-all command — implement as focus + Ctrl+A + await this.execAgentBrowser(sessionName, ['focus', element]) + return (await this.execAgentBrowser(sessionName, [ + 'press', + 'Control+a' + ])) as BrowserSelectAllResult + }) + } + + async keypress( + key: string, + worktreeId?: string, + browserPageId?: string + ): Promise { + return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => { + return (await this.execAgentBrowser(sessionName, ['press', key])) as BrowserKeypressResult + }) + } + + async pdf(worktreeId?: string, browserPageId?: string): Promise { + // Why: agent-browser's pdf command via CDP Page.printToPDF hangs in Electron + // webviews. Use Electron's native webContents.printToPDF() which is reliable. + // Routed through enqueueCommand so it serializes with other in-flight commands. + return this.enqueueTargetedCommand(worktreeId, browserPageId, async (_sessionName, target) => { + const wc = this.getWebContents(target.webContentsId) + if (!wc) { + throw new BrowserError('browser_no_tab', 'Tab is no longer available') + } + const buffer = await wc.printToPDF({ + printBackground: true, + preferCSSPageSize: true + }) + return { data: buffer.toString('base64') } + }) + } + + // ── Cookie commands ── + + async cookieGet( + _url?: string, + worktreeId?: string, + browserPageId?: string + ): Promise { + return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => { + return (await this.execAgentBrowser(sessionName, [ + 'cookies', + 'get' + ])) as BrowserCookieGetResult + }) + } + + async cookieSet( + cookie: Partial, + worktreeId?: string, + browserPageId?: string + ): Promise { + return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => { + const args = ['cookies', 'set', cookie.name ?? '', cookie.value ?? ''] + if (cookie.domain) { + args.push('--domain', cookie.domain) + } + if (cookie.path) { + args.push('--path', cookie.path) + } + if (cookie.secure) { + args.push('--secure') + } + if (cookie.httpOnly) { + args.push('--httpOnly') + } + if (cookie.sameSite) { + args.push('--sameSite', cookie.sameSite) + } + if (cookie.expires != null) { + args.push('--expires', String(cookie.expires)) + } + return (await this.execAgentBrowser(sessionName, args)) as BrowserCookieSetResult + }) + } + + async cookieDelete( + name?: string, + domain?: string, + _url?: string, + worktreeId?: string, + browserPageId?: string + ): Promise { + return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => { + const args = ['cookies', 'clear'] + if (name) { + args.push('--name', name) + } + if (domain) { + args.push('--domain', domain) + } + return (await this.execAgentBrowser(sessionName, args)) as BrowserCookieDeleteResult + }) + } + + // ── Viewport / emulation commands ── + + async setViewport( + width: number, + height: number, + scale = 1, + mobile = false, + worktreeId?: string, + browserPageId?: string + ): Promise { + return this.enqueueTargetedCommand(worktreeId, browserPageId, async (_sessionName, target) => { + const wc = this.getWebContents(target.webContentsId) + if (!wc) { + throw new BrowserError('browser_tab_not_found', 'Tab is no longer available') + } + const dbg = wc.debugger + if (!dbg.isAttached()) { + throw new BrowserError('browser_error', 'Debugger not attached') + } + + // Why: agent-browser only supports width/height/scale for `set viewport`; + // it has no `mobile` flag. Orca's CLI exposes `--mobile`, so apply the + // emulation directly through CDP to keep the public CLI contract honest. + await dbg.sendCommand('Emulation.setDeviceMetricsOverride', { + width, + height, + deviceScaleFactor: scale, + mobile + }) + + return { + width, + height, + deviceScaleFactor: scale, + mobile + } + }) + } + + async setGeolocation( + lat: number, + lon: number, + _accuracy?: number, + worktreeId?: string, + browserPageId?: string + ): Promise { + return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => { + return (await this.execAgentBrowser(sessionName, [ + 'set', + 'geo', + String(lat), + String(lon) + ])) as BrowserGeolocationResult + }) + } + + // ── Network interception commands ── + + async interceptEnable( + patterns?: string[], + worktreeId?: string, + browserPageId?: string + ): Promise { + return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => { + // Why: agent-browser uses "network route " to intercept. Route each pattern individually. + const urlPattern = patterns?.[0] ?? '**/*' + const args = ['network', 'route', urlPattern] + const result = (await this.execAgentBrowser( + sessionName, + args + )) as BrowserInterceptEnableResult + const session = this.sessions.get(sessionName) + if (session) { + this.pendingInterceptRestore.delete(sessionName) + session.activeInterceptPatterns = patterns ?? ['*'] + } + return result + }) + } + + async interceptDisable( + worktreeId?: string, + browserPageId?: string + ): Promise { + return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => { + const result = (await this.execAgentBrowser(sessionName, [ + 'network', + 'unroute' + ])) as BrowserInterceptDisableResult + const session = this.sessions.get(sessionName) + if (session) { + this.pendingInterceptRestore.delete(sessionName) + session.activeInterceptPatterns = [] + } + return result + }) + } + + async interceptList( + worktreeId?: string, + browserPageId?: string + ): Promise<{ requests: unknown[] }> { + return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => { + return (await this.execAgentBrowser(sessionName, ['network', 'requests'])) as { + requests: unknown[] + } + }) + } + + // TODO: Add interceptContinue/interceptBlock once agent-browser supports per-request + // interception decisions. Currently agent-browser only operates on URL pattern-level + // routing, not individual request IDs, so the RPC/CLI interface doesn't map cleanly. + + // ── Capture commands ── + + async captureStart( + worktreeId?: string, + browserPageId?: string + ): Promise { + return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => { + const result = (await this.execAgentBrowser(sessionName, [ + 'network', + 'har', + 'start' + ])) as BrowserCaptureStartResult + const session = this.sessions.get(sessionName) + if (session) { + session.activeCapture = true + } + return result + }) + } + + async captureStop( + worktreeId?: string, + browserPageId?: string + ): Promise { + return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => { + const result = (await this.execAgentBrowser(sessionName, [ + 'network', + 'har', + 'stop' + ])) as BrowserCaptureStopResult + const session = this.sessions.get(sessionName) + if (session) { + session.activeCapture = false + } + return result + }) + } + + async consoleLog( + _limit?: number, + worktreeId?: string, + browserPageId?: string + ): Promise { + return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => { + return (await this.execAgentBrowser(sessionName, ['console'])) as BrowserConsoleResult + }) + } + + async networkLog( + _limit?: number, + worktreeId?: string, + browserPageId?: string + ): Promise { + return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => { + return (await this.execAgentBrowser(sessionName, [ + 'network', + 'requests' + ])) as BrowserNetworkLogResult + }) + } + + // ── Generic passthrough ── + + async exec(command: string, worktreeId?: string, browserPageId?: string): Promise { + return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => { + // Why: strip --cdp and --session from raw command to prevent session/target injection + const sanitized = command + .replace(/--cdp\s+\S+/g, '') + .replace(/--session\s+\S+/g, '') + .trim() + const args = parseShellArgs(sanitized) + return await this.execAgentBrowser(sessionName, args) + }) + } + + // ── Session lifecycle ── + + async destroyAllSessions(): Promise { + const promises: Promise[] = [] + for (const sessionName of this.sessions.keys()) { + promises.push(this.destroySession(sessionName)) + } + await Promise.allSettled(promises) + } + + // ── Internal ── + + private async enqueueCommand( + worktreeId: string | undefined, + execute: (sessionName: string) => Promise + ): Promise { + return this.enqueueTargetedCommand(worktreeId, undefined, async (sessionName) => + execute(sessionName) + ) + } + + private async enqueueTargetedCommand( + worktreeId: string | undefined, + browserPageId: string | undefined, + execute: (sessionName: string, target: ResolvedBrowserCommandTarget) => Promise + ): Promise { + const target = this.resolveCommandTarget(worktreeId, browserPageId) + const sessionName = `orca-tab-${target.browserPageId}` + + await this.ensureSession(sessionName, target.browserPageId, target.webContentsId) + + return new Promise((resolve, reject) => { + let queue = this.commandQueues.get(sessionName) + if (!queue) { + queue = [] + this.commandQueues.set(sessionName, queue) + } + queue.push({ + execute: (() => execute(sessionName, target)) as () => Promise, + resolve: resolve as (value: unknown) => void, + reject + }) + this.processQueue(sessionName) + }) + } + + private async processQueue(sessionName: string): Promise { + if (this.processingQueues.has(sessionName)) { + return + } + this.processingQueues.add(sessionName) + + const queue = this.commandQueues.get(sessionName) + while (queue && queue.length > 0) { + const cmd = queue.shift()! + try { + const result = await cmd.execute() + cmd.resolve(result) + } catch (error) { + cmd.reject(error) + } + } + + this.processingQueues.delete(sessionName) + } + + getActivePageId(worktreeId?: string, browserPageId?: string): string | null { + try { + return this.resolveCommandTarget(worktreeId, browserPageId).browserPageId + } catch { + return null + } + } + + private resolveCommandTarget( + worktreeId?: string, + browserPageId?: string + ): ResolvedBrowserCommandTarget { + if (!browserPageId) { + return this.resolveActiveTab(worktreeId) + } + + const tabs = this.getRegisteredTabs(worktreeId) + const webContentsId = tabs.get(browserPageId) + if (webContentsId == null) { + const scope = worktreeId ? ' in this worktree' : '' + throw new BrowserError( + 'browser_tab_not_found', + `Browser page ${browserPageId} was not found${scope}` + ) + } + + if (!this.getWebContents(webContentsId)) { + throw new BrowserError( + 'browser_tab_not_found', + `Browser page ${browserPageId} is no longer available` + ) + } + + return { browserPageId, webContentsId } + } + + private resolveActiveTab(worktreeId?: string): ResolvedBrowserCommandTarget { + const tabs = this.getRegisteredTabs(worktreeId) + + if (tabs.size === 0) { + throw new BrowserError('browser_no_tab', 'No browser tab open in this worktree') + } + + // Why: prefer per-worktree active tab to prevent cross-worktree interference. + // Fall back to global activeWebContentsId for callers that don't pass worktreeId. + const preferredWcId = + (worktreeId && this.activeWebContentsPerWorktree.get(worktreeId)) ?? this.activeWebContentsId + + if (preferredWcId != null) { + for (const [tabId, wcId] of tabs) { + if (wcId === preferredWcId && this.getWebContents(wcId)) { + return { browserPageId: tabId, webContentsId: wcId } + } + } + } + + // Why: persisted store state can leave ghost tabs whose webContents no longer exist. + // Skip those and pick the first live tab. Also activate it so tabList and + // subsequent resolveActiveTab calls are consistent without requiring an + // explicit tab switch after app startup. + for (const [tabId, wcId] of tabs) { + if (this.getWebContents(wcId)) { + this.activeWebContentsId = wcId + if (worktreeId) { + this.activeWebContentsPerWorktree.set(worktreeId, wcId) + } + return { browserPageId: tabId, webContentsId: wcId } + } + } + + throw new BrowserError( + 'browser_no_tab', + 'No live browser tab available — all registered tabs have been destroyed' + ) + } + + private async ensureSession( + sessionName: string, + browserPageId: string, + webContentsId: number + ): Promise { + const pendingDestruction = this.pendingSessionDestruction.get(sessionName) + if (pendingDestruction) { + await pendingDestruction + } + + if (this.sessions.has(sessionName)) { + return + } + + // Why: two concurrent CLI calls can both reach here before either finishes + // creating the session. Without this lock, both would create proxies and the + // second would overwrite the first, leaking the first proxy's server/debugger. + const pending = this.pendingSessionCreation.get(sessionName) + if (pending) { + await pending + return + } + + const createSession = async (): Promise => { + const wc = this.getWebContents(webContentsId) + if (!wc) { + // Why: the renderer can unregister/destroy a webview between target + // resolution and session creation. Preserve the explicit page identity + // so callers get the same error shape as a settled closed tab. + throw new BrowserError( + 'browser_tab_not_found', + `Browser page ${browserPageId} is no longer available` + ) + } + + // Why: agent-browser's daemon persists session state (including the CDP port) + // across Orca restarts. A stale session ignores --cdp (already initialized) and + // connects to the dead port. Must await close so the daemon forgets the session + // before we pass --cdp with the new port. + await new Promise((resolve) => { + execFile(this.agentBrowserBin, ['--session', sessionName, 'close'], { timeout: 3000 }, () => + resolve() + ) + }) + + const proxy = new CdpWsProxy(wc) + const cdpEndpoint = await proxy.start() + + this.sessions.set(sessionName, { + proxy, + cdpEndpoint, + initialized: false, + consecutiveTimeouts: 0, + activeInterceptPatterns: [], + activeCapture: false, + webContentsId, + activeProcess: null + }) + } + + const promise = createSession() + this.pendingSessionCreation.set(sessionName, promise) + try { + await promise + } finally { + this.pendingSessionCreation.delete(sessionName) + } + } + + private async destroySession(sessionName: string): Promise { + const pendingDestruction = this.pendingSessionDestruction.get(sessionName) + if (pendingDestruction) { + await pendingDestruction + return + } + + const session = this.sessions.get(sessionName) + if (!session) { + return + } + + this.sessions.delete(sessionName) + this.pendingSessionCreation.delete(sessionName) + + // Why: queued commands would hang forever if we just delete the queue — + // their promises would never resolve or reject. Drain and reject them. + const queue = this.commandQueues.get(sessionName) + this.commandQueues.delete(sessionName) + this.processingQueues.delete(sessionName) + if (queue) { + const err = new BrowserError( + 'browser_tab_closed', + 'Tab was closed while commands were queued' + ) + for (const cmd of queue) { + cmd.reject(err) + } + queue.length = 0 + } + + if (session.activeProcess) { + // Why: queued command rejection is not enough when a daemon command is + // already running. Kill the active process so callers do not wait for the + // generic exec timeout after the session/tab has already been destroyed. + this.cancelledProcesses.add(session.activeProcess) + try { + session.activeProcess.kill() + } catch { + // Process may already be exiting. + } + session.activeProcess = null + } + + const destroy = (async (): Promise => { + try { + await this.runAgentBrowserRaw(sessionName, ['close']) + } catch { + // Session may already be dead + } + + await session.proxy.stop() + })() + this.pendingSessionDestruction.set(sessionName, destroy) + try { + await destroy + } finally { + this.pendingSessionDestruction.delete(sessionName) + } + } + + private async execAgentBrowser( + sessionName: string, + commandArgs: string[], + execOptions?: AgentBrowserExecOptions + ): Promise { + const session = this.sessions.get(sessionName) + if (!session) { + // Why: queued commands can reach execution after a concurrent tab close + // deletes the session. Surface this as a tab lifecycle error, not an + // opaque internal bridge failure. + throw this.createPageUnavailableError(sessionName) + } + + // Why: between enqueue time and execution time (queue delay), the webContents + // could be destroyed. Check here to give a clear error instead of letting the + // proxy fail with cryptic Electron debugger errors. + if (!this.getWebContents(session.webContentsId)) { + throw this.createPageUnavailableError(sessionName) + } + + const args = ['--session', sessionName] + const managesInterceptRoutes = + commandArgs[0] === 'network' && (commandArgs[1] === 'route' || commandArgs[1] === 'unroute') + + // Why: --cdp is session-initialization only — first command needs it, subsequent don't. + // Pass as port number (not ws:// URL) so agent-browser hits the proxy's HTTP /json + // endpoint for target discovery. The proxy only exposes the webview, preventing + // agent-browser from picking the host renderer page. + const needsInit = !session.initialized + if (needsInit) { + const port = session.proxy.getPort() + args.push('--cdp', String(port)) + } + + args.push(...commandArgs, '--json') + + const stdout = await this.runAgentBrowserRaw(sessionName, args, execOptions) + const translated = translateResult(stdout) + + if (!translated.ok) { + throw this.createCommandError( + sessionName, + translated.error.message, + translated.error.code, + session.webContentsId + ) + } + + // Why: only mark initialized after a successful command — if the first --cdp + // connection fails, the next attempt should retry with --cdp. + if (needsInit) { + session.initialized = true + + // Why: after a process swap, intercept patterns are lost because the session + // was destroyed and recreated. Restore them now that the new session is live, + // unless the caller's first command explicitly reconfigured routing. + const pendingPatterns = managesInterceptRoutes + ? undefined + : this.pendingInterceptRestore.get(sessionName) + if (pendingPatterns && pendingPatterns.length > 0) { + this.pendingInterceptRestore.delete(sessionName) + try { + const urlPattern = pendingPatterns[0] ?? '**/*' + await this.runAgentBrowserRaw(sessionName, [ + '--session', + sessionName, + 'network', + 'route', + urlPattern, + '--json' + ]) + session.activeInterceptPatterns = pendingPatterns + } catch { + // Why: intercept restore is best-effort — don't fail the user's command + // if the new page doesn't support the same interception setup. + } + } + } + + return translated.result + } + + private createPageUnavailableError(sessionName: string): BrowserError { + return new BrowserError('browser_tab_not_found', pageUnavailableMessageForSession(sessionName)) + } + + private createCommandError( + sessionName: string, + message: string, + fallbackCode: string, + webContentsId?: number + ): BrowserError { + // Why: CDP "connection refused" can also mean a real proxy failure. Only + // convert it to a closed-page error when bridge state confirms the target is gone. + if ( + fallbackCode === 'browser_error' && + isTabClosedTransportError(message) && + this.isSessionTargetClosed(sessionName, webContentsId) + ) { + return this.createPageUnavailableError(sessionName) + } + return new BrowserError(fallbackCode, message) + } + + private isSessionTargetClosed(sessionName: string, webContentsId?: number): boolean { + const session = this.sessions.get(sessionName) + if (!session) { + return true + } + const targetWebContentsId = webContentsId ?? session.webContentsId + return !this.getWebContents(targetWebContentsId) + } + + private runAgentBrowserRaw( + sessionName: string, + args: string[], + execOptions?: AgentBrowserExecOptions + ): Promise { + return new Promise((resolve, reject) => { + const session = this.sessions.get(sessionName) + let child: ChildProcess | null = null + child = execFile( + this.agentBrowserBin, + args, + // Why: screenshots return large base64 strings that exceed Node's default + // 1MB maxBuffer, causing ENOBUFS and a timeout-like failure. + { + timeout: execOptions?.timeoutMs ?? EXEC_TIMEOUT_MS, + maxBuffer: 50 * 1024 * 1024, + env: execOptions?.envOverrides + ? { ...process.env, ...execOptions.envOverrides } + : process.env + }, + (error, stdout, stderr) => { + if (session && session.activeProcess === child) { + session.activeProcess = null + } + if (child && this.cancelledProcesses.has(child)) { + this.cancelledProcesses.delete(child) + reject( + new BrowserError('browser_tab_closed', 'Tab was closed while command was running') + ) + return + } + + const liveSession = this.sessions.get(sessionName) + + if (error && (error as NodeJS.ErrnoException & { killed?: boolean }).killed) { + if (execOptions?.timeoutError) { + reject(execOptions.timeoutError) + return + } + if (liveSession) { + liveSession.consecutiveTimeouts++ + if (liveSession.consecutiveTimeouts >= CONSECUTIVE_TIMEOUT_LIMIT) { + // Why: 3 consecutive timeouts means the daemon is likely stuck — destroy and recreate + this.destroySession(sessionName) + } + } + reject(new BrowserError('browser_error', 'Browser command timed out')) + return + } + + if (liveSession) { + liveSession.consecutiveTimeouts = 0 + } + + if (error) { + // Why: agent-browser exits non-zero for command failures (e.g. clipboard + // NotAllowedError) but still writes structured JSON to stdout. Parse it + // so callers get the real error message instead of generic "Command failed". + if (stdout) { + try { + const parsed = JSON.parse(stdout) + if (parsed.error) { + const code = classifyErrorCode(parsed.error) + reject( + this.createCommandError(sessionName, parsed.error, code, session?.webContentsId) + ) + return + } + } catch { + // stdout not valid JSON — fall through to stderr/error.message + } + } + const message = stderr || error.message + const code = classifyErrorCode(message) + reject(this.createCommandError(sessionName, message, code, session?.webContentsId)) + return + } + + resolve(stdout) + } + ) + if (session) { + session.activeProcess = child + } + }) + } + + private resolveTabIdSafe(webContentsId: number): string | null { + const tabs = this.browserManager.getWebContentsIdByTabId() + for (const [tabId, wcId] of tabs) { + if (wcId === webContentsId) { + return tabId + } + } + return null + } + + private getWebContents(webContentsId: number): Electron.WebContents | null { + try { + const { webContents } = require('electron') + return webContents.fromId(webContentsId) ?? null + } catch { + return null + } + } +} diff --git a/src/main/browser/browser-manager.test.ts b/src/main/browser/browser-manager.test.ts index f2e5fd8c..e92f5c55 100644 --- a/src/main/browser/browser-manager.test.ts +++ b/src/main/browser/browser-manager.test.ts @@ -3,6 +3,7 @@ import { beforeEach, describe, expect, it, vi } from 'vitest' const { shellOpenExternalMock, + browserWindowFromWebContentsMock, menuBuildFromTemplateMock, guestOffMock, guestOnMock, @@ -13,6 +14,7 @@ const { screenGetCursorScreenPointMock } = vi.hoisted(() => ({ shellOpenExternalMock: vi.fn(), + browserWindowFromWebContentsMock: vi.fn(), menuBuildFromTemplateMock: vi.fn(), guestOffMock: vi.fn(), guestOnMock: vi.fn(), @@ -24,6 +26,9 @@ const { })) vi.mock('electron', () => ({ + BrowserWindow: { + fromWebContents: browserWindowFromWebContentsMock + }, clipboard: { writeText: vi.fn() }, shell: { openExternal: shellOpenExternalMock }, Menu: { @@ -44,6 +49,7 @@ describe('browserManager', () => { beforeEach(() => { shellOpenExternalMock.mockReset() + browserWindowFromWebContentsMock.mockReset() menuBuildFromTemplateMock.mockReset() guestOffMock.mockReset() guestOnMock.mockReset() @@ -148,6 +154,295 @@ describe('browserManager', () => { expect(shellOpenExternalMock).toHaveBeenCalledWith('https://example.com/login') }) + it('activates the owning browser workspace when ensuring a page-backed guest is visible', async () => { + const rendererExecuteJavaScriptMock = vi + .fn() + .mockResolvedValueOnce({ + prevTabType: 'terminal', + prevActiveWorktreeId: 'wt-1', + prevActiveBrowserWorkspaceId: 'workspace-prev', + prevActiveBrowserPageId: 'page-prev', + prevFocusedGroupTabId: 'tab-prev', + targetWorktreeId: 'wt-1', + targetBrowserWorkspaceId: 'workspace-1', + targetBrowserPageId: 'page-1' + }) + .mockResolvedValueOnce(undefined) + const guest = { + id: 707, + isDestroyed: vi.fn(() => false), + getType: vi.fn(() => 'webview'), + setBackgroundThrottling: guestSetBackgroundThrottlingMock, + setWindowOpenHandler: guestSetWindowOpenHandlerMock, + on: guestOnMock, + off: guestOffMock, + openDevTools: guestOpenDevToolsMock + } + const renderer = { + id: rendererWebContentsId, + isDestroyed: vi.fn(() => false), + executeJavaScript: rendererExecuteJavaScriptMock + } + browserWindowFromWebContentsMock.mockReturnValue({ isFocused: vi.fn(() => true) }) + webContentsFromIdMock.mockImplementation((id: number) => { + if (id === guest.id) { + return guest + } + if (id === rendererWebContentsId) { + return renderer + } + return null + }) + + browserManager.attachGuestPolicies(guest as never) + browserManager.registerGuest({ + browserPageId: 'page-1', + workspaceId: 'workspace-1', + worktreeId: 'wt-1', + webContentsId: guest.id, + rendererWebContentsId + }) + + const restore = await browserManager.ensureWebviewVisible(guest.id) + + const activationScript = rendererExecuteJavaScriptMock.mock.calls[0]?.[0] + expect(activationScript).toContain('var browserWorkspaceId = "workspace-1";') + expect(activationScript).toContain('var browserPageId = "page-1";') + expect(activationScript).toContain('state.setActiveBrowserTab(browserWorkspaceId);') + expect(activationScript).toContain( + 'state.setActiveBrowserPage(browserWorkspaceId, browserPageId);' + ) + expect(activationScript).toContain('var targetWorktreeId = "wt-1";') + + restore() + }) + + it('restores the previously focused browser workspace after screenshot prep changes tabs', async () => { + const rendererExecuteJavaScriptMock = vi + .fn() + .mockResolvedValueOnce({ + prevTabType: 'browser', + prevActiveWorktreeId: 'wt-prev', + prevActiveBrowserWorkspaceId: 'workspace-prev', + prevActiveBrowserPageId: 'page-prev', + prevFocusedGroupTabId: 'tab-prev', + targetWorktreeId: 'wt-target', + targetBrowserWorkspaceId: 'workspace-target', + targetBrowserPageId: 'page-target' + }) + .mockResolvedValueOnce(undefined) + const guest = { + id: 708, + isDestroyed: vi.fn(() => false), + getType: vi.fn(() => 'webview'), + setBackgroundThrottling: guestSetBackgroundThrottlingMock, + setWindowOpenHandler: guestSetWindowOpenHandlerMock, + on: guestOnMock, + off: guestOffMock, + openDevTools: guestOpenDevToolsMock + } + const renderer = { + id: rendererWebContentsId, + isDestroyed: vi.fn(() => false), + executeJavaScript: rendererExecuteJavaScriptMock + } + browserWindowFromWebContentsMock.mockReturnValue({ isFocused: vi.fn(() => true) }) + webContentsFromIdMock.mockImplementation((id: number) => { + if (id === guest.id) { + return guest + } + if (id === rendererWebContentsId) { + return renderer + } + return null + }) + + browserManager.attachGuestPolicies(guest as never) + browserManager.registerGuest({ + browserPageId: 'page-target', + workspaceId: 'workspace-target', + worktreeId: 'wt-target', + webContentsId: guest.id, + rendererWebContentsId + }) + + const restore = await browserManager.ensureWebviewVisible(guest.id) + restore() + + const restoreScript = rendererExecuteJavaScriptMock.mock.calls[1]?.[0] + expect(restoreScript).toContain('state.setActiveWorktree("wt-prev");') + expect(restoreScript).toContain('state.setActiveBrowserTab("workspace-prev");') + }) + + it('restores the previously active page when screenshot prep switches pages inside one workspace', async () => { + const rendererExecuteJavaScriptMock = vi + .fn() + .mockResolvedValueOnce({ + prevTabType: 'browser', + prevActiveWorktreeId: 'wt-target', + prevActiveBrowserWorkspaceId: 'workspace-target', + prevActiveBrowserPageId: 'page-prev', + prevFocusedGroupTabId: null, + targetWorktreeId: 'wt-target', + targetBrowserWorkspaceId: 'workspace-target', + targetBrowserPageId: 'page-target' + }) + .mockResolvedValueOnce(undefined) + const guest = { + id: 709, + isDestroyed: vi.fn(() => false), + getType: vi.fn(() => 'webview'), + setBackgroundThrottling: guestSetBackgroundThrottlingMock, + setWindowOpenHandler: guestSetWindowOpenHandlerMock, + on: guestOnMock, + off: guestOffMock, + openDevTools: guestOpenDevToolsMock + } + const renderer = { + id: rendererWebContentsId, + isDestroyed: vi.fn(() => false), + executeJavaScript: rendererExecuteJavaScriptMock + } + browserWindowFromWebContentsMock.mockReturnValue({ isFocused: vi.fn(() => true) }) + webContentsFromIdMock.mockImplementation((id: number) => { + if (id === guest.id) { + return guest + } + if (id === rendererWebContentsId) { + return renderer + } + return null + }) + + browserManager.attachGuestPolicies(guest as never) + browserManager.registerGuest({ + browserPageId: 'page-target', + workspaceId: 'workspace-target', + worktreeId: 'wt-target', + webContentsId: guest.id, + rendererWebContentsId + }) + + const restore = await browserManager.ensureWebviewVisible(guest.id) + restore() + + const restoreScript = rendererExecuteJavaScriptMock.mock.calls[1]?.[0] + expect(restoreScript).toContain('state.setActiveBrowserPage(') + expect(restoreScript).toContain('"workspace-target"') + expect(restoreScript).toContain('"page-prev"') + }) + + it('restores remembered browser workspace/page even when the visible pane was terminal', async () => { + const rendererExecuteJavaScriptMock = vi + .fn() + .mockResolvedValueOnce({ + prevTabType: 'terminal', + prevActiveWorktreeId: 'wt-target', + prevActiveBrowserWorkspaceId: 'workspace-prev', + prevActiveBrowserPageId: 'page-prev', + prevFocusedGroupTabId: 'tab-prev', + targetWorktreeId: 'wt-target', + targetBrowserWorkspaceId: 'workspace-target', + targetBrowserPageId: 'page-target' + }) + .mockResolvedValueOnce(undefined) + const guest = { + id: 7091, + isDestroyed: vi.fn(() => false), + getType: vi.fn(() => 'webview'), + setBackgroundThrottling: guestSetBackgroundThrottlingMock, + setWindowOpenHandler: guestSetWindowOpenHandlerMock, + on: guestOnMock, + off: guestOffMock, + openDevTools: guestOpenDevToolsMock + } + const renderer = { + id: rendererWebContentsId, + isDestroyed: vi.fn(() => false), + executeJavaScript: rendererExecuteJavaScriptMock + } + browserWindowFromWebContentsMock.mockReturnValue({ isFocused: vi.fn(() => true) }) + webContentsFromIdMock.mockImplementation((id: number) => { + if (id === guest.id) { + return guest + } + if (id === rendererWebContentsId) { + return renderer + } + return null + }) + + browserManager.attachGuestPolicies(guest as never) + browserManager.registerGuest({ + browserPageId: 'page-target', + workspaceId: 'workspace-target', + worktreeId: 'wt-target', + webContentsId: guest.id, + rendererWebContentsId + }) + + const restore = await browserManager.ensureWebviewVisible(guest.id) + restore() + + const restoreScript = rendererExecuteJavaScriptMock.mock.calls[1]?.[0] + expect(restoreScript).toContain('state.setActiveBrowserTab("workspace-prev");') + expect(restoreScript).toContain('state.setActiveBrowserPage(') + expect(restoreScript).toContain('"workspace-prev"') + expect(restoreScript).toContain('"page-prev"') + expect(restoreScript).toContain('state.activateTab("tab-prev");') + expect(restoreScript).toContain('state.setActiveTabType("terminal");') + }) + + it('does not focus the Orca window while preparing a screenshot', async () => { + const rendererExecuteJavaScriptMock = vi.fn().mockResolvedValueOnce({ + prevTabType: 'terminal', + prevActiveWorktreeId: 'wt-1', + prevActiveBrowserWorkspaceId: 'workspace-prev', + prevActiveBrowserPageId: 'page-prev', + prevFocusedGroupTabId: 'tab-prev', + targetWorktreeId: 'wt-1', + targetBrowserWorkspaceId: 'workspace-1', + targetBrowserPageId: 'page-1' + }) + const guest = { + id: 710, + isDestroyed: vi.fn(() => false), + getType: vi.fn(() => 'webview'), + setBackgroundThrottling: guestSetBackgroundThrottlingMock, + setWindowOpenHandler: guestSetWindowOpenHandlerMock, + on: guestOnMock, + off: guestOffMock, + openDevTools: guestOpenDevToolsMock + } + const renderer = { + id: rendererWebContentsId, + isDestroyed: vi.fn(() => false), + executeJavaScript: rendererExecuteJavaScriptMock + } + webContentsFromIdMock.mockImplementation((id: number) => { + if (id === guest.id) { + return guest + } + if (id === rendererWebContentsId) { + return renderer + } + return null + }) + + browserManager.attachGuestPolicies(guest as never) + browserManager.registerGuest({ + browserPageId: 'page-1', + workspaceId: 'workspace-1', + worktreeId: 'wt-1', + webContentsId: guest.id, + rendererWebContentsId + }) + + await browserManager.ensureWebviewVisible(guest.id) + + expect(browserWindowFromWebContentsMock).not.toHaveBeenCalled() + }) + it('offers opening a link in another Orca browser tab from the guest context menu', () => { const rendererSendMock = vi.fn() const guest = { @@ -455,6 +750,101 @@ describe('browserManager', () => { ) }) + it('retires stale guest mappings when a page re-registers after a process swap', () => { + const rendererSendMock = vi.fn() + const oldGuestOnMock = vi.fn() + const oldGuestOffMock = vi.fn() + const newGuestOnMock = vi.fn() + const newGuestOffMock = vi.fn() + const oldGuest = { + id: 501, + isDestroyed: vi.fn(() => false), + getType: vi.fn(() => 'webview'), + setBackgroundThrottling: guestSetBackgroundThrottlingMock, + setWindowOpenHandler: guestSetWindowOpenHandlerMock, + on: oldGuestOnMock, + off: oldGuestOffMock, + openDevTools: guestOpenDevToolsMock, + getURL: vi.fn(() => 'https://old.example') + } + const newGuest = { + id: 502, + isDestroyed: vi.fn(() => false), + getType: vi.fn(() => 'webview'), + setBackgroundThrottling: guestSetBackgroundThrottlingMock, + setWindowOpenHandler: guestSetWindowOpenHandlerMock, + on: newGuestOnMock, + off: newGuestOffMock, + openDevTools: guestOpenDevToolsMock, + getURL: vi.fn(() => 'https://new.example') + } + + webContentsFromIdMock.mockImplementation((id: number) => { + if (id === oldGuest.id) { + return oldGuest + } + if (id === newGuest.id) { + return newGuest + } + if (id === rendererWebContentsId) { + return { isDestroyed: vi.fn(() => false), send: rendererSendMock } + } + return null + }) + + browserManager.attachGuestPolicies(oldGuest as never) + browserManager.registerGuest({ + browserPageId: 'browser-1', + webContentsId: oldGuest.id, + rendererWebContentsId + }) + + browserManager.attachGuestPolicies(newGuest as never) + browserManager.registerGuest({ + browserPageId: 'browser-1', + webContentsId: newGuest.id, + rendererWebContentsId + }) + + const oldDidFailLoadHandler = oldGuestOnMock.mock.calls.find( + ([event]) => event === 'did-fail-load' + )?.[1] as + | (( + event: unknown, + errorCode: number, + errorDescription: string, + validatedUrl: string, + isMainFrame: boolean + ) => void) + | undefined + const newDidFailLoadHandler = newGuestOnMock.mock.calls.find( + ([event]) => event === 'did-fail-load' + )?.[1] as + | (( + event: unknown, + errorCode: number, + errorDescription: string, + validatedUrl: string, + isMainFrame: boolean + ) => void) + | undefined + + oldDidFailLoadHandler?.(null, -105, 'Old guest failed', 'https://old.example', true) + expect(rendererSendMock).not.toHaveBeenCalled() + + newDidFailLoadHandler?.(null, -106, 'New guest failed', 'https://new.example', true) + expect(rendererSendMock).toHaveBeenCalledWith('browser:guest-load-failed', { + browserPageId: 'browser-1', + loadError: { + code: -106, + description: 'New guest failed', + validatedUrl: 'https://new.example' + } + }) + expect(oldGuestOffMock).toHaveBeenCalled() + expect(browserManager.getGuestWebContentsId('browser-1')).toBe(newGuest.id) + }) + it('does not forward ctrl/cmd+r or readline chords from browser guests', () => { const rendererSendMock = vi.fn() const guest = { diff --git a/src/main/browser/browser-manager.ts b/src/main/browser/browser-manager.ts index 325a10bf..d8bf41ab 100644 --- a/src/main/browser/browser-manager.ts +++ b/src/main/browser/browser-manager.ts @@ -38,6 +38,7 @@ export type BrowserGuestRegistration = { browserPageId?: string browserTabId?: string workspaceId?: string + worktreeId?: string webContentsId: number rendererWebContentsId: number } @@ -71,15 +72,20 @@ function safeOrigin(rawUrl: string): string { } } -class BrowserManager { +export class BrowserManager { private readonly webContentsIdByTabId = new Map() // Why: reverse map enables O(1) guest→tab lookups instead of O(N) linear // scans on every mouse event, load failure, permission, and popup event. private readonly tabIdByWebContentsId = new Map() + // Why: guest registration is keyed by browser page id, but renderer + // visibility/focus state is keyed by browser workspace id. Screenshot prep + // has to bridge that mismatch to activate the right tab before capture. + private readonly workspaceIdByPageId = new Map() private readonly rendererWebContentsIdByTabId = new Map() private readonly contextMenuCleanupByTabId = new Map void>() private readonly grabShortcutCleanupByTabId = new Map void>() private readonly shortcutForwardingCleanupByTabId = new Map void>() + private readonly worktreeIdByTabId = new Map() private readonly policyAttachedGuestIds = new Set() private readonly policyCleanupByGuestId = new Map void>() private readonly pendingLoadFailuresByGuestId = new Map< @@ -108,12 +114,230 @@ class BrowserManager { return renderer } + // Why: screenshot sessions target guest page ids, but Orca's visible browser + // chrome is keyed by workspace ids. If we activate the page id directly, the + // webview stays hidden under the terminal pane and Page.captureScreenshot + // times out even though the guest still exists. + async ensureWebviewVisible(guestWebContentsId: number): Promise<() => void> { + const browserPageId = this.resolveBrowserTabIdForGuestWebContentsId(guestWebContentsId) + if (!browserPageId) { + return () => {} + } + const browserWorkspaceId = this.workspaceIdByPageId.get(browserPageId) ?? browserPageId + const worktreeId = this.worktreeIdByTabId.get(browserPageId) ?? null + const renderer = this.resolveRendererForBrowserTab(browserPageId) + if (!renderer || renderer.isDestroyed()) { + return () => {} + } + + const prev = await renderer + .executeJavaScript( + `(function() { + var store = window.__store; + if (!store) return null; + var state = store.getState(); + var prevTabType = state.activeTabType; + var prevActiveWorktreeId = state.activeWorktreeId || null; + var prevActiveBrowserWorkspaceId = state.activeBrowserTabId || null; + var prevActiveBrowserPageId = null; + var prevFocusedGroupTabId = null; + var targetWorktreeId = ${JSON.stringify(worktreeId)}; + var browserWorkspaceId = ${JSON.stringify(browserWorkspaceId)}; + var browserPageId = ${JSON.stringify(browserPageId)}; + var browserTabsByWorktree = state.browserTabsByWorktree || {}; + + if (prevActiveWorktreeId) { + var prevFocusedGroupId = (state.activeGroupIdByWorktree || {})[prevActiveWorktreeId]; + var prevGroups = (state.groupsByWorktree || {})[prevActiveWorktreeId] || []; + for (var pg = 0; pg < prevGroups.length; pg++) { + if (prevGroups[pg].id === prevFocusedGroupId) { + prevFocusedGroupTabId = prevGroups[pg].activeTabId; + break; + } + } + } + + if (prevActiveBrowserWorkspaceId) { + for (var prevWtId in browserTabsByWorktree) { + var prevBrowserTabs = browserTabsByWorktree[prevWtId] || []; + for (var pbt = 0; pbt < prevBrowserTabs.length; pbt++) { + if (prevBrowserTabs[pbt].id === prevActiveBrowserWorkspaceId) { + prevActiveBrowserPageId = prevBrowserTabs[pbt].activePageId || null; + break; + } + } + if (prevActiveBrowserPageId) break; + } + } + + if ( + targetWorktreeId && + prevActiveWorktreeId !== targetWorktreeId && + typeof state.setActiveWorktree === 'function' + ) { + state.setActiveWorktree(targetWorktreeId); + state = store.getState(); + } + + var foundWorkspace = null; + for (var wtId in browserTabsByWorktree) { + var tabs = browserTabsByWorktree[wtId] || []; + for (var i = 0; i < tabs.length; i++) { + if (tabs[i].id === browserWorkspaceId) { + foundWorkspace = tabs[i]; + if (!targetWorktreeId) { + targetWorktreeId = wtId; + } + break; + } + } + if (foundWorkspace) break; + } + + var hasTargetPage = false; + var targetPages = (state.browserPagesByWorkspace || {})[browserWorkspaceId] || []; + for (var pageIndex = 0; pageIndex < targetPages.length; pageIndex++) { + if (targetPages[pageIndex].id === browserPageId) { + hasTargetPage = true; + break; + } + } + + if (foundWorkspace) { + if (typeof state.setActiveBrowserTab === 'function') { + state.setActiveBrowserTab(browserWorkspaceId); + state = store.getState(); + } else { + var allTabs = state.unifiedTabsByWorktree || {}; + var found = null; + for (var unifiedWtId in allTabs) { + var unifiedTabs = allTabs[unifiedWtId] || []; + for (var unifiedIndex = 0; unifiedIndex < unifiedTabs.length; unifiedIndex++) { + if ( + unifiedTabs[unifiedIndex].contentType === 'browser' && + unifiedTabs[unifiedIndex].entityId === browserWorkspaceId + ) { + found = unifiedTabs[unifiedIndex]; + break; + } + } + if (found) break; + } + if (found) { + state.activateTab(found.id); + } + state.setActiveTabType('browser'); + state = store.getState(); + } + // Why: activating the workspace alone is not enough for screenshot + // capture when a browser workspace contains multiple pages. The + // compositor only paints the currently mounted page guest. + if ( + hasTargetPage && + foundWorkspace.activePageId !== browserPageId && + typeof state.setActiveBrowserPage === 'function' + ) { + state.setActiveBrowserPage(browserWorkspaceId, browserPageId); + state = store.getState(); + } + } + + return { + prevTabType: prevTabType, + prevActiveWorktreeId: prevActiveWorktreeId, + prevActiveBrowserWorkspaceId: prevActiveBrowserWorkspaceId, + prevActiveBrowserPageId: prevActiveBrowserPageId, + prevFocusedGroupTabId: prevFocusedGroupTabId, + targetWorktreeId: targetWorktreeId, + targetBrowserWorkspaceId: foundWorkspace ? browserWorkspaceId : null, + targetBrowserPageId: foundWorkspace && hasTargetPage ? browserPageId : null + }; + })()` + ) + .catch(() => null) + + const needsRestore = + prev && + (prev.prevTabType !== 'browser' || + prev.prevActiveWorktreeId !== prev.targetWorktreeId || + prev.prevFocusedGroupTabId !== null || + prev.prevActiveBrowserWorkspaceId !== prev.targetBrowserWorkspaceId || + prev.prevActiveBrowserPageId !== prev.targetBrowserPageId) + + if (!needsRestore) { + return () => {} + } + + return () => { + if (!prev || !renderer || renderer.isDestroyed()) { + return + } + renderer + .executeJavaScript( + `(function() { + var store = window.__store; + if (!store) return; + var state = store.getState(); + if ( + ${JSON.stringify(prev?.prevActiveWorktreeId)} && + ${JSON.stringify(prev?.prevActiveWorktreeId)} !== + ${JSON.stringify(prev?.targetWorktreeId)} && + typeof state.setActiveWorktree === 'function' + ) { + state.setActiveWorktree(${JSON.stringify(prev?.prevActiveWorktreeId)}); + state = store.getState(); + } + if ( + ${JSON.stringify(prev?.prevActiveBrowserWorkspaceId)} && + ${JSON.stringify(prev?.prevActiveBrowserWorkspaceId)} !== + ${JSON.stringify(prev?.targetBrowserWorkspaceId)} && + typeof state.setActiveBrowserTab === 'function' + ) { + state.setActiveBrowserTab(${JSON.stringify(prev?.prevActiveBrowserWorkspaceId)}); + state = store.getState(); + } + if ( + ${JSON.stringify(prev?.prevActiveBrowserWorkspaceId)} && + ${JSON.stringify(prev?.prevActiveBrowserPageId)} && + ${JSON.stringify(prev?.prevActiveBrowserPageId)} !== + ${JSON.stringify(prev?.targetBrowserPageId)} && + typeof state.setActiveBrowserPage === 'function' + ) { + // Why: Orca remembers the last browser workspace/page even when + // the user is currently in terminal/editor view. Screenshot prep + // temporarily switches that hidden browser selection state, so + // restore it independently of the visible tab type. + state.setActiveBrowserPage( + ${JSON.stringify(prev?.prevActiveBrowserWorkspaceId)}, + ${JSON.stringify(prev?.prevActiveBrowserPageId)} + ); + state = store.getState(); + } + if ( + ${JSON.stringify(prev?.prevTabType)} !== 'browser' && + ${JSON.stringify(prev?.prevFocusedGroupTabId)} + ) { + state.activateTab(${JSON.stringify(prev?.prevFocusedGroupTabId)}); + } + if (${JSON.stringify(prev?.prevTabType)} !== 'browser') { + state.setActiveTabType(${JSON.stringify(prev?.prevTabType)}); + } + })()` + ) + .catch(() => {}) + } + } + attachGuestPolicies(guest: Electron.WebContents): void { if (this.policyAttachedGuestIds.has(guest.id)) { return } this.policyAttachedGuestIds.add(guest.id) - guest.setBackgroundThrottling(true) + // Why: background throttling must be disabled so agent-driven screenshots + // (Page.captureScreenshot via CDP proxy) can capture frames even when the + // Orca window is not the focused foreground app. With throttling enabled, + // the compositor stops producing frames and capturePage() returns empty. + guest.setBackgroundThrottling(false) guest.setWindowOpenHandler(({ url }) => { const browserTabId = this.resolveBrowserTabIdForGuestWebContentsId(guest.id) const browserUrl = normalizeBrowserNavigationUrl(url) @@ -189,9 +413,30 @@ class BrowserManager { }) } + private retireStaleGuestWebContents(previousWebContentsId: number): void { + // Why: a browser page can re-register with a new guest id after Chromium + // swaps renderer processes. Late events from the dead guest must stop + // resolving to the live page, or stale download/popup/permission callbacks + // can be delivered to the wrong session after the swap. + this.tabIdByWebContentsId.delete(previousWebContentsId) + + const policyCleanup = this.policyCleanupByGuestId.get(previousWebContentsId) + if (policyCleanup) { + policyCleanup() + this.policyCleanupByGuestId.delete(previousWebContentsId) + } + this.policyAttachedGuestIds.delete(previousWebContentsId) + this.pendingLoadFailuresByGuestId.delete(previousWebContentsId) + this.pendingPermissionEventsByGuestId.delete(previousWebContentsId) + this.pendingPopupEventsByGuestId.delete(previousWebContentsId) + this.pendingDownloadIdsByGuestId.delete(previousWebContentsId) + } + registerGuest({ browserPageId, browserTabId: legacyBrowserTabId, + workspaceId, + worktreeId, webContentsId, rendererWebContentsId }: BrowserGuestRegistration): void { @@ -231,9 +476,20 @@ class BrowserManager { return } + const previousWebContentsId = this.webContentsIdByTabId.get(browserTabId) + if (previousWebContentsId !== undefined && previousWebContentsId !== webContentsId) { + this.retireStaleGuestWebContents(previousWebContentsId) + } + this.webContentsIdByTabId.set(browserTabId, webContentsId) this.tabIdByWebContentsId.set(webContentsId, browserTabId) + if (workspaceId) { + this.workspaceIdByPageId.set(browserTabId, workspaceId) + } this.rendererWebContentsIdByTabId.set(browserTabId, rendererWebContentsId) + if (worktreeId) { + this.worktreeIdByTabId.set(browserTabId, worktreeId) + } this.setupContextMenu(browserTabId, guest) this.setupGrabShortcut(browserTabId, guest) @@ -292,6 +548,8 @@ class BrowserManager { } this.webContentsIdByTabId.delete(browserTabId) this.rendererWebContentsIdByTabId.delete(browserTabId) + this.workspaceIdByPageId.delete(browserTabId) + this.worktreeIdByTabId.delete(browserTabId) } unregisterAll(): void { @@ -313,6 +571,7 @@ class BrowserManager { } this.policyCleanupByGuestId.clear() this.tabIdByWebContentsId.clear() + this.worktreeIdByTabId.clear() this.pendingLoadFailuresByGuestId.clear() this.pendingPermissionEventsByGuestId.clear() this.pendingPopupEventsByGuestId.clear() @@ -323,6 +582,14 @@ class BrowserManager { return this.webContentsIdByTabId.get(browserTabId) ?? null } + getWebContentsIdByTabId(): Map { + return this.webContentsIdByTabId + } + + getWorktreeIdForTab(browserTabId: string): string | undefined { + return this.worktreeIdByTabId.get(browserTabId) + } + notifyPermissionDenied(args: { guestWebContentsId: number permission: string diff --git a/src/main/browser/browser-session-registry.ts b/src/main/browser/browser-session-registry.ts index 3a0a136d..ad8c07fa 100644 --- a/src/main/browser/browser-session-registry.ts +++ b/src/main/browser/browser-session-registry.ts @@ -373,8 +373,12 @@ class BrowserSessionRegistry { this.configuredPartitions.add(partition) const sess = session.fromPartition(partition) + // Why: clipboard-read and clipboard-sanitized-write are required for agent-browser's + // clipboard commands to work. Without these, navigator.clipboard.writeText/readText + // throws NotAllowedError even when invoked via CDP with userGesture:true. + const autoGranted = new Set(['fullscreen', 'clipboard-read', 'clipboard-sanitized-write']) sess.setPermissionRequestHandler((webContents, permission, callback) => { - const allowed = permission === 'fullscreen' + const allowed = autoGranted.has(permission) if (!allowed) { browserManager.notifyPermissionDenied({ guestWebContentsId: webContents.id, @@ -385,7 +389,7 @@ class BrowserSessionRegistry { callback(allowed) }) sess.setPermissionCheckHandler((_webContents, permission) => { - return permission === 'fullscreen' + return autoGranted.has(permission) }) sess.setDisplayMediaRequestHandler((_request, callback) => { callback({ video: undefined, audio: undefined }) diff --git a/src/main/browser/cdp-bridge-integration.test.ts b/src/main/browser/cdp-bridge-integration.test.ts new file mode 100644 index 00000000..2f358428 --- /dev/null +++ b/src/main/browser/cdp-bridge-integration.test.ts @@ -0,0 +1,533 @@ +/* eslint-disable max-lines -- Why: integration test covering the full browser automation pipeline end-to-end. */ +import { mkdtempSync } from 'fs' +import { tmpdir } from 'os' +import { join } from 'path' +import { createConnection } from 'net' +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest' + +// ── Electron mocks ── + +const { webContentsFromIdMock } = vi.hoisted(() => ({ + webContentsFromIdMock: vi.fn() +})) + +vi.mock('electron', () => ({ + webContents: { fromId: webContentsFromIdMock }, + shell: { openExternal: vi.fn() }, + ipcMain: { handle: vi.fn(), removeHandler: vi.fn(), on: vi.fn() }, + app: { getPath: vi.fn(() => '/tmp'), isPackaged: false } +})) + +vi.mock('../git/worktree', () => ({ + listWorktrees: vi.fn().mockResolvedValue([]) +})) + +import { BrowserManager } from './browser-manager' +import { CdpBridge } from './cdp-bridge' +import { OrcaRuntimeService } from '../runtime/orca-runtime' +import { OrcaRuntimeRpcServer } from '../runtime/runtime-rpc' +import { readRuntimeMetadata } from '../runtime/runtime-metadata' + +// ── CDP response builders ── + +type AXNode = { + nodeId: string + backendDOMNodeId?: number + role?: { type: string; value: string } + name?: { type: string; value: string } + properties?: { name: string; value: { type: string; value: unknown } }[] + childIds?: string[] + ignored?: boolean +} + +function axNode( + id: string, + role: string, + name: string, + opts?: { childIds?: string[]; backendDOMNodeId?: number } +): AXNode { + return { + nodeId: id, + backendDOMNodeId: opts?.backendDOMNodeId ?? parseInt(id, 10) * 100, + role: { type: 'role', value: role }, + name: { type: 'computedString', value: name }, + childIds: opts?.childIds + } +} + +const EXAMPLE_COM_TREE: AXNode[] = [ + axNode('1', 'WebArea', 'Example Domain', { childIds: ['2', '3', '4'] }), + axNode('2', 'heading', 'Example Domain'), + axNode('3', 'staticText', 'This domain is for use in illustrative examples.'), + axNode('4', 'link', 'More information...', { backendDOMNodeId: 400 }) +] + +const SEARCH_PAGE_TREE: AXNode[] = [ + axNode('1', 'WebArea', 'Search', { childIds: ['2', '3', '4', '5'] }), + axNode('2', 'navigation', 'Main Nav', { childIds: ['3'] }), + axNode('3', 'link', 'Home', { backendDOMNodeId: 300 }), + axNode('4', 'textbox', 'Search query', { backendDOMNodeId: 400 }), + axNode('5', 'button', 'Search', { backendDOMNodeId: 500 }) +] + +// ── Mock WebContents factory ── + +function createMockGuest(id: number, url: string, title: string) { + let currentUrl = url + let currentTitle = title + let currentTree = EXAMPLE_COM_TREE + let navHistoryId = 1 + + const sendCommandMock = vi.fn(async (method: string, params?: Record) => { + switch (method) { + case 'Page.enable': + case 'DOM.enable': + case 'Accessibility.enable': + return {} + case 'Accessibility.getFullAXTree': + return { nodes: currentTree } + case 'Page.getNavigationHistory': + return { + entries: [{ id: navHistoryId, url: currentUrl }], + currentIndex: 0 + } + case 'Page.navigate': { + const targetUrl = (params as { url: string }).url + if (targetUrl.includes('nonexistent.invalid')) { + return { errorText: 'net::ERR_NAME_NOT_RESOLVED' } + } + navHistoryId++ + currentUrl = targetUrl + if (targetUrl.includes('search.example.com')) { + currentTitle = 'Search' + currentTree = SEARCH_PAGE_TREE + } else { + currentTitle = 'Example Domain' + currentTree = EXAMPLE_COM_TREE + } + return {} + } + case 'Runtime.evaluate': { + const expr = (params as { expression: string }).expression + if (expr === 'document.readyState') { + return { result: { value: 'complete' } } + } + if (expr === 'location.origin') { + return { result: { value: new URL(currentUrl).origin } } + } + if (expr.includes('innerWidth')) { + return { result: { value: JSON.stringify({ w: 1280, h: 720 }) } } + } + if (expr.includes('scrollBy')) { + return { result: { value: undefined } } + } + if (expr.includes('dispatchEvent')) { + return { result: { value: undefined } } + } + // eslint-disable-next-line no-eval + return { result: { value: String(eval(expr)), type: 'string' } } + } + case 'DOM.scrollIntoViewIfNeeded': + return {} + case 'DOM.getBoxModel': + return { model: { content: [100, 200, 300, 200, 300, 250, 100, 250] } } + case 'Input.dispatchMouseEvent': + return {} + case 'Input.insertText': + return {} + case 'Input.dispatchKeyEvent': + return {} + case 'DOM.focus': + return {} + case 'DOM.describeNode': + return { node: { nodeId: 1 } } + case 'DOM.requestNode': + return { nodeId: 1 } + case 'DOM.resolveNode': + return { object: { objectId: 'obj-1' } } + case 'Runtime.callFunctionOn': + return { result: { value: undefined } } + case 'DOM.setFileInputFiles': + return {} + case 'Page.captureScreenshot': + return { + data: 'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==' + } + case 'Page.reload': + return {} + case 'Network.enable': + return {} + case 'Target.setAutoAttach': + return {} + case 'Runtime.enable': + return {} + default: + throw new Error(`Unexpected CDP method: ${method}`) + } + }) + + const debuggerListeners = new Map void)[]>() + + const guest = { + id, + isDestroyed: vi.fn(() => false), + getType: vi.fn(() => 'webview'), + getURL: vi.fn(() => currentUrl), + getTitle: vi.fn(() => currentTitle), + setBackgroundThrottling: vi.fn(), + setWindowOpenHandler: vi.fn(), + on: vi.fn(), + off: vi.fn(), + debugger: { + attach: vi.fn(), + detach: vi.fn(), + sendCommand: sendCommandMock, + on: vi.fn((event: string, handler: (...args: unknown[]) => void) => { + const handlers = debuggerListeners.get(event) ?? [] + handlers.push(handler) + debuggerListeners.set(event, handlers) + }), + removeListener: vi.fn((event: string, handler: (...args: unknown[]) => void) => { + const handlers = debuggerListeners.get(event) ?? [] + const idx = handlers.indexOf(handler) + if (idx >= 0) { + handlers.splice(idx, 1) + } + }), + removeAllListeners: vi.fn((event: string) => { + debuggerListeners.set(event, []) + }), + off: vi.fn() + } + } + + return { guest, sendCommandMock } +} + +// ── RPC helper ── + +async function sendRequest( + endpoint: string, + request: Record +): Promise> { + return await new Promise((resolve, reject) => { + const socket = createConnection(endpoint) + let buffer = '' + socket.setEncoding('utf8') + socket.once('error', reject) + socket.on('data', (chunk) => { + buffer += chunk + const newlineIndex = buffer.indexOf('\n') + if (newlineIndex === -1) { + return + } + const message = buffer.slice(0, newlineIndex) + socket.end() + resolve(JSON.parse(message) as Record) + }) + socket.on('connect', () => { + socket.write(`${JSON.stringify(request)}\n`) + }) + }) +} + +// ── Tests ── + +describe('Browser automation pipeline (integration)', () => { + let server: OrcaRuntimeRpcServer + let endpoint: string + let authToken: string + + const GUEST_WC_ID = 5001 + const RENDERER_WC_ID = 1 + + beforeEach(async () => { + const { guest } = createMockGuest(GUEST_WC_ID, 'https://example.com', 'Example Domain') + webContentsFromIdMock.mockImplementation((id: number) => { + if (id === GUEST_WC_ID) { + return guest + } + return null + }) + + const browserManager = new BrowserManager() + // Simulate the attach-time policy (normally done in will-attach-webview) + browserManager.attachGuestPolicies(guest as never) + browserManager.registerGuest({ + browserPageId: 'page-1', + webContentsId: GUEST_WC_ID, + rendererWebContentsId: RENDERER_WC_ID + }) + + const cdpBridge = new CdpBridge(browserManager) + cdpBridge.setActiveTab(GUEST_WC_ID) + + const userDataPath = mkdtempSync(join(tmpdir(), 'browser-e2e-')) + const runtime = new OrcaRuntimeService() + // eslint-disable-next-line @typescript-eslint/no-explicit-any + runtime.setAgentBrowserBridge(cdpBridge as any) + + server = new OrcaRuntimeRpcServer({ runtime, userDataPath }) + await server.start() + + const metadata = readRuntimeMetadata(userDataPath)! + endpoint = metadata.transport!.endpoint + authToken = metadata.authToken! + }) + + afterEach(async () => { + await server.stop() + }) + + async function rpc(method: string, params?: Record) { + const response = await sendRequest(endpoint, { + id: `req_${method}`, + authToken, + method, + ...(params ? { params } : {}) + }) + return response + } + + // ── Snapshot ── + + it('takes a snapshot and returns refs for interactive elements', async () => { + const res = await rpc('browser.snapshot') + expect(res.ok).toBe(true) + + const result = res.result as { + snapshot: string + refs: { ref: string; role: string; name: string }[] + url: string + title: string + } + expect(result.url).toBe('https://example.com') + expect(result.title).toBe('Example Domain') + expect(result.snapshot).toContain('heading "Example Domain"') + expect(result.snapshot).toContain('link "More information..."') + expect(result.refs).toHaveLength(1) + expect(result.refs[0]).toMatchObject({ + ref: '@e1', + role: 'link', + name: 'More information...' + }) + }) + + // ── Click ── + + it('clicks an element by ref after snapshot', async () => { + await rpc('browser.snapshot') + + const res = await rpc('browser.click', { element: '@e1' }) + expect(res.ok).toBe(true) + expect((res.result as { clicked: string }).clicked).toBe('@e1') + }) + + it('returns error when clicking without a prior snapshot', async () => { + const res = await rpc('browser.click', { element: '@e1' }) + expect(res.ok).toBe(false) + expect((res.error as { code: string }).code).toBe('browser_stale_ref') + }) + + it('returns error for non-existent ref', async () => { + await rpc('browser.snapshot') + + const res = await rpc('browser.click', { element: '@e999' }) + expect(res.ok).toBe(false) + expect((res.error as { code: string }).code).toBe('browser_ref_not_found') + }) + + // ── Navigation ── + + it('navigates to a URL and invalidates refs', async () => { + await rpc('browser.snapshot') + + const gotoRes = await rpc('browser.goto', { url: 'https://search.example.com' }) + expect(gotoRes.ok).toBe(true) + const gotoResult = gotoRes.result as { url: string; title: string } + expect(gotoResult.url).toBe('https://search.example.com') + expect(gotoResult.title).toBe('Search') + + // Old refs should be stale after navigation + const clickRes = await rpc('browser.click', { element: '@e1' }) + expect(clickRes.ok).toBe(false) + expect((clickRes.error as { code: string }).code).toBe('browser_stale_ref') + + // Re-snapshot should work and show new page + const snapRes = await rpc('browser.snapshot') + expect(snapRes.ok).toBe(true) + const snapResult = snapRes.result as { snapshot: string; refs: { name: string }[] } + expect(snapResult.snapshot).toContain('Search') + expect(snapResult.refs.map((r) => r.name)).toContain('Search') + expect(snapResult.refs.map((r) => r.name)).toContain('Home') + }) + + it('returns error for failed navigation', async () => { + const res = await rpc('browser.goto', { url: 'https://nonexistent.invalid' }) + expect(res.ok).toBe(false) + expect((res.error as { code: string }).code).toBe('browser_navigation_failed') + }) + + // ── Fill ── + + it('fills an input by ref', async () => { + await rpc('browser.goto', { url: 'https://search.example.com' }) + await rpc('browser.snapshot') + + // @e2 should be the textbox "Search query" on the search page + const res = await rpc('browser.fill', { element: '@e2', value: 'hello world' }) + expect(res.ok).toBe(true) + expect((res.result as { filled: string }).filled).toBe('@e2') + }) + + // ── Type ── + + it('types text at current focus', async () => { + const res = await rpc('browser.type', { input: 'some text' }) + expect(res.ok).toBe(true) + expect((res.result as { typed: boolean }).typed).toBe(true) + }) + + // ── Select ── + + it('selects a dropdown option by ref', async () => { + await rpc('browser.goto', { url: 'https://search.example.com' }) + await rpc('browser.snapshot') + + const res = await rpc('browser.select', { element: '@e2', value: 'option-1' }) + expect(res.ok).toBe(true) + expect((res.result as { selected: string }).selected).toBe('@e2') + }) + + // ── Scroll ── + + it('scrolls the viewport', async () => { + const res = await rpc('browser.scroll', { direction: 'down' }) + expect(res.ok).toBe(true) + expect((res.result as { scrolled: string }).scrolled).toBe('down') + + const res2 = await rpc('browser.scroll', { direction: 'up', amount: 200 }) + expect(res2.ok).toBe(true) + expect((res2.result as { scrolled: string }).scrolled).toBe('up') + }) + + // ── Reload ── + + it('reloads the page', async () => { + const res = await rpc('browser.reload') + expect(res.ok).toBe(true) + expect((res.result as { url: string }).url).toBe('https://example.com') + }) + + // ── Screenshot ── + + it('captures a screenshot', async () => { + const res = await rpc('browser.screenshot', { format: 'png' }) + expect(res.ok).toBe(true) + const result = res.result as { data: string; format: string } + expect(result.format).toBe('png') + expect(result.data.length).toBeGreaterThan(0) + }) + + // ── Eval ── + + it('evaluates JavaScript in the page context', async () => { + const res = await rpc('browser.eval', { expression: '2 + 2' }) + expect(res.ok).toBe(true) + expect((res.result as { result: string }).result).toBe('4') + }) + + // ── Tab management ── + + it('lists open tabs', async () => { + const res = await rpc('browser.tabList') + expect(res.ok).toBe(true) + const result = res.result as { tabs: { index: number; url: string; active: boolean }[] } + expect(result.tabs).toHaveLength(1) + expect(result.tabs[0]).toMatchObject({ + index: 0, + url: 'https://example.com', + active: true + }) + }) + + it('returns error for out-of-range tab switch', async () => { + const res = await rpc('browser.tabSwitch', { index: 5 }) + expect(res.ok).toBe(false) + expect((res.error as { code: string }).code).toBe('browser_tab_not_found') + }) + + // ── Full agent workflow simulation ── + + it('simulates a complete agent workflow: navigate → snapshot → interact → re-snapshot', async () => { + // 1. Navigate to search page + const gotoRes = await rpc('browser.goto', { url: 'https://search.example.com' }) + expect(gotoRes.ok).toBe(true) + + // 2. Snapshot the page + const snap1 = await rpc('browser.snapshot') + expect(snap1.ok).toBe(true) + const snap1Result = snap1.result as { + snapshot: string + refs: { ref: string; role: string; name: string }[] + } + + // Verify we see the search page structure + expect(snap1Result.snapshot).toContain('[Main Nav]') + expect(snap1Result.snapshot).toContain('text input "Search query"') + expect(snap1Result.snapshot).toContain('button "Search"') + + // 3. Fill the search input + const searchInput = snap1Result.refs.find((r) => r.name === 'Search query') + expect(searchInput).toBeDefined() + const fillRes = await rpc('browser.fill', { + element: searchInput!.ref, + value: 'integration testing' + }) + expect(fillRes.ok).toBe(true) + + // 4. Click the search button + const searchBtn = snap1Result.refs.find((r) => r.name === 'Search') + expect(searchBtn).toBeDefined() + const clickRes = await rpc('browser.click', { element: searchBtn!.ref }) + expect(clickRes.ok).toBe(true) + + // 5. Take a screenshot + const ssRes = await rpc('browser.screenshot') + expect(ssRes.ok).toBe(true) + + // 6. Check tab list + const tabRes = await rpc('browser.tabList') + expect(tabRes.ok).toBe(true) + const tabs = (tabRes.result as { tabs: { url: string }[] }).tabs + expect(tabs[0].url).toBe('https://search.example.com') + }) + + // ── No tab errors ── + + it('returns browser_no_tab when no tabs are registered', async () => { + // Create a fresh setup with no registered tabs + const emptyManager = new BrowserManager() + const emptyBridge = new CdpBridge(emptyManager) + + const userDataPath2 = mkdtempSync(join(tmpdir(), 'browser-e2e-empty-')) + const runtime2 = new OrcaRuntimeService() + // eslint-disable-next-line @typescript-eslint/no-explicit-any + runtime2.setAgentBrowserBridge(emptyBridge as any) + + const server2 = new OrcaRuntimeRpcServer({ runtime: runtime2, userDataPath: userDataPath2 }) + await server2.start() + + const metadata2 = readRuntimeMetadata(userDataPath2)! + const res = await sendRequest(metadata2.transport!.endpoint, { + id: 'req_no_tab', + authToken: metadata2.authToken, + method: 'browser.snapshot' + }) + + expect(res.ok).toBe(false) + expect((res.error as { code: string }).code).toBe('browser_no_tab') + + await server2.stop() + }) +}) diff --git a/src/main/browser/cdp-bridge.ts b/src/main/browser/cdp-bridge.ts new file mode 100644 index 00000000..9ab5e1ed --- /dev/null +++ b/src/main/browser/cdp-bridge.ts @@ -0,0 +1,1760 @@ +/* eslint-disable max-lines -- Why: the CDP bridge owns debugger lifecycle, ref map management, command serialization, and all browser interaction logic in one module so the browser automation boundary stays coherent. */ +import { webContents } from 'electron' +import type { + BrowserCaptureStartResult, + BrowserCaptureStopResult, + BrowserCheckResult, + BrowserClearResult, + BrowserClickResult, + BrowserConsoleEntry, + BrowserConsoleResult, + BrowserCookie, + BrowserCookieDeleteResult, + BrowserCookieGetResult, + BrowserCookieSetResult, + BrowserDragResult, + BrowserEvalResult, + BrowserFillResult, + BrowserFocusResult, + BrowserGeolocationResult, + BrowserGotoResult, + BrowserHoverResult, + BrowserInterceptDisableResult, + BrowserInterceptEnableResult, + BrowserInterceptedRequest, + BrowserKeypressResult, + BrowserNetworkEntry, + BrowserNetworkLogResult, + BrowserPdfResult, + BrowserScreenshotResult, + BrowserScrollResult, + BrowserSelectAllResult, + BrowserSelectResult, + BrowserSnapshotResult, + BrowserTabInfo, + BrowserTabListResult, + BrowserTabSwitchResult, + BrowserTypeResult, + BrowserUploadResult, + BrowserViewportResult, + BrowserWaitResult +} from '../../shared/runtime-types' +import { + buildSnapshot, + type CdpCommandSender, + type RefEntry, + type SnapshotResult +} from './snapshot-engine' +import type { BrowserManager } from './browser-manager' + +export class BrowserError extends Error { + constructor( + readonly code: string, + message: string + ) { + super(message) + } +} + +type TabState = { + navigationId: string | null + snapshotResult: SnapshotResult | null + debuggerAttached: boolean + iframeSessions: Map + // Why: capture state is per-tab so console/network events from one tab + // don't pollute another's capture buffer. + capturing: boolean + consoleLog: BrowserConsoleEntry[] + networkLog: BrowserNetworkEntry[] + // Why: interception state tracks patterns and paused requests so the + // agent can selectively continue or block individual requests. + intercepting: boolean + interceptPatterns: string[] + pausedRequests: Map + // Why: maps CDP requestId to the networkLog entry so loadingFinished + // can attribute size to the correct response when requests overlap. + networkRequestMap: Map +} + +type QueuedCommand = { + execute: () => Promise + resolve: (value: unknown) => void + reject: (reason: unknown) => void +} + +export class CdpBridge { + private activeWebContentsId: number | null = null + private readonly tabState = new Map() + private readonly commandQueues = new Map() + private readonly processingQueues = new Set() + private readonly browserManager: BrowserManager + + constructor(browserManager: BrowserManager) { + this.browserManager = browserManager + } + + setActiveTab(webContentsId: number): void { + this.activeWebContentsId = webContentsId + } + + getActiveWebContentsId(): number | null { + return this.activeWebContentsId + } + + getActivePageId(_worktreeId?: string): string | null { + if (!this.activeWebContentsId) { + return null + } + for (const [tabId, wcId] of this.getRegisteredTabs()) { + if (wcId === this.activeWebContentsId) { + return tabId + } + } + return null + } + + getPageInfo( + _worktreeId?: string, + browserPageId?: string + ): { browserPageId: string; url: string; title: string } | null { + // Why: OrcaRuntimeService pushes navigation/title updates after commands + // using a bridge-agnostic contract. The CDP bridge only routes one active + // tab at a time, but it still needs to expose the same metadata lookup. + const resolvedPageId = browserPageId ?? this.getActivePageId() + if (!resolvedPageId) { + return null + } + const webContentsId = this.getRegisteredTabs().get(resolvedPageId) + if (webContentsId == null) { + return null + } + const guest = webContents.fromId(webContentsId) + if (!guest || guest.isDestroyed()) { + return null + } + return { + browserPageId: resolvedPageId, + url: guest.getURL(), + title: guest.getTitle() + } + } + + async snapshot(): Promise { + return this.enqueueCommand(async () => { + const guest = this.getActiveGuest() + const sender = this.makeCdpSender(guest) + await this.ensureDebuggerAttached(guest) + + const tabId = this.resolveTabId(guest.id) + const state = this.getOrCreateTabState(tabId) + + const result = await buildSnapshot(sender, state.iframeSessions, (sessionId) => + this.makeCdpSender(guest, sessionId) + ) + state.snapshotResult = result + + const navId = await this.getNavigationId(sender) + state.navigationId = navId + + return { + browserPageId: tabId, + snapshot: result.snapshot, + refs: result.refs, + url: guest.getURL(), + title: guest.getTitle() + } + }) + } + + async click(element: string): Promise { + return this.enqueueCommand(async () => { + const guest = this.getActiveGuest() + const sender = this.makeCdpSender(guest) + await this.ensureDebuggerAttached(guest) + + const node = await this.resolveRef(guest, sender, element) + const refSender = this.senderForRef(guest, node) + + await this.scrollIntoView(refSender, node.backendDOMNodeId) + const localCenter = await this.getElementCenter(refSender, node.backendDOMNodeId) + const { cx, cy } = await this.getPageCoordinates(guest, node, localCenter.cx, localCenter.cy) + + // Why: mouseMoved fires mouseenter/mouseover which some sites need to + // reveal hover-dependent menus or clickable areas before the click lands. + // Input events go to the parent session — Chrome routes them to the + // correct frame based on coordinates. + await sender('Input.dispatchMouseEvent', { type: 'mouseMoved', x: cx, y: cy }) + await sender('Input.dispatchMouseEvent', { + type: 'mousePressed', + x: cx, + y: cy, + button: 'left', + clickCount: 1 + }) + await sender('Input.dispatchMouseEvent', { + type: 'mouseReleased', + x: cx, + y: cy, + button: 'left', + clickCount: 1 + }) + + return { clicked: element } + }) + } + + async hover(element: string): Promise { + return this.enqueueCommand(async () => { + const guest = this.getActiveGuest() + const sender = this.makeCdpSender(guest) + await this.ensureDebuggerAttached(guest) + + const node = await this.resolveRef(guest, sender, element) + const refSender = this.senderForRef(guest, node) + await this.scrollIntoView(refSender, node.backendDOMNodeId) + const localCenter = await this.getElementCenter(refSender, node.backendDOMNodeId) + const { cx, cy } = await this.getPageCoordinates(guest, node, localCenter.cx, localCenter.cy) + + await sender('Input.dispatchMouseEvent', { type: 'mouseMoved', x: cx, y: cy }) + + return { hovered: element } + }) + } + + async drag(fromElement: string, toElement: string): Promise { + return this.enqueueCommand(async () => { + const guest = this.getActiveGuest() + const sender = this.makeCdpSender(guest) + await this.ensureDebuggerAttached(guest) + + const fromNode = await this.resolveRef(guest, sender, fromElement) + const toNode = await this.resolveRef(guest, sender, toElement) + const fromSender = this.senderForRef(guest, fromNode) + const toSender = this.senderForRef(guest, toNode) + + await this.scrollIntoView(fromSender, fromNode.backendDOMNodeId) + const fromLocal = await this.getElementCenter(fromSender, fromNode.backendDOMNodeId) + const from = await this.getPageCoordinates(guest, fromNode, fromLocal.cx, fromLocal.cy) + const toLocal = await this.getElementCenter(toSender, toNode.backendDOMNodeId) + const to = await this.getPageCoordinates(guest, toNode, toLocal.cx, toLocal.cy) + + // Why: 10-step interpolation with delays simulates human-like drag and + // triggers dragenter/dragover events on intermediate elements, which many + // drag-and-drop libraries rely on. + await sender('Input.dispatchMouseEvent', { type: 'mouseMoved', x: from.cx, y: from.cy }) + await sender('Input.dispatchMouseEvent', { + type: 'mousePressed', + x: from.cx, + y: from.cy, + button: 'left' + }) + + const steps = 10 + for (let i = 1; i <= steps; i++) { + const x = from.cx + ((to.cx - from.cx) * i) / steps + const y = from.cy + ((to.cy - from.cy) * i) / steps + await sender('Input.dispatchMouseEvent', { type: 'mouseMoved', x, y, buttons: 1 }) + await new Promise((r) => setTimeout(r, 10)) + } + + await sender('Input.dispatchMouseEvent', { + type: 'mouseReleased', + x: to.cx, + y: to.cy, + button: 'left' + }) + + return { dragged: { from: fromElement, to: toElement } } + }) + } + + async uploadFile(element: string, filePaths: string[]): Promise { + return this.enqueueCommand(async () => { + const guest = this.getActiveGuest() + const sender = this.makeCdpSender(guest) + await this.ensureDebuggerAttached(guest) + + const node = await this.resolveRef(guest, sender, element) + const refSender = this.senderForRef(guest, node) + await refSender('DOM.setFileInputFiles', { + files: filePaths, + backendNodeId: node.backendDOMNodeId + }) + + return { uploaded: filePaths.length } + }) + } + + async goto(url: string): Promise { + return this.enqueueCommand(async () => { + const guest = this.getActiveGuest() + const sender = this.makeCdpSender(guest) + await this.ensureDebuggerAttached(guest) + + const { errorText } = (await sender('Page.navigate', { url })) as { + errorText?: string + } + + if (errorText) { + throw new BrowserError('browser_navigation_failed', `Navigation failed: ${errorText}`) + } + + await this.waitForLoad(sender, guest) + this.invalidateRefMap(guest.id) + + return { url: guest.getURL(), title: guest.getTitle() } + }) + } + + async fill(element: string, value: string): Promise { + return this.enqueueCommand(async () => { + const guest = this.getActiveGuest() + const sender = this.makeCdpSender(guest) + await this.ensureDebuggerAttached(guest) + + const node = await this.resolveRef(guest, sender, element) + const refSender = this.senderForRef(guest, node) + + await refSender('DOM.focus', { backendNodeId: node.backendDOMNodeId }) + + // Why: select-all then delete clears any existing value before typing, + // matching the behavior of Playwright's fill() and agent-browser's fill. + await sender('Input.dispatchKeyEvent', { + type: 'keyDown', + key: 'a', + modifiers: process.platform === 'darwin' ? 4 : 2 + }) + await sender('Input.dispatchKeyEvent', { + type: 'keyUp', + key: 'a', + modifiers: process.platform === 'darwin' ? 4 : 2 + }) + await sender('Input.dispatchKeyEvent', { type: 'keyDown', key: 'Delete' }) + await sender('Input.dispatchKeyEvent', { type: 'keyUp', key: 'Delete' }) + + await sender('Input.insertText', { text: value }) + + // Why: React and other frameworks use synthetic event listeners that may not + // detect native keyboard events. Explicitly dispatching input/change ensures + // controlled components update their state. Uses refSender so that in iframe + // contexts, document.activeElement resolves to the iframe's focused element + // rather than the