Add explicit browser page targeting and screenshot fallback

This commit is contained in:
Jinwoo-H 2026-04-20 17:56:55 -04:00
parent 717a054435
commit 15694520f8
12 changed files with 1964 additions and 790 deletions

View file

@ -212,6 +212,29 @@ orca tab switch --index 1 --json # Switches to tab [1] within this worktree
If no tabs are open in the current worktree, commands return `browser_no_tab`.
### Stable Page Targeting
For single-agent flows, bare browser commands are fine: Orca will target the active browser tab in the current worktree.
For concurrent or multi-process browser automation, prefer a stable page id instead of ambient active-tab state:
1. Run `orca tab list --json`.
2. Read `tabs[].browserPageId` from the result.
3. Pass `--page <browserPageId>` to follow-up commands like `snapshot`, `click`, `goto`, `screenshot`, `tab switch`, or `tab close`.
Why: active-tab state and tab indices can change while another Orca CLI process is working. `browserPageId` pins the command to one concrete tab.
```bash
orca tab list --json
orca snapshot --page page-123 --json
orca click --page page-123 --element @e3 --json
orca screenshot --page page-123 --json
orca tab switch --page page-123 --json
orca tab close --page page-123 --json
```
If you also pass `--worktree`, Orca treats it as extra scoping/validation for that page id. Without `--page`, commands still fall back to the current worktree's active tab.
### Navigation
```bash
@ -224,40 +247,40 @@ orca reload [--json] # Reload the current page
### Observation
```bash
orca snapshot [--json] # Accessibility tree snapshot with element refs
orca screenshot [--format <png|jpeg>] [--json] # Viewport screenshot (base64)
orca full-screenshot [--format <png|jpeg>] [--json] # Full-page screenshot (base64)
orca pdf [--json] # Export page as PDF (base64)
orca snapshot [--page <browserPageId>] [--json] # Accessibility tree snapshot with element refs
orca screenshot [--page <browserPageId>] [--format <png|jpeg>] [--json] # Viewport screenshot (base64)
orca full-screenshot [--page <browserPageId>] [--format <png|jpeg>] [--json] # Full-page screenshot (base64)
orca pdf [--page <browserPageId>] [--json] # Export page as PDF (base64)
```
### Interaction
```bash
orca click --element <ref> [--json] # Click an element by ref
orca dblclick --element <ref> [--json] # Double-click an element
orca fill --element <ref> --value <text> [--json] # Clear and fill an input
orca type --input <text> [--json] # Type at current focus (no element targeting)
orca select --element <ref> --value <value> [--json] # Select dropdown option
orca check --element <ref> [--json] # Check a checkbox
orca uncheck --element <ref> [--json] # Uncheck a checkbox
orca scroll --direction <up|down> [--amount <pixels>] [--json] # Scroll viewport
orca scrollintoview --element <ref> [--json] # Scroll element into view
orca hover --element <ref> [--json] # Hover over an element
orca focus --element <ref> [--json] # Focus an element
orca drag --from <ref> --to <ref> [--json] # Drag from one element to another
orca clear --element <ref> [--json] # Clear an input field
orca select-all --element <ref> [--json] # Select all text in an element
orca keypress --key <key> [--json] # Press a key (Enter, Tab, Escape, etc.)
orca upload --element <ref> --files <paths> [--json] # Upload files to a file input
orca click --element <ref> [--page <browserPageId>] [--json] # Click an element by ref
orca dblclick --element <ref> [--page <browserPageId>] [--json] # Double-click an element
orca fill --element <ref> --value <text> [--page <browserPageId>] [--json] # Clear and fill an input
orca type --input <text> [--page <browserPageId>] [--json] # Type at current focus (no element targeting)
orca select --element <ref> --value <value> [--page <browserPageId>] [--json] # Select dropdown option
orca check --element <ref> [--page <browserPageId>] [--json] # Check a checkbox
orca uncheck --element <ref> [--page <browserPageId>] [--json] # Uncheck a checkbox
orca scroll --direction <up|down> [--amount <pixels>] [--page <browserPageId>] [--json] # Scroll viewport
orca scrollintoview --element <ref> [--page <browserPageId>] [--json] # Scroll element into view
orca hover --element <ref> [--page <browserPageId>] [--json] # Hover over an element
orca focus --element <ref> [--page <browserPageId>] [--json] # Focus an element
orca drag --from <ref> --to <ref> [--page <browserPageId>] [--json] # Drag from one element to another
orca clear --element <ref> [--page <browserPageId>] [--json] # Clear an input field
orca select-all --element <ref> [--page <browserPageId>] [--json] # Select all text in an element
orca keypress --key <key> [--page <browserPageId>] [--json] # Press a key (Enter, Tab, Escape, etc.)
orca upload --element <ref> --files <paths> [--page <browserPageId>] [--json] # Upload files to a file input
```
### Tab Management
```bash
orca tab list [--json] # List open browser tabs
orca tab switch --index <n> [--json] # Switch active tab (invalidates refs)
orca tab switch (--index <n> | --page <browserPageId>) [--json] # Switch active tab (invalidates refs)
orca tab create [--url <url>] [--json] # Open a new browser tab
orca tab close [--index <n>] [--json] # Close a browser tab
orca tab close [--index <n> | --page <browserPageId>] [--json] # Close a browser tab
```
### Wait / Synchronization
@ -503,10 +526,12 @@ When `orca tab create` opens a new tab, it is automatically set as the active ta
- After switching tabs, re-snapshot.
- If you get `browser_stale_ref`, re-snapshot and retry with the new refs.
- Use `orca tab list` before `orca tab switch` to know which tabs exist.
- For concurrent browser workflows, prefer `orca tab list --json` and reuse `tabs[].browserPageId` with `--page` on later commands.
- Use `orca wait` to synchronize after actions that trigger async updates (form submits, SPA navigation, modals) instead of arbitrary sleeps.
- Use `orca eval` as an escape hatch for interactions not covered by other commands.
- Use `orca exec --command "help"` to discover extended commands.
- Worktree scoping is automatic — you'll only see tabs from your worktree by default.
- Bare browser commands without `--page` still target the current worktree's active tab, which is convenient but less robust for multi-process automation.
- Tab creation auto-activates the new tab — no need for `tab switch` after `tab create`.
- Browser commands auto-switch the active worktree if needed — no manual worktree activation required.

View file

@ -1,3 +1,5 @@
/* oxlint-disable max-lines -- Why: CLI parsing behavior is exercised end-to-end
in one file so command and flag interactions stay visible in a single suite. */
import path from 'path'
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'
@ -319,3 +321,155 @@ describe('orca cli worktree awareness', () => {
})
})
})
describe('orca cli browser page targeting', () => {
beforeEach(() => {
callMock.mockReset()
})
afterEach(() => {
vi.restoreAllMocks()
})
it('passes explicit page ids to snapshot without resolving the current worktree', async () => {
callMock.mockResolvedValueOnce({
id: 'req_snapshot',
ok: true,
result: {
browserPageId: 'page-1',
snapshot: 'tree',
refs: [],
url: 'https://example.com',
title: 'Example'
},
_meta: {
runtimeId: 'runtime-1'
}
})
vi.spyOn(console, 'log').mockImplementation(() => {})
await main(['snapshot', '--page', 'page-1', '--json'], '/tmp/not-an-orca-worktree')
expect(callMock).toHaveBeenCalledTimes(1)
expect(callMock).toHaveBeenCalledWith('browser.snapshot', {
page: 'page-1'
})
})
it('resolves current worktree only when --page is combined with --worktree current', async () => {
callMock
.mockResolvedValueOnce({
id: 'req_list',
ok: true,
result: {
worktrees: [
{
id: 'repo::/tmp/repo/feature',
repoId: 'repo',
path: '/tmp/repo/feature',
branch: 'feature/foo',
linkedIssue: null,
git: {
path: '/tmp/repo/feature',
head: 'abc',
branch: 'feature/foo',
isBare: false,
isMainWorktree: false
},
displayName: '',
comment: ''
}
],
totalCount: 1,
truncated: false
},
_meta: {
runtimeId: 'runtime-1'
}
})
.mockResolvedValueOnce({
id: 'req_snapshot',
ok: true,
result: {
browserPageId: 'page-1',
snapshot: 'tree',
refs: [],
url: 'https://example.com',
title: 'Example'
},
_meta: {
runtimeId: 'runtime-1'
}
})
vi.spyOn(console, 'log').mockImplementation(() => {})
await main(
['snapshot', '--page', 'page-1', '--worktree', 'current', '--json'],
'/tmp/repo/feature/src'
)
expect(callMock).toHaveBeenNthCalledWith(1, 'worktree.list', {
limit: 10_000
})
expect(callMock).toHaveBeenNthCalledWith(2, 'browser.snapshot', {
page: 'page-1',
worktree: `path:${path.resolve('/tmp/repo/feature')}`
})
})
it('scopes tab switch by page id to the current worktree when available', async () => {
callMock
.mockResolvedValueOnce({
id: 'req_list',
ok: true,
result: {
worktrees: [
{
id: 'repo::/tmp/repo/feature',
repoId: 'repo',
path: '/tmp/repo/feature',
branch: 'feature/foo',
linkedIssue: null,
git: {
path: '/tmp/repo/feature',
head: 'abc',
branch: 'feature/foo',
isBare: false,
isMainWorktree: false
},
displayName: '',
comment: ''
}
],
totalCount: 1,
truncated: false
},
_meta: {
runtimeId: 'runtime-1'
}
})
.mockResolvedValueOnce({
id: 'req_switch',
ok: true,
result: {
switched: 2,
browserPageId: 'page-2'
},
_meta: {
runtimeId: 'runtime-1'
}
})
vi.spyOn(console, 'log').mockImplementation(() => {})
await main(['tab', 'switch', '--page', 'page-2', '--json'], '/tmp/repo/feature/src')
expect(callMock).toHaveBeenNthCalledWith(1, 'worktree.list', {
limit: 10_000
})
expect(callMock).toHaveBeenNthCalledWith(2, 'browser.tabSwitch', {
index: undefined,
page: 'page-2',
worktree: `path:${path.resolve('/tmp/repo/feature')}`
})
})
})

View file

@ -72,6 +72,11 @@ type CommandSpec = {
notes?: string[]
}
type BrowserCliTarget = {
worktree?: string
page?: string
}
const DEFAULT_TERMINAL_WAIT_RPC_TIMEOUT_MS = 5 * 60 * 1000
const GLOBAL_FLAGS = ['help', 'json']
export const COMMAND_SPECS: CommandSpec[] = [
@ -854,55 +859,55 @@ export async function main(argv = process.argv.slice(2), cwd = process.cwd()): P
// ── Browser automation dispatch ──
if (matches(commandPath, ['snapshot'])) {
const worktree = await getBrowserWorktreeSelector(parsed.flags, cwd, client)
const result = await client.call<BrowserSnapshotResult>('browser.snapshot', { worktree })
const target = await getBrowserCommandTarget(parsed.flags, cwd, client)
const result = await client.call<BrowserSnapshotResult>('browser.snapshot', target)
return printResult(result, json, formatSnapshot)
}
if (matches(commandPath, ['screenshot'])) {
const format = getOptionalStringFlag(parsed.flags, 'format')
const worktree = await getBrowserWorktreeSelector(parsed.flags, cwd, client)
const target = await getBrowserCommandTarget(parsed.flags, cwd, client)
const result = await client.call<BrowserScreenshotResult>('browser.screenshot', {
format: format === 'jpeg' ? 'jpeg' : undefined,
worktree
...target
})
return printResult(result, json, formatScreenshot)
}
if (matches(commandPath, ['click'])) {
const element = getRequiredStringFlag(parsed.flags, 'element')
const worktree = await getBrowserWorktreeSelector(parsed.flags, cwd, client)
const result = await client.call<BrowserClickResult>('browser.click', { element, worktree })
const target = await getBrowserCommandTarget(parsed.flags, cwd, client)
const result = await client.call<BrowserClickResult>('browser.click', { element, ...target })
return printResult(result, json, (v) => `Clicked ${v.clicked}`)
}
if (matches(commandPath, ['fill'])) {
const element = getRequiredStringFlag(parsed.flags, 'element')
const value = getRequiredStringFlag(parsed.flags, 'value')
const worktree = await getBrowserWorktreeSelector(parsed.flags, cwd, client)
const target = await getBrowserCommandTarget(parsed.flags, cwd, client)
const result = await client.call<BrowserFillResult>('browser.fill', {
element,
value,
worktree
...target
})
return printResult(result, json, (v) => `Filled ${v.filled}`)
}
if (matches(commandPath, ['type'])) {
const input = getRequiredStringFlag(parsed.flags, 'input')
const worktree = await getBrowserWorktreeSelector(parsed.flags, cwd, client)
const result = await client.call<BrowserTypeResult>('browser.type', { input, worktree })
const target = await getBrowserCommandTarget(parsed.flags, cwd, client)
const result = await client.call<BrowserTypeResult>('browser.type', { input, ...target })
return printResult(result, json, () => 'Typed input')
}
if (matches(commandPath, ['select'])) {
const element = getRequiredStringFlag(parsed.flags, 'element')
const value = getRequiredStringFlag(parsed.flags, 'value')
const worktree = await getBrowserWorktreeSelector(parsed.flags, cwd, client)
const target = await getBrowserCommandTarget(parsed.flags, cwd, client)
const result = await client.call<BrowserSelectResult>('browser.select', {
element,
value,
worktree
...target
})
return printResult(result, json, (v) => `Selected ${v.selected}`)
}
@ -913,47 +918,45 @@ export async function main(argv = process.argv.slice(2), cwd = process.cwd()): P
throw new RuntimeClientError('invalid_argument', '--direction must be "up" or "down"')
}
const amount = getOptionalPositiveIntegerFlag(parsed.flags, 'amount')
const worktree = await getBrowserWorktreeSelector(parsed.flags, cwd, client)
const target = await getBrowserCommandTarget(parsed.flags, cwd, client)
const result = await client.call<BrowserScrollResult>('browser.scroll', {
direction,
amount,
worktree
...target
})
return printResult(result, json, (v) => `Scrolled ${v.scrolled}`)
}
if (matches(commandPath, ['goto'])) {
const url = getRequiredStringFlag(parsed.flags, 'url')
const worktree = await getBrowserWorktreeSelector(parsed.flags, cwd, client)
const target = await getBrowserCommandTarget(parsed.flags, cwd, client)
// Why: navigation waits for network idle which can exceed the default 15s RPC timeout
const result = await client.call<BrowserGotoResult>(
'browser.goto',
{ url, worktree },
{ url, ...target },
{ timeoutMs: 60_000 }
)
return printResult(result, json, (v) => `Navigated to ${v.url}${v.title}`)
}
if (matches(commandPath, ['back'])) {
const worktree = await getBrowserWorktreeSelector(parsed.flags, cwd, client)
const result = await client.call<BrowserBackResult>('browser.back', { worktree })
const target = await getBrowserCommandTarget(parsed.flags, cwd, client)
const result = await client.call<BrowserBackResult>('browser.back', target)
return printResult(result, json, (v) => `Back to ${v.url}${v.title}`)
}
if (matches(commandPath, ['reload'])) {
const worktree = await getBrowserWorktreeSelector(parsed.flags, cwd, client)
const result = await client.call<BrowserReloadResult>(
'browser.reload',
{ worktree },
{ timeoutMs: 60_000 }
)
const target = await getBrowserCommandTarget(parsed.flags, cwd, client)
const result = await client.call<BrowserReloadResult>('browser.reload', target, {
timeoutMs: 60_000
})
return printResult(result, json, (v) => `Reloaded ${v.url}${v.title}`)
}
if (matches(commandPath, ['eval'])) {
const expression = getRequiredStringFlag(parsed.flags, 'expression')
const worktree = await getBrowserWorktreeSelector(parsed.flags, cwd, client)
const result = await client.call<BrowserEvalResult>('browser.eval', { expression, worktree })
const target = await getBrowserCommandTarget(parsed.flags, cwd, client)
const result = await client.call<BrowserEvalResult>('browser.eval', { expression, ...target })
return printResult(result, json, (v) => v.result)
}
@ -965,15 +968,17 @@ export async function main(argv = process.argv.slice(2), cwd = process.cwd()): P
if (matches(commandPath, ['tab', 'switch'])) {
const index = getOptionalNonNegativeIntegerFlag(parsed.flags, 'index')
if (index === undefined) {
throw new RuntimeClientError('invalid_argument', 'Missing required --index')
const page = getOptionalStringFlag(parsed.flags, 'page')
if (index === undefined && !page) {
throw new RuntimeClientError('invalid_argument', 'Missing required --index or --page')
}
const worktree = await getBrowserWorktreeSelector(parsed.flags, cwd, client)
const result = await client.call<BrowserTabSwitchResult>('browser.tabSwitch', {
index,
page,
worktree
})
return printResult(result, json, (v) => `Switched to tab ${v.switched}`)
return printResult(result, json, (v) => `Switched to tab ${v.switched} (${v.browserPageId})`)
}
if (matches(commandPath, ['tab', 'create'])) {
@ -989,15 +994,18 @@ export async function main(argv = process.argv.slice(2), cwd = process.cwd()): P
if (matches(commandPath, ['tab', 'close'])) {
const index = getOptionalNonNegativeIntegerFlag(parsed.flags, 'index')
const worktree = await getBrowserWorktreeSelector(parsed.flags, cwd, client)
const result = await client.call<{ closed: boolean }>('browser.tabClose', { index, worktree })
const target = await getBrowserCommandTarget(parsed.flags, cwd, client)
const result = await client.call<{ closed: boolean }>('browser.tabClose', {
index,
...target
})
return printResult(result, json, () => 'Tab closed')
}
if (matches(commandPath, ['exec'])) {
const command = getRequiredStringFlag(parsed.flags, 'command')
const worktree = await getBrowserWorktreeSelector(parsed.flags, cwd, client)
const result = await client.call<unknown>('browser.exec', { command, worktree })
const target = await getBrowserCommandTarget(parsed.flags, cwd, client)
const result = await client.call<unknown>('browser.exec', { command, ...target })
return printResult(result, json, (v) => JSON.stringify(v, null, 2))
}
@ -1009,7 +1017,7 @@ export async function main(argv = process.argv.slice(2), cwd = process.cwd()): P
const load = getOptionalStringFlag(parsed.flags, 'load')
const fn = getOptionalStringFlag(parsed.flags, 'fn')
const state = getOptionalStringFlag(parsed.flags, 'state')
const worktree = await getBrowserWorktreeSelector(parsed.flags, cwd, client)
const target = await getBrowserCommandTarget(parsed.flags, cwd, client)
const result = await client.call<BrowserWaitResult>('browser.wait', {
selector,
timeout,
@ -1018,7 +1026,7 @@ export async function main(argv = process.argv.slice(2), cwd = process.cwd()): P
load,
fn,
state,
worktree
...target
})
return printResult(result, json, (v) => JSON.stringify(v, null, 2))
}
@ -1026,11 +1034,11 @@ export async function main(argv = process.argv.slice(2), cwd = process.cwd()): P
if (matches(commandPath, ['check']) || matches(commandPath, ['uncheck'])) {
const element = getRequiredStringFlag(parsed.flags, 'element')
const checked = matches(commandPath, ['check'])
const worktree = await getBrowserWorktreeSelector(parsed.flags, cwd, client)
const target = await getBrowserCommandTarget(parsed.flags, cwd, client)
const result = await client.call<BrowserCheckResult>('browser.check', {
element,
checked,
worktree
...target
})
return printResult(result, json, (v) =>
v.checked ? `Checked ${element}` : `Unchecked ${element}`
@ -1039,63 +1047,66 @@ export async function main(argv = process.argv.slice(2), cwd = process.cwd()): P
if (matches(commandPath, ['focus'])) {
const element = getRequiredStringFlag(parsed.flags, 'element')
const worktree = await getBrowserWorktreeSelector(parsed.flags, cwd, client)
const result = await client.call<BrowserFocusResult>('browser.focus', { element, worktree })
const target = await getBrowserCommandTarget(parsed.flags, cwd, client)
const result = await client.call<BrowserFocusResult>('browser.focus', { element, ...target })
return printResult(result, json, (v) => `Focused ${v.focused}`)
}
if (matches(commandPath, ['clear'])) {
const element = getRequiredStringFlag(parsed.flags, 'element')
const worktree = await getBrowserWorktreeSelector(parsed.flags, cwd, client)
const result = await client.call<BrowserClearResult>('browser.clear', { element, worktree })
const target = await getBrowserCommandTarget(parsed.flags, cwd, client)
const result = await client.call<BrowserClearResult>('browser.clear', { element, ...target })
return printResult(result, json, (v) => `Cleared ${v.cleared}`)
}
if (matches(commandPath, ['select-all'])) {
const element = getRequiredStringFlag(parsed.flags, 'element')
const worktree = await getBrowserWorktreeSelector(parsed.flags, cwd, client)
const target = await getBrowserCommandTarget(parsed.flags, cwd, client)
const result = await client.call<BrowserSelectAllResult>('browser.selectAll', {
element,
worktree
...target
})
return printResult(result, json, (v) => `Selected all in ${v.selected}`)
}
if (matches(commandPath, ['keypress'])) {
const key = getRequiredStringFlag(parsed.flags, 'key')
const worktree = await getBrowserWorktreeSelector(parsed.flags, cwd, client)
const result = await client.call<BrowserKeypressResult>('browser.keypress', { key, worktree })
const target = await getBrowserCommandTarget(parsed.flags, cwd, client)
const result = await client.call<BrowserKeypressResult>('browser.keypress', {
key,
...target
})
return printResult(result, json, (v) => `Pressed ${v.pressed}`)
}
if (matches(commandPath, ['pdf'])) {
const worktree = await getBrowserWorktreeSelector(parsed.flags, cwd, client)
const result = await client.call<BrowserPdfResult>('browser.pdf', { worktree })
const target = await getBrowserCommandTarget(parsed.flags, cwd, client)
const result = await client.call<BrowserPdfResult>('browser.pdf', target)
return printResult(result, json, (v) => `PDF exported (${v.data.length} bytes base64)`)
}
if (matches(commandPath, ['full-screenshot'])) {
const format = getOptionalStringFlag(parsed.flags, 'format') === 'jpeg' ? 'jpeg' : 'png'
const worktree = await getBrowserWorktreeSelector(parsed.flags, cwd, client)
const target = await getBrowserCommandTarget(parsed.flags, cwd, client)
const result = await client.call<BrowserScreenshotResult>('browser.fullScreenshot', {
format,
worktree
...target
})
return printResult(result, json, (v) => `Full-page screenshot captured (${v.format})`)
}
if (matches(commandPath, ['hover'])) {
const element = getRequiredStringFlag(parsed.flags, 'element')
const worktree = await getBrowserWorktreeSelector(parsed.flags, cwd, client)
const result = await client.call<BrowserHoverResult>('browser.hover', { element, worktree })
const target = await getBrowserCommandTarget(parsed.flags, cwd, client)
const result = await client.call<BrowserHoverResult>('browser.hover', { element, ...target })
return printResult(result, json, (v) => `Hovered ${v.hovered}`)
}
if (matches(commandPath, ['drag'])) {
const from = getRequiredStringFlag(parsed.flags, 'from')
const to = getRequiredStringFlag(parsed.flags, 'to')
const worktree = await getBrowserWorktreeSelector(parsed.flags, cwd, client)
const result = await client.call<BrowserDragResult>('browser.drag', { from, to, worktree })
const target = await getBrowserCommandTarget(parsed.flags, cwd, client)
const result = await client.call<BrowserDragResult>('browser.drag', { from, to, ...target })
return printResult(result, json, (v) => `Dragged ${v.dragged.from}${v.dragged.to}`)
}
@ -1103,11 +1114,11 @@ export async function main(argv = process.argv.slice(2), cwd = process.cwd()): P
const element = getRequiredStringFlag(parsed.flags, 'element')
const filesStr = getRequiredStringFlag(parsed.flags, 'files')
const files = filesStr.split(',').map((f) => f.trim())
const worktree = await getBrowserWorktreeSelector(parsed.flags, cwd, client)
const target = await getBrowserCommandTarget(parsed.flags, cwd, client)
const result = await client.call<BrowserUploadResult>('browser.upload', {
element,
files,
worktree
...target
})
return printResult(result, json, (v) => `Uploaded ${v.uploaded} file(s)`)
}
@ -1116,10 +1127,10 @@ export async function main(argv = process.argv.slice(2), cwd = process.cwd()): P
if (matches(commandPath, ['cookie', 'get'])) {
const url = getOptionalStringFlag(parsed.flags, 'url')
const worktree = await getBrowserWorktreeSelector(parsed.flags, cwd, client)
const target = await getBrowserCommandTarget(parsed.flags, cwd, client)
const result = await client.call<BrowserCookieGetResult>('browser.cookie.get', {
url,
worktree
...target
})
return printResult(result, json, (v) => {
if (v.cookies.length === 0) {
@ -1155,8 +1166,7 @@ export async function main(argv = process.argv.slice(2), cwd = process.cwd()): P
if (expires) {
params.expires = Number(expires)
}
const worktree = await getBrowserWorktreeSelector(parsed.flags, cwd, client)
params.worktree = worktree
Object.assign(params, await getBrowserCommandTarget(parsed.flags, cwd, client))
const result = await client.call<BrowserCookieSetResult>('browser.cookie.set', params)
return printResult(result, json, (v) =>
v.success ? `Cookie "${name}" set` : `Failed to set cookie "${name}"`
@ -1174,8 +1184,7 @@ export async function main(argv = process.argv.slice(2), cwd = process.cwd()): P
if (url) {
params.url = url
}
const worktree = await getBrowserWorktreeSelector(parsed.flags, cwd, client)
params.worktree = worktree
Object.assign(params, await getBrowserCommandTarget(parsed.flags, cwd, client))
const result = await client.call<BrowserCookieDeleteResult>('browser.cookie.delete', params)
return printResult(result, json, () => `Cookie "${name}" deleted`)
}
@ -1197,8 +1206,7 @@ export async function main(argv = process.argv.slice(2), cwd = process.cwd()): P
if (parsed.flags.has('mobile')) {
params.mobile = true
}
const worktree = await getBrowserWorktreeSelector(parsed.flags, cwd, client)
params.worktree = worktree
Object.assign(params, await getBrowserCommandTarget(parsed.flags, cwd, client))
const result = await client.call<BrowserViewportResult>('browser.viewport', params)
return printResult(
result,
@ -1221,8 +1229,7 @@ export async function main(argv = process.argv.slice(2), cwd = process.cwd()): P
}
params.accuracy = n
}
const worktree = await getBrowserWorktreeSelector(parsed.flags, cwd, client)
params.worktree = worktree
Object.assign(params, await getBrowserCommandTarget(parsed.flags, cwd, client))
const result = await client.call<BrowserGeolocationResult>('browser.geolocation', params)
return printResult(result, json, (v) => `Geolocation set to ${v.latitude}, ${v.longitude}`)
}
@ -1235,8 +1242,7 @@ export async function main(argv = process.argv.slice(2), cwd = process.cwd()): P
if (patternsStr) {
params.patterns = patternsStr.split(',').map((p) => p.trim())
}
const worktree = await getBrowserWorktreeSelector(parsed.flags, cwd, client)
params.worktree = worktree
Object.assign(params, await getBrowserCommandTarget(parsed.flags, cwd, client))
const result = await client.call<BrowserInterceptEnableResult>(
'browser.intercept.enable',
params
@ -1249,18 +1255,19 @@ export async function main(argv = process.argv.slice(2), cwd = process.cwd()): P
}
if (matches(commandPath, ['intercept', 'disable'])) {
const worktree = await getBrowserWorktreeSelector(parsed.flags, cwd, client)
const result = await client.call<BrowserInterceptDisableResult>('browser.intercept.disable', {
worktree
})
const target = await getBrowserCommandTarget(parsed.flags, cwd, client)
const result = await client.call<BrowserInterceptDisableResult>(
'browser.intercept.disable',
target
)
return printResult(result, json, () => 'Interception disabled')
}
if (matches(commandPath, ['intercept', 'list'])) {
const worktree = await getBrowserWorktreeSelector(parsed.flags, cwd, client)
const target = await getBrowserCommandTarget(parsed.flags, cwd, client)
const result = await client.call<{ requests: BrowserInterceptedRequest[] }>(
'browser.intercept.list',
{ worktree }
target
)
return printResult(result, json, (v) => {
if (v.requests.length === 0) {
@ -1275,18 +1282,14 @@ export async function main(argv = process.argv.slice(2), cwd = process.cwd()): P
// ── Console/network capture ──
if (matches(commandPath, ['capture', 'start'])) {
const worktree = await getBrowserWorktreeSelector(parsed.flags, cwd, client)
const result = await client.call<BrowserCaptureStartResult>('browser.capture.start', {
worktree
})
const target = await getBrowserCommandTarget(parsed.flags, cwd, client)
const result = await client.call<BrowserCaptureStartResult>('browser.capture.start', target)
return printResult(result, json, () => 'Capture started (console + network)')
}
if (matches(commandPath, ['capture', 'stop'])) {
const worktree = await getBrowserWorktreeSelector(parsed.flags, cwd, client)
const result = await client.call<BrowserCaptureStopResult>('browser.capture.stop', {
worktree
})
const target = await getBrowserCommandTarget(parsed.flags, cwd, client)
const result = await client.call<BrowserCaptureStopResult>('browser.capture.stop', target)
return printResult(result, json, () => 'Capture stopped')
}
@ -1296,8 +1299,7 @@ export async function main(argv = process.argv.slice(2), cwd = process.cwd()): P
if (limit !== undefined) {
params.limit = limit
}
const worktree = await getBrowserWorktreeSelector(parsed.flags, cwd, client)
params.worktree = worktree
Object.assign(params, await getBrowserCommandTarget(parsed.flags, cwd, client))
const result = await client.call<BrowserConsoleResult>('browser.console', params)
return printResult(result, json, (v) => {
if (v.entries.length === 0) {
@ -1313,8 +1315,7 @@ export async function main(argv = process.argv.slice(2), cwd = process.cwd()): P
if (limit !== undefined) {
params.limit = limit
}
const worktree = await getBrowserWorktreeSelector(parsed.flags, cwd, client)
params.worktree = worktree
Object.assign(params, await getBrowserCommandTarget(parsed.flags, cwd, client))
const result = await client.call<BrowserNetworkLogResult>('browser.network', params)
return printResult(result, json, (v) => {
if (v.entries.length === 0) {
@ -1328,14 +1329,14 @@ export async function main(argv = process.argv.slice(2), cwd = process.cwd()): P
if (matches(commandPath, ['dblclick'])) {
const element = getRequiredStringFlag(parsed.flags, 'element')
const worktree = await getBrowserWorktreeSelector(parsed.flags, cwd, client)
const result = await client.call<unknown>('browser.dblclick', { element, worktree })
const target = await getBrowserCommandTarget(parsed.flags, cwd, client)
const result = await client.call<unknown>('browser.dblclick', { element, ...target })
return printResult(result, json, () => `Double-clicked ${element}`)
}
if (matches(commandPath, ['forward'])) {
const worktree = await getBrowserWorktreeSelector(parsed.flags, cwd, client)
const result = await client.call<unknown>('browser.forward', { worktree })
const target = await getBrowserCommandTarget(parsed.flags, cwd, client)
const result = await client.call<unknown>('browser.forward', target)
// eslint-disable-next-line @typescript-eslint/no-explicit-any
return printResult(result, json, (v: any) =>
v?.url ? `Navigated forward to ${v.url}` : 'Navigated forward'
@ -1344,19 +1345,19 @@ export async function main(argv = process.argv.slice(2), cwd = process.cwd()): P
if (matches(commandPath, ['scrollintoview'])) {
const element = getRequiredStringFlag(parsed.flags, 'element')
const worktree = await getBrowserWorktreeSelector(parsed.flags, cwd, client)
const result = await client.call<unknown>('browser.scrollIntoView', { element, worktree })
const target = await getBrowserCommandTarget(parsed.flags, cwd, client)
const result = await client.call<unknown>('browser.scrollIntoView', { element, ...target })
return printResult(result, json, () => `Scrolled ${element} into view`)
}
if (matches(commandPath, ['get'])) {
const what = getRequiredStringFlag(parsed.flags, 'what')
const element = getOptionalStringFlag(parsed.flags, 'element')
const worktree = await getBrowserWorktreeSelector(parsed.flags, cwd, client)
const target = await getBrowserCommandTarget(parsed.flags, cwd, client)
const result = await client.call<unknown>('browser.get', {
what,
selector: element,
worktree
...target
})
return printResult(result, json, (v) =>
typeof v === 'string' ? v : JSON.stringify(v, null, 2)
@ -1366,8 +1367,12 @@ export async function main(argv = process.argv.slice(2), cwd = process.cwd()): P
if (matches(commandPath, ['is'])) {
const what = getRequiredStringFlag(parsed.flags, 'what')
const element = getRequiredStringFlag(parsed.flags, 'element')
const worktree = await getBrowserWorktreeSelector(parsed.flags, cwd, client)
const result = await client.call<unknown>('browser.is', { what, selector: element, worktree })
const target = await getBrowserCommandTarget(parsed.flags, cwd, client)
const result = await client.call<unknown>('browser.is', {
what,
selector: element,
...target
})
return printResult(result, json, (v) => String(v))
}
@ -1375,8 +1380,8 @@ export async function main(argv = process.argv.slice(2), cwd = process.cwd()): P
if (matches(commandPath, ['inserttext'])) {
const text = getRequiredStringFlag(parsed.flags, 'text')
const worktree = await getBrowserWorktreeSelector(parsed.flags, cwd, client)
const result = await client.call<unknown>('browser.keyboardInsertText', { text, worktree })
const target = await getBrowserCommandTarget(parsed.flags, cwd, client)
const result = await client.call<unknown>('browser.keyboardInsertText', { text, ...target })
return printResult(result, json, () => 'Text inserted')
}
@ -1385,30 +1390,30 @@ export async function main(argv = process.argv.slice(2), cwd = process.cwd()): P
if (matches(commandPath, ['mouse', 'move'])) {
const x = getRequiredFiniteNumber(parsed.flags, 'x')
const y = getRequiredFiniteNumber(parsed.flags, 'y')
const worktree = await getBrowserWorktreeSelector(parsed.flags, cwd, client)
const result = await client.call<unknown>('browser.mouseMove', { x, y, worktree })
const target = await getBrowserCommandTarget(parsed.flags, cwd, client)
const result = await client.call<unknown>('browser.mouseMove', { x, y, ...target })
return printResult(result, json, () => `Mouse moved to ${x},${y}`)
}
if (matches(commandPath, ['mouse', 'down'])) {
const button = getOptionalStringFlag(parsed.flags, 'button')
const worktree = await getBrowserWorktreeSelector(parsed.flags, cwd, client)
const result = await client.call<unknown>('browser.mouseDown', { button, worktree })
const target = await getBrowserCommandTarget(parsed.flags, cwd, client)
const result = await client.call<unknown>('browser.mouseDown', { button, ...target })
return printResult(result, json, () => `Mouse button ${button ?? 'left'} pressed`)
}
if (matches(commandPath, ['mouse', 'up'])) {
const button = getOptionalStringFlag(parsed.flags, 'button')
const worktree = await getBrowserWorktreeSelector(parsed.flags, cwd, client)
const result = await client.call<unknown>('browser.mouseUp', { button, worktree })
const target = await getBrowserCommandTarget(parsed.flags, cwd, client)
const result = await client.call<unknown>('browser.mouseUp', { button, ...target })
return printResult(result, json, () => `Mouse button ${button ?? 'left'} released`)
}
if (matches(commandPath, ['mouse', 'wheel'])) {
const dy = getRequiredFiniteNumber(parsed.flags, 'dy')
const dx = getOptionalNumberFlag(parsed.flags, 'dx')
const worktree = await getBrowserWorktreeSelector(parsed.flags, cwd, client)
const result = await client.call<unknown>('browser.mouseWheel', { dy, dx, worktree })
const target = await getBrowserCommandTarget(parsed.flags, cwd, client)
const result = await client.call<unknown>('browser.mouseWheel', { dy, dx, ...target })
return printResult(
result,
json,
@ -1423,13 +1428,13 @@ export async function main(argv = process.argv.slice(2), cwd = process.cwd()): P
const value = getRequiredStringFlag(parsed.flags, 'value')
const action = getRequiredStringFlag(parsed.flags, 'action')
const text = getOptionalStringFlag(parsed.flags, 'text')
const worktree = await getBrowserWorktreeSelector(parsed.flags, cwd, client)
const target = await getBrowserCommandTarget(parsed.flags, cwd, client)
const result = await client.call<unknown>('browser.find', {
locator,
value,
action,
text,
worktree
...target
})
return printResult(result, json, (v) => JSON.stringify(v, null, 2))
}
@ -1438,41 +1443,45 @@ export async function main(argv = process.argv.slice(2), cwd = process.cwd()): P
if (matches(commandPath, ['set', 'device'])) {
const name = getRequiredStringFlag(parsed.flags, 'name')
const worktree = await getBrowserWorktreeSelector(parsed.flags, cwd, client)
const result = await client.call<unknown>('browser.setDevice', { name, worktree })
const target = await getBrowserCommandTarget(parsed.flags, cwd, client)
const result = await client.call<unknown>('browser.setDevice', { name, ...target })
return printResult(result, json, () => `Device emulation set to ${name}`)
}
if (matches(commandPath, ['set', 'offline'])) {
const state = getOptionalStringFlag(parsed.flags, 'state')
const worktree = await getBrowserWorktreeSelector(parsed.flags, cwd, client)
const result = await client.call<unknown>('browser.setOffline', { state, worktree })
const target = await getBrowserCommandTarget(parsed.flags, cwd, client)
const result = await client.call<unknown>('browser.setOffline', { state, ...target })
return printResult(result, json, () => `Offline mode ${state ?? 'toggled'}`)
}
if (matches(commandPath, ['set', 'headers'])) {
const headers = getRequiredStringFlag(parsed.flags, 'headers')
const worktree = await getBrowserWorktreeSelector(parsed.flags, cwd, client)
const result = await client.call<unknown>('browser.setHeaders', { headers, worktree })
const target = await getBrowserCommandTarget(parsed.flags, cwd, client)
const result = await client.call<unknown>('browser.setHeaders', { headers, ...target })
return printResult(result, json, () => 'Extra HTTP headers set')
}
if (matches(commandPath, ['set', 'credentials'])) {
const user = getRequiredStringFlag(parsed.flags, 'user')
const pass = getRequiredStringFlag(parsed.flags, 'pass')
const worktree = await getBrowserWorktreeSelector(parsed.flags, cwd, client)
const result = await client.call<unknown>('browser.setCredentials', { user, pass, worktree })
const target = await getBrowserCommandTarget(parsed.flags, cwd, client)
const result = await client.call<unknown>('browser.setCredentials', {
user,
pass,
...target
})
return printResult(result, json, () => `HTTP auth credentials set for ${user}`)
}
if (matches(commandPath, ['set', 'media'])) {
const colorScheme = getOptionalStringFlag(parsed.flags, 'color-scheme')
const reducedMotion = getOptionalStringFlag(parsed.flags, 'reduced-motion')
const worktree = await getBrowserWorktreeSelector(parsed.flags, cwd, client)
const target = await getBrowserCommandTarget(parsed.flags, cwd, client)
const result = await client.call<unknown>('browser.setMedia', {
colorScheme,
reducedMotion,
worktree
...target
})
return printResult(result, json, () => 'Media preferences set')
}
@ -1480,15 +1489,15 @@ export async function main(argv = process.argv.slice(2), cwd = process.cwd()): P
// ── Clipboard commands ──
if (matches(commandPath, ['clipboard', 'read'])) {
const worktree = await getBrowserWorktreeSelector(parsed.flags, cwd, client)
const result = await client.call<unknown>('browser.clipboardRead', { worktree })
const target = await getBrowserCommandTarget(parsed.flags, cwd, client)
const result = await client.call<unknown>('browser.clipboardRead', target)
return printResult(result, json, (v) => JSON.stringify(v, null, 2))
}
if (matches(commandPath, ['clipboard', 'write'])) {
const text = getRequiredStringFlag(parsed.flags, 'text')
const worktree = await getBrowserWorktreeSelector(parsed.flags, cwd, client)
const result = await client.call<unknown>('browser.clipboardWrite', { text, worktree })
const target = await getBrowserCommandTarget(parsed.flags, cwd, client)
const result = await client.call<unknown>('browser.clipboardWrite', { text, ...target })
return printResult(result, json, () => 'Clipboard updated')
}
@ -1496,14 +1505,14 @@ export async function main(argv = process.argv.slice(2), cwd = process.cwd()): P
if (matches(commandPath, ['dialog', 'accept'])) {
const text = getOptionalStringFlag(parsed.flags, 'text')
const worktree = await getBrowserWorktreeSelector(parsed.flags, cwd, client)
const result = await client.call<unknown>('browser.dialogAccept', { text, worktree })
const target = await getBrowserCommandTarget(parsed.flags, cwd, client)
const result = await client.call<unknown>('browser.dialogAccept', { text, ...target })
return printResult(result, json, () => 'Dialog accepted')
}
if (matches(commandPath, ['dialog', 'dismiss'])) {
const worktree = await getBrowserWorktreeSelector(parsed.flags, cwd, client)
const result = await client.call<unknown>('browser.dialogDismiss', { worktree })
const target = await getBrowserCommandTarget(parsed.flags, cwd, client)
const result = await client.call<unknown>('browser.dialogDismiss', target)
return printResult(result, json, () => 'Dialog dismissed')
}
@ -1511,51 +1520,51 @@ export async function main(argv = process.argv.slice(2), cwd = process.cwd()): P
if (matches(commandPath, ['storage', 'local', 'get'])) {
const key = getRequiredStringFlag(parsed.flags, 'key')
const worktree = await getBrowserWorktreeSelector(parsed.flags, cwd, client)
const result = await client.call<unknown>('browser.storage.local.get', { key, worktree })
const target = await getBrowserCommandTarget(parsed.flags, cwd, client)
const result = await client.call<unknown>('browser.storage.local.get', { key, ...target })
return printResult(result, json, (v) => JSON.stringify(v, null, 2))
}
if (matches(commandPath, ['storage', 'local', 'set'])) {
const key = getRequiredStringFlag(parsed.flags, 'key')
const value = getRequiredStringFlag(parsed.flags, 'value')
const worktree = await getBrowserWorktreeSelector(parsed.flags, cwd, client)
const target = await getBrowserCommandTarget(parsed.flags, cwd, client)
const result = await client.call<unknown>('browser.storage.local.set', {
key,
value,
worktree
...target
})
return printResult(result, json, () => `localStorage["${key}"] set`)
}
if (matches(commandPath, ['storage', 'local', 'clear'])) {
const worktree = await getBrowserWorktreeSelector(parsed.flags, cwd, client)
const result = await client.call<unknown>('browser.storage.local.clear', { worktree })
const target = await getBrowserCommandTarget(parsed.flags, cwd, client)
const result = await client.call<unknown>('browser.storage.local.clear', target)
return printResult(result, json, () => 'localStorage cleared')
}
if (matches(commandPath, ['storage', 'session', 'get'])) {
const key = getRequiredStringFlag(parsed.flags, 'key')
const worktree = await getBrowserWorktreeSelector(parsed.flags, cwd, client)
const result = await client.call<unknown>('browser.storage.session.get', { key, worktree })
const target = await getBrowserCommandTarget(parsed.flags, cwd, client)
const result = await client.call<unknown>('browser.storage.session.get', { key, ...target })
return printResult(result, json, (v) => JSON.stringify(v, null, 2))
}
if (matches(commandPath, ['storage', 'session', 'set'])) {
const key = getRequiredStringFlag(parsed.flags, 'key')
const value = getRequiredStringFlag(parsed.flags, 'value')
const worktree = await getBrowserWorktreeSelector(parsed.flags, cwd, client)
const target = await getBrowserCommandTarget(parsed.flags, cwd, client)
const result = await client.call<unknown>('browser.storage.session.set', {
key,
value,
worktree
...target
})
return printResult(result, json, () => `sessionStorage["${key}"] set`)
}
if (matches(commandPath, ['storage', 'session', 'clear'])) {
const worktree = await getBrowserWorktreeSelector(parsed.flags, cwd, client)
const result = await client.call<unknown>('browser.storage.session.clear', { worktree })
const target = await getBrowserCommandTarget(parsed.flags, cwd, client)
const result = await client.call<unknown>('browser.storage.session.clear', target)
return printResult(result, json, () => 'sessionStorage cleared')
}
@ -1564,8 +1573,8 @@ export async function main(argv = process.argv.slice(2), cwd = process.cwd()): P
if (matches(commandPath, ['download'])) {
const selector = getRequiredStringFlag(parsed.flags, 'selector')
const path = getRequiredStringFlag(parsed.flags, 'path')
const worktree = await getBrowserWorktreeSelector(parsed.flags, cwd, client)
const result = await client.call<unknown>('browser.download', { selector, path, worktree })
const target = await getBrowserCommandTarget(parsed.flags, cwd, client)
const result = await client.call<unknown>('browser.download', { selector, path, ...target })
return printResult(result, json, () => `Downloaded to ${path}`)
}
@ -1573,8 +1582,8 @@ export async function main(argv = process.argv.slice(2), cwd = process.cwd()): P
if (matches(commandPath, ['highlight'])) {
const selector = getRequiredStringFlag(parsed.flags, 'selector')
const worktree = await getBrowserWorktreeSelector(parsed.flags, cwd, client)
const result = await client.call<unknown>('browser.highlight', { selector, worktree })
const target = await getBrowserCommandTarget(parsed.flags, cwd, client)
const result = await client.call<unknown>('browser.highlight', { selector, ...target })
return printResult(result, json, () => `Highlighted ${selector}`)
}
@ -1648,7 +1657,10 @@ export function validateCommandAndFlags(parsed: ParsedArgs): void {
}
for (const flag of parsed.flags.keys()) {
if (!spec.allowedFlags.includes(flag)) {
if (
!spec.allowedFlags.includes(flag) &&
!(flag === 'page' && supportsBrowserPageFlag(spec.path))
) {
throw new RuntimeClientError(
'invalid_argument',
`Unknown flag --${flag} for command: ${spec.path.join(' ')}`
@ -1661,6 +1673,17 @@ export function findCommandSpec(commandPath: string[]): CommandSpec | undefined
return COMMAND_SPECS.find((spec) => matches(spec.path, commandPath))
}
function supportsBrowserPageFlag(commandPath: string[]): boolean {
const joined = commandPath.join(' ')
if (['open', 'status'].includes(commandPath[0])) {
return false
}
if (['repo', 'worktree', 'terminal'].includes(commandPath[0])) {
return false
}
return !['tab list', 'tab create'].includes(joined)
}
function isCommandGroup(commandPath: string[]): boolean {
return (
(commandPath.length === 1 &&
@ -1794,6 +1817,34 @@ async function getBrowserWorktreeSelector(
}
}
async function getBrowserCommandTarget(
flags: Map<string, string | boolean>,
cwd: string,
client: RuntimeClient
): Promise<BrowserCliTarget> {
const page = getOptionalStringFlag(flags, 'page')
if (!page) {
return {
worktree: await getBrowserWorktreeSelector(flags, cwd, client)
}
}
const explicitWorktree = getOptionalStringFlag(flags, 'worktree')
if (!explicitWorktree || explicitWorktree === 'all') {
return { page }
}
if (explicitWorktree === 'active' || explicitWorktree === 'current') {
return {
page,
worktree: await resolveCurrentWorktreeSelector(cwd, client)
}
}
return {
page,
worktree: normalizeWorktreeSelector(explicitWorktree, cwd)
}
}
function getOptionalNumberFlag(
flags: Map<string, string | boolean>,
name: string
@ -2031,7 +2082,7 @@ function formatWorktreeShow(result: { worktree: RuntimeWorktreeRecord }): string
}
function formatSnapshot(result: BrowserSnapshotResult): string {
const header = `${result.title}${result.url}\n`
const header = `page: ${result.browserPageId}\n${result.title}${result.url}\n`
return header + result.snapshot
}
@ -2046,7 +2097,7 @@ function formatTabList(result: BrowserTabListResult): string {
return result.tabs
.map((t) => {
const marker = t.active ? '* ' : ' '
return `${marker}[${t.index}] ${t.title}${t.url}`
return `${marker}[${t.index}] ${t.browserPageId} ${t.title}${t.url}`
})
.join('\n')
}
@ -2203,6 +2254,8 @@ Browser Workflow:
orca goto --url https://example.com
2. Inspect the page: orca snapshot
(Returns an accessibility tree with element refs like e1, e2, e3)
For concurrent workflows, prefer: orca tab list --json
then reuse tabs[].browserPageId with --page <id> on later commands.
3. Interact: orca click --element e2
orca fill --element e5 --value "search query"
orca keypress --key Enter
@ -2219,6 +2272,7 @@ Browser Options:
--direction <dir> Scroll direction: up, down, left, right
--amount <pixels> Scroll distance in pixels (default: viewport height)
--index <n> Tab index (from \`tab list\`)
--page <id> Stable browser page id (preferred for concurrent workflows)
--format <png|jpeg> Screenshot image format
--from <ref> Drag source element ref
--to <ref> Drag target element ref
@ -2250,10 +2304,13 @@ Examples:
function formatCommandHelp(spec: CommandSpec): string {
const lines = [`orca ${spec.path.join(' ')}`, '', `Usage: ${spec.usage}`, '', spec.summary]
const displayedFlags = supportsBrowserPageFlag(spec.path)
? [...spec.allowedFlags, 'page']
: spec.allowedFlags
if (spec.allowedFlags.length > 0) {
if (displayedFlags.length > 0) {
lines.push('', 'Options:')
for (const flag of spec.allowedFlags) {
for (const flag of displayedFlags) {
lines.push(` ${formatFlagHelp(flag)}`)
}
}
@ -2309,7 +2366,7 @@ function formatFlagHelp(flag: string): string {
worktree:
'--worktree <selector> Worktree selector such as id:<id>, branch:<branch>, issue:<number>, path:<path>, or active/current',
// Browser automation flags
element: '--element <ref> Element ref from snapshot (e.g. @e3)',
element: '--element <ref> Element ref from snapshot (e.g. e3)',
url: '--url <url> URL to navigate to',
value: '--value <text> Value to fill or select',
input: '--input <text> Text to type at current focus',
@ -2317,6 +2374,7 @@ function formatFlagHelp(flag: string): string {
direction: '--direction <up|down> Scroll direction',
amount: '--amount <pixels> Scroll distance in pixels',
index: '--index <n> Tab index to switch to',
page: '--page <id> Stable browser page id from `orca tab list --json`',
format: '--format <png|jpeg> Screenshot image format'
}

View file

@ -167,7 +167,34 @@ describe('AgentBrowserBridge', () => {
it('translates success response to result', async () => {
succeedWith({ snapshot: 'tree output' })
const result = await bridge.snapshot()
expect(result).toEqual({ snapshot: 'tree output' })
expect(result).toEqual({ browserPageId: 'tab-1', snapshot: 'tree output' })
})
it('routes snapshot to an explicit browser page id without changing the active tab', async () => {
const tabs = new Map([
['tab-a', 1],
['tab-b', 2]
])
const wc1 = mockWebContents(1, 'https://a.com', 'A')
const wc2 = mockWebContents(2, 'https://b.com', 'B')
webContentsFromIdMock.mockImplementation((id: number) => (id === 1 ? wc1 : wc2))
const b = new AgentBrowserBridge(mockBrowserManager(tabs))
b.setActiveTab(1)
succeedWith({ snapshot: 'tree output' })
const result = await b.snapshot(undefined, 'tab-b')
const snapshotCall = execFileMock.mock.calls.find((c: unknown[]) =>
(c[1] as string[]).includes('snapshot')
)
expect(snapshotCall).toBeTruthy()
expect(snapshotCall![1]).toContain('--session')
expect(
(snapshotCall![1] as string[])[(snapshotCall![1] as string[]).indexOf('--session') + 1]
).toBe('orca-tab-tab-b')
expect(result).toEqual({ browserPageId: 'tab-b', snapshot: 'tree output' })
expect(b.getActiveWebContentsId()).toBe(1)
})
it('translates error response to BrowserError', async () => {
@ -232,6 +259,7 @@ describe('AgentBrowserBridge', () => {
const b = new AgentBrowserBridge(mockBrowserManager(tabs, worktrees))
const result = b.tabList('wt-1')
expect(result.tabs).toHaveLength(1)
expect(result.tabs[0].browserPageId).toBe('tab-a')
expect(result.tabs[0].url).toBe('https://a.com')
})
})
@ -262,7 +290,7 @@ describe('AgentBrowserBridge', () => {
)
const [r1, r2] = await Promise.all([bridge.snapshot(), bridge.click('@e1')])
expect(r1).toEqual({ ok: true })
expect(r1).toEqual({ browserPageId: 'tab-1', ok: true })
expect(r2).toEqual({ ok: true })
// Why: close runs first (stale session cleanup), then commands execute sequentially
const snapshotIdx = commandCalls.findIndex((a) => a.includes('snapshot'))
@ -431,7 +459,10 @@ describe('AgentBrowserBridge', () => {
releaseDestroyClose!()
await destroyPromise
await expect(nextSnapshot).resolves.toEqual({ snapshot: 'after-destroy' })
await expect(nextSnapshot).resolves.toEqual({
browserPageId: 'tab-1',
snapshot: 'after-destroy'
})
expect(commandCalls.filter((args) => args.includes('close'))).toHaveLength(2)
})
@ -497,10 +528,53 @@ describe('AgentBrowserBridge', () => {
b.setActiveTab(1)
const result = await b.tabSwitch(1)
expect(result).toEqual({ switched: 1 })
expect(result).toEqual({ switched: 1, browserPageId: 'tab-b' })
expect(b.getActiveWebContentsId()).toBe(2)
})
it('switches tabs by explicit browser page id', async () => {
const tabs = new Map([
['tab-a', 1],
['tab-b', 2]
])
const wc1 = mockWebContents(1)
const wc2 = mockWebContents(2)
webContentsFromIdMock.mockImplementation((id: number) => (id === 1 ? wc1 : wc2))
const b = new AgentBrowserBridge(mockBrowserManager(tabs))
b.setActiveTab(1)
const result = await b.tabSwitch(undefined, undefined, 'tab-b')
expect(result).toEqual({ switched: 1, browserPageId: 'tab-b' })
expect(b.getActiveWebContentsId()).toBe(2)
})
it('updates the owning worktree active tab when switching by browser page id', async () => {
const tabs = new Map([
['tab-a', 1],
['tab-b', 2]
])
const worktrees = new Map([
['tab-a', 'wt-1'],
['tab-b', 'wt-1']
])
const wc1 = mockWebContents(1, 'https://a.com', 'A')
const wc2 = mockWebContents(2, 'https://b.com', 'B')
webContentsFromIdMock.mockImplementation((id: number) => (id === 1 ? wc1 : wc2))
const b = new AgentBrowserBridge(mockBrowserManager(tabs, worktrees))
b.setActiveTab(2, 'wt-1')
await expect(b.tabSwitch(undefined, undefined, 'tab-a')).resolves.toEqual({
switched: 0,
browserPageId: 'tab-a'
})
expect(b.tabList('wt-1').tabs).toMatchObject([
{ browserPageId: 'tab-a', active: true },
{ browserPageId: 'tab-b', active: false }
])
})
it('queues tabSwitch behind in-flight commands on the current session', async () => {
const tabs = new Map([
['tab-a', 1],
@ -547,8 +621,8 @@ describe('AgentBrowserBridge', () => {
expect(releaseSnapshot).not.toBeNull()
releaseSnapshot!()
await expect(snapshot).resolves.toEqual({ snapshot: 'tree' })
await expect(switched).resolves.toEqual({ switched: 1 })
await expect(snapshot).resolves.toEqual({ browserPageId: 'tab-a', snapshot: 'tree' })
await expect(switched).resolves.toEqual({ switched: 1, browserPageId: 'tab-b' })
expect(b.getActiveWebContentsId()).toBe(2)
})

View file

@ -71,6 +71,11 @@ type QueuedCommand = {
reject: (reason: unknown) => void
}
type ResolvedBrowserCommandTarget = {
browserPageId: string
webContentsId: number
}
function agentBrowserNativeName(): string {
const ext = process.platform === 'win32' ? '.exe' : ''
return `agent-browser-${platform()}-${arch()}${ext}`
@ -219,6 +224,26 @@ export class AgentBrowserBridge {
return this.activeWebContentsId
}
getPageInfo(
worktreeId?: string,
browserPageId?: string
): { browserPageId: string; url: string; title: string } | null {
try {
const target = this.resolveCommandTarget(worktreeId, browserPageId)
const wc = this.getWebContents(target.webContentsId)
if (!wc) {
return null
}
return {
browserPageId: target.browserPageId,
url: wc.getURL() ?? '',
title: wc.getTitle() ?? ''
}
} catch {
return null
}
}
onTabChanged(webContentsId: number, worktreeId?: string): void {
this.activeWebContentsId = webContentsId
if (worktreeId) {
@ -278,7 +303,7 @@ export class AgentBrowserBridge {
const result: BrowserTabInfo[] = []
let index = 0
let firstLiveWcId: number | null = null
for (const [, wcId] of tabs) {
for (const [tabId, wcId] of tabs) {
const wc = this.getWebContents(wcId)
if (!wc) {
continue
@ -287,6 +312,7 @@ export class AgentBrowserBridge {
firstLiveWcId = wcId
}
result.push({
browserPageId: tabId,
index: index++,
url: wc.getURL() ?? '',
title: wc.getTitle() ?? '',
@ -310,65 +336,100 @@ export class AgentBrowserBridge {
// Why: tab switch must go through the command queue to prevent race conditions
// with in-flight commands that target the previously active tab.
async tabSwitch(index: number, worktreeId?: string): Promise<BrowserTabSwitchResult> {
async tabSwitch(
index: number | undefined,
worktreeId?: string,
browserPageId?: string
): Promise<BrowserTabSwitchResult> {
return this.enqueueCommand(worktreeId, async () => {
const tabs = this.getRegisteredTabs(worktreeId)
// Why: queue delay means the tab list can change between RPC arrival and
// execution time. Recompute against live webContents here so we never
// activate a tab index that disappeared while earlier commands were running.
const liveEntries = [...tabs.entries()].filter(([, wcId]) => this.getWebContents(wcId))
if (index < 0 || index >= liveEntries.length) {
let switchedIndex = index ?? -1
let resolvedPageId = browserPageId
if (resolvedPageId) {
switchedIndex = liveEntries.findIndex(([tabId]) => tabId === resolvedPageId)
}
if (switchedIndex < 0 || switchedIndex >= liveEntries.length) {
const targetLabel =
resolvedPageId != null ? `Browser page ${resolvedPageId}` : `Tab index ${index}`
throw new BrowserError(
'browser_tab_not_found',
`Tab index ${index} out of range (0-${liveEntries.length - 1})`
`${targetLabel} out of range (0-${liveEntries.length - 1})`
)
}
const [, wcId] = liveEntries[index]
const [tabId, wcId] = liveEntries[switchedIndex]
this.activeWebContentsId = wcId
// Why: resolveActiveTab prefers the per-worktree map over the global when
// worktreeId is provided. Without this update, subsequent commands would
// still route to the previous tab despite tabSwitch reporting success.
if (worktreeId) {
this.activeWebContentsPerWorktree.set(worktreeId, wcId)
const owningWorktreeId = worktreeId ?? this.browserManager.getWorktreeIdForTab(tabId)
// Why: `tab switch --page <id>` may omit --worktree because the page id is
// already a stable target. We still need to update the owning worktree's
// active-tab slot so later worktree-scoped commands follow the tab that was
// just activated instead of the previously active one.
if (owningWorktreeId) {
this.activeWebContentsPerWorktree.set(owningWorktreeId, wcId)
}
return { switched: index }
return { switched: switchedIndex, browserPageId: tabId }
})
}
// ── Core commands (typed) ──
async snapshot(worktreeId?: string): Promise<BrowserSnapshotResult> {
async snapshot(worktreeId?: string, browserPageId?: string): Promise<BrowserSnapshotResult> {
// Why: snapshot creates fresh refs so it must bypass the stale-ref guard
return this.enqueueCommand(worktreeId, async (sessionName) => {
return (await this.execAgentBrowser(sessionName, ['snapshot'])) as BrowserSnapshotResult
return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName, target) => {
const result = (await this.execAgentBrowser(sessionName, [
'snapshot'
])) as BrowserSnapshotResult
return {
...result,
browserPageId: target.browserPageId
}
})
}
async click(element: string, worktreeId?: string): Promise<BrowserClickResult> {
return this.enqueueCommand(worktreeId, async (sessionName) => {
async click(
element: string,
worktreeId?: string,
browserPageId?: string
): Promise<BrowserClickResult> {
return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => {
return (await this.execAgentBrowser(sessionName, ['click', element])) as BrowserClickResult
})
}
async dblclick(element: string, worktreeId?: string): Promise<BrowserClickResult> {
return this.enqueueCommand(worktreeId, async (sessionName) => {
async dblclick(
element: string,
worktreeId?: string,
browserPageId?: string
): Promise<BrowserClickResult> {
return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => {
return (await this.execAgentBrowser(sessionName, ['dblclick', element])) as BrowserClickResult
})
}
async goto(url: string, worktreeId?: string): Promise<BrowserGotoResult> {
return this.enqueueCommand(worktreeId, async (sessionName) => {
async goto(url: string, worktreeId?: string, browserPageId?: string): Promise<BrowserGotoResult> {
return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => {
return (await this.execAgentBrowser(sessionName, ['goto', url])) as BrowserGotoResult
})
}
async fill(element: string, value: string, worktreeId?: string): Promise<BrowserFillResult> {
async fill(
element: string,
value: string,
worktreeId?: string,
browserPageId?: string
): Promise<BrowserFillResult> {
// Why: Input.insertText via Electron's debugger API does not deliver text to
// focused inputs in webviews — this is a fundamental Electron limitation.
// Agent-browser's fill and click also fail for the same reason.
// Workaround: use agent-browser's focus to resolve the ref, then set the value
// directly via JS and dispatch input/change events for React/framework compat.
return this.enqueueCommand(worktreeId, async (sessionName) => {
return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => {
await this.execAgentBrowser(sessionName, ['focus', element])
const escaped = value.replace(/\\/g, '\\\\').replace(/'/g, "\\'")
await this.execAgentBrowser(sessionName, [
@ -379,8 +440,12 @@ export class AgentBrowserBridge {
})
}
async type(input: string, worktreeId?: string): Promise<BrowserTypeResult> {
return this.enqueueCommand(worktreeId, async (sessionName) => {
async type(
input: string,
worktreeId?: string,
browserPageId?: string
): Promise<BrowserTypeResult> {
return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => {
return (await this.execAgentBrowser(sessionName, [
'keyboard',
'type',
@ -389,8 +454,13 @@ export class AgentBrowserBridge {
})
}
async select(element: string, value: string, worktreeId?: string): Promise<BrowserSelectResult> {
return this.enqueueCommand(worktreeId, async (sessionName) => {
async select(
element: string,
value: string,
worktreeId?: string,
browserPageId?: string
): Promise<BrowserSelectResult> {
return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => {
return (await this.execAgentBrowser(sessionName, [
'select',
element,
@ -402,9 +472,10 @@ export class AgentBrowserBridge {
async scroll(
direction: string,
amount?: number,
worktreeId?: string
worktreeId?: string,
browserPageId?: string
): Promise<BrowserScrollResult> {
return this.enqueueCommand(worktreeId, async (sessionName) => {
return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => {
const args = ['scroll', direction]
if (amount != null) {
args.push(String(amount))
@ -413,14 +484,23 @@ export class AgentBrowserBridge {
})
}
async scrollIntoView(element: string, worktreeId?: string): Promise<unknown> {
return this.enqueueCommand(worktreeId, async (sessionName) => {
async scrollIntoView(
element: string,
worktreeId?: string,
browserPageId?: string
): Promise<unknown> {
return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => {
return await this.execAgentBrowser(sessionName, ['scrollintoview', element])
})
}
async get(what: string, selector?: string, worktreeId?: string): Promise<unknown> {
return this.enqueueCommand(worktreeId, async (sessionName) => {
async get(
what: string,
selector?: string,
worktreeId?: string,
browserPageId?: string
): Promise<unknown> {
return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => {
const args = ['get', what]
if (selector) {
args.push(selector)
@ -429,30 +509,44 @@ export class AgentBrowserBridge {
})
}
async is(what: string, selector: string, worktreeId?: string): Promise<unknown> {
return this.enqueueCommand(worktreeId, async (sessionName) => {
async is(
what: string,
selector: string,
worktreeId?: string,
browserPageId?: string
): Promise<unknown> {
return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => {
return await this.execAgentBrowser(sessionName, ['is', what, selector])
})
}
// ── Keyboard commands ──
async keyboardInsertText(text: string, worktreeId?: string): Promise<unknown> {
return this.enqueueCommand(worktreeId, async (sessionName) => {
async keyboardInsertText(
text: string,
worktreeId?: string,
browserPageId?: string
): Promise<unknown> {
return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => {
return await this.execAgentBrowser(sessionName, ['keyboard', 'inserttext', text])
})
}
// ── Mouse commands ──
async mouseMove(x: number, y: number, worktreeId?: string): Promise<unknown> {
return this.enqueueCommand(worktreeId, async (sessionName) => {
async mouseMove(
x: number,
y: number,
worktreeId?: string,
browserPageId?: string
): Promise<unknown> {
return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => {
return await this.execAgentBrowser(sessionName, ['mouse', 'move', String(x), String(y)])
})
}
async mouseDown(button?: string, worktreeId?: string): Promise<unknown> {
return this.enqueueCommand(worktreeId, async (sessionName) => {
async mouseDown(button?: string, worktreeId?: string, browserPageId?: string): Promise<unknown> {
return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => {
const args = ['mouse', 'down']
if (button) {
args.push(button)
@ -461,8 +555,8 @@ export class AgentBrowserBridge {
})
}
async mouseUp(button?: string, worktreeId?: string): Promise<unknown> {
return this.enqueueCommand(worktreeId, async (sessionName) => {
async mouseUp(button?: string, worktreeId?: string, browserPageId?: string): Promise<unknown> {
return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => {
const args = ['mouse', 'up']
if (button) {
args.push(button)
@ -471,8 +565,13 @@ export class AgentBrowserBridge {
})
}
async mouseWheel(dy: number, dx?: number, worktreeId?: string): Promise<unknown> {
return this.enqueueCommand(worktreeId, async (sessionName) => {
async mouseWheel(
dy: number,
dx?: number,
worktreeId?: string,
browserPageId?: string
): Promise<unknown> {
return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => {
const args = ['mouse', 'wheel', String(dy)]
if (dx != null) {
args.push(String(dx))
@ -488,9 +587,10 @@ export class AgentBrowserBridge {
value: string,
action: string,
text?: string,
worktreeId?: string
worktreeId?: string,
browserPageId?: string
): Promise<unknown> {
return this.enqueueCommand(worktreeId, async (sessionName) => {
return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => {
const args = ['find', locator, value, action]
if (text) {
args.push(text)
@ -501,14 +601,14 @@ export class AgentBrowserBridge {
// ── Set commands ──
async setDevice(name: string, worktreeId?: string): Promise<unknown> {
return this.enqueueCommand(worktreeId, async (sessionName) => {
async setDevice(name: string, worktreeId?: string, browserPageId?: string): Promise<unknown> {
return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => {
return await this.execAgentBrowser(sessionName, ['set', 'device', name])
})
}
async setOffline(state?: string, worktreeId?: string): Promise<unknown> {
return this.enqueueCommand(worktreeId, async (sessionName) => {
async setOffline(state?: string, worktreeId?: string, browserPageId?: string): Promise<unknown> {
return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => {
const args = ['set', 'offline']
if (state) {
args.push(state)
@ -517,14 +617,23 @@ export class AgentBrowserBridge {
})
}
async setHeaders(headersJson: string, worktreeId?: string): Promise<unknown> {
return this.enqueueCommand(worktreeId, async (sessionName) => {
async setHeaders(
headersJson: string,
worktreeId?: string,
browserPageId?: string
): Promise<unknown> {
return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => {
return await this.execAgentBrowser(sessionName, ['set', 'headers', headersJson])
})
}
async setCredentials(user: string, pass: string, worktreeId?: string): Promise<unknown> {
return this.enqueueCommand(worktreeId, async (sessionName) => {
async setCredentials(
user: string,
pass: string,
worktreeId?: string,
browserPageId?: string
): Promise<unknown> {
return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => {
return await this.execAgentBrowser(sessionName, ['set', 'credentials', user, pass])
})
}
@ -532,9 +641,10 @@ export class AgentBrowserBridge {
async setMedia(
colorScheme?: string,
reducedMotion?: string,
worktreeId?: string
worktreeId?: string,
browserPageId?: string
): Promise<unknown> {
return this.enqueueCommand(worktreeId, async (sessionName) => {
return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => {
const args = ['set', 'media']
if (colorScheme) {
args.push(colorScheme)
@ -548,22 +658,26 @@ export class AgentBrowserBridge {
// ── Clipboard commands ──
async clipboardRead(worktreeId?: string): Promise<unknown> {
return this.enqueueCommand(worktreeId, async (sessionName) => {
async clipboardRead(worktreeId?: string, browserPageId?: string): Promise<unknown> {
return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => {
return await this.execAgentBrowser(sessionName, ['clipboard', 'read'])
})
}
async clipboardWrite(text: string, worktreeId?: string): Promise<unknown> {
return this.enqueueCommand(worktreeId, async (sessionName) => {
async clipboardWrite(
text: string,
worktreeId?: string,
browserPageId?: string
): Promise<unknown> {
return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => {
return await this.execAgentBrowser(sessionName, ['clipboard', 'write', text])
})
}
// ── Dialog commands ──
async dialogAccept(text?: string, worktreeId?: string): Promise<unknown> {
return this.enqueueCommand(worktreeId, async (sessionName) => {
async dialogAccept(text?: string, worktreeId?: string, browserPageId?: string): Promise<unknown> {
return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => {
const args = ['dialog', 'accept']
if (text) {
args.push(text)
@ -572,86 +686,108 @@ export class AgentBrowserBridge {
})
}
async dialogDismiss(worktreeId?: string): Promise<unknown> {
return this.enqueueCommand(worktreeId, async (sessionName) => {
async dialogDismiss(worktreeId?: string, browserPageId?: string): Promise<unknown> {
return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => {
return await this.execAgentBrowser(sessionName, ['dialog', 'dismiss'])
})
}
// ── Storage commands ──
async storageLocalGet(key: string, worktreeId?: string): Promise<unknown> {
return this.enqueueCommand(worktreeId, async (sessionName) => {
async storageLocalGet(
key: string,
worktreeId?: string,
browserPageId?: string
): Promise<unknown> {
return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => {
return await this.execAgentBrowser(sessionName, ['storage', 'local', 'get', key])
})
}
async storageLocalSet(key: string, value: string, worktreeId?: string): Promise<unknown> {
return this.enqueueCommand(worktreeId, async (sessionName) => {
async storageLocalSet(
key: string,
value: string,
worktreeId?: string,
browserPageId?: string
): Promise<unknown> {
return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => {
return await this.execAgentBrowser(sessionName, ['storage', 'local', 'set', key, value])
})
}
async storageLocalClear(worktreeId?: string): Promise<unknown> {
return this.enqueueCommand(worktreeId, async (sessionName) => {
async storageLocalClear(worktreeId?: string, browserPageId?: string): Promise<unknown> {
return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => {
return await this.execAgentBrowser(sessionName, ['storage', 'local', 'clear'])
})
}
async storageSessionGet(key: string, worktreeId?: string): Promise<unknown> {
return this.enqueueCommand(worktreeId, async (sessionName) => {
async storageSessionGet(
key: string,
worktreeId?: string,
browserPageId?: string
): Promise<unknown> {
return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => {
return await this.execAgentBrowser(sessionName, ['storage', 'session', 'get', key])
})
}
async storageSessionSet(key: string, value: string, worktreeId?: string): Promise<unknown> {
return this.enqueueCommand(worktreeId, async (sessionName) => {
async storageSessionSet(
key: string,
value: string,
worktreeId?: string,
browserPageId?: string
): Promise<unknown> {
return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => {
return await this.execAgentBrowser(sessionName, ['storage', 'session', 'set', key, value])
})
}
async storageSessionClear(worktreeId?: string): Promise<unknown> {
return this.enqueueCommand(worktreeId, async (sessionName) => {
async storageSessionClear(worktreeId?: string, browserPageId?: string): Promise<unknown> {
return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => {
return await this.execAgentBrowser(sessionName, ['storage', 'session', 'clear'])
})
}
// ── Download command ──
async download(selector: string, path: string, worktreeId?: string): Promise<unknown> {
return this.enqueueCommand(worktreeId, async (sessionName) => {
async download(
selector: string,
path: string,
worktreeId?: string,
browserPageId?: string
): Promise<unknown> {
return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => {
return await this.execAgentBrowser(sessionName, ['download', selector, path])
})
}
// ── Highlight command ──
async highlight(selector: string, worktreeId?: string): Promise<unknown> {
return this.enqueueCommand(worktreeId, async (sessionName) => {
async highlight(selector: string, worktreeId?: string, browserPageId?: string): Promise<unknown> {
return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => {
return await this.execAgentBrowser(sessionName, ['highlight', selector])
})
}
async back(worktreeId?: string): Promise<BrowserBackResult> {
return this.enqueueCommand(worktreeId, async (sessionName) => {
async back(worktreeId?: string, browserPageId?: string): Promise<BrowserBackResult> {
return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => {
return (await this.execAgentBrowser(sessionName, ['back'])) as BrowserBackResult
})
}
async forward(worktreeId?: string): Promise<BrowserBackResult> {
return this.enqueueCommand(worktreeId, async (sessionName) => {
async forward(worktreeId?: string, browserPageId?: string): Promise<BrowserBackResult> {
return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => {
return (await this.execAgentBrowser(sessionName, ['forward'])) as BrowserBackResult
})
}
async reload(worktreeId?: string): Promise<BrowserReloadResult> {
async reload(worktreeId?: string, browserPageId?: string): Promise<BrowserReloadResult> {
// Why: reload can trigger a process swap in Electron (site-isolation), which
// destroys the session mid-command. Use the webContents directly for reload
// instead of going through agent-browser to avoid the session lifecycle issue.
// Routed through enqueueCommand so it serializes with other in-flight commands.
return this.enqueueCommand(worktreeId, async () => {
const { webContentsId } = this.resolveActiveTab(worktreeId)
const wc = this.getWebContents(webContentsId)
return this.enqueueTargetedCommand(worktreeId, browserPageId, async (_sessionName, target) => {
const wc = this.getWebContents(target.webContentsId)
if (!wc) {
throw new BrowserError('browser_no_tab', 'Tab is no longer available')
}
@ -675,16 +811,24 @@ export class AgentBrowserBridge {
})
}
async screenshot(format?: string, worktreeId?: string): Promise<BrowserScreenshotResult> {
async screenshot(
format?: string,
worktreeId?: string,
browserPageId?: string
): Promise<BrowserScreenshotResult> {
// Why: agent-browser writes the screenshot to a temp file and returns
// { "path": "/tmp/screenshot-xxx.png" }. We read the file and return base64.
return this.enqueueCommand(worktreeId, async (sessionName) => {
return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => {
return this.captureScreenshotCommand(sessionName, ['screenshot'], 300, format)
})
}
async fullPageScreenshot(format?: string, worktreeId?: string): Promise<BrowserScreenshotResult> {
return this.enqueueCommand(worktreeId, async (sessionName) => {
async fullPageScreenshot(
format?: string,
worktreeId?: string,
browserPageId?: string
): Promise<BrowserScreenshotResult> {
return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => {
return this.captureScreenshotCommand(sessionName, ['screenshot', '--full-page'], 500, format)
})
}
@ -740,20 +884,33 @@ export class AgentBrowserBridge {
}
}
async evaluate(expression: string, worktreeId?: string): Promise<BrowserEvalResult> {
return this.enqueueCommand(worktreeId, async (sessionName) => {
async evaluate(
expression: string,
worktreeId?: string,
browserPageId?: string
): Promise<BrowserEvalResult> {
return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => {
return (await this.execAgentBrowser(sessionName, ['eval', expression])) as BrowserEvalResult
})
}
async hover(element: string, worktreeId?: string): Promise<BrowserHoverResult> {
return this.enqueueCommand(worktreeId, async (sessionName) => {
async hover(
element: string,
worktreeId?: string,
browserPageId?: string
): Promise<BrowserHoverResult> {
return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => {
return (await this.execAgentBrowser(sessionName, ['hover', element])) as BrowserHoverResult
})
}
async drag(from: string, to: string, worktreeId?: string): Promise<BrowserDragResult> {
return this.enqueueCommand(worktreeId, async (sessionName) => {
async drag(
from: string,
to: string,
worktreeId?: string,
browserPageId?: string
): Promise<BrowserDragResult> {
return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => {
return (await this.execAgentBrowser(sessionName, ['drag', from, to])) as BrowserDragResult
})
}
@ -761,9 +918,10 @@ export class AgentBrowserBridge {
async upload(
element: string,
filePaths: string[],
worktreeId?: string
worktreeId?: string,
browserPageId?: string
): Promise<BrowserUploadResult> {
return this.enqueueCommand(worktreeId, async (sessionName) => {
return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => {
return (await this.execAgentBrowser(sessionName, [
'upload',
element,
@ -782,9 +940,10 @@ export class AgentBrowserBridge {
fn?: string
state?: string
},
worktreeId?: string
worktreeId?: string,
browserPageId?: string
): Promise<BrowserWaitResult> {
return this.enqueueCommand(worktreeId, async (sessionName) => {
return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => {
const args = ['wait']
if (options?.selector) {
args.push(options.selector)
@ -810,28 +969,45 @@ export class AgentBrowserBridge {
})
}
async check(element: string, checked: boolean, worktreeId?: string): Promise<BrowserCheckResult> {
return this.enqueueCommand(worktreeId, async (sessionName) => {
async check(
element: string,
checked: boolean,
worktreeId?: string,
browserPageId?: string
): Promise<BrowserCheckResult> {
return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => {
const args = checked ? ['check', element] : ['uncheck', element]
return (await this.execAgentBrowser(sessionName, args)) as BrowserCheckResult
})
}
async focus(element: string, worktreeId?: string): Promise<BrowserFocusResult> {
return this.enqueueCommand(worktreeId, async (sessionName) => {
async focus(
element: string,
worktreeId?: string,
browserPageId?: string
): Promise<BrowserFocusResult> {
return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => {
return (await this.execAgentBrowser(sessionName, ['focus', element])) as BrowserFocusResult
})
}
async clear(element: string, worktreeId?: string): Promise<BrowserClearResult> {
return this.enqueueCommand(worktreeId, async (sessionName) => {
async clear(
element: string,
worktreeId?: string,
browserPageId?: string
): Promise<BrowserClearResult> {
return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => {
// Why: agent-browser has no clear command — use fill with empty string
return (await this.execAgentBrowser(sessionName, ['fill', element, ''])) as BrowserClearResult
})
}
async selectAll(element: string, worktreeId?: string): Promise<BrowserSelectAllResult> {
return this.enqueueCommand(worktreeId, async (sessionName) => {
async selectAll(
element: string,
worktreeId?: string,
browserPageId?: string
): Promise<BrowserSelectAllResult> {
return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => {
// Why: agent-browser has no select-all command — implement as focus + Ctrl+A
await this.execAgentBrowser(sessionName, ['focus', element])
return (await this.execAgentBrowser(sessionName, [
@ -841,19 +1017,22 @@ export class AgentBrowserBridge {
})
}
async keypress(key: string, worktreeId?: string): Promise<BrowserKeypressResult> {
return this.enqueueCommand(worktreeId, async (sessionName) => {
async keypress(
key: string,
worktreeId?: string,
browserPageId?: string
): Promise<BrowserKeypressResult> {
return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => {
return (await this.execAgentBrowser(sessionName, ['press', key])) as BrowserKeypressResult
})
}
async pdf(worktreeId?: string): Promise<BrowserPdfResult> {
async pdf(worktreeId?: string, browserPageId?: string): Promise<BrowserPdfResult> {
// Why: agent-browser's pdf command via CDP Page.printToPDF hangs in Electron
// webviews. Use Electron's native webContents.printToPDF() which is reliable.
// Routed through enqueueCommand so it serializes with other in-flight commands.
return this.enqueueCommand(worktreeId, async () => {
const { webContentsId } = this.resolveActiveTab(worktreeId)
const wc = this.getWebContents(webContentsId)
return this.enqueueTargetedCommand(worktreeId, browserPageId, async (_sessionName, target) => {
const wc = this.getWebContents(target.webContentsId)
if (!wc) {
throw new BrowserError('browser_no_tab', 'Tab is no longer available')
}
@ -867,8 +1046,12 @@ export class AgentBrowserBridge {
// ── Cookie commands ──
async cookieGet(_url?: string, worktreeId?: string): Promise<BrowserCookieGetResult> {
return this.enqueueCommand(worktreeId, async (sessionName) => {
async cookieGet(
_url?: string,
worktreeId?: string,
browserPageId?: string
): Promise<BrowserCookieGetResult> {
return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => {
return (await this.execAgentBrowser(sessionName, [
'cookies',
'get'
@ -878,9 +1061,10 @@ export class AgentBrowserBridge {
async cookieSet(
cookie: Partial<BrowserCookie>,
worktreeId?: string
worktreeId?: string,
browserPageId?: string
): Promise<BrowserCookieSetResult> {
return this.enqueueCommand(worktreeId, async (sessionName) => {
return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => {
const args = ['cookies', 'set', cookie.name ?? '', cookie.value ?? '']
if (cookie.domain) {
args.push('--domain', cookie.domain)
@ -908,9 +1092,10 @@ export class AgentBrowserBridge {
name?: string,
domain?: string,
_url?: string,
worktreeId?: string
worktreeId?: string,
browserPageId?: string
): Promise<BrowserCookieDeleteResult> {
return this.enqueueCommand(worktreeId, async (sessionName) => {
return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => {
const args = ['cookies', 'clear']
if (name) {
args.push('--name', name)
@ -929,9 +1114,10 @@ export class AgentBrowserBridge {
height: number,
scale?: number,
_mobile?: boolean,
worktreeId?: string
worktreeId?: string,
browserPageId?: string
): Promise<BrowserViewportResult> {
return this.enqueueCommand(worktreeId, async (sessionName) => {
return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => {
const args = ['set', 'viewport', String(width), String(height)]
if (scale != null) {
args.push(String(scale))
@ -944,9 +1130,10 @@ export class AgentBrowserBridge {
lat: number,
lon: number,
_accuracy?: number,
worktreeId?: string
worktreeId?: string,
browserPageId?: string
): Promise<BrowserGeolocationResult> {
return this.enqueueCommand(worktreeId, async (sessionName) => {
return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => {
return (await this.execAgentBrowser(sessionName, [
'set',
'geo',
@ -960,9 +1147,10 @@ export class AgentBrowserBridge {
async interceptEnable(
patterns?: string[],
worktreeId?: string
worktreeId?: string,
browserPageId?: string
): Promise<BrowserInterceptEnableResult> {
return this.enqueueCommand(worktreeId, async (sessionName) => {
return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => {
// Why: agent-browser uses "network route <url>" to intercept. Route each pattern individually.
const urlPattern = patterns?.[0] ?? '**/*'
const args = ['network', 'route', urlPattern]
@ -978,8 +1166,11 @@ export class AgentBrowserBridge {
})
}
async interceptDisable(worktreeId?: string): Promise<BrowserInterceptDisableResult> {
return this.enqueueCommand(worktreeId, async (sessionName) => {
async interceptDisable(
worktreeId?: string,
browserPageId?: string
): Promise<BrowserInterceptDisableResult> {
return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => {
const result = (await this.execAgentBrowser(sessionName, [
'network',
'unroute'
@ -992,8 +1183,11 @@ export class AgentBrowserBridge {
})
}
async interceptList(worktreeId?: string): Promise<{ requests: unknown[] }> {
return this.enqueueCommand(worktreeId, async (sessionName) => {
async interceptList(
worktreeId?: string,
browserPageId?: string
): Promise<{ requests: unknown[] }> {
return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => {
return (await this.execAgentBrowser(sessionName, ['network', 'requests'])) as {
requests: unknown[]
}
@ -1006,8 +1200,11 @@ export class AgentBrowserBridge {
// ── Capture commands ──
async captureStart(worktreeId?: string): Promise<BrowserCaptureStartResult> {
return this.enqueueCommand(worktreeId, async (sessionName) => {
async captureStart(
worktreeId?: string,
browserPageId?: string
): Promise<BrowserCaptureStartResult> {
return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => {
const result = (await this.execAgentBrowser(sessionName, [
'network',
'har',
@ -1021,8 +1218,11 @@ export class AgentBrowserBridge {
})
}
async captureStop(worktreeId?: string): Promise<BrowserCaptureStopResult> {
return this.enqueueCommand(worktreeId, async (sessionName) => {
async captureStop(
worktreeId?: string,
browserPageId?: string
): Promise<BrowserCaptureStopResult> {
return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => {
const result = (await this.execAgentBrowser(sessionName, [
'network',
'har',
@ -1036,14 +1236,22 @@ export class AgentBrowserBridge {
})
}
async consoleLog(_limit?: number, worktreeId?: string): Promise<BrowserConsoleResult> {
return this.enqueueCommand(worktreeId, async (sessionName) => {
async consoleLog(
_limit?: number,
worktreeId?: string,
browserPageId?: string
): Promise<BrowserConsoleResult> {
return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => {
return (await this.execAgentBrowser(sessionName, ['console'])) as BrowserConsoleResult
})
}
async networkLog(_limit?: number, worktreeId?: string): Promise<BrowserNetworkLogResult> {
return this.enqueueCommand(worktreeId, async (sessionName) => {
async networkLog(
_limit?: number,
worktreeId?: string,
browserPageId?: string
): Promise<BrowserNetworkLogResult> {
return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => {
return (await this.execAgentBrowser(sessionName, [
'network',
'requests'
@ -1053,8 +1261,8 @@ export class AgentBrowserBridge {
// ── Generic passthrough ──
async exec(command: string, worktreeId?: string): Promise<unknown> {
return this.enqueueCommand(worktreeId, async (sessionName) => {
async exec(command: string, worktreeId?: string, browserPageId?: string): Promise<unknown> {
return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => {
// Why: strip --cdp and --session from raw command to prevent session/target injection
const sanitized = command
.replace(/--cdp\s+\S+/g, '')
@ -1081,10 +1289,20 @@ export class AgentBrowserBridge {
worktreeId: string | undefined,
execute: (sessionName: string) => Promise<T>
): Promise<T> {
const { browserPageId, webContentsId } = this.resolveActiveTab(worktreeId)
const sessionName = `orca-tab-${browserPageId}`
return this.enqueueTargetedCommand(worktreeId, undefined, async (sessionName) =>
execute(sessionName)
)
}
await this.ensureSession(sessionName, browserPageId, webContentsId)
private async enqueueTargetedCommand<T>(
worktreeId: string | undefined,
browserPageId: string | undefined,
execute: (sessionName: string, target: ResolvedBrowserCommandTarget) => Promise<T>
): Promise<T> {
const target = this.resolveCommandTarget(worktreeId, browserPageId)
const sessionName = `orca-tab-${target.browserPageId}`
await this.ensureSession(sessionName, target.browserPageId, target.webContentsId)
return new Promise<T>((resolve, reject) => {
let queue = this.commandQueues.get(sessionName)
@ -1093,7 +1311,7 @@ export class AgentBrowserBridge {
this.commandQueues.set(sessionName, queue)
}
queue.push({
execute: (() => execute(sessionName)) as () => Promise<unknown>,
execute: (() => execute(sessionName, target)) as () => Promise<unknown>,
resolve: resolve as (value: unknown) => void,
reject
})
@ -1121,15 +1339,43 @@ export class AgentBrowserBridge {
this.processingQueues.delete(sessionName)
}
getActivePageId(worktreeId?: string): string | null {
getActivePageId(worktreeId?: string, browserPageId?: string): string | null {
try {
return this.resolveActiveTab(worktreeId).browserPageId
return this.resolveCommandTarget(worktreeId, browserPageId).browserPageId
} catch {
return null
}
}
private resolveActiveTab(worktreeId?: string): { browserPageId: string; webContentsId: number } {
private resolveCommandTarget(
worktreeId?: string,
browserPageId?: string
): ResolvedBrowserCommandTarget {
if (!browserPageId) {
return this.resolveActiveTab(worktreeId)
}
const tabs = this.getRegisteredTabs(worktreeId)
const webContentsId = tabs.get(browserPageId)
if (webContentsId == null) {
const scope = worktreeId ? ' in this worktree' : ''
throw new BrowserError(
'browser_tab_not_found',
`Browser page ${browserPageId} was not found${scope}`
)
}
if (!this.getWebContents(webContentsId)) {
throw new BrowserError(
'browser_tab_not_found',
`Browser page ${browserPageId} is no longer available`
)
}
return { browserPageId, webContentsId }
}
private resolveActiveTab(worktreeId?: string): ResolvedBrowserCommandTarget {
const tabs = this.getRegisteredTabs(worktreeId)
if (tabs.size === 0) {

View file

@ -113,6 +113,32 @@ export class CdpBridge {
return null
}
getPageInfo(
_worktreeId?: string,
browserPageId?: string
): { browserPageId: string; url: string; title: string } | null {
// Why: OrcaRuntimeService pushes navigation/title updates after commands
// using a bridge-agnostic contract. The CDP bridge only routes one active
// tab at a time, but it still needs to expose the same metadata lookup.
const resolvedPageId = browserPageId ?? this.getActivePageId()
if (!resolvedPageId) {
return null
}
const webContentsId = this.getRegisteredTabs().get(resolvedPageId)
if (webContentsId == null) {
return null
}
const guest = webContents.fromId(webContentsId)
if (!guest || guest.isDestroyed()) {
return null
}
return {
browserPageId: resolvedPageId,
url: guest.getURL(),
title: guest.getTitle()
}
}
async snapshot(): Promise<BrowserSnapshotResult> {
return this.enqueueCommand(async () => {
const guest = this.getActiveGuest()
@ -131,6 +157,7 @@ export class CdpBridge {
state.navigationId = navId
return {
browserPageId: tabId,
snapshot: result.snapshot,
refs: result.refs,
url: guest.getURL(),
@ -939,12 +966,13 @@ export class CdpBridge {
const tabs: BrowserTabInfo[] = []
let index = 0
for (const [_tabId, wcId] of this.getRegisteredTabs()) {
for (const [tabId, wcId] of this.getRegisteredTabs()) {
const guest = webContents.fromId(wcId)
if (!guest || guest.isDestroyed()) {
continue
}
tabs.push({
browserPageId: tabId,
index,
url: guest.getURL(),
title: guest.getTitle(),
@ -970,13 +998,13 @@ export class CdpBridge {
)
}
const [_tabId, wcId] = liveEntries[index]
const [tabId, wcId] = liveEntries[index]
if (this.activeWebContentsId !== null) {
this.invalidateRefMap(this.activeWebContentsId)
}
this.activeWebContentsId = wcId
return { switched: index }
return { switched: index, browserPageId: tabId }
}
onTabClosed(webContentsId: number): void {

View file

@ -0,0 +1,81 @@
import { afterEach, describe, expect, it, vi } from 'vitest'
import { captureScreenshot } from './cdp-screenshot'
function createMockWebContents() {
return {
isDestroyed: vi.fn(() => false),
invalidate: vi.fn(),
capturePage: vi.fn(),
debugger: {
isAttached: vi.fn(() => true),
sendCommand: vi.fn()
}
}
}
describe('captureScreenshot', () => {
afterEach(() => {
vi.useRealTimers()
})
it('invalidates the guest before forwarding Page.captureScreenshot', async () => {
const webContents = createMockWebContents()
webContents.debugger.sendCommand.mockResolvedValueOnce({ data: 'png-data' })
const onResult = vi.fn()
const onError = vi.fn()
captureScreenshot(webContents as never, { format: 'png' }, onResult, onError)
await Promise.resolve()
expect(webContents.invalidate).toHaveBeenCalledTimes(1)
expect(webContents.debugger.sendCommand).toHaveBeenCalledWith('Page.captureScreenshot', {
format: 'png'
})
expect(onResult).toHaveBeenCalledWith({ data: 'png-data' })
expect(onError).not.toHaveBeenCalled()
})
it('falls back to capturePage when Page.captureScreenshot stalls', async () => {
vi.useFakeTimers()
const webContents = createMockWebContents()
webContents.debugger.sendCommand.mockImplementation(() => new Promise(() => {}))
webContents.capturePage.mockResolvedValueOnce({
isEmpty: () => false,
toPNG: () => Buffer.from('fallback-png')
})
const onResult = vi.fn()
const onError = vi.fn()
captureScreenshot(webContents as never, { format: 'png' }, onResult, onError)
await vi.advanceTimersByTimeAsync(8000)
expect(webContents.capturePage).toHaveBeenCalledTimes(1)
expect(onResult).toHaveBeenCalledWith({
data: Buffer.from('fallback-png').toString('base64')
})
expect(onError).not.toHaveBeenCalled()
})
it('reports the original timeout when the fallback capture is unavailable', async () => {
vi.useFakeTimers()
const webContents = createMockWebContents()
webContents.debugger.sendCommand.mockImplementation(() => new Promise(() => {}))
webContents.capturePage.mockResolvedValueOnce({
isEmpty: () => true,
toPNG: () => Buffer.from('unused')
})
const onResult = vi.fn()
const onError = vi.fn()
captureScreenshot(webContents as never, { format: 'png' }, onResult, onError)
await vi.advanceTimersByTimeAsync(8000)
expect(onResult).not.toHaveBeenCalled()
expect(onError).toHaveBeenCalledWith(
'Screenshot timed out — the browser tab may not be visible or the window may not have focus.'
)
})
})

View file

@ -1,5 +1,22 @@
import type { WebContents } from 'electron'
function encodeNativeImageScreenshot(
image: Electron.NativeImage,
params: Record<string, unknown> | undefined
): { data: string } | null {
if (image.isEmpty()) {
return null
}
const format = params?.format === 'jpeg' ? 'jpeg' : 'png'
const quality =
typeof params?.quality === 'number' && Number.isFinite(params.quality)
? Math.max(0, Math.min(100, Math.round(params.quality)))
: undefined
const buffer = format === 'jpeg' ? image.toJPEG(quality ?? 90) : image.toPNG()
return { data: buffer.toString('base64') }
}
// Why: Electron's capturePage() is unreliable on webview guests — the compositor
// may not produce frames when the webview panel is inactive, unfocused, or in a
// split-pane layout. Instead, use the debugger's Page.captureScreenshot which
@ -40,12 +57,33 @@ export function captureScreenshot(
}
let settled = false
const timer = setTimeout(() => {
// Why: a compositor invalidate is cheap and can recover guest instances that
// are visible but have not produced a fresh frame since being reclaimed into
// the active browser tab.
try {
webContents.invalidate()
} catch {
// Some guest teardown paths reject repaint requests. Fall through to CDP.
}
const timer = setTimeout(async () => {
if (!settled) {
settled = true
onError(
'Screenshot timed out — the browser tab may not be visible or the window may not have focus.'
)
try {
const fallback = encodeNativeImageScreenshot(await webContents.capturePage(), params)
if (fallback) {
settled = true
onResult(fallback)
return
}
} catch {
// Fall through to the original timeout error below.
}
if (!settled) {
settled = true
onError(
'Screenshot timed out — the browser tab may not be visible or the window may not have focus.'
)
}
}
}, 8000)

View file

@ -764,4 +764,78 @@ describe('OrcaRuntimeService', () => {
}
])
})
describe('browser page targeting', () => {
it('passes explicit page ids through without resolving the current worktree', async () => {
vi.mocked(listWorktrees).mockClear()
const runtime = createRuntime()
const snapshotMock = vi.fn().mockResolvedValue({
browserPageId: 'page-1',
snapshot: 'tree',
refs: [],
url: 'https://example.com',
title: 'Example'
})
runtime.setAgentBrowserBridge({
snapshot: snapshotMock
} as never)
const result = await runtime.browserSnapshot({ page: 'page-1' })
expect(result.browserPageId).toBe('page-1')
expect(snapshotMock).toHaveBeenCalledWith(undefined, 'page-1')
expect(listWorktrees).not.toHaveBeenCalled()
})
it('resolves explicit worktree selectors when page ids are also provided', async () => {
vi.mocked(listWorktrees).mockClear()
const runtime = createRuntime()
const snapshotMock = vi.fn().mockResolvedValue({
browserPageId: 'page-1',
snapshot: 'tree',
refs: [],
url: 'https://example.com',
title: 'Example'
})
runtime.setAgentBrowserBridge({
snapshot: snapshotMock,
getRegisteredTabs: vi.fn(() => new Map([['page-1', 1]]))
} as never)
await runtime.browserSnapshot({
worktree: 'branch:feature/foo',
page: 'page-1'
})
expect(snapshotMock).toHaveBeenCalledWith(TEST_WORKTREE_ID, 'page-1')
})
it('routes tab switch and capture start by explicit page id', async () => {
const runtime = createRuntime()
const tabSwitchMock = vi.fn().mockResolvedValue({
switched: 2,
browserPageId: 'page-2'
})
const captureStartMock = vi.fn().mockResolvedValue({
capturing: true
})
runtime.setAgentBrowserBridge({
tabSwitch: tabSwitchMock,
captureStart: captureStartMock
} as never)
await expect(runtime.browserTabSwitch({ page: 'page-2' })).resolves.toEqual({
switched: 2,
browserPageId: 'page-2'
})
await expect(runtime.browserCaptureStart({ page: 'page-2' })).resolves.toEqual({
capturing: true
})
expect(tabSwitchMock).toHaveBeenCalledWith(undefined, undefined, 'page-2')
expect(captureStartMock).toHaveBeenCalledWith(undefined, 'page-2')
})
})
})

File diff suppressed because it is too large Load diff

View file

@ -707,8 +707,7 @@ export class OrcaRuntimeRpcServer {
if (request.method === 'browser.snapshot') {
try {
const params = this.extractParams(request)
const worktree = typeof params?.worktree === 'string' ? params.worktree : undefined
const result = await this.runtime.browserSnapshot({ worktree })
const result = await this.runtime.browserSnapshot(this.extractBrowserTarget(params))
return this.successResponse(request.id, result)
} catch (error) {
return this.browserErrorResponse(request.id, error)
@ -722,8 +721,10 @@ export class OrcaRuntimeRpcServer {
if (!element) {
return this.errorResponse(request.id, 'invalid_argument', 'Missing required --element')
}
const worktree = typeof params?.worktree === 'string' ? params.worktree : undefined
const result = await this.runtime.browserClick({ element, worktree })
const result = await this.runtime.browserClick({
element,
...this.extractBrowserTarget(params)
})
return this.successResponse(request.id, result)
} catch (error) {
return this.browserErrorResponse(request.id, error)
@ -737,8 +738,7 @@ export class OrcaRuntimeRpcServer {
if (!url) {
return this.errorResponse(request.id, 'invalid_argument', 'Missing required --url')
}
const worktree = typeof params?.worktree === 'string' ? params.worktree : undefined
const result = await this.runtime.browserGoto({ url, worktree })
const result = await this.runtime.browserGoto({ url, ...this.extractBrowserTarget(params) })
return this.successResponse(request.id, result)
} catch (error) {
return this.browserErrorResponse(request.id, error)
@ -756,8 +756,11 @@ export class OrcaRuntimeRpcServer {
if (value === null) {
return this.errorResponse(request.id, 'invalid_argument', 'Missing required --value')
}
const worktree = typeof params?.worktree === 'string' ? params.worktree : undefined
const result = await this.runtime.browserFill({ element, value, worktree })
const result = await this.runtime.browserFill({
element,
value,
...this.extractBrowserTarget(params)
})
return this.successResponse(request.id, result)
} catch (error) {
return this.browserErrorResponse(request.id, error)
@ -771,8 +774,10 @@ export class OrcaRuntimeRpcServer {
if (!input) {
return this.errorResponse(request.id, 'invalid_argument', 'Missing required --input')
}
const worktree = typeof params?.worktree === 'string' ? params.worktree : undefined
const result = await this.runtime.browserType({ input, worktree })
const result = await this.runtime.browserType({
input,
...this.extractBrowserTarget(params)
})
return this.successResponse(request.id, result)
} catch (error) {
return this.browserErrorResponse(request.id, error)
@ -790,8 +795,11 @@ export class OrcaRuntimeRpcServer {
if (value === null) {
return this.errorResponse(request.id, 'invalid_argument', 'Missing required --value')
}
const worktree = typeof params?.worktree === 'string' ? params.worktree : undefined
const result = await this.runtime.browserSelect({ element, value, worktree })
const result = await this.runtime.browserSelect({
element,
value,
...this.extractBrowserTarget(params)
})
return this.successResponse(request.id, result)
} catch (error) {
return this.browserErrorResponse(request.id, error)
@ -811,8 +819,11 @@ export class OrcaRuntimeRpcServer {
}
const amount =
typeof params?.amount === 'number' && params.amount > 0 ? params.amount : undefined
const worktree = typeof params?.worktree === 'string' ? params.worktree : undefined
const result = await this.runtime.browserScroll({ direction, amount, worktree })
const result = await this.runtime.browserScroll({
direction,
amount,
...this.extractBrowserTarget(params)
})
return this.successResponse(request.id, result)
} catch (error) {
return this.browserErrorResponse(request.id, error)
@ -822,8 +833,7 @@ export class OrcaRuntimeRpcServer {
if (request.method === 'browser.back') {
try {
const params = this.extractParams(request)
const worktree = typeof params?.worktree === 'string' ? params.worktree : undefined
const result = await this.runtime.browserBack({ worktree })
const result = await this.runtime.browserBack(this.extractBrowserTarget(params))
return this.successResponse(request.id, result)
} catch (error) {
return this.browserErrorResponse(request.id, error)
@ -833,8 +843,7 @@ export class OrcaRuntimeRpcServer {
if (request.method === 'browser.reload') {
try {
const params = this.extractParams(request)
const worktree = typeof params?.worktree === 'string' ? params.worktree : undefined
const result = await this.runtime.browserReload({ worktree })
const result = await this.runtime.browserReload(this.extractBrowserTarget(params))
return this.successResponse(request.id, result)
} catch (error) {
return this.browserErrorResponse(request.id, error)
@ -849,8 +858,10 @@ export class OrcaRuntimeRpcServer {
(params.format === 'png' || params.format === 'jpeg')
? params.format
: undefined
const worktree = typeof params?.worktree === 'string' ? params.worktree : undefined
const result = await this.runtime.browserScreenshot({ format, worktree })
const result = await this.runtime.browserScreenshot({
format,
...this.extractBrowserTarget(params)
})
return this.successResponse(request.id, result)
} catch (error) {
return this.browserErrorResponse(request.id, error)
@ -864,8 +875,10 @@ export class OrcaRuntimeRpcServer {
if (!expression) {
return this.errorResponse(request.id, 'invalid_argument', 'Missing required --expression')
}
const worktree = typeof params?.worktree === 'string' ? params.worktree : undefined
const result = await this.runtime.browserEval({ expression, worktree })
const result = await this.runtime.browserEval({
expression,
...this.extractBrowserTarget(params)
})
return this.successResponse(request.id, result)
} catch (error) {
return this.browserErrorResponse(request.id, error)
@ -886,16 +899,20 @@ export class OrcaRuntimeRpcServer {
if (request.method === 'browser.tabSwitch') {
try {
const params = this.extractParams(request)
const index = typeof params?.index === 'number' ? params.index : null
if (index === null || !Number.isInteger(index) || index < 0) {
const index = typeof params?.index === 'number' ? params.index : undefined
const page =
typeof params?.page === 'string' && params.page.length > 0 ? params.page : undefined
if (page === undefined && (index === undefined || !Number.isInteger(index) || index < 0)) {
return this.errorResponse(
request.id,
'invalid_argument',
'Missing required --index (non-negative integer)'
'Missing required --index (non-negative integer) or --page'
)
}
const worktree = typeof params?.worktree === 'string' ? params.worktree : undefined
const result = await this.runtime.browserTabSwitch({ index, worktree })
const result = await this.runtime.browserTabSwitch({
index,
...this.extractBrowserTarget(params)
})
return this.successResponse(request.id, result)
} catch (error) {
return this.browserErrorResponse(request.id, error)
@ -909,8 +926,10 @@ export class OrcaRuntimeRpcServer {
if (!element) {
return this.errorResponse(request.id, 'invalid_argument', 'Missing required --element')
}
const worktree = typeof params?.worktree === 'string' ? params.worktree : undefined
const result = await this.runtime.browserHover({ element, worktree })
const result = await this.runtime.browserHover({
element,
...this.extractBrowserTarget(params)
})
return this.successResponse(request.id, result)
} catch (error) {
return this.browserErrorResponse(request.id, error)
@ -929,8 +948,11 @@ export class OrcaRuntimeRpcServer {
'Missing required --from and --to element refs'
)
}
const worktree = typeof params?.worktree === 'string' ? params.worktree : undefined
const result = await this.runtime.browserDrag({ from, to, worktree })
const result = await this.runtime.browserDrag({
from,
to,
...this.extractBrowserTarget(params)
})
return this.successResponse(request.id, result)
} catch (error) {
return this.browserErrorResponse(request.id, error)
@ -949,8 +971,11 @@ export class OrcaRuntimeRpcServer {
'Missing required --element and --files'
)
}
const worktree = typeof params?.worktree === 'string' ? params.worktree : undefined
const result = await this.runtime.browserUpload({ element, files, worktree })
const result = await this.runtime.browserUpload({
element,
files,
...this.extractBrowserTarget(params)
})
return this.successResponse(request.id, result)
} catch (error) {
return this.browserErrorResponse(request.id, error)
@ -968,7 +993,6 @@ export class OrcaRuntimeRpcServer {
const load = typeof params?.load === 'string' ? params.load : undefined
const fn = typeof params?.fn === 'string' ? params.fn : undefined
const state = typeof params?.state === 'string' ? params.state : undefined
const worktree = typeof params?.worktree === 'string' ? params.worktree : undefined
const result = await this.runtime.browserWait({
selector,
timeout,
@ -977,7 +1001,7 @@ export class OrcaRuntimeRpcServer {
load,
fn,
state,
worktree
...this.extractBrowserTarget(params)
})
return this.successResponse(request.id, result)
} catch (error) {
@ -993,8 +1017,11 @@ export class OrcaRuntimeRpcServer {
if (!element) {
return this.errorResponse(request.id, 'invalid_argument', 'Missing required --element')
}
const worktree = typeof params?.worktree === 'string' ? params.worktree : undefined
const result = await this.runtime.browserCheck({ element, checked, worktree })
const result = await this.runtime.browserCheck({
element,
checked,
...this.extractBrowserTarget(params)
})
return this.successResponse(request.id, result)
} catch (error) {
return this.browserErrorResponse(request.id, error)
@ -1008,8 +1035,10 @@ export class OrcaRuntimeRpcServer {
if (!element) {
return this.errorResponse(request.id, 'invalid_argument', 'Missing required --element')
}
const worktree = typeof params?.worktree === 'string' ? params.worktree : undefined
const result = await this.runtime.browserFocus({ element, worktree })
const result = await this.runtime.browserFocus({
element,
...this.extractBrowserTarget(params)
})
return this.successResponse(request.id, result)
} catch (error) {
return this.browserErrorResponse(request.id, error)
@ -1023,8 +1052,10 @@ export class OrcaRuntimeRpcServer {
if (!element) {
return this.errorResponse(request.id, 'invalid_argument', 'Missing required --element')
}
const worktree = typeof params?.worktree === 'string' ? params.worktree : undefined
const result = await this.runtime.browserClear({ element, worktree })
const result = await this.runtime.browserClear({
element,
...this.extractBrowserTarget(params)
})
return this.successResponse(request.id, result)
} catch (error) {
return this.browserErrorResponse(request.id, error)
@ -1038,8 +1069,10 @@ export class OrcaRuntimeRpcServer {
if (!element) {
return this.errorResponse(request.id, 'invalid_argument', 'Missing required --element')
}
const worktree = typeof params?.worktree === 'string' ? params.worktree : undefined
const result = await this.runtime.browserSelectAll({ element, worktree })
const result = await this.runtime.browserSelectAll({
element,
...this.extractBrowserTarget(params)
})
return this.successResponse(request.id, result)
} catch (error) {
return this.browserErrorResponse(request.id, error)
@ -1053,8 +1086,10 @@ export class OrcaRuntimeRpcServer {
if (!key) {
return this.errorResponse(request.id, 'invalid_argument', 'Missing required --key')
}
const worktree = typeof params?.worktree === 'string' ? params.worktree : undefined
const result = await this.runtime.browserKeypress({ key, worktree })
const result = await this.runtime.browserKeypress({
key,
...this.extractBrowserTarget(params)
})
return this.successResponse(request.id, result)
} catch (error) {
return this.browserErrorResponse(request.id, error)
@ -1064,8 +1099,7 @@ export class OrcaRuntimeRpcServer {
if (request.method === 'browser.pdf') {
try {
const params = this.extractParams(request)
const worktree = typeof params?.worktree === 'string' ? params.worktree : undefined
const result = await this.runtime.browserPdf({ worktree })
const result = await this.runtime.browserPdf(this.extractBrowserTarget(params))
return this.successResponse(request.id, result)
} catch (error) {
return this.browserErrorResponse(request.id, error)
@ -1076,8 +1110,10 @@ export class OrcaRuntimeRpcServer {
try {
const params = this.extractParams(request)
const format = params?.format === 'jpeg' ? ('jpeg' as const) : ('png' as const)
const worktree = typeof params?.worktree === 'string' ? params.worktree : undefined
const result = await this.runtime.browserFullScreenshot({ format, worktree })
const result = await this.runtime.browserFullScreenshot({
format,
...this.extractBrowserTarget(params)
})
return this.successResponse(request.id, result)
} catch (error) {
return this.browserErrorResponse(request.id, error)
@ -1090,8 +1126,10 @@ export class OrcaRuntimeRpcServer {
try {
const params = this.extractParams(request)
const url = typeof params?.url === 'string' ? params.url : undefined
const worktree = typeof params?.worktree === 'string' ? params.worktree : undefined
const result = await this.runtime.browserCookieGet({ url, worktree })
const result = await this.runtime.browserCookieGet({
url,
...this.extractBrowserTarget(params)
})
return this.successResponse(request.id, result)
} catch (error) {
return this.browserErrorResponse(request.id, error)
@ -1106,7 +1144,6 @@ export class OrcaRuntimeRpcServer {
if (!name || value === null) {
return this.errorResponse(request.id, 'invalid_argument', 'Missing name or value')
}
const worktree = typeof params?.worktree === 'string' ? params.worktree : undefined
const result = await this.runtime.browserCookieSet({
name,
value,
@ -1116,7 +1153,7 @@ export class OrcaRuntimeRpcServer {
httpOnly: typeof params?.httpOnly === 'boolean' ? params.httpOnly : undefined,
sameSite: typeof params?.sameSite === 'string' ? params.sameSite : undefined,
expires: typeof params?.expires === 'number' ? params.expires : undefined,
worktree
...this.extractBrowserTarget(params)
})
return this.successResponse(request.id, result)
} catch (error) {
@ -1131,12 +1168,11 @@ export class OrcaRuntimeRpcServer {
if (!name) {
return this.errorResponse(request.id, 'invalid_argument', 'Missing cookie name')
}
const worktree = typeof params?.worktree === 'string' ? params.worktree : undefined
const result = await this.runtime.browserCookieDelete({
name,
domain: typeof params?.domain === 'string' ? params.domain : undefined,
url: typeof params?.url === 'string' ? params.url : undefined,
worktree
...this.extractBrowserTarget(params)
})
return this.successResponse(request.id, result)
} catch (error) {
@ -1158,14 +1194,13 @@ export class OrcaRuntimeRpcServer {
'Width and height must be positive numbers'
)
}
const worktree = typeof params?.worktree === 'string' ? params.worktree : undefined
const result = await this.runtime.browserSetViewport({
width,
height,
deviceScaleFactor:
typeof params?.deviceScaleFactor === 'number' ? params.deviceScaleFactor : undefined,
mobile: typeof params?.mobile === 'boolean' ? params.mobile : undefined,
worktree
...this.extractBrowserTarget(params)
})
return this.successResponse(request.id, result)
} catch (error) {
@ -1183,12 +1218,11 @@ export class OrcaRuntimeRpcServer {
if (latitude === null || longitude === null) {
return this.errorResponse(request.id, 'invalid_argument', 'Missing latitude or longitude')
}
const worktree = typeof params?.worktree === 'string' ? params.worktree : undefined
const result = await this.runtime.browserSetGeolocation({
latitude,
longitude,
accuracy: typeof params?.accuracy === 'number' ? params.accuracy : undefined,
worktree
...this.extractBrowserTarget(params)
})
return this.successResponse(request.id, result)
} catch (error) {
@ -1202,8 +1236,10 @@ export class OrcaRuntimeRpcServer {
try {
const params = this.extractParams(request)
const patterns = Array.isArray(params?.patterns) ? (params.patterns as string[]) : undefined
const worktree = typeof params?.worktree === 'string' ? params.worktree : undefined
const result = await this.runtime.browserInterceptEnable({ patterns, worktree })
const result = await this.runtime.browserInterceptEnable({
patterns,
...this.extractBrowserTarget(params)
})
return this.successResponse(request.id, result)
} catch (error) {
return this.browserErrorResponse(request.id, error)
@ -1213,8 +1249,7 @@ export class OrcaRuntimeRpcServer {
if (request.method === 'browser.intercept.disable') {
try {
const params = this.extractParams(request)
const worktree = typeof params?.worktree === 'string' ? params.worktree : undefined
const result = await this.runtime.browserInterceptDisable({ worktree })
const result = await this.runtime.browserInterceptDisable(this.extractBrowserTarget(params))
return this.successResponse(request.id, result)
} catch (error) {
return this.browserErrorResponse(request.id, error)
@ -1224,8 +1259,7 @@ export class OrcaRuntimeRpcServer {
if (request.method === 'browser.intercept.list') {
try {
const params = this.extractParams(request)
const worktree = typeof params?.worktree === 'string' ? params.worktree : undefined
const result = await this.runtime.browserInterceptList({ worktree })
const result = await this.runtime.browserInterceptList(this.extractBrowserTarget(params))
return this.successResponse(request.id, result)
} catch (error) {
return this.browserErrorResponse(request.id, error)
@ -1237,8 +1271,7 @@ export class OrcaRuntimeRpcServer {
if (request.method === 'browser.capture.start') {
try {
const params = this.extractParams(request)
const worktree = typeof params?.worktree === 'string' ? params.worktree : undefined
const result = await this.runtime.browserCaptureStart({ worktree })
const result = await this.runtime.browserCaptureStart(this.extractBrowserTarget(params))
return this.successResponse(request.id, result)
} catch (error) {
return this.browserErrorResponse(request.id, error)
@ -1248,8 +1281,7 @@ export class OrcaRuntimeRpcServer {
if (request.method === 'browser.capture.stop') {
try {
const params = this.extractParams(request)
const worktree = typeof params?.worktree === 'string' ? params.worktree : undefined
const result = await this.runtime.browserCaptureStop({ worktree })
const result = await this.runtime.browserCaptureStop(this.extractBrowserTarget(params))
return this.successResponse(request.id, result)
} catch (error) {
return this.browserErrorResponse(request.id, error)
@ -1260,8 +1292,10 @@ export class OrcaRuntimeRpcServer {
try {
const params = this.extractParams(request)
const limit = typeof params?.limit === 'number' ? params.limit : undefined
const worktree = typeof params?.worktree === 'string' ? params.worktree : undefined
const result = await this.runtime.browserConsoleLog({ limit, worktree })
const result = await this.runtime.browserConsoleLog({
limit,
...this.extractBrowserTarget(params)
})
return this.successResponse(request.id, result)
} catch (error) {
return this.browserErrorResponse(request.id, error)
@ -1272,8 +1306,10 @@ export class OrcaRuntimeRpcServer {
try {
const params = this.extractParams(request)
const limit = typeof params?.limit === 'number' ? params.limit : undefined
const worktree = typeof params?.worktree === 'string' ? params.worktree : undefined
const result = await this.runtime.browserNetworkLog({ limit, worktree })
const result = await this.runtime.browserNetworkLog({
limit,
...this.extractBrowserTarget(params)
})
return this.successResponse(request.id, result)
} catch (error) {
return this.browserErrorResponse(request.id, error)
@ -1289,8 +1325,10 @@ export class OrcaRuntimeRpcServer {
if (!element) {
return this.errorResponse(request.id, 'invalid_argument', 'Missing required --element')
}
const worktree = typeof params?.worktree === 'string' ? params.worktree : undefined
const result = await this.runtime.browserDblclick({ element, worktree })
const result = await this.runtime.browserDblclick({
element,
...this.extractBrowserTarget(params)
})
return this.successResponse(request.id, result)
} catch (error) {
return this.browserErrorResponse(request.id, error)
@ -1300,8 +1338,7 @@ export class OrcaRuntimeRpcServer {
if (request.method === 'browser.forward') {
try {
const params = this.extractParams(request)
const worktree = typeof params?.worktree === 'string' ? params.worktree : undefined
const result = await this.runtime.browserForward({ worktree })
const result = await this.runtime.browserForward(this.extractBrowserTarget(params))
return this.successResponse(request.id, result)
} catch (error) {
return this.browserErrorResponse(request.id, error)
@ -1315,8 +1352,10 @@ export class OrcaRuntimeRpcServer {
if (!element) {
return this.errorResponse(request.id, 'invalid_argument', 'Missing required --element')
}
const worktree = typeof params?.worktree === 'string' ? params.worktree : undefined
const result = await this.runtime.browserScrollIntoView({ element, worktree })
const result = await this.runtime.browserScrollIntoView({
element,
...this.extractBrowserTarget(params)
})
return this.successResponse(request.id, result)
} catch (error) {
return this.browserErrorResponse(request.id, error)
@ -1331,8 +1370,11 @@ export class OrcaRuntimeRpcServer {
return this.errorResponse(request.id, 'invalid_argument', 'Missing required --what')
}
const selector = typeof params?.selector === 'string' ? params.selector : undefined
const worktree = typeof params?.worktree === 'string' ? params.worktree : undefined
const result = await this.runtime.browserGet({ what, selector, worktree })
const result = await this.runtime.browserGet({
what,
selector,
...this.extractBrowserTarget(params)
})
return this.successResponse(request.id, result)
} catch (error) {
return this.browserErrorResponse(request.id, error)
@ -1351,8 +1393,11 @@ export class OrcaRuntimeRpcServer {
'Missing required --what and --element'
)
}
const worktree = typeof params?.worktree === 'string' ? params.worktree : undefined
const result = await this.runtime.browserIs({ what, selector, worktree })
const result = await this.runtime.browserIs({
what,
selector,
...this.extractBrowserTarget(params)
})
return this.successResponse(request.id, result)
} catch (error) {
return this.browserErrorResponse(request.id, error)
@ -1368,8 +1413,10 @@ export class OrcaRuntimeRpcServer {
if (!text) {
return this.errorResponse(request.id, 'invalid_argument', 'Missing required --text')
}
const worktree = typeof params?.worktree === 'string' ? params.worktree : undefined
const result = await this.runtime.browserKeyboardInsertText({ text, worktree })
const result = await this.runtime.browserKeyboardInsertText({
text,
...this.extractBrowserTarget(params)
})
return this.successResponse(request.id, result)
} catch (error) {
return this.browserErrorResponse(request.id, error)
@ -1390,8 +1437,11 @@ export class OrcaRuntimeRpcServer {
'Missing required x and y coordinates'
)
}
const worktree = typeof params?.worktree === 'string' ? params.worktree : undefined
const result = await this.runtime.browserMouseMove({ x, y, worktree })
const result = await this.runtime.browserMouseMove({
x,
y,
...this.extractBrowserTarget(params)
})
return this.successResponse(request.id, result)
} catch (error) {
return this.browserErrorResponse(request.id, error)
@ -1402,8 +1452,10 @@ export class OrcaRuntimeRpcServer {
try {
const params = this.extractParams(request)
const button = typeof params?.button === 'string' ? params.button : undefined
const worktree = typeof params?.worktree === 'string' ? params.worktree : undefined
const result = await this.runtime.browserMouseDown({ button, worktree })
const result = await this.runtime.browserMouseDown({
button,
...this.extractBrowserTarget(params)
})
return this.successResponse(request.id, result)
} catch (error) {
return this.browserErrorResponse(request.id, error)
@ -1414,8 +1466,10 @@ export class OrcaRuntimeRpcServer {
try {
const params = this.extractParams(request)
const button = typeof params?.button === 'string' ? params.button : undefined
const worktree = typeof params?.worktree === 'string' ? params.worktree : undefined
const result = await this.runtime.browserMouseUp({ button, worktree })
const result = await this.runtime.browserMouseUp({
button,
...this.extractBrowserTarget(params)
})
return this.successResponse(request.id, result)
} catch (error) {
return this.browserErrorResponse(request.id, error)
@ -1430,8 +1484,11 @@ export class OrcaRuntimeRpcServer {
return this.errorResponse(request.id, 'invalid_argument', 'Missing required --dy')
}
const dx = typeof params?.dx === 'number' ? params.dx : undefined
const worktree = typeof params?.worktree === 'string' ? params.worktree : undefined
const result = await this.runtime.browserMouseWheel({ dy, dx, worktree })
const result = await this.runtime.browserMouseWheel({
dy,
dx,
...this.extractBrowserTarget(params)
})
return this.successResponse(request.id, result)
} catch (error) {
return this.browserErrorResponse(request.id, error)
@ -1454,8 +1511,13 @@ export class OrcaRuntimeRpcServer {
)
}
const text = typeof params?.text === 'string' ? params.text : undefined
const worktree = typeof params?.worktree === 'string' ? params.worktree : undefined
const result = await this.runtime.browserFind({ locator, value, action, text, worktree })
const result = await this.runtime.browserFind({
locator,
value,
action,
text,
...this.extractBrowserTarget(params)
})
return this.successResponse(request.id, result)
} catch (error) {
return this.browserErrorResponse(request.id, error)
@ -1471,8 +1533,10 @@ export class OrcaRuntimeRpcServer {
if (!name) {
return this.errorResponse(request.id, 'invalid_argument', 'Missing required --name')
}
const worktree = typeof params?.worktree === 'string' ? params.worktree : undefined
const result = await this.runtime.browserSetDevice({ name, worktree })
const result = await this.runtime.browserSetDevice({
name,
...this.extractBrowserTarget(params)
})
return this.successResponse(request.id, result)
} catch (error) {
return this.browserErrorResponse(request.id, error)
@ -1483,8 +1547,10 @@ export class OrcaRuntimeRpcServer {
try {
const params = this.extractParams(request)
const state = typeof params?.state === 'string' ? params.state : undefined
const worktree = typeof params?.worktree === 'string' ? params.worktree : undefined
const result = await this.runtime.browserSetOffline({ state, worktree })
const result = await this.runtime.browserSetOffline({
state,
...this.extractBrowserTarget(params)
})
return this.successResponse(request.id, result)
} catch (error) {
return this.browserErrorResponse(request.id, error)
@ -1502,8 +1568,10 @@ export class OrcaRuntimeRpcServer {
'Missing required --headers (JSON string)'
)
}
const worktree = typeof params?.worktree === 'string' ? params.worktree : undefined
const result = await this.runtime.browserSetHeaders({ headers, worktree })
const result = await this.runtime.browserSetHeaders({
headers,
...this.extractBrowserTarget(params)
})
return this.successResponse(request.id, result)
} catch (error) {
return this.browserErrorResponse(request.id, error)
@ -1522,8 +1590,11 @@ export class OrcaRuntimeRpcServer {
'Missing required --user and --pass'
)
}
const worktree = typeof params?.worktree === 'string' ? params.worktree : undefined
const result = await this.runtime.browserSetCredentials({ user, pass, worktree })
const result = await this.runtime.browserSetCredentials({
user,
pass,
...this.extractBrowserTarget(params)
})
return this.successResponse(request.id, result)
} catch (error) {
return this.browserErrorResponse(request.id, error)
@ -1536,8 +1607,11 @@ export class OrcaRuntimeRpcServer {
const colorScheme = typeof params?.colorScheme === 'string' ? params.colorScheme : undefined
const reducedMotion =
typeof params?.reducedMotion === 'string' ? params.reducedMotion : undefined
const worktree = typeof params?.worktree === 'string' ? params.worktree : undefined
const result = await this.runtime.browserSetMedia({ colorScheme, reducedMotion, worktree })
const result = await this.runtime.browserSetMedia({
colorScheme,
reducedMotion,
...this.extractBrowserTarget(params)
})
return this.successResponse(request.id, result)
} catch (error) {
return this.browserErrorResponse(request.id, error)
@ -1549,8 +1623,7 @@ export class OrcaRuntimeRpcServer {
if (request.method === 'browser.clipboardRead') {
try {
const params = this.extractParams(request)
const worktree = typeof params?.worktree === 'string' ? params.worktree : undefined
const result = await this.runtime.browserClipboardRead({ worktree })
const result = await this.runtime.browserClipboardRead(this.extractBrowserTarget(params))
return this.successResponse(request.id, result)
} catch (error) {
return this.browserErrorResponse(request.id, error)
@ -1564,8 +1637,10 @@ export class OrcaRuntimeRpcServer {
if (!text) {
return this.errorResponse(request.id, 'invalid_argument', 'Missing required --text')
}
const worktree = typeof params?.worktree === 'string' ? params.worktree : undefined
const result = await this.runtime.browserClipboardWrite({ text, worktree })
const result = await this.runtime.browserClipboardWrite({
text,
...this.extractBrowserTarget(params)
})
return this.successResponse(request.id, result)
} catch (error) {
return this.browserErrorResponse(request.id, error)
@ -1578,8 +1653,10 @@ export class OrcaRuntimeRpcServer {
try {
const params = this.extractParams(request)
const text = typeof params?.text === 'string' ? params.text : undefined
const worktree = typeof params?.worktree === 'string' ? params.worktree : undefined
const result = await this.runtime.browserDialogAccept({ text, worktree })
const result = await this.runtime.browserDialogAccept({
text,
...this.extractBrowserTarget(params)
})
return this.successResponse(request.id, result)
} catch (error) {
return this.browserErrorResponse(request.id, error)
@ -1589,8 +1666,7 @@ export class OrcaRuntimeRpcServer {
if (request.method === 'browser.dialogDismiss') {
try {
const params = this.extractParams(request)
const worktree = typeof params?.worktree === 'string' ? params.worktree : undefined
const result = await this.runtime.browserDialogDismiss({ worktree })
const result = await this.runtime.browserDialogDismiss(this.extractBrowserTarget(params))
return this.successResponse(request.id, result)
} catch (error) {
return this.browserErrorResponse(request.id, error)
@ -1606,8 +1682,10 @@ export class OrcaRuntimeRpcServer {
if (!key) {
return this.errorResponse(request.id, 'invalid_argument', 'Missing required --key')
}
const worktree = typeof params?.worktree === 'string' ? params.worktree : undefined
const result = await this.runtime.browserStorageLocalGet({ key, worktree })
const result = await this.runtime.browserStorageLocalGet({
key,
...this.extractBrowserTarget(params)
})
return this.successResponse(request.id, result)
} catch (error) {
return this.browserErrorResponse(request.id, error)
@ -1626,8 +1704,11 @@ export class OrcaRuntimeRpcServer {
'Missing required --key and --value'
)
}
const worktree = typeof params?.worktree === 'string' ? params.worktree : undefined
const result = await this.runtime.browserStorageLocalSet({ key, value, worktree })
const result = await this.runtime.browserStorageLocalSet({
key,
value,
...this.extractBrowserTarget(params)
})
return this.successResponse(request.id, result)
} catch (error) {
return this.browserErrorResponse(request.id, error)
@ -1637,8 +1718,9 @@ export class OrcaRuntimeRpcServer {
if (request.method === 'browser.storage.local.clear') {
try {
const params = this.extractParams(request)
const worktree = typeof params?.worktree === 'string' ? params.worktree : undefined
const result = await this.runtime.browserStorageLocalClear({ worktree })
const result = await this.runtime.browserStorageLocalClear(
this.extractBrowserTarget(params)
)
return this.successResponse(request.id, result)
} catch (error) {
return this.browserErrorResponse(request.id, error)
@ -1652,8 +1734,10 @@ export class OrcaRuntimeRpcServer {
if (!key) {
return this.errorResponse(request.id, 'invalid_argument', 'Missing required --key')
}
const worktree = typeof params?.worktree === 'string' ? params.worktree : undefined
const result = await this.runtime.browserStorageSessionGet({ key, worktree })
const result = await this.runtime.browserStorageSessionGet({
key,
...this.extractBrowserTarget(params)
})
return this.successResponse(request.id, result)
} catch (error) {
return this.browserErrorResponse(request.id, error)
@ -1672,8 +1756,11 @@ export class OrcaRuntimeRpcServer {
'Missing required --key and --value'
)
}
const worktree = typeof params?.worktree === 'string' ? params.worktree : undefined
const result = await this.runtime.browserStorageSessionSet({ key, value, worktree })
const result = await this.runtime.browserStorageSessionSet({
key,
value,
...this.extractBrowserTarget(params)
})
return this.successResponse(request.id, result)
} catch (error) {
return this.browserErrorResponse(request.id, error)
@ -1683,8 +1770,9 @@ export class OrcaRuntimeRpcServer {
if (request.method === 'browser.storage.session.clear') {
try {
const params = this.extractParams(request)
const worktree = typeof params?.worktree === 'string' ? params.worktree : undefined
const result = await this.runtime.browserStorageSessionClear({ worktree })
const result = await this.runtime.browserStorageSessionClear(
this.extractBrowserTarget(params)
)
return this.successResponse(request.id, result)
} catch (error) {
return this.browserErrorResponse(request.id, error)
@ -1705,8 +1793,11 @@ export class OrcaRuntimeRpcServer {
'Missing required --selector and --path'
)
}
const worktree = typeof params?.worktree === 'string' ? params.worktree : undefined
const result = await this.runtime.browserDownload({ selector, path, worktree })
const result = await this.runtime.browserDownload({
selector,
path,
...this.extractBrowserTarget(params)
})
return this.successResponse(request.id, result)
} catch (error) {
return this.browserErrorResponse(request.id, error)
@ -1722,8 +1813,10 @@ export class OrcaRuntimeRpcServer {
if (!selector) {
return this.errorResponse(request.id, 'invalid_argument', 'Missing required --selector')
}
const worktree = typeof params?.worktree === 'string' ? params.worktree : undefined
const result = await this.runtime.browserHighlight({ selector, worktree })
const result = await this.runtime.browserHighlight({
selector,
...this.extractBrowserTarget(params)
})
return this.successResponse(request.id, result)
} catch (error) {
return this.browserErrorResponse(request.id, error)
@ -1739,8 +1832,10 @@ export class OrcaRuntimeRpcServer {
if (!command) {
return this.errorResponse(request.id, 'invalid_argument', 'Missing required --command')
}
const worktree = typeof params?.worktree === 'string' ? params.worktree : undefined
const result = await this.runtime.browserExec({ command, worktree })
const result = await this.runtime.browserExec({
command,
...this.extractBrowserTarget(params)
})
return this.successResponse(request.id, result)
} catch (error) {
return this.browserErrorResponse(request.id, error)
@ -1763,8 +1858,13 @@ export class OrcaRuntimeRpcServer {
try {
const params = this.extractParams(request)
const index = typeof params?.index === 'number' ? params.index : undefined
const worktree = typeof params?.worktree === 'string' ? params.worktree : undefined
const result = await this.runtime.browserTabClose({ index, worktree })
const page =
typeof params?.page === 'string' && params.page.length > 0 ? params.page : undefined
const result = await this.runtime.browserTabClose({
index,
page,
worktree: typeof params?.worktree === 'string' ? params.worktree : undefined
})
return this.successResponse(request.id, result)
} catch (error) {
return this.browserErrorResponse(request.id, error)
@ -1805,6 +1905,16 @@ export class OrcaRuntimeRpcServer {
: null
}
private extractBrowserTarget(params: Record<string, unknown> | null): {
worktree?: string
page?: string
} {
return {
worktree: typeof params?.worktree === 'string' ? params.worktree : undefined,
page: typeof params?.page === 'string' && params.page.length > 0 ? params.page : undefined
}
}
// Why: browser errors carry a structured .code property (BrowserError from
// cdp-bridge.ts) that maps directly to agent-facing error codes. We forward
// that code rather than relying on the message-matching pattern used by

View file

@ -163,6 +163,7 @@ export type BrowserSnapshotRef = {
}
export type BrowserSnapshotResult = {
browserPageId: string
snapshot: string
refs: BrowserSnapshotRef[]
url: string
@ -215,6 +216,7 @@ export type BrowserEvalResult = {
}
export type BrowserTabInfo = {
browserPageId: string
index: number
url: string
title: string
@ -227,6 +229,7 @@ export type BrowserTabListResult = {
export type BrowserTabSwitchResult = {
switched: number
browserPageId: string
}
export type BrowserHoverResult = {