Fix browser wait timeout and focus behavior

This commit is contained in:
Jinwoo-H 2026-04-20 22:11:43 -04:00
parent 99c7f59968
commit 19c49e601f
7 changed files with 366 additions and 45 deletions

View file

@ -83,6 +83,32 @@ Last updated: 2026-04-20
Resolution: command responses are now sent back to the websocket that originated the request,
so late results from a closed client are dropped instead of leaking into a newer connection.
## Wait and emulation behavior
- `fixed` `src/cli/index.ts`, `src/cli/index.test.ts`
Issue: `orca wait --selector ...` used the generic RPC timeout budget and then surfaced a
misleading "Orca is not running" hint when the wait outlived that budget.
Resolution: browser waits now use an explicit RPC timeout budget keyed to the requested wait
duration, and generic runtime timeouts no longer claim the app is down.
- `fixed` `src/main/browser/agent-browser-bridge.ts`, `src/main/browser/agent-browser-bridge.test.ts`
Issue: selector waits relied on undocumented `agent-browser` behavior. `--state visible`
timed out even when the selector was already visible, and timed selector waits could still
bubble up as generic runtime timeouts when the underlying command never resolved itself.
Resolution: Orca now treats `--state visible` as the default selector-wait semantics instead
of forwarding an unsupported flag, and it enforces selector/text/url/function/load wait
timeouts at the bridge layer so missing conditions fail as `browser_timeout` with the
requested timeout in the message. Live verification against `orca-dev` confirmed both
`wait --selector h1 --state visible --timeout 2000` and
`wait --selector .definitely-not-present --timeout 1000`.
- `fixed` `src/main/browser/agent-browser-bridge.ts`, `src/main/browser/agent-browser-bridge.test.ts`
Issue: `orca viewport --mobile` never took effect because the bridge shelled out to
`agent-browser set viewport`, which does not support mobile emulation.
Resolution: viewport emulation now goes straight through CDP
`Emulation.setDeviceMetricsOverride`, so width, scale, and the `mobile` flag all stay aligned
with the CLI contract.
## Console behavior
- `not-a-bug` `src/main/browser/agent-browser-bridge.ts:1252`, `src/main/browser/cdp-ws-proxy.ts`
@ -92,6 +118,16 @@ Last updated: 2026-04-20
emitted inside each guest renderer context, so the command is page-scoped even though the output
includes Chromium/Electron-flavored warnings.
## Focus behavior
- `fixed` `src/main/browser/cdp-ws-proxy.ts`, `src/main/browser/cdp-ws-proxy.test.ts`
Issue: background browser automation could repeatedly steal app focus because the CDP proxy
auto-focused the guest for every `Runtime.evaluate` / `Runtime.callFunctionOn` request, and
commands like `wait --selector` poll through those methods.
Resolution: proxy auto-focus is now limited to `Input.insertText`. Generic eval/function calls
stay background-safe, so waits and read-only probes no longer keep dragging the browser window
to the foreground.
## Session lifecycle and process swaps
- `fixed` `src/main/browser/agent-browser-bridge.ts:264`, `src/main/browser/agent-browser-bridge.ts:1148`, `src/main/browser/agent-browser-bridge.ts:1169`, `src/main/browser/agent-browser-bridge.ts:1559`, `src/main/browser/agent-browser-bridge.test.ts`

View file

@ -43,6 +43,7 @@ import {
main,
normalizeWorktreeSelector
} from './index'
import { RuntimeClientError } from './runtime-client'
describe('COMMAND_SPECS collision check', () => {
it('has no duplicate command paths', () => {
@ -499,3 +500,134 @@ describe('orca cli browser page targeting', () => {
})
})
})
describe('orca cli browser waits and viewport flags', () => {
beforeEach(() => {
callMock.mockReset()
process.exitCode = undefined
})
afterEach(() => {
vi.restoreAllMocks()
})
it('gives selector waits an explicit RPC timeout budget', async () => {
callMock.mockResolvedValueOnce({
id: 'req_wait',
ok: true,
result: { ok: true },
_meta: {
runtimeId: 'runtime-1'
}
})
vi.spyOn(console, 'log').mockImplementation(() => {})
await main(
['wait', '--selector', '#ready', '--worktree', 'all', '--json'],
'/tmp/not-an-orca-worktree'
)
expect(callMock).toHaveBeenCalledWith(
'browser.wait',
{
selector: '#ready',
timeout: undefined,
text: undefined,
url: undefined,
load: undefined,
fn: undefined,
state: undefined,
worktree: undefined
},
{ timeoutMs: 60_000 }
)
})
it('extends selector wait RPC timeout when the user passes --timeout', async () => {
callMock.mockResolvedValueOnce({
id: 'req_wait',
ok: true,
result: { ok: true },
_meta: {
runtimeId: 'runtime-1'
}
})
vi.spyOn(console, 'log').mockImplementation(() => {})
await main(
['wait', '--selector', '#ready', '--timeout', '12000', '--worktree', 'all', '--json'],
'/tmp/not-an-orca-worktree'
)
expect(callMock).toHaveBeenCalledWith(
'browser.wait',
{
selector: '#ready',
timeout: 12000,
text: undefined,
url: undefined,
load: undefined,
fn: undefined,
state: undefined,
worktree: undefined
},
{ timeoutMs: 17000 }
)
})
it('does not tell users Orca is down for a generic runtime timeout', async () => {
callMock.mockRejectedValueOnce(
new RuntimeClientError(
'runtime_timeout',
'Timed out waiting for the Orca runtime to respond.'
)
)
const errorSpy = vi.spyOn(console, 'error').mockImplementation(() => {})
await main(['wait', '--selector', '#ready', '--worktree', 'all'], '/tmp/not-an-orca-worktree')
expect(errorSpy).toHaveBeenCalledWith('Timed out waiting for the Orca runtime to respond.')
})
it('passes the mobile viewport flag through to browser.viewport', async () => {
callMock.mockResolvedValueOnce({
id: 'req_viewport',
ok: true,
result: {
width: 375,
height: 812,
deviceScaleFactor: 2,
mobile: true
},
_meta: {
runtimeId: 'runtime-1'
}
})
vi.spyOn(console, 'log').mockImplementation(() => {})
await main(
[
'viewport',
'--width',
'375',
'--height',
'812',
'--scale',
'2',
'--mobile',
'--worktree',
'all',
'--json'
],
'/tmp/not-an-orca-worktree'
)
expect(callMock).toHaveBeenCalledWith('browser.viewport', {
width: 375,
height: 812,
deviceScaleFactor: 2,
mobile: true,
worktree: undefined
})
})
})

View file

@ -78,6 +78,7 @@ type BrowserCliTarget = {
}
const DEFAULT_TERMINAL_WAIT_RPC_TIMEOUT_MS = 5 * 60 * 1000
const DEFAULT_BROWSER_WAIT_RPC_TIMEOUT_MS = 60_000
const GLOBAL_FLAGS = ['help', 'json']
export const COMMAND_SPECS: CommandSpec[] = [
{
@ -1021,16 +1022,26 @@ export async function main(argv = process.argv.slice(2), cwd = process.cwd()): P
const fn = getOptionalStringFlag(parsed.flags, 'fn')
const state = getOptionalStringFlag(parsed.flags, 'state')
const target = await getBrowserCommandTarget(parsed.flags, cwd, client)
const result = await client.call<BrowserWaitResult>('browser.wait', {
selector,
timeout,
text,
url,
load,
fn,
state,
...target
})
const result = await client.call<BrowserWaitResult>(
'browser.wait',
{
selector,
timeout,
text,
url,
load,
fn,
state,
...target
},
{
// Why: selector/text/url waits can legitimately take longer than a
// normal RPC round-trip, even when Orca is healthy. Give browser.wait
// an explicit timeout budget so slow waits do not get mislabeled as
// "Orca is not running" by the generic client timeout path.
timeoutMs: timeout ? timeout + 5000 : DEFAULT_BROWSER_WAIT_RPC_TIMEOUT_MS
}
)
return printResult(result, json, (v) => JSON.stringify(v, null, 2))
}
@ -1955,10 +1966,7 @@ function formatCliStatus(status: CliStatusResult): string {
function formatCliError(error: unknown): string {
const message = error instanceof Error ? error.message : String(error)
if (
error instanceof RuntimeClientError &&
(error.code === 'runtime_unavailable' || error.code === 'runtime_timeout')
) {
if (error instanceof RuntimeClientError && error.code === 'runtime_unavailable') {
return `${message}\nOrca is not running. Run 'orca open' first.`
}
if (

View file

@ -830,17 +830,69 @@ describe('AgentBrowserBridge', () => {
// ── Viewport command arg building ──
it('builds viewport args with scale', async () => {
succeedWith({ width: 375, height: 812, mobile: true })
it('applies viewport emulation through CDP so mobile mode is preserved', async () => {
const wc = mockWebContents(100)
webContentsFromIdMock.mockReturnValue(wc)
await bridge.setViewport(375, 812, 2, true)
// Why: calls[0] is the stale-session 'close'; the actual command is the last call
expect(wc.debugger.sendCommand).toHaveBeenCalledWith('Emulation.setDeviceMetricsOverride', {
width: 375,
height: 812,
deviceScaleFactor: 2,
mobile: true
})
const viewportCall = execFileMock.mock.calls.find((call: unknown[]) =>
(call[1] as string[]).includes('viewport')
)
expect(viewportCall).toBeUndefined()
})
it('normalizes selector wait state=visible to the default supported semantics', async () => {
succeedWith({ selector: 'h1', waited: 'selector' })
await bridge.wait({ selector: 'h1', state: 'visible' })
const args = execFileMock.mock.calls.at(-1)![1] as string[]
expect(args).toContain('set')
expect(args).toContain('viewport')
expect(args).toContain('375')
expect(args).toContain('812')
expect(args).toContain('2')
expect(args).toContain('wait')
expect(args).toContain('h1')
expect(args).not.toContain('--state')
})
it('enforces conditional wait timeouts at the bridge layer', async () => {
succeedWith({ selector: '#ready', waited: 'selector' })
await bridge.wait({ selector: '#ready', timeout: 1200 })
const args = execFileMock.mock.calls.at(-1)![1] as string[]
const options = execFileMock.mock.calls.at(-1)![2] as { timeout?: number; env?: unknown }
expect(args).toContain('wait')
expect(args).toContain('#ready')
expect(options.timeout).toBe(2200)
expect(options.env).toBe(process.env)
})
it('returns browser_timeout for timed conditional waits without recycling the session', async () => {
const killedError = Object.assign(new Error('timeout'), { killed: true })
execFileMock.mockImplementation(
(_bin: string, args: string[], _opts: unknown, cb: Function) => {
if (args.includes('wait')) {
cb(killedError, '', '')
return
}
cb(null, JSON.stringify({ success: true, data: { snapshot: 'fresh' } }), '')
}
)
for (let i = 0; i < 3; i++) {
await expect(bridge.wait({ selector: '.missing', timeout: 1200 })).rejects.toThrow(
'Timed out waiting for browser condition after 1200ms.'
)
}
await bridge.snapshot()
expect(CdpWsProxyMock.instances).toHaveLength(1)
})
// ── Stderr passthrough on non-timeout errors ──

View file

@ -52,6 +52,7 @@ import type {
// agent-browser's own timeout fires and returns a proper error.
const EXEC_TIMEOUT_MS = 90_000
const CONSECUTIVE_TIMEOUT_LIMIT = 3
const WAIT_PROCESS_TIMEOUT_GRACE_MS = 1_000
type SessionState = {
proxy: CdpWsProxy
@ -78,6 +79,12 @@ type ResolvedBrowserCommandTarget = {
webContentsId: number
}
type AgentBrowserExecOptions = {
envOverrides?: NodeJS.ProcessEnv
timeoutMs?: number
timeoutError?: BrowserError
}
function agentBrowserNativeName(): string {
const ext = process.platform === 'win32' ? '.exe' : ''
return `agent-browser-${platform()}-${arch()}${ext}`
@ -982,9 +989,11 @@ export class AgentBrowserBridge {
): Promise<BrowserWaitResult> {
return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => {
const args = ['wait']
const hasCondition =
!!options?.selector || !!options?.text || !!options?.url || !!options?.load || !!options?.fn
if (options?.selector) {
args.push(options.selector)
} else if (options?.timeout != null) {
} else if (options?.timeout != null && !hasCondition) {
args.push(String(options.timeout))
}
if (options?.text) {
@ -999,10 +1008,28 @@ export class AgentBrowserBridge {
if (options?.fn) {
args.push('--fn', options.fn)
}
if (options?.state) {
args.push('--state', options.state)
const normalizedState = options?.state === 'visible' ? undefined : options?.state
if (normalizedState) {
args.push('--state', normalizedState)
}
return (await this.execAgentBrowser(sessionName, args)) as BrowserWaitResult
// Why: agent-browser's selector wait surface does not support `--state visible`
// or a documented per-command `--timeout`. Orca normalizes "visible" back
// to the default selector wait semantics and enforces the requested timeout
// at the bridge layer so missing selectors fail as browser_timeout instead
// of hanging until the generic runtime RPC timeout fires.
return (await this.execAgentBrowser(sessionName, args, {
timeoutMs:
options?.timeout != null && hasCondition
? options.timeout + WAIT_PROCESS_TIMEOUT_GRACE_MS
: undefined,
timeoutError:
options?.timeout != null && hasCondition
? new BrowserError(
'browser_timeout',
`Timed out waiting for browser condition after ${options.timeout}ms.`
)
: undefined
})) as BrowserWaitResult
})
}
@ -1149,17 +1176,37 @@ export class AgentBrowserBridge {
async setViewport(
width: number,
height: number,
scale?: number,
_mobile?: boolean,
scale = 1,
mobile = false,
worktreeId?: string,
browserPageId?: string
): Promise<BrowserViewportResult> {
return this.enqueueTargetedCommand(worktreeId, browserPageId, async (sessionName) => {
const args = ['set', 'viewport', String(width), String(height)]
if (scale != null) {
args.push(String(scale))
return this.enqueueTargetedCommand(worktreeId, browserPageId, async (_sessionName, target) => {
const wc = this.getWebContents(target.webContentsId)
if (!wc) {
throw new BrowserError('browser_tab_not_found', 'Tab is no longer available')
}
const dbg = wc.debugger
if (!dbg.isAttached()) {
throw new BrowserError('browser_error', 'Debugger not attached')
}
// Why: agent-browser only supports width/height/scale for `set viewport`;
// it has no `mobile` flag. Orca's CLI exposes `--mobile`, so apply the
// emulation directly through CDP to keep the public CLI contract honest.
await dbg.sendCommand('Emulation.setDeviceMetricsOverride', {
width,
height,
deviceScaleFactor: scale,
mobile
})
return {
width,
height,
deviceScaleFactor: scale,
mobile
}
return (await this.execAgentBrowser(sessionName, args)) as BrowserViewportResult
})
}
@ -1575,7 +1622,11 @@ export class AgentBrowserBridge {
}
}
private async execAgentBrowser(sessionName: string, commandArgs: string[]): Promise<unknown> {
private async execAgentBrowser(
sessionName: string,
commandArgs: string[],
execOptions?: AgentBrowserExecOptions
): Promise<unknown> {
const session = this.sessions.get(sessionName)
if (!session) {
throw new BrowserError('browser_error', 'Session not found')
@ -1604,7 +1655,7 @@ export class AgentBrowserBridge {
args.push(...commandArgs, '--json')
const stdout = await this.runAgentBrowserRaw(sessionName, args)
const stdout = await this.runAgentBrowserRaw(sessionName, args, execOptions)
const translated = translateResult(stdout)
if (!translated.ok) {
@ -1645,7 +1696,11 @@ export class AgentBrowserBridge {
return translated.result
}
private runAgentBrowserRaw(sessionName: string, args: string[]): Promise<string> {
private runAgentBrowserRaw(
sessionName: string,
args: string[],
execOptions?: AgentBrowserExecOptions
): Promise<string> {
return new Promise<string>((resolve, reject) => {
const session = this.sessions.get(sessionName)
let child: ChildProcess | null = null
@ -1654,7 +1709,13 @@ export class AgentBrowserBridge {
args,
// Why: screenshots return large base64 strings that exceed Node's default
// 1MB maxBuffer, causing ENOBUFS and a timeout-like failure.
{ timeout: EXEC_TIMEOUT_MS, maxBuffer: 50 * 1024 * 1024 },
{
timeout: execOptions?.timeoutMs ?? EXEC_TIMEOUT_MS,
maxBuffer: 50 * 1024 * 1024,
env: execOptions?.envOverrides
? { ...process.env, ...execOptions.envOverrides }
: process.env
},
(error, stdout, stderr) => {
if (session && session.activeProcess === child) {
session.activeProcess = null
@ -1668,6 +1729,10 @@ export class AgentBrowserBridge {
const liveSession = this.sessions.get(sessionName)
if (error && (error as NodeJS.ErrnoException & { killed?: boolean }).killed) {
if (execOptions?.timeoutError) {
reject(execOptions.timeoutError)
return
}
if (liveSession) {
liveSession.consecutiveTimeouts++
if (liveSession.consecutiveTimeouts >= CONSECUTIVE_TIMEOUT_LIMIT) {

View file

@ -33,7 +33,10 @@ function createMockWebContents() {
return {
webContents: {
debugger: debuggerObj,
isDestroyed: () => false
isDestroyed: () => false,
focus: vi.fn(),
getTitle: vi.fn(() => 'Example'),
getURL: vi.fn(() => 'https://example.com')
},
listeners,
emit(event: string, ...args: unknown[]) {
@ -242,6 +245,32 @@ describe('CdpWsProxy', () => {
client.close()
})
it('does not focus the guest for Runtime.evaluate polling commands', async () => {
const client = await connect()
await sendAndReceive(client, {
id: 9,
method: 'Runtime.evaluate',
params: { expression: 'document.readyState' }
})
expect(mock.webContents.focus).not.toHaveBeenCalled()
client.close()
})
it('still focuses the guest for Input.insertText', async () => {
const client = await connect()
await sendAndReceive(client, {
id: 10,
method: 'Input.insertText',
params: { text: 'hello' }
})
expect(mock.webContents.focus).toHaveBeenCalledTimes(1)
client.close()
})
// ── Page.frameNavigated interception ──
// ── Cleanup ──

View file

@ -233,13 +233,12 @@ export class CdpWsProxy {
this.handleScreenshot(client, clientId, msg.params)
return
}
// Why: Input.insertText/Runtime.evaluate need native focus for clipboard APIs.
if (
(msg.method === 'Input.insertText' ||
msg.method === 'Runtime.evaluate' ||
msg.method === 'Runtime.callFunctionOn') &&
!this.webContents.isDestroyed()
) {
// Why: Input.insertText can still require native focus in Electron webviews.
// Do not auto-focus generic Runtime.evaluate/callFunctionOn traffic: wait
// polling and read-only JS probes use those methods heavily, and focusing on
// every eval steals the user's foreground window while background automation
// is running.
if (msg.method === 'Input.insertText' && !this.webContents.isDestroyed()) {
this.webContents.focus()
}
// Why: agent-browser waits for network idle to detect navigation completion.