remove intercept continue/block — design mismatch with agent-browser

agent-browser operates on URL pattern-level routing, not individual
request IDs. The RPC/CLI interface assumed per-request decisions which
don't map cleanly. Removed from all layers (CLI, RPC, runtime, bridge,
types) with TODO comments for adding back once supported.

Also updates SKILL.md to remove timezone/locale/permissions references
and document the intercept limitation.
This commit is contained in:
Jinwoo-H 2026-04-19 22:07:42 -04:00
parent 5ef2cfeb76
commit d7b84d4a85
8 changed files with 174 additions and 181 deletions

View file

@ -64,6 +64,7 @@ If no tabs are open in the current worktree, commands return `browser_no_tab`.
```bash
orca goto --url <url> [--json] # Navigate to URL, waits for page load
orca back [--json] # Go back in browser history
orca forward [--json] # Go forward in browser history
orca reload [--json] # Reload the current page
```
@ -80,13 +81,16 @@ orca pdf [--json] # Export page as PDF (base64)
```bash
orca click --element <ref> [--json] # Click an element by ref
orca dblclick --element <ref> [--json] # Double-click an element
orca fill --element <ref> --value <text> [--json] # Clear and fill an input
orca type --input <text> [--json] # Type at current focus (no element targeting)
orca select --element <ref> --value <value> [--json] # Select dropdown option
orca check --element <ref> [--json] # Check a checkbox
orca uncheck --element <ref> [--json] # Uncheck a checkbox
orca scroll --direction <up|down> [--amount <pixels>] [--json] # Scroll viewport
orca scrollintoview --element <ref> [--json] # Scroll element into view
orca hover --element <ref> [--json] # Hover over an element
orca focus --element <ref> [--json] # Focus an element
orca drag --from <ref> --to <ref> [--json] # Drag from one element to another
orca clear --element <ref> [--json] # Clear an input field
orca select-all --element <ref> [--json] # Select all text in an element
@ -103,6 +107,48 @@ orca tab create [--url <url>] [--json] # Open a new browser tab
orca tab close [--index <n>] [--json] # Close a browser tab
```
### Wait / Synchronization
Agents fail more often from bad waits than from bad selectors. Pick the right wait for the situation:
```bash
orca wait [--timeout <ms>] [--json] # Wait for timeout (default 1000ms)
orca wait --selector <css> [--state <visible|hidden>] [--timeout <ms>] [--json] # Wait for element
orca wait --text <string> [--timeout <ms>] [--json] # Wait for text to appear on page
orca wait --url <substring> [--timeout <ms>] [--json] # Wait for URL to contain substring
orca wait --load <networkidle|load|domcontentloaded> [--timeout <ms>] [--json] # Wait for load state
orca wait --fn <js-expression> [--timeout <ms>] [--json] # Wait for JS condition to be truthy
```
After any page-changing action, pick one:
- Wait for specific content: `orca wait --text "Dashboard" --json`
- Wait for URL change: `orca wait --url "/dashboard" --json`
- Wait for network idle (catch-all for SPA navigation): `orca wait --load networkidle --json`
- Wait for an element: `orca wait --selector ".results" --json`
Avoid bare `orca wait --timeout 2000` except when debugging — it makes scripts slow and flaky. Condition waits default to 30000ms timeout.
### Data Extraction
```bash
orca exec --command "get text @e1" [--json] # Get visible text of an element
orca exec --command "get html @e1" [--json] # Get innerHTML
orca exec --command "get value @e1" [--json] # Get input value
orca exec --command "get attr @e1 href" [--json] # Get element attribute
orca exec --command "get title" [--json] # Get page title
orca exec --command "get url" [--json] # Get current URL
orca exec --command "get count .item" [--json] # Count matching elements
```
### State Checks
```bash
orca exec --command "is visible @e1" [--json] # Check if element is visible
orca exec --command "is enabled @e1" [--json] # Check if element is enabled
orca exec --command "is checked @e1" [--json] # Check if checkbox is checked
```
### Page Inspection
```bash
@ -122,9 +168,6 @@ orca cookie delete --name <n> [--domain <d>] [--json] # Delete a cookie
```bash
orca viewport --width <w> --height <h> [--scale <n>] [--mobile] [--json]
orca geolocation --latitude <lat> --longitude <lng> [--accuracy <m>] [--json]
orca timezone --id <tzId> [--json] # e.g. --id America/New_York
orca locale --locale <loc> [--json] # e.g. --locale fr-FR
orca permissions --grant <list> [--origin <url>] [--json]
```
### Request Interception
@ -133,10 +176,11 @@ orca permissions --grant <list> [--origin <url>] [--json]
orca intercept enable [--patterns <list>] [--json] # Start intercepting requests
orca intercept disable [--json] # Stop intercepting
orca intercept list [--json] # List paused requests
orca intercept continue --id <id> [--json] # Allow a paused request
orca intercept block --id <id> [--reason <r>] [--json] # Block a paused request
```
> **Note:** Per-request `intercept continue` and `intercept block` are not yet supported.
> They will be added once agent-browser supports per-request interception decisions.
### Console / Network Capture
```bash
@ -146,6 +190,65 @@ orca console [--limit <n>] [--json] # Read captured console entries
orca network [--limit <n>] [--json] # Read captured network entries
```
### Mouse Control
```bash
orca exec --command "mouse move 100 200" [--json] # Move mouse to coordinates
orca exec --command "mouse down left" [--json] # Press mouse button
orca exec --command "mouse up left" [--json] # Release mouse button
orca exec --command "mouse wheel 100" [--json] # Scroll wheel
```
### Keyboard
```bash
orca exec --command "keyboard inserttext \"text\"" [--json] # Insert text bypassing key events
orca exec --command "keyboard type \"text\"" [--json] # Raw keystrokes
orca exec --command "keydown Shift" [--json] # Hold key down
orca exec --command "keyup Shift" [--json] # Release key
```
### Frames (Iframes)
Iframes are auto-inlined in snapshots — refs inside iframes work transparently. For scoped interaction:
```bash
orca exec --command "frame @e3" [--json] # Switch to iframe by ref
orca exec --command "frame \"#iframe\"" [--json] # Switch to iframe by CSS selector
orca exec --command "frame main" [--json] # Return to main frame
```
### Semantic Locators (alternative to refs)
When refs aren't available or you want to skip a snapshot:
```bash
orca exec --command "find role button click --name \"Submit\"" [--json]
orca exec --command "find text \"Sign In\" click" [--json]
orca exec --command "find label \"Email\" fill \"user@test.com\"" [--json]
orca exec --command "find placeholder \"Search\" type \"query\"" [--json]
orca exec --command "find testid \"submit-btn\" click" [--json]
```
### Dialogs
`alert` and `beforeunload` are auto-accepted. For `confirm` and `prompt`:
```bash
orca exec --command "dialog status" [--json] # Check for pending dialog
orca exec --command "dialog accept" [--json] # Accept
orca exec --command "dialog accept \"text\"" [--json] # Accept with prompt input
orca exec --command "dialog dismiss" [--json] # Dismiss/cancel
```
### Debugging
```bash
orca exec --command "highlight @e1" [--json] # Highlight element visually
orca exec --command "console" [--json] # View console messages
orca exec --command "errors" [--json] # View page errors
```
### Extended Commands (Passthrough)
```bash
@ -155,10 +258,11 @@ orca exec --command "<agent-browser command>" [--json]
The `exec` command provides access to agent-browser's full command surface. Useful for commands without typed Orca handlers:
```bash
orca exec --command "dblclick @e3" --json
orca exec --command "get text @e5" --json
orca exec --command "mouse move 100 200" --json
orca exec --command "help" --json # See all available commands
orca exec --command "set device \"iPhone 14\"" --json # Emulate device
orca exec --command "set offline on" --json # Toggle offline mode
orca exec --command "set media dark" --json # Emulate color scheme
orca exec --command "network requests" --json # View tracked network requests
orca exec --command "help" --json # See all available commands
```
**Important:** Do not use `orca exec --command "tab ..."` for tab management. Use `orca tab list/create/close/switch` instead — those operate at the Orca level and keep the UI synchronized.
@ -168,6 +272,13 @@ orca exec --command "help" --json # See all available commands
- **`fill`** targets a specific element by ref, clears its value first, then enters text. Use for form fields.
- **`type`** types at whatever currently has focus. Use for search boxes or after clicking into an input.
If neither works on a custom input component, try:
```bash
orca focus --element @e1 --json
orca exec --command "keyboard inserttext \"text\"" --json # bypasses key events
```
## Error Codes and Recovery
| Error Code | Meaning | Recovery |
@ -204,6 +315,46 @@ orca snapshot --json
# Output should show dashboard content, not the login form
```
## Troubleshooting
**"Ref not found" / `browser_stale_ref`**
Page changed since the snapshot. Run `orca snapshot --json` again, then use the new refs.
**Element exists but not in snapshot**
It may be off-screen or not yet rendered. Try:
```bash
orca scroll --direction down --amount 1000 --json
orca snapshot --json
# or wait for it:
orca wait --text "..." --json
orca snapshot --json
```
**Click does nothing / overlay swallows the click**
Modals or cookie banners may be blocking. Snapshot, find the dismiss button, click it, then re-snapshot.
**Fill/type doesn't work on a custom input**
Some components intercept key events. Use `keyboard inserttext`:
```bash
orca focus --element @e1 --json
orca exec --command "keyboard inserttext \"text\"" --json
```
**`browser_no_tab` error**
No browser tab is open in the current worktree. Open one with `orca tab create --url <url> --json`.
## Auto-Switch Worktree
Browser commands automatically activate the target worktree in the Orca UI when needed. If the agent issues a browser command targeting a worktree that isn't currently active (e.g., its webviews aren't mounted), Orca will switch to that worktree before executing the command.
This means agents don't need to manually activate a worktree before using browser commands — `tab create`, `goto`, `snapshot`, etc. will work regardless of which worktree the UI is currently showing.
## Tab Create Auto-Activation
When `orca tab create` opens a new tab, it is automatically set as the active tab for the worktree. Subsequent commands (`snapshot`, `click`, etc.) will target the newly created tab without needing an explicit `tab switch`.
## Agent Guidance
- Always use `--json` for machine-driven use.
@ -212,7 +363,10 @@ orca snapshot --json
- After switching tabs, re-snapshot.
- If you get `browser_stale_ref`, re-snapshot and retry with the new refs.
- Use `orca tab list` before `orca tab switch` to know which tabs exist.
- Use `orca wait` to synchronize after actions that trigger async updates (form submits, SPA navigation, modals) instead of arbitrary sleeps.
- Use `orca eval` as an escape hatch for interactions not covered by other commands.
- Use `orca exec --command "help"` to discover extended commands.
- Worktree scoping is automatic — you'll only see tabs from your worktree by default.
- Tab creation auto-activates the new tab — no need for `tab switch` after `tab create`.
- Browser commands auto-switch the active worktree if needed — no manual worktree activation required.
- For full IDE/worktree/terminal commands, see the `orca-cli` skill.

View file

@ -171,7 +171,9 @@ Why: terminal handles are runtime-scoped and may go stale after reloads. If Orca
`orca` also supports browser automation commands for driving the built-in Orca browser. The core loop is: snapshot the page to get element refs → interact using refs → re-snapshot to see the updated state.
Key commands: `orca snapshot`, `orca click --element @e3`, `orca fill --element @e5 --value "hello"`, `orca goto --url <url>`, `orca tab list`, `orca tab switch --index <n>`.
Key commands: `orca snapshot`, `orca click --element @e3`, `orca fill --element @e5 --value "hello"`, `orca goto --url <url>`, `orca tab list`, `orca tab switch --index <n>`, `orca wait --text "loaded"`.
Browser commands auto-switch the active worktree and auto-activate newly created tabs — no manual worktree/tab activation needed.
For the full browser command reference, error codes, and worked examples, see the `orca-browser` skill.

View file

@ -45,8 +45,6 @@ import type {
BrowserInterceptEnableResult,
BrowserInterceptDisableResult,
BrowserInterceptedRequest,
BrowserInterceptContinueResult,
BrowserInterceptBlockResult,
BrowserCaptureStartResult,
BrowserCaptureStopResult,
BrowserConsoleResult,
@ -454,19 +452,8 @@ export const COMMAND_SPECS: CommandSpec[] = [
usage: 'orca intercept list [--worktree <selector>] [--json]',
allowedFlags: [...GLOBAL_FLAGS, 'worktree']
},
{
path: ['intercept', 'continue'],
summary: 'Continue a paused request',
usage: 'orca intercept continue --id <requestId> [--worktree <selector>] [--json]',
allowedFlags: [...GLOBAL_FLAGS, 'id', 'worktree']
},
{
path: ['intercept', 'block'],
summary: 'Block (fail) a paused request',
usage:
'orca intercept block --id <requestId> [--reason <reason>] [--worktree <selector>] [--json]',
allowedFlags: [...GLOBAL_FLAGS, 'id', 'reason', 'worktree']
},
// TODO: add intercept continue/block once agent-browser supports per-request
// interception decisions (currently only supports URL-pattern-based route/unroute).
// ── Console/network capture ──
{
path: ['capture', 'start'],
@ -1285,32 +1272,6 @@ export async function main(argv = process.argv.slice(2), cwd = process.cwd()): P
})
}
if (matches(commandPath, ['intercept', 'continue'])) {
const requestId = getRequiredStringFlag(parsed.flags, 'id')
const worktree = await getBrowserWorktreeSelector(parsed.flags, cwd, client)
const result = await client.call<BrowserInterceptContinueResult>(
'browser.intercept.continue',
{ requestId, worktree }
)
return printResult(result, json, (v) => `Continued request ${v.continued}`)
}
if (matches(commandPath, ['intercept', 'block'])) {
const requestId = getRequiredStringFlag(parsed.flags, 'id')
const params: Record<string, unknown> = { requestId }
const reason = getOptionalStringFlag(parsed.flags, 'reason')
if (reason) {
params.reason = reason
}
const worktree = await getBrowserWorktreeSelector(parsed.flags, cwd, client)
params.worktree = worktree
const result = await client.call<BrowserInterceptBlockResult>(
'browser.intercept.block',
params
)
return printResult(result, json, (v) => `Blocked request ${v.blocked}`)
}
// ── Console/network capture ──
if (matches(commandPath, ['capture', 'start'])) {

View file

@ -39,8 +39,6 @@ import type {
BrowserGeolocationResult,
BrowserInterceptEnableResult,
BrowserInterceptDisableResult,
BrowserInterceptContinueResult,
BrowserInterceptBlockResult,
BrowserConsoleResult,
BrowserNetworkLogResult,
BrowserCaptureStartResult,
@ -972,36 +970,9 @@ export class AgentBrowserBridge {
})
}
async interceptContinue(
_requestId: string,
worktreeId?: string
): Promise<BrowserInterceptContinueResult> {
// TODO: agent-browser doesn't support per-request continue — this removes all interception.
// The CLI/RPC pass requestId but agent-browser only operates on URL patterns.
return this.enqueueCommand(worktreeId, async (sessionName) => {
return (await this.execAgentBrowser(sessionName, [
'network',
'unroute'
])) as BrowserInterceptContinueResult
})
}
async interceptBlock(
urlPattern: string,
_reason?: string,
worktreeId?: string
): Promise<BrowserInterceptBlockResult> {
// TODO: RPC passes requestId as urlPattern — agent-browser expects a URL glob, not a request ID.
// This is a design mismatch that needs reworking at the CLI/RPC/bridge level.
return this.enqueueCommand(worktreeId, async (sessionName) => {
return (await this.execAgentBrowser(sessionName, [
'network',
'route',
urlPattern,
'--abort'
])) as BrowserInterceptBlockResult
})
}
// TODO: Add interceptContinue/interceptBlock once agent-browser supports per-request
// interception decisions. Currently agent-browser only operates on URL pattern-level
// routing, not individual request IDs, so the RPC/CLI interface doesn't map cleanly.
// ── Capture commands ──

View file

@ -19,8 +19,6 @@ import type {
BrowserGeolocationResult,
BrowserGotoResult,
BrowserHoverResult,
BrowserInterceptBlockResult,
BrowserInterceptContinueResult,
BrowserInterceptDisableResult,
BrowserInterceptEnableResult,
BrowserInterceptedRequest,
@ -791,37 +789,9 @@ export class CdpBridge {
return { requests: [...state.pausedRequests.values()] }
}
async interceptContinue(requestId: string): Promise<BrowserInterceptContinueResult> {
return this.enqueueCommand(async () => {
const guest = this.getActiveGuest()
const sender = this.makeCdpSender(guest)
await this.ensureDebuggerAttached(guest)
const tabId = this.resolveTabId(guest.id)
const state = this.getOrCreateTabState(tabId)
await sender('Fetch.continueRequest', { requestId })
state.pausedRequests.delete(requestId)
return { continued: requestId }
})
}
async interceptBlock(requestId: string, reason = 'Failed'): Promise<BrowserInterceptBlockResult> {
return this.enqueueCommand(async () => {
const guest = this.getActiveGuest()
const sender = this.makeCdpSender(guest)
await this.ensureDebuggerAttached(guest)
const tabId = this.resolveTabId(guest.id)
const state = this.getOrCreateTabState(tabId)
await sender('Fetch.failRequest', { requestId, reason })
state.pausedRequests.delete(requestId)
return { blocked: requestId }
})
}
// TODO: Add interceptContinue/interceptBlock once agent-browser supports per-request
// interception decisions. The CDP Fetch domain supports it, but the agent-browser CLI
// only operates on URL pattern-level routing, creating a design mismatch.
// ── Console/network capture ──

View file

@ -54,8 +54,6 @@ import type {
BrowserGeolocationResult,
BrowserInterceptEnableResult,
BrowserInterceptDisableResult,
BrowserInterceptContinueResult,
BrowserInterceptBlockResult,
BrowserCaptureStartResult,
BrowserCaptureStopResult,
BrowserConsoleResult,
@ -1472,27 +1470,6 @@ export class OrcaRuntimeService {
return this.requireAgentBrowserBridge().interceptList(worktreeId)
}
async browserInterceptContinue(params: {
requestId: string
worktree?: string
}): Promise<BrowserInterceptContinueResult> {
const worktreeId = await this.resolveBrowserWorktreeId(params.worktree)
return this.requireAgentBrowserBridge().interceptContinue(params.requestId, worktreeId)
}
async browserInterceptBlock(params: {
requestId: string
reason?: string
worktree?: string
}): Promise<BrowserInterceptBlockResult> {
const worktreeId = await this.resolveBrowserWorktreeId(params.worktree)
return this.requireAgentBrowserBridge().interceptBlock(
params.requestId,
params.reason,
worktreeId
)
}
// ── Console/network capture ──
async browserCaptureStart(params: { worktree?: string }): Promise<BrowserCaptureStartResult> {

View file

@ -1232,40 +1232,6 @@ export class OrcaRuntimeRpcServer {
}
}
if (request.method === 'browser.intercept.continue') {
try {
const params = this.extractParams(request)
const requestId = typeof params?.requestId === 'string' ? params.requestId : null
if (!requestId) {
return this.errorResponse(request.id, 'invalid_argument', 'Missing requestId')
}
const worktree = typeof params?.worktree === 'string' ? params.worktree : undefined
const result = await this.runtime.browserInterceptContinue({ requestId, worktree })
return this.successResponse(request.id, result)
} catch (error) {
return this.browserErrorResponse(request.id, error)
}
}
if (request.method === 'browser.intercept.block') {
try {
const params = this.extractParams(request)
const requestId = typeof params?.requestId === 'string' ? params.requestId : null
if (!requestId) {
return this.errorResponse(request.id, 'invalid_argument', 'Missing requestId')
}
const worktree = typeof params?.worktree === 'string' ? params.worktree : undefined
const result = await this.runtime.browserInterceptBlock({
requestId,
reason: typeof params?.reason === 'string' ? params.reason : undefined,
worktree
})
return this.successResponse(request.id, result)
} catch (error) {
return this.browserErrorResponse(request.id, error)
}
}
// ── Console/network capture ──
if (request.method === 'browser.capture.start') {

View file

@ -329,14 +329,6 @@ export type BrowserInterceptDisableResult = {
disabled: boolean
}
export type BrowserInterceptContinueResult = {
continued: string
}
export type BrowserInterceptBlockResult = {
blocked: string
}
// ── Console/network capture types ──
export type BrowserConsoleEntry = {