diff --git a/src/main/browser/anti-detection.ts b/src/main/browser/anti-detection.ts new file mode 100644 index 00000000..f523fc05 --- /dev/null +++ b/src/main/browser/anti-detection.ts @@ -0,0 +1,88 @@ +// Why: Cloudflare Turnstile and similar bot detectors probe multiple browser +// APIs beyond navigator.webdriver. This script runs via +// Page.addScriptToEvaluateOnNewDocument before any page JS to mask automation +// signals that CDP debugger attachment and Electron's webview expose. +export const ANTI_DETECTION_SCRIPT = `(function() { + Object.defineProperty(navigator, 'webdriver', { get: () => false }); + // Why: Electron webviews expose an empty plugins array. Real Chrome always + // has at least a few default plugins (PDF Viewer, etc.). An empty array is + // a strong automation signal. + if (navigator.plugins.length === 0) { + Object.defineProperty(navigator, 'plugins', { + get: () => [ + { name: 'Chrome PDF Plugin', filename: 'internal-pdf-viewer' }, + { name: 'Chrome PDF Viewer', filename: 'mhjfbmdgcfjbbpaeojofohoefgiehjai' }, + { name: 'Native Client', filename: 'internal-nacl-plugin' } + ] + }); + } + // Why: Electron webviews may not have the window.chrome object that real + // Chrome exposes. Turnstile checks for its presence. The csi() and + // loadTimes() stubs satisfy deeper probes that check for these Chrome- + // specific APIs beyond just chrome.runtime. + if (!window.chrome) { + window.chrome = {}; + } + if (!window.chrome.runtime) { + window.chrome.runtime = {}; + } + if (!window.chrome.csi) { + window.chrome.csi = function() { + return { + startE: Date.now(), + onloadT: Date.now(), + pageT: performance.now(), + tran: 15 + }; + }; + } + if (!window.chrome.loadTimes) { + window.chrome.loadTimes = function() { + return { + commitLoadTime: Date.now() / 1000, + connectionInfo: 'h2', + finishDocumentLoadTime: Date.now() / 1000, + finishLoadTime: Date.now() / 1000, + firstPaintAfterLoadTime: 0, + firstPaintTime: Date.now() / 1000, + navigationType: 'Other', + npnNegotiatedProtocol: 'h2', + requestTime: Date.now() / 1000 - 0.16, + startLoadTime: Date.now() / 1000 - 0.3, + wasAlternateProtocolAvailable: false, + wasFetchedViaSpdy: true, + wasNpnNegotiated: true + }; + }; + } + // Why: Electron's Permission API defaults to 'denied' for most permissions, + // but real Chrome returns 'prompt' for ungranted permissions. Returning + // 'denied' is a strong bot signal. Override the query result for common + // permissions that Turnstile and similar detectors probe. + const promptPerms = new Set([ + 'notifications', 'geolocation', 'camera', 'microphone', + 'midi', 'idle-detection', 'storage-access' + ]); + const origQuery = Permissions.prototype.query; + Permissions.prototype.query = function(desc) { + if (promptPerms.has(desc.name)) { + return Promise.resolve({ state: 'prompt', onchange: null }); + } + return origQuery.call(this, desc); + }; + // Why: Electron may report Notification.permission as 'denied' by default + // whereas real Chrome reports 'default' for sites that haven't been granted + // or blocked. Turnstile cross-references this with the Permissions API. + try { + Object.defineProperty(Notification, 'permission', { + get: () => 'default' + }); + } catch {} + // Why: Electron webviews may have an empty languages array. Real Chrome + // always has at least one entry. An empty array is an automation signal. + if (!navigator.languages || navigator.languages.length === 0) { + Object.defineProperty(navigator, 'languages', { + get: () => ['en-US', 'en'] + }); + } +})()` diff --git a/src/main/browser/browser-cookie-import.ts b/src/main/browser/browser-cookie-import.ts index 7af9a3c7..dfbdcd4f 100644 --- a/src/main/browser/browser-cookie-import.ts +++ b/src/main/browser/browser-cookie-import.ts @@ -46,6 +46,7 @@ import type { BrowserSessionProfileSource } from '../../shared/types' import { browserSessionRegistry } from './browser-session-registry' +import { setupClientHintsOverride } from './browser-session-ua' // --------------------------------------------------------------------------- // Browser detection @@ -1578,7 +1579,7 @@ export async function importCookiesFromBrowser( const ua = getUserAgentForBrowser(browser.family) if (ua) { targetSession.setUserAgent(ua) - browserSessionRegistry.setupClientHintsOverride(targetSession, ua) + setupClientHintsOverride(targetSession, ua) browserSessionRegistry.persistUserAgent(ua) diag(` set UA for partition: ${ua.substring(0, 80)}...`) } diff --git a/src/main/browser/browser-manager.ts b/src/main/browser/browser-manager.ts index d8bf41ab..01c811af 100644 --- a/src/main/browser/browser-manager.ts +++ b/src/main/browser/browser-manager.ts @@ -33,6 +33,7 @@ import { setupGuestContextMenu, setupGuestShortcutForwarding } from './browser-guest-ui' +import { ANTI_DETECTION_SCRIPT } from './anti-detection' export type BrowserGuestRegistration = { browserPageId?: string @@ -98,6 +99,66 @@ export class BrowserManager { private readonly downloadsById = new Map() private readonly grabSessionController = new BrowserGrabSessionController() + // Why: Page.addScriptToEvaluateOnNewDocument (via the CDP debugger) is the + // only reliable way to run JS before page scripts on every navigation. + // The previous approach — executeJavaScript on did-start-navigation — ran + // on the OLD page context during navigation, so overrides were never + // present when the new page's Turnstile script executed. + // + // Returns a cleanup function that removes the detach listener and prevents + // further re-attach attempts. + private injectAntiDetection(guest: Electron.WebContents): () => void { + let disposed = false + + const attach = (): void => { + if (disposed || guest.isDestroyed()) { + return + } + try { + if (!guest.debugger.isAttached()) { + guest.debugger.attach('1.3') + } + void guest.debugger + .sendCommand('Page.enable', {}) + .then(() => + guest.debugger.sendCommand('Page.addScriptToEvaluateOnNewDocument', { + source: ANTI_DETECTION_SCRIPT + }) + ) + .catch(() => {}) + } catch { + /* best-effort — debugger may be unavailable */ + } + } + + // Why: the CDP proxy and bridge detach the debugger when they stop, + // which removes addScriptToEvaluateOnNewDocument injections. Re-attach + // so manual browsing retains anti-detection overrides after agent + // sessions end. The 500ms delay avoids racing with the proxy/bridge if + // it is mid-restart (detach → re-attach). + const onDetach = (): void => { + if (!disposed && !guest.isDestroyed()) { + setTimeout(attach, 500) + } + } + + try { + attach() + guest.debugger.on('detach', onDetach) + } catch { + /* best-effort */ + } + + return () => { + disposed = true + try { + guest.debugger.off('detach', onDetach) + } catch { + /* guest may already be destroyed */ + } + } + } + private resolveBrowserTabIdForGuestWebContentsId(guestWebContentsId: number): string | null { return this.tabIdByWebContentsId.get(guestWebContentsId) ?? null } @@ -333,6 +394,13 @@ export class BrowserManager { return } this.policyAttachedGuestIds.add(guest.id) + + // Why: Cloudflare Turnstile and similar bot detectors probe browser APIs + // (navigator.webdriver, plugins, window.chrome) that differ in Electron + // webviews vs real Chrome. Inject overrides on every page load so manual + // browsing passes challenges even without the CDP debugger attached. + const disposeAntiDetection = this.injectAntiDetection(guest) + // Why: background throttling must be disabled so agent-driven screenshots // (Page.captureScreenshot via CDP proxy) can capture frames even when the // Orca window is not the focused foreground app. With throttling enabled, @@ -373,6 +441,14 @@ export class BrowserManager { }) const navigationGuard = (event: Electron.Event, url: string): void => { + // Why: blob: URLs are same-origin (inherit the creator's origin) and are + // used by Cloudflare Turnstile to load challenge resources inside iframes. + // Blocking them triggers error 600010 ("bot behavior detected"). Only + // allow blobs whose embedded origin is http(s) to maintain defense-in-depth + // against blob:null or other opaque-origin blobs. + if (url.startsWith('blob:https://') || url.startsWith('blob:http://')) { + return + } if (!normalizeBrowserNavigationUrl(url)) { // Why: `will-attach-webview` only validates the initial src. Main must // keep enforcing the same allowlist for later guest navigations too. @@ -405,6 +481,7 @@ export class BrowserManager { // guest surface is torn down, preventing the callbacks from preventing GC of // the underlying WebContents wrapper. this.policyCleanupByGuestId.set(guest.id, () => { + disposeAntiDetection() if (!guest.isDestroyed()) { guest.off('will-navigate', navigationGuard) guest.off('will-redirect', navigationGuard) diff --git a/src/main/browser/browser-session-registry.test.ts b/src/main/browser/browser-session-registry.test.ts index 1bb7b1b4..822ecd88 100644 --- a/src/main/browser/browser-session-registry.test.ts +++ b/src/main/browser/browser-session-registry.test.ts @@ -18,6 +18,7 @@ vi.mock('./browser-manager', () => ({ })) import { browserSessionRegistry } from './browser-session-registry' +import { setupClientHintsOverride } from './browser-session-ua' import { ORCA_BROWSER_PARTITION } from '../../shared/constants' describe('BrowserSessionRegistry', () => { @@ -153,7 +154,7 @@ describe('BrowserSessionRegistry', () => { const edgeUa = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/147.0.6890.3 Safari/537.36 Edg/147.0.3210.5' - browserSessionRegistry.setupClientHintsOverride(mockSess, edgeUa) + setupClientHintsOverride(mockSess, edgeUa) expect(onBeforeSendHeaders).toHaveBeenCalledWith( { urls: ['https://*/*'] }, @@ -178,7 +179,7 @@ describe('BrowserSessionRegistry', () => { const chromeUa = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/147.0.6890.3 Safari/537.36' - browserSessionRegistry.setupClientHintsOverride(mockSess, chromeUa) + setupClientHintsOverride(mockSess, chromeUa) const callback = vi.fn() const listener = onBeforeSendHeaders.mock.calls[0][1] @@ -192,10 +193,7 @@ describe('BrowserSessionRegistry', () => { const onBeforeSendHeaders = vi.fn() const mockSess = { webRequest: { onBeforeSendHeaders } } as never - browserSessionRegistry.setupClientHintsOverride( - mockSess, - 'Mozilla/5.0 (compatible; MSIE 10.0)' - ) + setupClientHintsOverride(mockSess, 'Mozilla/5.0 (compatible; MSIE 10.0)') expect(onBeforeSendHeaders).not.toHaveBeenCalled() }) @@ -203,10 +201,7 @@ describe('BrowserSessionRegistry', () => { it('leaves non-Client-Hints headers unchanged', () => { const onBeforeSendHeaders = vi.fn() const mockSess = { webRequest: { onBeforeSendHeaders } } as never - browserSessionRegistry.setupClientHintsOverride( - mockSess, - 'Mozilla/5.0 Chrome/147.0.0.0 Safari/537.36' - ) + setupClientHintsOverride(mockSess, 'Mozilla/5.0 Chrome/147.0.0.0 Safari/537.36') const callback = vi.fn() const listener = onBeforeSendHeaders.mock.calls[0][1] diff --git a/src/main/browser/browser-session-registry.ts b/src/main/browser/browser-session-registry.ts index ad8c07fa..10272c1c 100644 --- a/src/main/browser/browser-session-registry.ts +++ b/src/main/browser/browser-session-registry.ts @@ -1,4 +1,8 @@ -import { app, type Session, session } from 'electron' +/* eslint-disable max-lines -- Why: the registry is the single source of truth for + browser session profiles, partition allowlisting, cookie import staging, and + per-partition permission/download policies. Splitting further would scatter the + security boundary across modules. */ +import { app, session } from 'electron' import { randomUUID } from 'node:crypto' import { copyFileSync, @@ -12,6 +16,7 @@ import { join } from 'node:path' import { ORCA_BROWSER_PARTITION } from '../../shared/constants' import type { BrowserSessionProfile, BrowserSessionProfileScope } from '../../shared/types' import { browserManager } from './browser-manager' +import { cleanElectronUserAgent, setupClientHintsOverride } from './browser-session-ua' type BrowserSessionMeta = { defaultSource: BrowserSessionProfile['source'] @@ -107,7 +112,18 @@ class BrowserSessionRegistry { if (meta.userAgent) { const sess = session.fromPartition(ORCA_BROWSER_PARTITION) sess.setUserAgent(meta.userAgent) - this.setupClientHintsOverride(sess, meta.userAgent) + setupClientHintsOverride(sess, meta.userAgent) + } else { + // Why: even without an imported session, the default Electron UA contains + // "Electron/X.X.X" and the app name which trip Cloudflare Turnstile. + try { + const sess = session.fromPartition(ORCA_BROWSER_PARTITION) + const cleanUA = cleanElectronUserAgent(sess.getUserAgent()) + sess.setUserAgent(cleanUA) + setupClientHintsOverride(sess, cleanUA) + } catch { + /* session not available yet (e.g. unit tests or pre-ready) */ + } } if (meta.defaultSource) { const current = this.profiles.get('default') @@ -120,46 +136,6 @@ class BrowserSessionRegistry { } } - // Why: Electron's actual Chromium version (e.g. 134) differs from the source - // browser's version (e.g. Edge 147). The sec-ch-ua Client Hints headers - // reveal the real version, creating a mismatch that Google's anti-fraud - // detection flags as CookieMismatch on accounts.google.com. Override Client - // Hints on outgoing requests to match the source browser's UA. - setupClientHintsOverride(sess: Session, ua: string): void { - const chromeMatch = ua.match(/Chrome\/([\d.]+)/) - if (!chromeMatch) { - return - } - const fullChromeVersion = chromeMatch[1] - const majorVersion = fullChromeVersion.split('.')[0] - - let brand = 'Google Chrome' - let brandFullVersion = fullChromeVersion - - const edgeMatch = ua.match(/Edg\/([\d.]+)/) - if (edgeMatch) { - brand = 'Microsoft Edge' - brandFullVersion = edgeMatch[1] - } - const brandMajor = brandFullVersion.split('.')[0] - - const secChUa = `"${brand}";v="${brandMajor}", "Chromium";v="${majorVersion}", "Not/A)Brand";v="24"` - const secChUaFull = `"${brand}";v="${brandFullVersion}", "Chromium";v="${fullChromeVersion}", "Not/A)Brand";v="24.0.0.0"` - - sess.webRequest.onBeforeSendHeaders({ urls: ['https://*/*'] }, (details, callback) => { - const headers = details.requestHeaders - for (const key of Object.keys(headers)) { - const lower = key.toLowerCase() - if (lower === 'sec-ch-ua') { - headers[key] = secChUa - } else if (lower === 'sec-ch-ua-full-version-list') { - headers[key] = secChUaFull - } - } - callback({ requestHeaders: headers }) - }) - } - // Why: the import writes cookies to a staging DB because CookieMonster holds // the live DB's data in memory and would overwrite our changes on its next // flush. This method MUST run before any session.fromPartition() call so @@ -373,6 +349,11 @@ class BrowserSessionRegistry { this.configuredPartitions.add(partition) const sess = session.fromPartition(partition) + if (typeof sess.getUserAgent === 'function') { + const cleanUA = cleanElectronUserAgent(sess.getUserAgent()) + sess.setUserAgent(cleanUA) + setupClientHintsOverride(sess, cleanUA) + } // Why: clipboard-read and clipboard-sanitized-write are required for agent-browser's // clipboard commands to work. Without these, navigator.clipboard.writeText/readText // throws NotAllowedError even when invoked via CDP with userGesture:true. diff --git a/src/main/browser/browser-session-ua.ts b/src/main/browser/browser-session-ua.ts new file mode 100644 index 00000000..f30ce96f --- /dev/null +++ b/src/main/browser/browser-session-ua.ts @@ -0,0 +1,55 @@ +import type { Session } from 'electron' + +// Why: Electron's default UA includes "Electron/X.X.X" and the app name +// (e.g. "orca/1.2.3"), which Cloudflare Turnstile and other bot detectors +// flag as non-human traffic. Strip those tokens so the webview's UA and +// sec-ch-ua Client Hints look like standard Chrome. +export function cleanElectronUserAgent(ua: string): string { + return ( + ua + .replace(/\s+Electron\/\S+/, '') + // Why: \S+ matches any non-whitespace token (e.g. "orca/1.3.8-rc.0") + // including pre-release semver strings that [\d.]+ would miss. + .replace(/(\)\s+)\S+\s+(Chrome\/)/, '$1$2') + ) +} + +// Why: Electron's actual Chromium version (e.g. 134) differs from the source +// browser's version (e.g. Edge 147). The sec-ch-ua Client Hints headers +// reveal the real version, creating a mismatch that Google's anti-fraud +// detection flags as CookieMismatch on accounts.google.com. Override Client +// Hints on outgoing requests to match the source browser's UA. +export function setupClientHintsOverride(sess: Session, ua: string): void { + const chromeMatch = ua.match(/Chrome\/([\d.]+)/) + if (!chromeMatch) { + return + } + const fullChromeVersion = chromeMatch[1] + const majorVersion = fullChromeVersion.split('.')[0] + + let brand = 'Google Chrome' + let brandFullVersion = fullChromeVersion + + const edgeMatch = ua.match(/Edg\/([\d.]+)/) + if (edgeMatch) { + brand = 'Microsoft Edge' + brandFullVersion = edgeMatch[1] + } + const brandMajor = brandFullVersion.split('.')[0] + + const secChUa = `"${brand}";v="${brandMajor}", "Chromium";v="${majorVersion}", "Not/A)Brand";v="24"` + const secChUaFull = `"${brand}";v="${brandFullVersion}", "Chromium";v="${fullChromeVersion}", "Not/A)Brand";v="24.0.0.0"` + + sess.webRequest.onBeforeSendHeaders({ urls: ['https://*/*'] }, (details, callback) => { + const headers = details.requestHeaders + for (const key of Object.keys(headers)) { + const lower = key.toLowerCase() + if (lower === 'sec-ch-ua') { + headers[key] = secChUa + } else if (lower === 'sec-ch-ua-full-version-list') { + headers[key] = secChUaFull + } + } + callback({ requestHeaders: headers }) + }) +} diff --git a/src/main/browser/cdp-bridge-integration.test.ts b/src/main/browser/cdp-bridge-integration.test.ts index 2f358428..37a89cae 100644 --- a/src/main/browser/cdp-bridge-integration.test.ts +++ b/src/main/browser/cdp-bridge-integration.test.ts @@ -159,6 +159,8 @@ function createMockGuest(id: number, url: string, title: string) { return {} case 'Target.setAutoAttach': return {} + case 'Page.addScriptToEvaluateOnNewDocument': + return { identifier: 'mock-script-id' } case 'Runtime.enable': return {} default: diff --git a/src/main/browser/cdp-bridge.ts b/src/main/browser/cdp-bridge.ts index 9ab5e1ed..63f7d570 100644 --- a/src/main/browser/cdp-bridge.ts +++ b/src/main/browser/cdp-bridge.ts @@ -46,6 +46,7 @@ import { type SnapshotResult } from './snapshot-engine' import type { BrowserManager } from './browser-manager' +import { ANTI_DETECTION_SCRIPT } from './anti-detection' export class BrowserError extends Error { constructor( @@ -1158,6 +1159,13 @@ export class CdpBridge { flatten: true }) + // Why: attaching the CDP debugger sets navigator.webdriver = true and + // exposes other automation signals that Cloudflare Turnstile checks. + // Override them on every new document load so challenges succeed. + await sender('Page.addScriptToEvaluateOnNewDocument', { + source: ANTI_DETECTION_SCRIPT + }) + // Why: remove any stale listeners from a previous attach cycle to prevent // listener accumulation. After a detach+reattach, the old handlers would // still fire alongside the new ones, causing duplicate log entries, diff --git a/src/main/browser/cdp-ws-proxy.ts b/src/main/browser/cdp-ws-proxy.ts index 9575ffc2..4a0ead7d 100644 --- a/src/main/browser/cdp-ws-proxy.ts +++ b/src/main/browser/cdp-ws-proxy.ts @@ -2,6 +2,7 @@ import { WebSocketServer, WebSocket } from 'ws' import { createServer, type Server, type IncomingMessage, type ServerResponse } from 'http' import type { WebContents } from 'electron' import { captureScreenshot } from './cdp-screenshot' +import { ANTI_DETECTION_SCRIPT } from './anti-detection' export class CdpWsProxy { private httpServer: Server | null = null @@ -96,9 +97,15 @@ export class CdpWsProxy { const url = req.url ?? '' if (url === '/json/version' || url === '/json/version/') { res.writeHead(200, { 'Content-Type': 'application/json' }) + // Why: agent-browser reads this endpoint to identify the browser. Returning + // "Orca/CdpWsProxy" leaks that this is an embedded automation surface, which + // could affect downstream detection heuristics. + // Why: process.versions.chrome contains the exact Chromium version + // bundled with Electron, producing a realistic version string. + const chromeVersion = process.versions.chrome ?? '134.0.0.0' res.end( JSON.stringify({ - Browser: 'Orca/CdpWsProxy', + Browser: `Chrome/${chromeVersion}`, 'Protocol-Version': '1.3', webSocketDebuggerUrl: `ws://127.0.0.1:${this.port}` }) @@ -134,6 +141,19 @@ export class CdpWsProxy { } } this.attached = true + + // Why: attaching the CDP debugger sets navigator.webdriver = true and + // exposes other automation signals that Cloudflare Turnstile checks. + // Inject before any page loads so challenges succeed. + try { + await this.webContents.debugger.sendCommand('Page.enable', {}) + await this.webContents.debugger.sendCommand('Page.addScriptToEvaluateOnNewDocument', { + source: ANTI_DETECTION_SCRIPT + }) + } catch { + /* best-effort — page domain may not be ready yet */ + } + this.debuggerMessageHandler = (_event: unknown, ...rest: unknown[]) => { const [method, params, sessionId] = rest as [ string, @@ -209,11 +229,14 @@ export class CdpWsProxy { return } if (msg.method === 'Browser.getVersion') { + // Why: returning "Orca/Electron" identifies this as an embedded automation + // surface to agent-browser. Use a generic Chrome product string instead. + const chromeVersion = process.versions.chrome ?? '134.0.0.0' this.sendResult( clientId, { protocolVersion: '1.3', - product: 'Orca/Electron', + product: `Chrome/${chromeVersion}`, userAgent: '', jsVersion: '' },