fix(browser): improve Cloudflare Turnstile compatibility (#885)

This commit is contained in:
Jinwoo Hong 2026-04-21 01:36:30 -04:00 committed by GitHub
parent a8c3780bb9
commit 55e935ff32
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 285 additions and 55 deletions

View file

@ -0,0 +1,88 @@
// Why: Cloudflare Turnstile and similar bot detectors probe multiple browser
// APIs beyond navigator.webdriver. This script runs via
// Page.addScriptToEvaluateOnNewDocument before any page JS to mask automation
// signals that CDP debugger attachment and Electron's webview expose.
export const ANTI_DETECTION_SCRIPT = `(function() {
Object.defineProperty(navigator, 'webdriver', { get: () => false });
// Why: Electron webviews expose an empty plugins array. Real Chrome always
// has at least a few default plugins (PDF Viewer, etc.). An empty array is
// a strong automation signal.
if (navigator.plugins.length === 0) {
Object.defineProperty(navigator, 'plugins', {
get: () => [
{ name: 'Chrome PDF Plugin', filename: 'internal-pdf-viewer' },
{ name: 'Chrome PDF Viewer', filename: 'mhjfbmdgcfjbbpaeojofohoefgiehjai' },
{ name: 'Native Client', filename: 'internal-nacl-plugin' }
]
});
}
// Why: Electron webviews may not have the window.chrome object that real
// Chrome exposes. Turnstile checks for its presence. The csi() and
// loadTimes() stubs satisfy deeper probes that check for these Chrome-
// specific APIs beyond just chrome.runtime.
if (!window.chrome) {
window.chrome = {};
}
if (!window.chrome.runtime) {
window.chrome.runtime = {};
}
if (!window.chrome.csi) {
window.chrome.csi = function() {
return {
startE: Date.now(),
onloadT: Date.now(),
pageT: performance.now(),
tran: 15
};
};
}
if (!window.chrome.loadTimes) {
window.chrome.loadTimes = function() {
return {
commitLoadTime: Date.now() / 1000,
connectionInfo: 'h2',
finishDocumentLoadTime: Date.now() / 1000,
finishLoadTime: Date.now() / 1000,
firstPaintAfterLoadTime: 0,
firstPaintTime: Date.now() / 1000,
navigationType: 'Other',
npnNegotiatedProtocol: 'h2',
requestTime: Date.now() / 1000 - 0.16,
startLoadTime: Date.now() / 1000 - 0.3,
wasAlternateProtocolAvailable: false,
wasFetchedViaSpdy: true,
wasNpnNegotiated: true
};
};
}
// Why: Electron's Permission API defaults to 'denied' for most permissions,
// but real Chrome returns 'prompt' for ungranted permissions. Returning
// 'denied' is a strong bot signal. Override the query result for common
// permissions that Turnstile and similar detectors probe.
const promptPerms = new Set([
'notifications', 'geolocation', 'camera', 'microphone',
'midi', 'idle-detection', 'storage-access'
]);
const origQuery = Permissions.prototype.query;
Permissions.prototype.query = function(desc) {
if (promptPerms.has(desc.name)) {
return Promise.resolve({ state: 'prompt', onchange: null });
}
return origQuery.call(this, desc);
};
// Why: Electron may report Notification.permission as 'denied' by default
// whereas real Chrome reports 'default' for sites that haven't been granted
// or blocked. Turnstile cross-references this with the Permissions API.
try {
Object.defineProperty(Notification, 'permission', {
get: () => 'default'
});
} catch {}
// Why: Electron webviews may have an empty languages array. Real Chrome
// always has at least one entry. An empty array is an automation signal.
if (!navigator.languages || navigator.languages.length === 0) {
Object.defineProperty(navigator, 'languages', {
get: () => ['en-US', 'en']
});
}
})()`

View file

@ -46,6 +46,7 @@ import type {
BrowserSessionProfileSource
} from '../../shared/types'
import { browserSessionRegistry } from './browser-session-registry'
import { setupClientHintsOverride } from './browser-session-ua'
// ---------------------------------------------------------------------------
// Browser detection
@ -1578,7 +1579,7 @@ export async function importCookiesFromBrowser(
const ua = getUserAgentForBrowser(browser.family)
if (ua) {
targetSession.setUserAgent(ua)
browserSessionRegistry.setupClientHintsOverride(targetSession, ua)
setupClientHintsOverride(targetSession, ua)
browserSessionRegistry.persistUserAgent(ua)
diag(` set UA for partition: ${ua.substring(0, 80)}...`)
}

View file

@ -33,6 +33,7 @@ import {
setupGuestContextMenu,
setupGuestShortcutForwarding
} from './browser-guest-ui'
import { ANTI_DETECTION_SCRIPT } from './anti-detection'
export type BrowserGuestRegistration = {
browserPageId?: string
@ -98,6 +99,66 @@ export class BrowserManager {
private readonly downloadsById = new Map<string, ActiveDownload>()
private readonly grabSessionController = new BrowserGrabSessionController()
// Why: Page.addScriptToEvaluateOnNewDocument (via the CDP debugger) is the
// only reliable way to run JS before page scripts on every navigation.
// The previous approach — executeJavaScript on did-start-navigation — ran
// on the OLD page context during navigation, so overrides were never
// present when the new page's Turnstile script executed.
//
// Returns a cleanup function that removes the detach listener and prevents
// further re-attach attempts.
private injectAntiDetection(guest: Electron.WebContents): () => void {
let disposed = false
const attach = (): void => {
if (disposed || guest.isDestroyed()) {
return
}
try {
if (!guest.debugger.isAttached()) {
guest.debugger.attach('1.3')
}
void guest.debugger
.sendCommand('Page.enable', {})
.then(() =>
guest.debugger.sendCommand('Page.addScriptToEvaluateOnNewDocument', {
source: ANTI_DETECTION_SCRIPT
})
)
.catch(() => {})
} catch {
/* best-effort — debugger may be unavailable */
}
}
// Why: the CDP proxy and bridge detach the debugger when they stop,
// which removes addScriptToEvaluateOnNewDocument injections. Re-attach
// so manual browsing retains anti-detection overrides after agent
// sessions end. The 500ms delay avoids racing with the proxy/bridge if
// it is mid-restart (detach → re-attach).
const onDetach = (): void => {
if (!disposed && !guest.isDestroyed()) {
setTimeout(attach, 500)
}
}
try {
attach()
guest.debugger.on('detach', onDetach)
} catch {
/* best-effort */
}
return () => {
disposed = true
try {
guest.debugger.off('detach', onDetach)
} catch {
/* guest may already be destroyed */
}
}
}
private resolveBrowserTabIdForGuestWebContentsId(guestWebContentsId: number): string | null {
return this.tabIdByWebContentsId.get(guestWebContentsId) ?? null
}
@ -333,6 +394,13 @@ export class BrowserManager {
return
}
this.policyAttachedGuestIds.add(guest.id)
// Why: Cloudflare Turnstile and similar bot detectors probe browser APIs
// (navigator.webdriver, plugins, window.chrome) that differ in Electron
// webviews vs real Chrome. Inject overrides on every page load so manual
// browsing passes challenges even without the CDP debugger attached.
const disposeAntiDetection = this.injectAntiDetection(guest)
// Why: background throttling must be disabled so agent-driven screenshots
// (Page.captureScreenshot via CDP proxy) can capture frames even when the
// Orca window is not the focused foreground app. With throttling enabled,
@ -373,6 +441,14 @@ export class BrowserManager {
})
const navigationGuard = (event: Electron.Event, url: string): void => {
// Why: blob: URLs are same-origin (inherit the creator's origin) and are
// used by Cloudflare Turnstile to load challenge resources inside iframes.
// Blocking them triggers error 600010 ("bot behavior detected"). Only
// allow blobs whose embedded origin is http(s) to maintain defense-in-depth
// against blob:null or other opaque-origin blobs.
if (url.startsWith('blob:https://') || url.startsWith('blob:http://')) {
return
}
if (!normalizeBrowserNavigationUrl(url)) {
// Why: `will-attach-webview` only validates the initial src. Main must
// keep enforcing the same allowlist for later guest navigations too.
@ -405,6 +481,7 @@ export class BrowserManager {
// guest surface is torn down, preventing the callbacks from preventing GC of
// the underlying WebContents wrapper.
this.policyCleanupByGuestId.set(guest.id, () => {
disposeAntiDetection()
if (!guest.isDestroyed()) {
guest.off('will-navigate', navigationGuard)
guest.off('will-redirect', navigationGuard)

View file

@ -18,6 +18,7 @@ vi.mock('./browser-manager', () => ({
}))
import { browserSessionRegistry } from './browser-session-registry'
import { setupClientHintsOverride } from './browser-session-ua'
import { ORCA_BROWSER_PARTITION } from '../../shared/constants'
describe('BrowserSessionRegistry', () => {
@ -153,7 +154,7 @@ describe('BrowserSessionRegistry', () => {
const edgeUa =
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/147.0.6890.3 Safari/537.36 Edg/147.0.3210.5'
browserSessionRegistry.setupClientHintsOverride(mockSess, edgeUa)
setupClientHintsOverride(mockSess, edgeUa)
expect(onBeforeSendHeaders).toHaveBeenCalledWith(
{ urls: ['https://*/*'] },
@ -178,7 +179,7 @@ describe('BrowserSessionRegistry', () => {
const chromeUa =
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/147.0.6890.3 Safari/537.36'
browserSessionRegistry.setupClientHintsOverride(mockSess, chromeUa)
setupClientHintsOverride(mockSess, chromeUa)
const callback = vi.fn()
const listener = onBeforeSendHeaders.mock.calls[0][1]
@ -192,10 +193,7 @@ describe('BrowserSessionRegistry', () => {
const onBeforeSendHeaders = vi.fn()
const mockSess = { webRequest: { onBeforeSendHeaders } } as never
browserSessionRegistry.setupClientHintsOverride(
mockSess,
'Mozilla/5.0 (compatible; MSIE 10.0)'
)
setupClientHintsOverride(mockSess, 'Mozilla/5.0 (compatible; MSIE 10.0)')
expect(onBeforeSendHeaders).not.toHaveBeenCalled()
})
@ -203,10 +201,7 @@ describe('BrowserSessionRegistry', () => {
it('leaves non-Client-Hints headers unchanged', () => {
const onBeforeSendHeaders = vi.fn()
const mockSess = { webRequest: { onBeforeSendHeaders } } as never
browserSessionRegistry.setupClientHintsOverride(
mockSess,
'Mozilla/5.0 Chrome/147.0.0.0 Safari/537.36'
)
setupClientHintsOverride(mockSess, 'Mozilla/5.0 Chrome/147.0.0.0 Safari/537.36')
const callback = vi.fn()
const listener = onBeforeSendHeaders.mock.calls[0][1]

View file

@ -1,4 +1,8 @@
import { app, type Session, session } from 'electron'
/* eslint-disable max-lines -- Why: the registry is the single source of truth for
browser session profiles, partition allowlisting, cookie import staging, and
per-partition permission/download policies. Splitting further would scatter the
security boundary across modules. */
import { app, session } from 'electron'
import { randomUUID } from 'node:crypto'
import {
copyFileSync,
@ -12,6 +16,7 @@ import { join } from 'node:path'
import { ORCA_BROWSER_PARTITION } from '../../shared/constants'
import type { BrowserSessionProfile, BrowserSessionProfileScope } from '../../shared/types'
import { browserManager } from './browser-manager'
import { cleanElectronUserAgent, setupClientHintsOverride } from './browser-session-ua'
type BrowserSessionMeta = {
defaultSource: BrowserSessionProfile['source']
@ -107,7 +112,18 @@ class BrowserSessionRegistry {
if (meta.userAgent) {
const sess = session.fromPartition(ORCA_BROWSER_PARTITION)
sess.setUserAgent(meta.userAgent)
this.setupClientHintsOverride(sess, meta.userAgent)
setupClientHintsOverride(sess, meta.userAgent)
} else {
// Why: even without an imported session, the default Electron UA contains
// "Electron/X.X.X" and the app name which trip Cloudflare Turnstile.
try {
const sess = session.fromPartition(ORCA_BROWSER_PARTITION)
const cleanUA = cleanElectronUserAgent(sess.getUserAgent())
sess.setUserAgent(cleanUA)
setupClientHintsOverride(sess, cleanUA)
} catch {
/* session not available yet (e.g. unit tests or pre-ready) */
}
}
if (meta.defaultSource) {
const current = this.profiles.get('default')
@ -120,46 +136,6 @@ class BrowserSessionRegistry {
}
}
// Why: Electron's actual Chromium version (e.g. 134) differs from the source
// browser's version (e.g. Edge 147). The sec-ch-ua Client Hints headers
// reveal the real version, creating a mismatch that Google's anti-fraud
// detection flags as CookieMismatch on accounts.google.com. Override Client
// Hints on outgoing requests to match the source browser's UA.
setupClientHintsOverride(sess: Session, ua: string): void {
const chromeMatch = ua.match(/Chrome\/([\d.]+)/)
if (!chromeMatch) {
return
}
const fullChromeVersion = chromeMatch[1]
const majorVersion = fullChromeVersion.split('.')[0]
let brand = 'Google Chrome'
let brandFullVersion = fullChromeVersion
const edgeMatch = ua.match(/Edg\/([\d.]+)/)
if (edgeMatch) {
brand = 'Microsoft Edge'
brandFullVersion = edgeMatch[1]
}
const brandMajor = brandFullVersion.split('.')[0]
const secChUa = `"${brand}";v="${brandMajor}", "Chromium";v="${majorVersion}", "Not/A)Brand";v="24"`
const secChUaFull = `"${brand}";v="${brandFullVersion}", "Chromium";v="${fullChromeVersion}", "Not/A)Brand";v="24.0.0.0"`
sess.webRequest.onBeforeSendHeaders({ urls: ['https://*/*'] }, (details, callback) => {
const headers = details.requestHeaders
for (const key of Object.keys(headers)) {
const lower = key.toLowerCase()
if (lower === 'sec-ch-ua') {
headers[key] = secChUa
} else if (lower === 'sec-ch-ua-full-version-list') {
headers[key] = secChUaFull
}
}
callback({ requestHeaders: headers })
})
}
// Why: the import writes cookies to a staging DB because CookieMonster holds
// the live DB's data in memory and would overwrite our changes on its next
// flush. This method MUST run before any session.fromPartition() call so
@ -373,6 +349,11 @@ class BrowserSessionRegistry {
this.configuredPartitions.add(partition)
const sess = session.fromPartition(partition)
if (typeof sess.getUserAgent === 'function') {
const cleanUA = cleanElectronUserAgent(sess.getUserAgent())
sess.setUserAgent(cleanUA)
setupClientHintsOverride(sess, cleanUA)
}
// Why: clipboard-read and clipboard-sanitized-write are required for agent-browser's
// clipboard commands to work. Without these, navigator.clipboard.writeText/readText
// throws NotAllowedError even when invoked via CDP with userGesture:true.

View file

@ -0,0 +1,55 @@
import type { Session } from 'electron'
// Why: Electron's default UA includes "Electron/X.X.X" and the app name
// (e.g. "orca/1.2.3"), which Cloudflare Turnstile and other bot detectors
// flag as non-human traffic. Strip those tokens so the webview's UA and
// sec-ch-ua Client Hints look like standard Chrome.
export function cleanElectronUserAgent(ua: string): string {
return (
ua
.replace(/\s+Electron\/\S+/, '')
// Why: \S+ matches any non-whitespace token (e.g. "orca/1.3.8-rc.0")
// including pre-release semver strings that [\d.]+ would miss.
.replace(/(\)\s+)\S+\s+(Chrome\/)/, '$1$2')
)
}
// Why: Electron's actual Chromium version (e.g. 134) differs from the source
// browser's version (e.g. Edge 147). The sec-ch-ua Client Hints headers
// reveal the real version, creating a mismatch that Google's anti-fraud
// detection flags as CookieMismatch on accounts.google.com. Override Client
// Hints on outgoing requests to match the source browser's UA.
export function setupClientHintsOverride(sess: Session, ua: string): void {
const chromeMatch = ua.match(/Chrome\/([\d.]+)/)
if (!chromeMatch) {
return
}
const fullChromeVersion = chromeMatch[1]
const majorVersion = fullChromeVersion.split('.')[0]
let brand = 'Google Chrome'
let brandFullVersion = fullChromeVersion
const edgeMatch = ua.match(/Edg\/([\d.]+)/)
if (edgeMatch) {
brand = 'Microsoft Edge'
brandFullVersion = edgeMatch[1]
}
const brandMajor = brandFullVersion.split('.')[0]
const secChUa = `"${brand}";v="${brandMajor}", "Chromium";v="${majorVersion}", "Not/A)Brand";v="24"`
const secChUaFull = `"${brand}";v="${brandFullVersion}", "Chromium";v="${fullChromeVersion}", "Not/A)Brand";v="24.0.0.0"`
sess.webRequest.onBeforeSendHeaders({ urls: ['https://*/*'] }, (details, callback) => {
const headers = details.requestHeaders
for (const key of Object.keys(headers)) {
const lower = key.toLowerCase()
if (lower === 'sec-ch-ua') {
headers[key] = secChUa
} else if (lower === 'sec-ch-ua-full-version-list') {
headers[key] = secChUaFull
}
}
callback({ requestHeaders: headers })
})
}

View file

@ -159,6 +159,8 @@ function createMockGuest(id: number, url: string, title: string) {
return {}
case 'Target.setAutoAttach':
return {}
case 'Page.addScriptToEvaluateOnNewDocument':
return { identifier: 'mock-script-id' }
case 'Runtime.enable':
return {}
default:

View file

@ -46,6 +46,7 @@ import {
type SnapshotResult
} from './snapshot-engine'
import type { BrowserManager } from './browser-manager'
import { ANTI_DETECTION_SCRIPT } from './anti-detection'
export class BrowserError extends Error {
constructor(
@ -1158,6 +1159,13 @@ export class CdpBridge {
flatten: true
})
// Why: attaching the CDP debugger sets navigator.webdriver = true and
// exposes other automation signals that Cloudflare Turnstile checks.
// Override them on every new document load so challenges succeed.
await sender('Page.addScriptToEvaluateOnNewDocument', {
source: ANTI_DETECTION_SCRIPT
})
// Why: remove any stale listeners from a previous attach cycle to prevent
// listener accumulation. After a detach+reattach, the old handlers would
// still fire alongside the new ones, causing duplicate log entries,

View file

@ -2,6 +2,7 @@ import { WebSocketServer, WebSocket } from 'ws'
import { createServer, type Server, type IncomingMessage, type ServerResponse } from 'http'
import type { WebContents } from 'electron'
import { captureScreenshot } from './cdp-screenshot'
import { ANTI_DETECTION_SCRIPT } from './anti-detection'
export class CdpWsProxy {
private httpServer: Server | null = null
@ -96,9 +97,15 @@ export class CdpWsProxy {
const url = req.url ?? ''
if (url === '/json/version' || url === '/json/version/') {
res.writeHead(200, { 'Content-Type': 'application/json' })
// Why: agent-browser reads this endpoint to identify the browser. Returning
// "Orca/CdpWsProxy" leaks that this is an embedded automation surface, which
// could affect downstream detection heuristics.
// Why: process.versions.chrome contains the exact Chromium version
// bundled with Electron, producing a realistic version string.
const chromeVersion = process.versions.chrome ?? '134.0.0.0'
res.end(
JSON.stringify({
Browser: 'Orca/CdpWsProxy',
Browser: `Chrome/${chromeVersion}`,
'Protocol-Version': '1.3',
webSocketDebuggerUrl: `ws://127.0.0.1:${this.port}`
})
@ -134,6 +141,19 @@ export class CdpWsProxy {
}
}
this.attached = true
// Why: attaching the CDP debugger sets navigator.webdriver = true and
// exposes other automation signals that Cloudflare Turnstile checks.
// Inject before any page loads so challenges succeed.
try {
await this.webContents.debugger.sendCommand('Page.enable', {})
await this.webContents.debugger.sendCommand('Page.addScriptToEvaluateOnNewDocument', {
source: ANTI_DETECTION_SCRIPT
})
} catch {
/* best-effort — page domain may not be ready yet */
}
this.debuggerMessageHandler = (_event: unknown, ...rest: unknown[]) => {
const [method, params, sessionId] = rest as [
string,
@ -209,11 +229,14 @@ export class CdpWsProxy {
return
}
if (msg.method === 'Browser.getVersion') {
// Why: returning "Orca/Electron" identifies this as an embedded automation
// surface to agent-browser. Use a generic Chrome product string instead.
const chromeVersion = process.versions.chrome ?? '134.0.0.0'
this.sendResult(
clientId,
{
protocolVersion: '1.3',
product: 'Orca/Electron',
product: `Chrome/${chromeVersion}`,
userAgent: '',
jsVersion: ''
},