diff --git a/.gitignore b/.gitignore index 78af482035..cff93124fc 100644 --- a/.gitignore +++ b/.gitignore @@ -140,5 +140,9 @@ pnpm-lock.yaml .turbo spaHtmlTemplates.ts +# Embedded CLI bundle (built at pack time) +apps/desktop/resources/bin/lobe-cli.js +apps/desktop/resources/cli-package.json + .superpowers/ docs/superpowers \ No newline at end of file diff --git a/apps/cli/src/daemon/manager.ts b/apps/cli/src/daemon/manager.ts index d4b1fcef05..e2f0a65bbe 100644 --- a/apps/cli/src/daemon/manager.ts +++ b/apps/cli/src/daemon/manager.ts @@ -160,7 +160,7 @@ export function spawnDaemon(args: string[]): number { // Re-run the same entry with --daemon-child (internal flag) const child = spawn(process.execPath, [...process.execArgv, ...args, '--daemon-child'], { detached: true, - env: { ...process.env, LOBEHUB_DAEMON: '1' }, + env: { ...process.env, ELECTRON_RUN_AS_NODE: '1', LOBEHUB_DAEMON: '1' }, stdio: ['ignore', logFd, logFd], }); diff --git a/apps/cli/src/index.ts b/apps/cli/src/index.ts index 004bf1cbe7..4d42b0566e 100644 --- a/apps/cli/src/index.ts +++ b/apps/cli/src/index.ts @@ -1,3 +1,3 @@ import { createProgram } from './program'; -createProgram().parse(); +createProgram().parse(process.argv, { from: 'node' }); diff --git a/apps/cli/tsdown.config.ts b/apps/cli/tsdown.config.ts index 0ee73abdd3..caa64edf1d 100644 --- a/apps/cli/tsdown.config.ts +++ b/apps/cli/tsdown.config.ts @@ -9,6 +9,10 @@ export default defineConfig({ entry: ['src/index.ts'], fixedExtension: false, format: ['esm'], + minify: true, + outputOptions: { + codeSplitting: false, + }, platform: 'node', target: 'node18', }); diff --git a/apps/desktop/electron-builder.mjs b/apps/desktop/electron-builder.mjs index 51b87789cf..7914c8f74a 100644 --- a/apps/desktop/electron-builder.mjs +++ b/apps/desktop/electron-builder.mjs @@ -109,6 +109,26 @@ const config = { console.info('📦 Downloading agent-browser binary...'); execSync('node scripts/download-agent-browser.mjs', { stdio: 'inherit', cwd: __dirname }); + + // Build and copy CLI bundle for embedding + console.info('📦 Building CLI for embedding...'); + execSync('npm run build', { stdio: 'inherit', cwd: path.resolve(__dirname, '../cli') }); + const cliSrc = path.resolve(__dirname, '../cli/dist/index.js'); + const cliDest = path.resolve(__dirname, 'resources/bin/lobe-cli.js'); + await fs.copyFile(cliSrc, cliDest); + + // Write a minimal package.json next to the CLI bundle so that + // createRequire('../package.json') resolves correctly in the packaged app. + // The CLI script lives at Resources/bin/lobe-cli.js, so '../package.json' + // resolves to Resources/package.json. + const cliPkg = JSON.parse( + await fs.readFile(path.resolve(__dirname, '../cli/package.json'), 'utf8'), + ); + await fs.writeFile( + path.resolve(__dirname, 'resources/cli-package.json'), + JSON.stringify({ name: cliPkg.name, type: 'module', version: cliPkg.version }), + ); + console.info('✅ CLI bundle copied to resources/bin/lobe-cli.js'); }, /** * AfterPack hook for post-processing: @@ -296,7 +316,10 @@ const config = { releaseNotes: process.env.RELEASE_NOTES || undefined, }, - extraResources: [{ from: 'resources/bin', to: 'bin' }], + extraResources: [ + { from: 'resources/bin', to: 'bin' }, + { from: 'resources/cli-package.json', to: 'package.json' }, + ], win: { executableName: 'LobeHub', diff --git a/apps/desktop/electron.vite.config.ts b/apps/desktop/electron.vite.config.ts index 0c3514ae2c..8bb425862c 100644 --- a/apps/desktop/electron.vite.config.ts +++ b/apps/desktop/electron.vite.config.ts @@ -90,7 +90,6 @@ export default defineConfig({ outDir: 'dist/preload', sourcemap: isDev ? 'inline' : false, }, - resolve: { alias: { '@': path.resolve(__dirname, 'src/main'), diff --git a/apps/desktop/package.json b/apps/desktop/package.json index b61d195f99..ee539f12de 100644 --- a/apps/desktop/package.json +++ b/apps/desktop/package.json @@ -11,6 +11,7 @@ "author": "LobeHub", "main": "./dist/main/index.js", "scripts": { + "build:cli": "cd ../cli && bun run build", "build:main": "cross-env NODE_OPTIONS=--max-old-space-size=8192 electron-vite build", "build:run-unpack": "electron .", "dev": "electron-vite dev", diff --git a/apps/desktop/src/main/controllers/CliCtr.ts b/apps/desktop/src/main/controllers/CliCtr.ts new file mode 100644 index 0000000000..f99a61ea4d --- /dev/null +++ b/apps/desktop/src/main/controllers/CliCtr.ts @@ -0,0 +1,58 @@ +import { exec } from 'node:child_process'; +import path from 'node:path'; +import process from 'node:process'; +import { promisify } from 'node:util'; + +import { getCliWrapperDir } from '@/modules/cliEmbedding'; +import { createLogger } from '@/utils/logger'; + +import { ControllerModule, IpcMethod } from './index'; +import RemoteServerConfigCtr from './RemoteServerConfigCtr'; + +const logger = createLogger('controllers:CliCtr'); + +function normalizeServerUrl(url: string): string { + return url.replace(/\/$/, ''); +} + +export default class CliCtr extends ControllerModule { + static override readonly groupName = 'cli'; + + @IpcMethod() + async runCliCommand(args: string): Promise<{ exitCode: number; stderr: string; stdout: string }> { + const execAsync = promisify(exec); + const wrapperDir = getCliWrapperDir(); + const cmd = process.platform === 'win32' ? 'lobehub.cmd' : 'lobehub'; + const wrapperPath = path.join(wrapperDir, cmd); + + const env = { ...process.env }; + + const remoteCtr = this.app.getController(RemoteServerConfigCtr); + if (remoteCtr) { + const [token, serverUrl] = await Promise.all([ + remoteCtr.getAccessToken(), + remoteCtr.getRemoteServerUrl(), + ]); + + if (token && serverUrl) { + env.LOBEHUB_JWT = token; + env.LOBEHUB_SERVER = normalizeServerUrl(serverUrl); + logger.debug('Injected LOBEHUB_JWT / LOBEHUB_SERVER for CLI command'); + } + } + + try { + const { stdout, stderr } = await execAsync(`"${wrapperPath}" ${args}`, { + env, + timeout: 15_000, + }); + return { exitCode: 0, stderr, stdout }; + } catch (error: any) { + return { + exitCode: error.code ?? 1, + stderr: error.stderr ?? '', + stdout: error.stdout ?? String(error.message), + }; + } + } +} diff --git a/apps/desktop/src/main/controllers/ShellCommandCtr.ts b/apps/desktop/src/main/controllers/ShellCommandCtr.ts index 3283f31260..ddca49426a 100644 --- a/apps/desktop/src/main/controllers/ShellCommandCtr.ts +++ b/apps/desktop/src/main/controllers/ShellCommandCtr.ts @@ -10,17 +10,38 @@ import { runCommand, ShellProcessManager } from '@lobechat/local-file-shell'; import { createLogger } from '@/utils/logger'; +import CliCtr from './CliCtr'; import { ControllerModule, IpcMethod } from './index'; const logger = createLogger('controllers:ShellCommandCtr'); const processManager = new ShellProcessManager(); +/** Prefix for a simple `lh`/`lobe`/`lobehub` invocation (keyword + boundary, args via slice). */ +const SIMPLE_LH_PREFIX = /^\s*(?:lh|lobe|lobehub)(?=\s|$)/; + export default class ShellCommandCtr extends ControllerModule { static override readonly groupName = 'shellCommand'; @IpcMethod() async handleRunCommand(params: RunCommandParams): Promise { + const prefixMatch = SIMPLE_LH_PREFIX.exec(params.command); + if (prefixMatch) { + const cliCtr = this.app.getController(CliCtr); + if (cliCtr) { + const args = params.command.slice(prefixMatch[0].length).trim(); + logger.debug('Routing lh command to CliCtr.runCliCommand:', args); + const result = await cliCtr.runCliCommand(args); + return { + exit_code: result.exitCode, + output: result.stdout + result.stderr, + stderr: result.stderr, + stdout: result.stdout, + success: result.exitCode === 0, + }; + } + } + return runCommand(params, { logger, processManager }); } diff --git a/apps/desktop/src/main/controllers/__tests__/ShellCommandCtr.test.ts b/apps/desktop/src/main/controllers/__tests__/ShellCommandCtr.test.ts index ec87f8c4fa..92b9754b22 100644 --- a/apps/desktop/src/main/controllers/__tests__/ShellCommandCtr.test.ts +++ b/apps/desktop/src/main/controllers/__tests__/ShellCommandCtr.test.ts @@ -2,6 +2,7 @@ import { beforeEach, describe, expect, it, vi } from 'vitest'; import type { App } from '@/core/App'; +import CliCtr from '../CliCtr'; import ShellCommandCtr from '../ShellCommandCtr'; const { ipcMainHandleMock } = vi.hoisted(() => ({ @@ -32,7 +33,17 @@ vi.mock('node:crypto', () => ({ randomUUID: vi.fn(() => 'test-uuid-123'), })); -const mockApp = {} as unknown as App; +vi.mock('../CliCtr', () => ({ + default: class CliCtr {}, +})); + +const mockCliCtr = { + runCliCommand: vi.fn().mockResolvedValue({ exitCode: 0, stderr: '', stdout: 'cli output\n' }), +}; + +const mockApp = { + getController: vi.fn((c: unknown) => (c === CliCtr ? mockCliCtr : undefined)), +} as unknown as App; describe('ShellCommandCtr (thin wrapper)', () => { let ctr: ShellCommandCtr; @@ -118,6 +129,28 @@ describe('ShellCommandCtr (thin wrapper)', () => { expect(mockChildProcess.kill).toHaveBeenCalled(); }); + it('should route lh commands to CliCtr.runCliCommand', async () => { + const result = await ctr.handleRunCommand({ + command: 'lh status --json', + description: 'lh status', + }); + + expect(mockCliCtr.runCliCommand).toHaveBeenCalledWith('status --json'); + expect(result.success).toBe(true); + expect(result.stdout).toContain('cli output'); + expect(mockSpawn).not.toHaveBeenCalled(); + }); + + it('should route lobehub commands to CliCtr.runCliCommand', async () => { + const result = await ctr.handleRunCommand({ + command: 'lobehub search test', + description: 'lobehub search', + }); + + expect(mockCliCtr.runCliCommand).toHaveBeenCalledWith('search test'); + expect(result.success).toBe(true); + }); + it('should return error for non-existent shell_id', async () => { const result = await ctr.handleGetCommandOutput({ shell_id: 'non-existent', diff --git a/apps/desktop/src/main/controllers/registry.ts b/apps/desktop/src/main/controllers/registry.ts index a0cecaf0d4..11690efc74 100644 --- a/apps/desktop/src/main/controllers/registry.ts +++ b/apps/desktop/src/main/controllers/registry.ts @@ -2,6 +2,7 @@ import type { CreateServicesResult, IpcServiceConstructor, MergeIpcService } fro import AuthCtr from './AuthCtr'; import BrowserWindowsCtr from './BrowserWindowsCtr'; +import CliCtr from './CliCtr'; import DevtoolsCtr from './DevtoolsCtr'; import GatewayConnectionCtr from './GatewayConnectionCtr'; import LocalFileCtr from './LocalFileCtr'; @@ -23,6 +24,7 @@ import UploadFileCtr from './UploadFileCtr'; export const controllerIpcConstructors = [ AuthCtr, BrowserWindowsCtr, + CliCtr, DevtoolsCtr, GatewayConnectionCtr, LocalFileCtr, diff --git a/apps/desktop/src/main/core/App.ts b/apps/desktop/src/main/core/App.ts index 460b92a842..e52634d10b 100644 --- a/apps/desktop/src/main/core/App.ts +++ b/apps/desktop/src/main/core/App.ts @@ -13,6 +13,7 @@ import { isDev } from '@/const/env'; import { ELECTRON_BE_PROTOCOL_SCHEME } from '@/const/protocol'; import type { IControlModule } from '@/controllers'; import AuthCtr from '@/controllers/AuthCtr'; +import { generateCliWrapper, getCliWrapperDir } from '@/modules/cliEmbedding'; import { astSearchDetectors, browserAutomationDetectors, @@ -89,9 +90,9 @@ export class App { logger.info('----------------------------------------------'); logger.info('Starting LobeHub...'); - // Append bundled binaries directory to PATH for fallback tool resolution + // Append bundled binaries and CLI wrapper directories to PATH for tool resolution const pathSep = process.platform === 'win32' ? ';' : ':'; - process.env.PATH = `${process.env.PATH}${pathSep}${binDir}`; + process.env.PATH = `${process.env.PATH}${pathSep}${binDir}${pathSep}${getCliWrapperDir()}`; logger.debug('Initializing App'); // Initialize store manager @@ -226,6 +227,11 @@ export class App { // Initialize app await this.makeAppReady(); + // Generate CLI wrapper for terminal usage + generateCliWrapper().catch((error) => { + logger.warn('Failed to generate CLI wrapper:', error); + }); + // Initialize i18n. Note: app.getLocale() must be called after app.whenReady() to get the correct value await this.i18n.init(); this.menuManager.initialize(); diff --git a/apps/desktop/src/main/modules/cliEmbedding/generateCliWrapper.ts b/apps/desktop/src/main/modules/cliEmbedding/generateCliWrapper.ts new file mode 100644 index 0000000000..f3cac83474 --- /dev/null +++ b/apps/desktop/src/main/modules/cliEmbedding/generateCliWrapper.ts @@ -0,0 +1,97 @@ +import { chmod, mkdir, rename, symlink, unlink, writeFile } from 'node:fs/promises'; +import path from 'node:path'; + +import { app } from 'electron'; + +import { createLogger } from '@/utils/logger'; + +const logger = createLogger('modules:cliEmbedding'); + +/** + * Resolve the correct Electron binary path per platform. + * - AppImage: use APPIMAGE env var (the actual .AppImage file) + * - Others: app.getPath('exe') + */ +function resolveElectronBinary(): string { + if (process.platform === 'linux' && process.env.APPIMAGE) { + return process.env.APPIMAGE; + } + return app.getPath('exe'); +} + +/** + * Resolve the CLI script path inside packaged resources. + */ +function resolveCliScript(): string { + if (app.isPackaged) { + return path.join(process.resourcesPath, 'bin', 'lobe-cli.js'); + } + // Dev mode: app.getAppPath() points to apps/desktop/, go up to apps/cli/ + return path.join(app.getAppPath(), '..', 'cli', 'dist', 'index.js'); +} + +/** + * Get the user-writable bin directory for CLI wrapper. + */ +export function getCliWrapperDir(): string { + return path.join(app.getPath('userData'), 'bin'); +} + +/** + * Generate shell wrapper scripts that invoke the embedded CLI + * using Electron's Node.js runtime via ELECTRON_RUN_AS_NODE=1. + * + * Called on every app launch to keep paths up-to-date after auto-updates. + */ +export async function generateCliWrapper(): Promise { + const electronBin = resolveElectronBinary(); + const cliScript = resolveCliScript(); + const wrapperDir = getCliWrapperDir(); + + await mkdir(wrapperDir, { recursive: true }); + + if (process.platform === 'win32') { + const content = [ + '@echo off', + 'set ELECTRON_RUN_AS_NODE=1', + `"${electronBin}" "${cliScript}" %*`, + ].join('\r\n'); + + const cmdPath = path.join(wrapperDir, 'lobehub.cmd'); + await atomicWrite(cmdPath, content); + + // Create short aliases: lh.cmd, lobe.cmd (copies on Windows, symlinks unreliable) + for (const alias of ['lh.cmd', 'lobe.cmd']) { + await atomicWrite(path.join(wrapperDir, alias), content); + } + + logger.info(`CLI wrapper generated: ${cmdPath}`); + } else { + const content = [ + '#!/bin/sh', + `ELECTRON_RUN_AS_NODE=1 exec "${electronBin}" "${cliScript}" "$@"`, + ].join('\n'); + + const wrapperPath = path.join(wrapperDir, 'lobehub'); + await atomicWrite(wrapperPath, content); + await chmod(wrapperPath, 0o755); + + // Create short aliases: lh, lobe → lobehub + for (const alias of ['lh', 'lobe']) { + const linkPath = path.join(wrapperDir, alias); + await unlink(linkPath).catch(() => {}); + await symlink('lobehub', linkPath); + } + + logger.info(`CLI wrapper generated: ${wrapperPath}`); + } +} + +/** + * Atomic write: write to temp file then rename to avoid partial reads. + */ +async function atomicWrite(filePath: string, content: string): Promise { + const tmpPath = `${filePath}.tmp.${process.pid}`; + await writeFile(tmpPath, content, 'utf8'); + await rename(tmpPath, filePath); +} diff --git a/apps/desktop/src/main/modules/cliEmbedding/index.ts b/apps/desktop/src/main/modules/cliEmbedding/index.ts new file mode 100644 index 0000000000..bb91404ec4 --- /dev/null +++ b/apps/desktop/src/main/modules/cliEmbedding/index.ts @@ -0,0 +1 @@ +export { generateCliWrapper, getCliWrapperDir } from './generateCliWrapper'; diff --git a/apps/desktop/src/main/modules/toolDetectors/runtimeEnvironmentDetectors.ts b/apps/desktop/src/main/modules/toolDetectors/runtimeEnvironmentDetectors.ts index 1724e50dc0..e0fce4be79 100644 --- a/apps/desktop/src/main/modules/toolDetectors/runtimeEnvironmentDetectors.ts +++ b/apps/desktop/src/main/modules/toolDetectors/runtimeEnvironmentDetectors.ts @@ -63,11 +63,82 @@ export const pythonDetector: IToolDetector = { priority: 3, }; +/** + * Bun runtime detector + */ +export const bunDetector: IToolDetector = createCommandDetector('bun', { + description: 'Bun - fast JavaScript runtime and package manager', + priority: 4, +}); + +/** + * Bunx package runner detector + */ +export const bunxDetector: IToolDetector = createCommandDetector('bunx', { + description: 'bunx - Bun package runner for executing npm packages', + priority: 5, +}); + +/** + * pnpm package manager detector + */ +export const pnpmDetector: IToolDetector = createCommandDetector('pnpm', { + description: 'pnpm - fast, disk space efficient package manager', + priority: 6, +}); + +/** + * uv Python package manager detector + */ +export const uvDetector: IToolDetector = createCommandDetector('uv', { + description: 'uv - extremely fast Python package manager', + priority: 7, +}); + +/** + * LobeHub CLI detector + * Tries lobehub, lobe, lh in order; validates via --help output containing "LobeHub" + */ +export const lobehubDetector: IToolDetector = { + description: 'LobeHub CLI - manage and connect to LobeHub services', + async detect(): Promise { + const commands = ['lobehub', 'lobe', 'lh']; + const whichCmd = platform() === 'win32' ? 'where' : 'which'; + + for (const cmd of commands) { + try { + const { stdout: pathOut } = await execPromise(`${whichCmd} ${cmd}`, { timeout: 3000 }); + const toolPath = pathOut.trim().split('\n')[0]; + + // Validate it's actually LobeHub CLI by checking help output + const { stdout: helpOut } = await execPromise(`${cmd} --help`, { timeout: 3000 }); + if (!helpOut.includes('LobeHub')) continue; + + const { stdout: versionOut } = await execPromise(`${cmd} --version`, { timeout: 3000 }); + const version = versionOut.trim().split('\n')[0]; + + return { available: true, path: toolPath, version }; + } catch { + continue; + } + } + + return { available: false }; + }, + name: 'lobehub', + priority: 0, +}; + /** * All runtime environment detectors */ export const runtimeEnvironmentDetectors: IToolDetector[] = [ + lobehubDetector, nodeDetector, npmDetector, pythonDetector, + bunDetector, + bunxDetector, + pnpmDetector, + uvDetector, ]; diff --git a/locales/ar/setting.json b/locales/ar/setting.json index 76e23babaa..090a122982 100644 --- a/locales/ar/setting.json +++ b/locales/ar/setting.json @@ -657,7 +657,6 @@ "settingSystemTools.appEnvironment.electron.desc": "إصدار إطار Electron", "settingSystemTools.appEnvironment.node.desc": "إصدار Node.js المدمج", "settingSystemTools.appEnvironment.title": "بيئة التطبيق", - "settingSystemTools.autoSelectDesc": "سيتم اختيار أفضل أداة متاحة تلقائيًا", "settingSystemTools.category.browserAutomation": "أتمتة المتصفح", "settingSystemTools.category.browserAutomation.desc": "أدوات لأتمتة المتصفح بدون واجهة والتفاعل مع الويب", "settingSystemTools.category.contentSearch": "البحث في المحتوى", diff --git a/locales/bg-BG/setting.json b/locales/bg-BG/setting.json index b5176f330b..078767dab2 100644 --- a/locales/bg-BG/setting.json +++ b/locales/bg-BG/setting.json @@ -657,7 +657,6 @@ "settingSystemTools.appEnvironment.electron.desc": "Версия на рамката Electron", "settingSystemTools.appEnvironment.node.desc": "Вградена версия на Node.js", "settingSystemTools.appEnvironment.title": "Среда на приложението", - "settingSystemTools.autoSelectDesc": "Най-добрият наличен инструмент ще бъде избран автоматично", "settingSystemTools.category.browserAutomation": "Автоматизация на браузъра", "settingSystemTools.category.browserAutomation.desc": "Инструменти за автоматизация на браузъра без графичен интерфейс и уеб взаимодействие", "settingSystemTools.category.contentSearch": "Търсене в съдържание", diff --git a/locales/de-DE/setting.json b/locales/de-DE/setting.json index 738ab5bd3b..ffac3e1e3e 100644 --- a/locales/de-DE/setting.json +++ b/locales/de-DE/setting.json @@ -657,7 +657,6 @@ "settingSystemTools.appEnvironment.electron.desc": "Electron-Framework-Version", "settingSystemTools.appEnvironment.node.desc": "Eingebettete Node.js-Version", "settingSystemTools.appEnvironment.title": "App-Umgebung", - "settingSystemTools.autoSelectDesc": "Das beste verfügbare Tool wird automatisch ausgewählt", "settingSystemTools.category.browserAutomation": "Browser-Automatisierung", "settingSystemTools.category.browserAutomation.desc": "Werkzeuge für headless Browser-Automatisierung und Web-Interaktion", "settingSystemTools.category.contentSearch": "Inhaltssuche", diff --git a/locales/en-US/setting.json b/locales/en-US/setting.json index 59e00bf047..d2af11ef04 100644 --- a/locales/en-US/setting.json +++ b/locales/en-US/setting.json @@ -656,8 +656,7 @@ "settingSystemTools.appEnvironment.desc": "Built-in runtime versions in the desktop app", "settingSystemTools.appEnvironment.electron.desc": "Electron framework version", "settingSystemTools.appEnvironment.node.desc": "Embedded Node.js version", - "settingSystemTools.appEnvironment.title": "App Environment", - "settingSystemTools.autoSelectDesc": "The best available tool will be automatically selected", + "settingSystemTools.appEnvironment.title": "Built-in App Tools", "settingSystemTools.category.browserAutomation": "Browser Automation", "settingSystemTools.category.browserAutomation.desc": "Tools for headless browser automation and web interaction", "settingSystemTools.category.contentSearch": "Content Search", @@ -674,14 +673,19 @@ "settingSystemTools.title": "System Tools", "settingSystemTools.tools.ag.desc": "The Silver Searcher - fast code searching tool", "settingSystemTools.tools.agentBrowser.desc": "Agent-browser - headless browser automation CLI for AI agents", + "settingSystemTools.tools.bun.desc": "Bun - fast JavaScript runtime and package manager", + "settingSystemTools.tools.bunx.desc": "bunx - Bun package runner for executing npm packages", "settingSystemTools.tools.fd.desc": "fd - fast and user-friendly alternative to find", "settingSystemTools.tools.find.desc": "Unix find - standard file search command", "settingSystemTools.tools.grep.desc": "GNU grep - standard text search tool", + "settingSystemTools.tools.lobehub.desc": "LobeHub CLI - manage and connect to LobeHub services", "settingSystemTools.tools.mdfind.desc": "macOS Spotlight search (fast indexed search)", "settingSystemTools.tools.node.desc": "Node.js - JavaScript runtime for executing JS/TS", "settingSystemTools.tools.npm.desc": "npm - Node.js package manager for installing dependencies", + "settingSystemTools.tools.pnpm.desc": "pnpm - fast, disk space efficient package manager", "settingSystemTools.tools.python.desc": "Python - programming language runtime", "settingSystemTools.tools.rg.desc": "ripgrep - extremely fast text search tool", + "settingSystemTools.tools.uv.desc": "uv - extremely fast Python package manager", "settingTTS.openai.sttModel": "OpenAI Speech-to-Text Model", "settingTTS.openai.title": "OpenAI", "settingTTS.openai.ttsModel": "OpenAI Text-to-Speech Model", diff --git a/locales/es-ES/setting.json b/locales/es-ES/setting.json index 5d9a1363f1..7bf39d6ad0 100644 --- a/locales/es-ES/setting.json +++ b/locales/es-ES/setting.json @@ -657,7 +657,6 @@ "settingSystemTools.appEnvironment.electron.desc": "Versión del framework Electron", "settingSystemTools.appEnvironment.node.desc": "Versión de Node.js integrada", "settingSystemTools.appEnvironment.title": "Entorno de la aplicación", - "settingSystemTools.autoSelectDesc": "La mejor herramienta disponible se seleccionará automáticamente", "settingSystemTools.category.browserAutomation": "Automatización del Navegador", "settingSystemTools.category.browserAutomation.desc": "Herramientas para la automatización de navegadores sin cabeza e interacción web", "settingSystemTools.category.contentSearch": "Búsqueda de contenido", diff --git a/locales/fa-IR/setting.json b/locales/fa-IR/setting.json index c3d40bbe36..5b1d82121a 100644 --- a/locales/fa-IR/setting.json +++ b/locales/fa-IR/setting.json @@ -657,7 +657,6 @@ "settingSystemTools.appEnvironment.electron.desc": "نسخهٔ چارچوب Electron", "settingSystemTools.appEnvironment.node.desc": "نسخهٔ Node.js تعبیه‌شده", "settingSystemTools.appEnvironment.title": "محیط برنامه", - "settingSystemTools.autoSelectDesc": "بهترین ابزار موجود به‌صورت خودکار انتخاب خواهد شد", "settingSystemTools.category.browserAutomation": "اتوماسیون مرورگر", "settingSystemTools.category.browserAutomation.desc": "ابزارهایی برای اتوماسیون مرورگر بدون رابط کاربری و تعامل وب", "settingSystemTools.category.contentSearch": "جستجوی محتوا", diff --git a/locales/fr-FR/setting.json b/locales/fr-FR/setting.json index fa70bf9207..68889b251c 100644 --- a/locales/fr-FR/setting.json +++ b/locales/fr-FR/setting.json @@ -657,7 +657,6 @@ "settingSystemTools.appEnvironment.electron.desc": "Version du framework Electron", "settingSystemTools.appEnvironment.node.desc": "Version de Node.js intégrée", "settingSystemTools.appEnvironment.title": "Environnement de l'application", - "settingSystemTools.autoSelectDesc": "L'outil le plus performant sera sélectionné automatiquement", "settingSystemTools.category.browserAutomation": "Automatisation du navigateur", "settingSystemTools.category.browserAutomation.desc": "Outils pour l'automatisation de navigateur sans interface et l'interaction web", "settingSystemTools.category.contentSearch": "Recherche de contenu", diff --git a/locales/it-IT/setting.json b/locales/it-IT/setting.json index 90ea95a98a..5ef6785091 100644 --- a/locales/it-IT/setting.json +++ b/locales/it-IT/setting.json @@ -657,7 +657,6 @@ "settingSystemTools.appEnvironment.electron.desc": "Versione del framework Electron", "settingSystemTools.appEnvironment.node.desc": "Versione di Node.js integrata", "settingSystemTools.appEnvironment.title": "Ambiente app", - "settingSystemTools.autoSelectDesc": "Lo strumento migliore disponibile verrà selezionato automaticamente", "settingSystemTools.category.browserAutomation": "Automazione del browser", "settingSystemTools.category.browserAutomation.desc": "Strumenti per l'automazione del browser senza interfaccia grafica e l'interazione web", "settingSystemTools.category.contentSearch": "Ricerca Contenuti", diff --git a/locales/ja-JP/setting.json b/locales/ja-JP/setting.json index 88507f06a0..ba36ec33b0 100644 --- a/locales/ja-JP/setting.json +++ b/locales/ja-JP/setting.json @@ -657,7 +657,6 @@ "settingSystemTools.appEnvironment.electron.desc": "Electron フレームワークのバージョン", "settingSystemTools.appEnvironment.node.desc": "同梱 Node.js のバージョン", "settingSystemTools.appEnvironment.title": "アプリ環境", - "settingSystemTools.autoSelectDesc": "最適な利用可能ツールが自動的に選択されます", "settingSystemTools.category.browserAutomation": "ブラウザー自動化", "settingSystemTools.category.browserAutomation.desc": "ヘッドレスブラウザーの自動化とウェブ操作のためのツール", "settingSystemTools.category.contentSearch": "コンテンツ検索", diff --git a/locales/ko-KR/setting.json b/locales/ko-KR/setting.json index e1cc706bb0..55f2868fb0 100644 --- a/locales/ko-KR/setting.json +++ b/locales/ko-KR/setting.json @@ -657,7 +657,6 @@ "settingSystemTools.appEnvironment.electron.desc": "Electron 프레임워크 버전", "settingSystemTools.appEnvironment.node.desc": "내장 Node.js 버전", "settingSystemTools.appEnvironment.title": "앱 환경", - "settingSystemTools.autoSelectDesc": "가장 적합한 도구가 자동으로 선택됩니다", "settingSystemTools.category.browserAutomation": "브라우저 자동화", "settingSystemTools.category.browserAutomation.desc": "헤드리스 브라우저 자동화 및 웹 상호작용을 위한 도구", "settingSystemTools.category.contentSearch": "콘텐츠 검색", diff --git a/locales/nl-NL/setting.json b/locales/nl-NL/setting.json index 81b60e4739..2cd5ec7a0a 100644 --- a/locales/nl-NL/setting.json +++ b/locales/nl-NL/setting.json @@ -657,7 +657,6 @@ "settingSystemTools.appEnvironment.electron.desc": "Electron-frameworkversie", "settingSystemTools.appEnvironment.node.desc": "Ingesloten Node.js-versie", "settingSystemTools.appEnvironment.title": "App-omgeving", - "settingSystemTools.autoSelectDesc": "Het best beschikbare hulpmiddel wordt automatisch geselecteerd", "settingSystemTools.category.browserAutomation": "Browserautomatisering", "settingSystemTools.category.browserAutomation.desc": "Tools voor headless browserautomatisering en webinteractie", "settingSystemTools.category.contentSearch": "Zoeken in Inhoud", diff --git a/locales/pl-PL/setting.json b/locales/pl-PL/setting.json index 7374e9d4f6..0bae16fb3d 100644 --- a/locales/pl-PL/setting.json +++ b/locales/pl-PL/setting.json @@ -657,7 +657,6 @@ "settingSystemTools.appEnvironment.electron.desc": "Wersja frameworka Electron", "settingSystemTools.appEnvironment.node.desc": "Wersja wbudowanego Node.js", "settingSystemTools.appEnvironment.title": "Środowisko aplikacji", - "settingSystemTools.autoSelectDesc": "Najlepsze dostępne narzędzie zostanie wybrane automatycznie", "settingSystemTools.category.browserAutomation": "Automatyzacja przeglądarki", "settingSystemTools.category.browserAutomation.desc": "Narzędzia do automatyzacji przeglądarki bez interfejsu graficznego i interakcji z siecią", "settingSystemTools.category.contentSearch": "Wyszukiwanie treści", diff --git a/locales/pt-BR/setting.json b/locales/pt-BR/setting.json index 338985dda1..5a5ebee669 100644 --- a/locales/pt-BR/setting.json +++ b/locales/pt-BR/setting.json @@ -657,7 +657,6 @@ "settingSystemTools.appEnvironment.electron.desc": "Versão do framework Electron", "settingSystemTools.appEnvironment.node.desc": "Versão do Node.js integrada", "settingSystemTools.appEnvironment.title": "Ambiente do aplicativo", - "settingSystemTools.autoSelectDesc": "A melhor ferramenta disponível será selecionada automaticamente", "settingSystemTools.category.browserAutomation": "Automação de Navegador", "settingSystemTools.category.browserAutomation.desc": "Ferramentas para automação de navegador sem interface gráfica e interação com a web", "settingSystemTools.category.contentSearch": "Busca de Conteúdo", diff --git a/locales/ru-RU/setting.json b/locales/ru-RU/setting.json index c1990563fe..b20bb61927 100644 --- a/locales/ru-RU/setting.json +++ b/locales/ru-RU/setting.json @@ -657,7 +657,6 @@ "settingSystemTools.appEnvironment.electron.desc": "Версия фреймворка Electron", "settingSystemTools.appEnvironment.node.desc": "Версия встроенного Node.js", "settingSystemTools.appEnvironment.title": "Среда приложения", - "settingSystemTools.autoSelectDesc": "Лучший доступный инструмент будет выбран автоматически", "settingSystemTools.category.browserAutomation": "Автоматизация браузера", "settingSystemTools.category.browserAutomation.desc": "Инструменты для автоматизации безголового браузера и взаимодействия с вебом", "settingSystemTools.category.contentSearch": "Поиск по содержимому", diff --git a/locales/tr-TR/setting.json b/locales/tr-TR/setting.json index c5797521d2..9201f6977e 100644 --- a/locales/tr-TR/setting.json +++ b/locales/tr-TR/setting.json @@ -657,7 +657,6 @@ "settingSystemTools.appEnvironment.electron.desc": "Electron framework sürümü", "settingSystemTools.appEnvironment.node.desc": "Gömülü Node.js sürümü", "settingSystemTools.appEnvironment.title": "Uygulama ortamı", - "settingSystemTools.autoSelectDesc": "En iyi mevcut araç otomatik olarak seçilecektir", "settingSystemTools.category.browserAutomation": "Tarayıcı Otomasyonu", "settingSystemTools.category.browserAutomation.desc": "Başsız tarayıcı otomasyonu ve web etkileşimi için araçlar", "settingSystemTools.category.contentSearch": "İçerik Arama", diff --git a/locales/vi-VN/setting.json b/locales/vi-VN/setting.json index c8203483dd..6b25df3ee7 100644 --- a/locales/vi-VN/setting.json +++ b/locales/vi-VN/setting.json @@ -657,7 +657,6 @@ "settingSystemTools.appEnvironment.electron.desc": "Phiên bản framework Electron", "settingSystemTools.appEnvironment.node.desc": "Phiên bản Node.js nhúng", "settingSystemTools.appEnvironment.title": "Môi trường ứng dụng", - "settingSystemTools.autoSelectDesc": "Công cụ tốt nhất sẽ được tự động chọn", "settingSystemTools.category.browserAutomation": "Tự động hóa trình duyệt", "settingSystemTools.category.browserAutomation.desc": "Công cụ cho tự động hóa trình duyệt không giao diện và tương tác web", "settingSystemTools.category.contentSearch": "Tìm kiếm nội dung", diff --git a/locales/zh-CN/setting.json b/locales/zh-CN/setting.json index 5ad97d202c..06a1c32e56 100644 --- a/locales/zh-CN/setting.json +++ b/locales/zh-CN/setting.json @@ -656,8 +656,7 @@ "settingSystemTools.appEnvironment.desc": "桌面应用内置的运行时版本", "settingSystemTools.appEnvironment.electron.desc": "Electron 框架版本", "settingSystemTools.appEnvironment.node.desc": "内嵌 Node.js 版本", - "settingSystemTools.appEnvironment.title": "应用环境", - "settingSystemTools.autoSelectDesc": "系统会自动选择最优的可用工具", + "settingSystemTools.appEnvironment.title": "内建应用工具", "settingSystemTools.category.browserAutomation": "浏览器自动化", "settingSystemTools.category.browserAutomation.desc": "用于无头浏览器自动化和网页交互的工具", "settingSystemTools.category.contentSearch": "内容搜索", @@ -674,14 +673,19 @@ "settingSystemTools.title": "系统工具", "settingSystemTools.tools.ag.desc": "The Silver Searcher - 快速代码搜索工具", "settingSystemTools.tools.agentBrowser.desc": "Agent-browser - 面向AI代理的无头浏览器自动化命令行工具", + "settingSystemTools.tools.bun.desc": "Bun - 快速的 JavaScript 运行时和包管理器", + "settingSystemTools.tools.bunx.desc": "bunx - Bun 包执行器,用于运行 npm 包", "settingSystemTools.tools.fd.desc": "fd - 快速且用户友好的 find 替代品", "settingSystemTools.tools.find.desc": "Unix find - 标准文件搜索命令", "settingSystemTools.tools.grep.desc": "GNU grep - 标准文本搜索工具", + "settingSystemTools.tools.lobehub.desc": "LobeHub CLI - 管理和连接 LobeHub 服务", "settingSystemTools.tools.mdfind.desc": "macOS 聚焦搜索(快速索引搜索)", "settingSystemTools.tools.node.desc": "Node.js - 执行 JavaScript/TypeScript 的运行时", "settingSystemTools.tools.npm.desc": "npm - Node.js 包管理器,用于安装依赖", + "settingSystemTools.tools.pnpm.desc": "pnpm - 快速、节省磁盘空间的包管理器", "settingSystemTools.tools.python.desc": "Python - 编程语言运行时", "settingSystemTools.tools.rg.desc": "ripgrep - 极快的文本搜索工具", + "settingSystemTools.tools.uv.desc": "uv - 极快的 Python 包管理器", "settingTTS.openai.sttModel": "OpenAI 语音识别模型", "settingTTS.openai.title": "OpenAI", "settingTTS.openai.ttsModel": "OpenAI 语音合成模型", diff --git a/locales/zh-TW/setting.json b/locales/zh-TW/setting.json index a8f5fa0a79..edf86b093a 100644 --- a/locales/zh-TW/setting.json +++ b/locales/zh-TW/setting.json @@ -657,7 +657,6 @@ "settingSystemTools.appEnvironment.electron.desc": "Electron 框架版本", "settingSystemTools.appEnvironment.node.desc": "內嵌 Node.js 版本", "settingSystemTools.appEnvironment.title": "應用環境", - "settingSystemTools.autoSelectDesc": "將自動選擇最佳可用工具", "settingSystemTools.category.browserAutomation": "瀏覽器自動化", "settingSystemTools.category.browserAutomation.desc": "用於無頭瀏覽器自動化和網頁交互的工具", "settingSystemTools.category.contentSearch": "內容搜尋", diff --git a/packages/builtin-skills/src/agent-browser/content.ts b/packages/builtin-skills/src/agent-browser/content.ts index 2fd6a0c7db..a70459ba32 100644 --- a/packages/builtin-skills/src/agent-browser/content.ts +++ b/packages/builtin-skills/src/agent-browser/content.ts @@ -1,158 +1,819 @@ /** - * @see https://github.com/vercel-labs/agent-browser/blob/main/skills/agent-browser/SKILL.md + * Synced from https://github.com/vercel-labs/agent-browser/blob/main/skills/agent-browser/SKILL.md */ export const systemPrompt = ` -You can automate websites and Electron desktop apps with the agent-browser CLI. Use the \`execScript\` tool to run local shell commands. +# Browser Automation with agent-browser -# Prerequisites +The CLI uses Chrome/Chromium via CDP directly. **LobeHub desktop** bundles \`agent-browser\` in native mode. Otherwise install via \`npm i -g agent-browser\`, \`brew install agent-browser\`, or \`cargo install agent-browser\`. Run \`agent-browser install\` to download Chrome. Existing Chrome, Brave, Playwright, and Puppeteer installations are detected automatically. Run \`agent-browser upgrade\` to update to the latest version. -The \`agent-browser\` CLI is bundled with the desktop app (v0.20.1) and runs in native mode by default. It automatically detects system Chrome/Chromium. If no browser is found, install Google Chrome. +## Core Workflow -# Core Workflow (Snapshot-Ref Pattern) +Every browser automation follows this pattern: -Use this 4-step loop for almost all tasks: - -1. Navigate: \`agent-browser open \` -2. Snapshot: \`agent-browser snapshot -i\` (returns refs like \`@e1\`, \`@e2\`) -3. Interact: \`click\`, \`fill\`, \`select\`, etc. with refs -4. Re-snapshot after page changes - -Refs are ephemeral. After navigation, form submit, modal open, or dynamic updates, old refs are invalid. Re-snapshot before the next interaction. - -# Command Chaining - -You can chain commands with \`&&\` in one shell call. The daemon preserves browser state across chained commands. +1. **Navigate**: \`agent-browser open \` +2. **Snapshot**: \`agent-browser snapshot -i\` (get element refs like \`@e1\`, \`@e2\`) +3. **Interact**: Use refs to click, fill, select +4. **Re-snapshot**: After navigation or DOM changes, get fresh refs \`\`\`bash -agent-browser open https://example.com && agent-browser wait --load networkidle && agent-browser snapshot -i -\`\`\` - -Chain only when you do not need to inspect intermediate output. If you must parse snapshot output to discover refs, run snapshot separately. - -# Essential Commands - -## Navigation -- \`agent-browser open \` -- \`agent-browser close\` -- \`agent-browser back\` -- \`agent-browser forward\` -- \`agent-browser reload\` - -## Snapshot and Capture -- \`agent-browser snapshot -i\` (recommended) -- \`agent-browser snapshot -i -C\` (include cursor-interactive elements) -- \`agent-browser screenshot\` -- \`agent-browser screenshot --annotate\` -- \`agent-browser screenshot --full\` -- \`agent-browser pdf output.pdf\` - -## Interaction -- \`agent-browser click @e1\` -- \`agent-browser fill @e2 "text"\` -- \`agent-browser type @e2 "text"\` -- \`agent-browser select @e3 "option"\` -- \`agent-browser check @e4\` -- \`agent-browser press Enter\` -- \`agent-browser scroll down 500\` - -## Retrieval -- \`agent-browser get text @e1\` -- \`agent-browser get url\` -- \`agent-browser get title\` - -## Wait -- \`agent-browser wait @e1\` -- \`agent-browser wait --load networkidle\` -- \`agent-browser wait --url "**/dashboard"\` -- \`agent-browser wait 2000\` - -## Diff and Verification -- \`agent-browser diff snapshot\` -- \`agent-browser diff screenshot --baseline before.png\` -- \`agent-browser diff url \` - -## Session and State -- \`agent-browser --session open \` -- \`agent-browser session list\` -- \`agent-browser state save auth.json\` -- \`agent-browser state load auth.json\` - -## Chrome or Electron Connection - -To control an existing Chrome or Electron app, it must be launched with remote debugging enabled. If the app is already running, quit it first, then relaunch with the flag: - -**macOS (Chrome):** -\`\`\`bash -open -a "Google Chrome" --args --remote-debugging-port=9222 -\`\`\` - -**macOS (Electron app, e.g. Slack):** -\`\`\`bash -open -a "Slack" --args --remote-debugging-port=9222 -\`\`\` - -Then connect and control: -- \`agent-browser --auto-connect snapshot -i\` -- \`agent-browser --cdp 9222 snapshot -i\` -- \`agent-browser connect 9222\` - -# Common Patterns - -## Form Submission -\`\`\`bash -agent-browser open https://example.com/signup +agent-browser open https://example.com/form agent-browser snapshot -i -agent-browser fill @e1 "Jane Doe" -agent-browser fill @e2 "jane@example.com" +# Output: @e1 [input type="email"], @e2 [input type="password"], @e3 [button] "Submit" + +agent-browser fill @e1 "user@example.com" +agent-browser fill @e2 "password123" agent-browser click @e3 -agent-browser wait --load networkidle -agent-browser snapshot -i +agent-browser wait 2000 +agent-browser snapshot -i # Check result \`\`\` -## Data Extraction +## Command Chaining + +Commands can be chained with \`&&\` in a single shell invocation. The browser persists between commands via a background daemon, so chaining is safe and more efficient than separate calls. + \`\`\`bash -agent-browser open https://example.com/products -agent-browser wait --load networkidle -agent-browser snapshot -i -agent-browser get text @e5 +# Chain open + snapshot in one call (open already waits for page load) +agent-browser open https://example.com && agent-browser snapshot -i + +# Chain multiple interactions +agent-browser fill @e1 "user@example.com" && agent-browser fill @e2 "password123" && agent-browser click @e3 + +# Navigate and capture +agent-browser open https://example.com && agent-browser screenshot \`\`\` -## Annotated Screenshot for Vision Tasks +**When to chain:** Use \`&&\` when you don't need to read the output of an intermediate command before proceeding (e.g., open + wait + screenshot). Run commands separately when you need to parse the output first (e.g., snapshot to discover refs, then interact using those refs). + +## Handling Authentication + +When automating a site that requires login, choose the approach that fits: + +**Option 1: Import auth from the user's browser (fastest for one-off tasks)** + +\`\`\`bash +# Connect to the user's running Chrome (they're already logged in) +agent-browser --auto-connect state save ./auth.json +# Use that auth state +agent-browser --state ./auth.json open https://app.example.com/dashboard +\`\`\` + +State files contain session tokens in plaintext -- add to \`.gitignore\` and delete when no longer needed. Set \`AGENT_BROWSER_ENCRYPTION_KEY\` for encryption at rest. + +**Option 2: Chrome profile reuse (zero setup)** + +\`\`\`bash +# List available Chrome profiles +agent-browser profiles + +# Reuse the user's existing Chrome login state +agent-browser --profile Default open https://gmail.com +\`\`\` + +**Option 3: Persistent profile (for recurring tasks)** + +\`\`\`bash +# First run: login manually or via automation +agent-browser --profile ~/.myapp open https://app.example.com/login +# ... fill credentials, submit ... + +# All future runs: already authenticated +agent-browser --profile ~/.myapp open https://app.example.com/dashboard +\`\`\` + +**Option 4: Session name (auto-save/restore cookies + localStorage)** + +\`\`\`bash +agent-browser --session-name myapp open https://app.example.com/login +# ... login flow ... +agent-browser close # State auto-saved + +# Next time: state auto-restored +agent-browser --session-name myapp open https://app.example.com/dashboard +\`\`\` + +**Option 5: Auth vault (credentials stored encrypted, login by name)** + +\`\`\`bash +echo "$PASSWORD" | agent-browser auth save myapp --url https://app.example.com/login --username user --password-stdin +agent-browser auth login myapp +\`\`\` + +\`auth login\` navigates with \`load\` and then waits for login form selectors to appear before filling/clicking, which is more reliable on delayed SPA login screens. + +**Option 6: State file (manual save/load)** + +\`\`\`bash +# After logging in: +agent-browser state save ./auth.json +# In a future session: +agent-browser state load ./auth.json +agent-browser open https://app.example.com/dashboard +\`\`\` + +For OAuth, 2FA, cookie-based auth, and token refresh patterns, see the upstream \`references/authentication.md\` at https://github.com/vercel-labs/agent-browser/tree/main/skills/agent-browser/references. + +## Essential Commands + +\`\`\`bash +# Batch: ALWAYS use batch for 2+ sequential commands. Commands run in order. +agent-browser batch "open https://example.com" "snapshot -i" +agent-browser batch "open https://example.com" "screenshot" +agent-browser batch "click @e1" "wait 1000" "screenshot" + +# Navigation +agent-browser open # Navigate (aliases: goto, navigate) +agent-browser close # Close browser +agent-browser close --all # Close all active sessions + +# Snapshot +agent-browser snapshot -i # Interactive elements with refs (recommended) +agent-browser snapshot -i --urls # Include href URLs for links +agent-browser snapshot -s "#selector" # Scope to CSS selector + +# Interaction (use @refs from snapshot) +agent-browser click @e1 # Click element +agent-browser click @e1 --new-tab # Click and open in new tab +agent-browser fill @e2 "text" # Clear and type text +agent-browser type @e2 "text" # Type without clearing +agent-browser select @e1 "option" # Select dropdown option +agent-browser check @e1 # Check checkbox +agent-browser press Enter # Press key +agent-browser keyboard type "text" # Type at current focus (no selector) +agent-browser keyboard inserttext "text" # Insert without key events +agent-browser scroll down 500 # Scroll page +agent-browser scroll down 500 --selector "div.content" # Scroll within a specific container + +# Get information +agent-browser get text @e1 # Get element text +agent-browser get url # Get current URL +agent-browser get title # Get page title +agent-browser get cdp-url # Get CDP WebSocket URL + +# Wait +agent-browser wait @e1 # Wait for element +agent-browser wait 2000 # Wait milliseconds +agent-browser wait --url "**/page" # Wait for URL pattern +agent-browser wait --text "Welcome" # Wait for text to appear (substring match) +agent-browser wait --load networkidle # Wait for network idle (caution: see Pitfalls) +agent-browser wait --fn "!document.body.innerText.includes('Loading...')" # Wait for text to disappear +agent-browser wait "#spinner" --state hidden # Wait for element to disappear + +# Downloads +agent-browser download @e1 ./file.pdf # Click element to trigger download +agent-browser wait --download ./output.zip # Wait for any download to complete +agent-browser --download-path ./downloads open # Set default download directory + +# Tab management +agent-browser tab list # List all open tabs +agent-browser tab new # Open a blank new tab +agent-browser tab new https://example.com # Open URL in a new tab +agent-browser tab 2 # Switch to tab by index (0-based) +agent-browser tab close # Close the current tab +agent-browser tab close 2 # Close tab by index + +# Network +agent-browser network requests # Inspect tracked requests +agent-browser network requests --type xhr,fetch # Filter by resource type +agent-browser network requests --method POST # Filter by HTTP method +agent-browser network requests --status 2xx # Filter by status (200, 2xx, 400-499) +agent-browser network request # View full request/response detail +agent-browser network route "**/api/*" --abort # Block matching requests +agent-browser network har start # Start HAR recording +agent-browser network har stop ./capture.har # Stop and save HAR file + +# Viewport & Device Emulation +agent-browser set viewport 1920 1080 # Set viewport size (default: 1280x720) +agent-browser set viewport 1920 1080 2 # 2x retina (same CSS size, higher res screenshots) +agent-browser set device "iPhone 14" # Emulate device (viewport + user agent) + +# Capture +agent-browser screenshot # Screenshot to temp dir +agent-browser screenshot --full # Full page screenshot +agent-browser screenshot --annotate # Annotated screenshot with numbered element labels +agent-browser screenshot --screenshot-dir ./shots # Save to custom directory +agent-browser screenshot --screenshot-format jpeg --screenshot-quality 80 +agent-browser pdf output.pdf # Save as PDF + +# Live preview / streaming +agent-browser stream enable # Start runtime WebSocket streaming on an auto-selected port +agent-browser stream enable --port 9223 # Bind a specific localhost port +agent-browser stream status # Inspect enabled state, port, connection, and screencasting +agent-browser stream disable # Stop runtime streaming and remove the .stream metadata file + +# Clipboard +agent-browser clipboard read # Read text from clipboard +agent-browser clipboard write "Hello, World!" # Write text to clipboard +agent-browser clipboard copy # Copy current selection +agent-browser clipboard paste # Paste from clipboard + +# Dialogs (alert, confirm, prompt, beforeunload) +# By default, alert and beforeunload dialogs are auto-accepted so they never block the agent. +# confirm and prompt dialogs still require explicit handling. +# Use --no-auto-dialog (or AGENT_BROWSER_NO_AUTO_DIALOG=1) to disable automatic handling. +agent-browser dialog accept # Accept dialog +agent-browser dialog accept "my input" # Accept prompt dialog with text +agent-browser dialog dismiss # Dismiss/cancel dialog +agent-browser dialog status # Check if a dialog is currently open + +# Diff (compare page states) +agent-browser diff snapshot # Compare current vs last snapshot +agent-browser diff snapshot --baseline before.txt # Compare current vs saved file +agent-browser diff screenshot --baseline before.png # Visual pixel diff +agent-browser diff url # Compare two pages +agent-browser diff url --wait-until networkidle # Custom wait strategy +agent-browser diff url --selector "#main" # Scope to element + +# Chat (AI natural language control) +agent-browser chat "open google.com and search for cats" # Single-shot instruction +agent-browser chat # Interactive REPL mode +agent-browser -q chat "summarize this page" # Quiet (text only, no tool calls) +agent-browser -v chat "fill in the login form" # Verbose (show command output) +agent-browser --model openai/gpt-4o chat "take a screenshot" # Override model +\`\`\` + +## Streaming + +Every session automatically starts a WebSocket stream server on an OS-assigned port. Use \`agent-browser stream status\` to see the bound port and connection state. Use \`stream disable\` to tear it down, and \`stream enable --port \` to re-enable on a specific port. + +## Batch Execution + +ALWAYS use \`batch\` when running 2+ commands in sequence. Batch executes commands in order, so dependent commands (like navigate then screenshot) work correctly. Each quoted argument is a separate command. + +\`\`\`bash +# Navigate and take a snapshot +agent-browser batch "open https://example.com" "snapshot -i" + +# Navigate, snapshot, and screenshot in one call +agent-browser batch "open https://example.com" "snapshot -i" "screenshot" + +# Click, wait, then screenshot +agent-browser batch "click @e1" "wait 1000" "screenshot" + +# With --bail to stop on first error +agent-browser batch --bail "open https://example.com" "click @e1" "screenshot" +\`\`\` + +Only use a single command (not batch) when you need to read the output before deciding the next command. For example, you must run \`snapshot -i\` as a single command when you need to read the refs to decide what to click. After reading the snapshot, batch the remaining steps. + +Stdin mode is also supported for programmatic use: + +\`\`\`bash +echo '[["open","https://example.com"],["screenshot"]]' | agent-browser batch --json +agent-browser batch --bail < commands.json +\`\`\` + +## Efficiency Strategies + +These patterns minimize tool calls and token usage. + +**Use \`--urls\` to avoid re-navigation.** When you need to visit links from a page, use \`snapshot -i --urls\` to get all href URLs upfront. Then \`open\` each URL directly instead of clicking refs and navigating back. + +**Snapshot once, act many times.** Never re-snapshot the same page. Extract all needed info (refs, URLs, text) from a single snapshot, then batch the remaining actions. + +**Multi-page workflow (e.g. "visit N sites and screenshot each"):** + +\`\`\`bash +# 1. Get all URLs in one call +agent-browser batch "open https://news.ycombinator.com" "snapshot -i --urls" +# Read output to extract URLs, then visit each directly: +# 2. One batch per target site +agent-browser batch "open https://github.com/example/repo" "screenshot" +agent-browser batch "open https://example.com/article" "screenshot" +agent-browser batch "open https://other.com/page" "screenshot" +\`\`\` + +This approach uses 4 tool calls instead of 14+. Never go back to the listing page between visits. + +## Common Patterns + +### Form Submission + +\`\`\`bash +# Navigate and get the form structure +agent-browser batch "open https://example.com/signup" "snapshot -i" +# Read the snapshot output to identify form refs, then fill and submit +agent-browser batch "fill @e1 \\"Jane Doe\\"" "fill @e2 \\"jane@example.com\\"" "select @e3 \\"California\\"" "check @e4" "click @e5" "wait 2000" +\`\`\` + +### Authentication with Auth Vault (Recommended) + +\`\`\`bash +# Save credentials once (encrypted with AGENT_BROWSER_ENCRYPTION_KEY) +# Recommended: pipe password via stdin to avoid shell history exposure +echo "pass" | agent-browser auth save github --url https://github.com/login --username user --password-stdin + +# Login using saved profile (LLM never sees password) +agent-browser auth login github + +# List/show/delete profiles +agent-browser auth list +agent-browser auth show github +agent-browser auth delete github +\`\`\` + +\`auth login\` waits for username/password/submit selectors before interacting, with a timeout tied to the default action timeout. + +### Authentication with State Persistence + +\`\`\`bash +# Login once and save state +agent-browser batch "open https://app.example.com/login" "snapshot -i" +# Read snapshot to find form refs, then fill and submit +agent-browser batch "fill @e1 \\"$USERNAME\\"" "fill @e2 \\"$PASSWORD\\"" "click @e3" "wait --url **/dashboard" "state save auth.json" + +# Reuse in future sessions +agent-browser batch "state load auth.json" "open https://app.example.com/dashboard" +\`\`\` + +### Session Persistence + +\`\`\`bash +# Auto-save/restore cookies and localStorage across browser restarts +agent-browser --session-name myapp open https://app.example.com/login +# ... login flow ... +agent-browser close # State auto-saved to ~/.agent-browser/sessions/ + +# Next time, state is auto-loaded +agent-browser --session-name myapp open https://app.example.com/dashboard + +# Encrypt state at rest +export AGENT_BROWSER_ENCRYPTION_KEY=$(openssl rand -hex 32) +agent-browser --session-name secure open https://app.example.com + +# Manage saved states +agent-browser state list +agent-browser state show myapp-default.json +agent-browser state clear myapp +agent-browser state clean --older-than 7 +\`\`\` + +### Working with Iframes + +Iframe content is automatically inlined in snapshots. Refs inside iframes carry frame context, so you can interact with them directly. + +\`\`\`bash +agent-browser batch "open https://example.com/checkout" "snapshot -i" +# @e1 [heading] "Checkout" +# @e2 [Iframe] "payment-frame" +# @e3 [input] "Card number" +# @e4 [input] "Expiry" +# @e5 [button] "Pay" + +# Interact directly — no frame switch needed +agent-browser batch "fill @e3 \\"4111111111111111\\"" "fill @e4 \\"12/28\\"" "click @e5" + +# To scope a snapshot to one iframe: +agent-browser batch "frame @e2" "snapshot -i" +agent-browser frame main # Return to main frame +\`\`\` + +### Data Extraction + +\`\`\`bash +agent-browser batch "open https://example.com/products" "snapshot -i" +# Read snapshot to find element refs, then extract +agent-browser get text @e5 # Get specific element text + +# JSON output for parsing +agent-browser snapshot -i --json +agent-browser get text @e1 --json +\`\`\` + +### Parallel Sessions + +\`\`\`bash +agent-browser --session site1 open https://site-a.com +agent-browser --session site2 open https://site-b.com + +agent-browser --session site1 snapshot -i +agent-browser --session site2 snapshot -i + +agent-browser session list +\`\`\` + +### Connect to Existing Chrome + +\`\`\`bash +# Auto-discover running Chrome with remote debugging enabled +agent-browser --auto-connect open https://example.com +agent-browser --auto-connect snapshot + +# Or with explicit CDP port +agent-browser --cdp 9222 snapshot +\`\`\` + +Auto-connect discovers Chrome via \`DevToolsActivePort\`, common debugging ports (9222, 9229), and falls back to a direct WebSocket connection if HTTP-based CDP discovery fails. + +### Color Scheme (Dark Mode) + +\`\`\`bash +# Persistent dark mode via flag (applies to all pages and new tabs) +agent-browser --color-scheme dark open https://example.com + +# Or via environment variable +AGENT_BROWSER_COLOR_SCHEME=dark agent-browser open https://example.com + +# Or set during session (persists for subsequent commands) +agent-browser set media dark +\`\`\` + +### Viewport & Responsive Testing + +\`\`\`bash +# Set a custom viewport size (default is 1280x720) +agent-browser set viewport 1920 1080 +agent-browser screenshot desktop.png + +# Test mobile-width layout +agent-browser set viewport 375 812 +agent-browser screenshot mobile.png + +# Retina/HiDPI: same CSS layout at 2x pixel density +# Screenshots stay at logical viewport size, but content renders at higher DPI +agent-browser set viewport 1920 1080 2 +agent-browser screenshot retina.png + +# Device emulation (sets viewport + user agent in one step) +agent-browser set device "iPhone 14" +agent-browser screenshot device.png +\`\`\` + +The \`scale\` parameter (3rd argument) sets \`window.devicePixelRatio\` without changing CSS layout. Use it when testing retina rendering or capturing higher-resolution screenshots. + +### Visual Browser (Debugging) + +\`\`\`bash +agent-browser --headed open https://example.com +agent-browser highlight @e1 # Highlight element +agent-browser inspect # Open Chrome DevTools for the active page +agent-browser record start demo.webm # Record session +agent-browser profiler start # Start Chrome DevTools profiling +agent-browser profiler stop trace.json # Stop and save profile (path optional) +\`\`\` + +Use \`AGENT_BROWSER_HEADED=1\` to enable headed mode via environment variable. Browser extensions work in both headed and headless mode. + +### Local Files (PDFs, HTML) + +\`\`\`bash +# Open local files with file:// URLs +agent-browser --allow-file-access open file:///path/to/document.pdf +agent-browser --allow-file-access open file:///path/to/page.html +agent-browser screenshot output.png +\`\`\` + +### iOS Simulator (Mobile Safari) + +\`\`\`bash +# List available iOS simulators +agent-browser device list + +# Launch Safari on a specific device +agent-browser -p ios --device "iPhone 16 Pro" open https://example.com + +# Same workflow as desktop - snapshot, interact, re-snapshot +agent-browser -p ios snapshot -i +agent-browser -p ios tap @e1 # Tap (alias for click) +agent-browser -p ios fill @e2 "text" +agent-browser -p ios swipe up # Mobile-specific gesture + +# Take screenshot +agent-browser -p ios screenshot mobile.png + +# Close session (shuts down simulator) +agent-browser -p ios close +\`\`\` + +**Requirements:** macOS with Xcode, Appium (\`npm install -g appium && appium driver install xcuitest\`) + +**Real devices:** Works with physical iOS devices if pre-configured. Use \`--device ""\` where UDID is from \`xcrun xctrace list devices\`. + +## Security + +All security features are opt-in. By default, agent-browser imposes no restrictions on navigation, actions, or output. + +### Content Boundaries (Recommended for AI Agents) + +Enable \`--content-boundaries\` to wrap page-sourced output in markers that help LLMs distinguish tool output from untrusted page content: + +\`\`\`bash +export AGENT_BROWSER_CONTENT_BOUNDARIES=1 +agent-browser snapshot +# Output: +# --- AGENT_BROWSER_PAGE_CONTENT nonce= origin=https://example.com --- +# [accessibility tree] +# --- END_AGENT_BROWSER_PAGE_CONTENT nonce= --- +\`\`\` + +### Domain Allowlist + +Restrict navigation to trusted domains. Wildcards like \`*.example.com\` also match the bare domain \`example.com\`. Sub-resource requests, WebSocket, and EventSource connections to non-allowed domains are also blocked. Include CDN domains your target pages depend on: + +\`\`\`bash +export AGENT_BROWSER_ALLOWED_DOMAINS="example.com,*.example.com" +agent-browser open https://example.com # OK +agent-browser open https://malicious.com # Blocked +\`\`\` + +### Action Policy + +Use a policy file to gate destructive actions: + +\`\`\`bash +export AGENT_BROWSER_ACTION_POLICY=./policy.json +\`\`\` + +Example \`policy.json\`: + +\`\`\`json +{ "default": "deny", "allow": ["navigate", "snapshot", "click", "scroll", "wait", "get"] } +\`\`\` + +Auth vault operations (\`auth login\`, etc.) bypass action policy but domain allowlist still applies. + +### Output Limits + +Prevent context flooding from large pages: + +\`\`\`bash +export AGENT_BROWSER_MAX_OUTPUT=50000 +\`\`\` + +## Diffing (Verifying Changes) + +Use \`diff snapshot\` after performing an action to verify it had the intended effect. This compares the current accessibility tree against the last snapshot taken in the session. + +\`\`\`bash +# Typical workflow: snapshot -> action -> diff +agent-browser snapshot -i # Take baseline snapshot +agent-browser click @e2 # Perform action +agent-browser diff snapshot # See what changed (auto-compares to last snapshot) +\`\`\` + +For visual regression testing or monitoring: + +\`\`\`bash +# Save a baseline screenshot, then compare later +agent-browser screenshot baseline.png +# ... time passes or changes are made ... +agent-browser diff screenshot --baseline baseline.png + +# Compare staging vs production +agent-browser diff url https://staging.example.com https://prod.example.com --screenshot +\`\`\` + +\`diff snapshot\` output uses \`+\` for additions and \`-\` for removals, similar to git diff. \`diff screenshot\` produces a diff image with changed pixels highlighted in red, plus a mismatch percentage. + +## Timeouts and Slow Pages + +The default timeout is 25 seconds. This can be overridden with the \`AGENT_BROWSER_DEFAULT_TIMEOUT\` environment variable (value in milliseconds). + +**Important:** \`open\` already waits for the page \`load\` event before returning. In most cases, no additional wait is needed before taking a snapshot or screenshot. Only add an explicit wait when content loads asynchronously after the initial page load. + +\`\`\`bash +# Wait for a specific element to appear (preferred for dynamic content) +agent-browser wait "#content" +agent-browser wait @e1 + +# Wait a fixed duration (good default for slow SPAs) +agent-browser wait 2000 + +# Wait for a specific URL pattern (useful after redirects) +agent-browser wait --url "**/dashboard" + +# Wait for text to appear on the page +agent-browser wait --text "Results loaded" + +# Wait for a JavaScript condition +agent-browser wait --fn "document.querySelectorAll('.item').length > 0" +\`\`\` + +**Avoid \`wait --load networkidle\`** unless you are certain the site has no persistent network activity. Ad-heavy sites, sites with analytics/tracking, and sites with websockets will cause \`networkidle\` to hang indefinitely. Prefer \`wait 2000\` or \`wait \` instead. + +## JavaScript Dialogs (alert / confirm / prompt) + +When a page opens a JavaScript dialog (\`alert()\`, \`confirm()\`, or \`prompt()\`), it blocks all other browser commands (snapshot, screenshot, click, etc.) until the dialog is dismissed. If commands start timing out unexpectedly, check for a pending dialog: + +\`\`\`bash +# Check if a dialog is blocking +agent-browser dialog status + +# Accept the dialog (dismiss the alert / click OK) +agent-browser dialog accept + +# Accept a prompt dialog with input text +agent-browser dialog accept "my input" + +# Dismiss the dialog (click Cancel) +agent-browser dialog dismiss +\`\`\` + +When a dialog is pending, all command responses include a \`warning\` field indicating the dialog type and message. In \`--json\` mode this appears as a \`"warning"\` key in the response object. + +## Session Management and Cleanup + +When running multiple agents or automations concurrently, always use named sessions to avoid conflicts: + +\`\`\`bash +# Each agent gets its own isolated session +agent-browser --session agent1 open site-a.com +agent-browser --session agent2 open site-b.com + +# Check active sessions +agent-browser session list +\`\`\` + +Always close your browser session when done to avoid leaked processes: + +\`\`\`bash +agent-browser close # Close default session +agent-browser --session agent1 close # Close specific session +agent-browser close --all # Close all active sessions +\`\`\` + +If a previous session was not closed properly, the daemon may still be running. Use \`agent-browser close\` to clean it up, or \`agent-browser close --all\` to shut down every session at once. + +To auto-shutdown the daemon after a period of inactivity (useful for ephemeral/CI environments): + +\`\`\`bash +AGENT_BROWSER_IDLE_TIMEOUT_MS=60000 agent-browser open example.com +\`\`\` + +## Ref Lifecycle (Important) + +Refs (\`@e1\`, \`@e2\`, etc.) are invalidated when the page changes. Always re-snapshot after: + +- Clicking links or buttons that navigate +- Form submissions +- Dynamic content loading (dropdowns, modals) + +\`\`\`bash +agent-browser click @e5 # Navigates to new page +agent-browser snapshot -i # MUST re-snapshot +agent-browser click @e1 # Use new refs +\`\`\` + +## Annotated Screenshots (Vision Mode) + +Use \`--annotate\` to take a screenshot with numbered labels overlaid on interactive elements. Each label \`[N]\` maps to ref \`@eN\`. This also caches refs, so you can interact with elements immediately without a separate snapshot. + \`\`\`bash agent-browser screenshot --annotate -agent-browser click @e2 +# Output includes the image path and a legend: +# [1] @e1 button "Submit" +# [2] @e2 link "Home" +# [3] @e3 textbox "Email" +agent-browser click @e2 # Click using ref from annotated screenshot \`\`\` -## Authentication (Auth Vault) +Use annotated screenshots when: + +- The page has unlabeled icon buttons or visual-only elements +- You need to verify visual layout or styling +- Canvas or chart elements are present (invisible to text snapshots) +- You need spatial reasoning about element positions + +## Semantic Locators (Alternative to Refs) + +When refs are unavailable or unreliable, use semantic locators: + \`\`\`bash -echo "pass" | agent-browser auth save github --url https://github.com/login --username user --password-stdin -agent-browser auth login github +agent-browser find text "Sign In" click +agent-browser find label "Email" fill "user@test.com" +agent-browser find role button click --name "Submit" +agent-browser find placeholder "Search" type "query" +agent-browser find testid "submit-btn" click \`\`\` -# Security Controls (Opt-In) +## JavaScript Evaluation (eval) -- Content boundaries: \`AGENT_BROWSER_CONTENT_BOUNDARIES=1\` -- Domain allowlist: \`AGENT_BROWSER_ALLOWED_DOMAINS="example.com,*.example.com"\` -- Action policy: \`AGENT_BROWSER_ACTION_POLICY=./policy.json\` -- Output limits: \`AGENT_BROWSER_MAX_OUTPUT=50000\` - -Use allowlists and policies when tasks involve unknown pages or potentially destructive actions. - -# JavaScript Evaluation Notes - -For complex JavaScript, use stdin mode to avoid shell quoting issues: +Use \`eval\` to run JavaScript in the browser context. **Shell quoting can corrupt complex expressions** -- use \`--stdin\` or \`-b\` to avoid issues. \`\`\`bash +# Simple expressions work with regular quoting +agent-browser eval 'document.title' +agent-browser eval 'document.querySelectorAll("img").length' + +# Complex JS: use --stdin with heredoc (RECOMMENDED) agent-browser eval --stdin <<'EVALEOF' -JSON.stringify(Array.from(document.querySelectorAll("a")).map((a) => a.href)) +JSON.stringify( + Array.from(document.querySelectorAll("img")) + .filter(i => !i.alt) + .map(i => ({ src: i.src.split("/").pop(), width: i.width })) +) EVALEOF + +# Alternative: base64 encoding (avoids all shell escaping issues) +agent-browser eval -b "$(echo -n 'Array.from(document.querySelectorAll("a")).map(a => a.href)' | base64)" \`\`\` +**Why this matters:** When the shell processes your command, inner double quotes, \`!\` characters (history expansion), backticks, and \`$()\` can all corrupt the JavaScript before it reaches agent-browser. The \`--stdin\` and \`-b\` flags bypass shell interpretation entirely. + +**Rules of thumb:** + +- Single-line, no nested quotes -> regular \`eval 'expression'\` with single quotes is fine +- Nested quotes, arrow functions, template literals, or multiline -> use \`eval --stdin <<'EVALEOF'\` +- Programmatic/generated scripts -> use \`eval -b\` with base64 + +## Configuration File + +Create \`agent-browser.json\` in the project root for persistent settings: + +\`\`\`json +{ + "headed": true, + "proxy": "http://localhost:8080", + "profile": "./browser-data" +} +\`\`\` + +Priority (lowest to highest): \`~/.agent-browser/config.json\` < \`./agent-browser.json\` < env vars < CLI flags. Use \`--config \` or \`AGENT_BROWSER_CONFIG\` env var for a custom config file (exits with error if missing/invalid). All CLI options map to camelCase keys (e.g., \`--executable-path\` -> \`"executablePath"\`). Boolean flags accept \`true\`/\`false\` values (e.g., \`--headed false\` overrides config). Extensions from user and project configs are merged, not replaced. + +## Deep-Dive Documentation + +Extended references (commands, snapshot-refs, sessions, authentication, video, profiling, proxy): https://github.com/vercel-labs/agent-browser/tree/main/skills/agent-browser/references + +## Cloud Providers + +Use \`-p \` (or \`AGENT_BROWSER_PROVIDER\`) to run against a cloud browser instead of launching a local Chrome instance. Supported providers: \`agentcore\`, \`browserbase\`, \`browserless\`, \`browseruse\`, \`kernel\`. + +### AgentCore (AWS Bedrock) + +\`\`\`bash +# Credentials auto-resolved from env vars or AWS CLI (SSO, IAM roles, etc.) +agent-browser -p agentcore open https://example.com + +# With persistent browser profile +AGENTCORE_PROFILE_ID=my-profile agent-browser -p agentcore open https://example.com + +# With explicit region +AGENTCORE_REGION=eu-west-1 agent-browser -p agentcore open https://example.com +\`\`\` + +Set \`AWS_PROFILE\` to select a named AWS profile. + +## Browser Engine Selection + +Use \`--engine\` to choose a local browser engine. The default is \`chrome\`. + +\`\`\`bash +# Use Lightpanda (fast headless browser, requires separate install) +agent-browser --engine lightpanda open example.com + +# Via environment variable +export AGENT_BROWSER_ENGINE=lightpanda +agent-browser open example.com + +# With custom binary path +agent-browser --engine lightpanda --executable-path /path/to/lightpanda open example.com +\`\`\` + +Supported engines: +- \`chrome\` (default) -- Chrome/Chromium via CDP +- \`lightpanda\` -- Lightpanda headless browser via CDP (10x faster, 10x less memory than Chrome) + +Lightpanda does not support \`--extension\`, \`--profile\`, \`--state\`, or \`--allow-file-access\`. Install Lightpanda from https://lightpanda.io/docs/open-source/installation. + +## Observability Dashboard + +The dashboard is a standalone background server that shows live browser viewports, command activity, and console output for all sessions. + +\`\`\`bash +# Start the dashboard server (background, port 4848) +agent-browser dashboard start + +# All sessions are automatically visible in the dashboard +agent-browser open example.com + +# Stop the dashboard +agent-browser dashboard stop +\`\`\` + +The dashboard runs independently of browser sessions on port 4848 (configurable with \`--port\`). All sessions automatically stream to the dashboard. Sessions can also be created from the dashboard UI with local engines or cloud providers. + +### Dashboard AI Chat + +The dashboard has an optional AI chat tab powered by the Vercel AI Gateway. Enable it by setting: + +\`\`\`bash +export AI_GATEWAY_API_KEY=gw_your_key_here +export AI_GATEWAY_MODEL=anthropic/claude-sonnet-4.6 # optional default +export AI_GATEWAY_URL=https://ai-gateway.vercel.sh # optional default +\`\`\` + +The Chat tab is always visible in the dashboard. Set \`AI_GATEWAY_API_KEY\` to enable AI responses. + +## Ready-to-Use Templates + +Example scripts in the upstream repo: https://github.com/vercel-labs/agent-browser/tree/main/skills/agent-browser/templates + + # Execution Rules in This Runtime - Run all agent-browser commands via \`execScript\` with \`runInClient: true\` because it is a local CLI. - Prefer \`--json\` output when structured parsing is needed. -- Always close sessions when done: \`agent-browser close\` (or named session close). -- If a task stalls, use explicit wait commands instead of blind retries. +- Always close sessions when done: \`agent-browser close\`, \`agent-browser close --all\`, or \`agent-browser --session close\`. +- If a task stalls, use explicit \`wait\` commands instead of blind retries. +- Run \`snapshot -i\` alone when you must read refs from output; then use \`agent-browser batch\` or \`&&\` for the remaining steps (see **Batch Execution** above). `; diff --git a/packages/builtin-tool-local-system/src/client/Intervention/OutOfScopeWarning.tsx b/packages/builtin-tool-local-system/src/client/Intervention/OutOfScopeWarning.tsx index 61adf98a14..79731803d1 100644 --- a/packages/builtin-tool-local-system/src/client/Intervention/OutOfScopeWarning.tsx +++ b/packages/builtin-tool-local-system/src/client/Intervention/OutOfScopeWarning.tsx @@ -22,7 +22,6 @@ interface OutOfScopeWarningProps { const OutOfScopeWarning = memo(({ paths }) => { const { t } = useTranslation('tool'); - // Get working directory from topic or agent store const topicWorkingDir = useChatStore(topicSelectors.currentTopicWorkingDirectory); const agentWorkingDir = useAgentStore(agentSelectors.currentAgentWorkingDirectory); const workingDirectory = topicWorkingDir || agentWorkingDir; diff --git a/packages/electron-client-ipc/src/types/localSystem.ts b/packages/electron-client-ipc/src/types/localSystem.ts index 49a2740b62..06bcc7b7a4 100644 --- a/packages/electron-client-ipc/src/types/localSystem.ts +++ b/packages/electron-client-ipc/src/types/localSystem.ts @@ -184,6 +184,8 @@ export interface RunCommandParams { command: string; cwd?: string; description?: string; + /** Merged into the child process environment (after `process.env`). */ + env?: Record; run_in_background?: boolean; timeout?: number; } diff --git a/packages/local-file-shell/src/shell/__tests__/runner.test.ts b/packages/local-file-shell/src/shell/__tests__/runner.test.ts index a68c684637..a4dd7c14f3 100644 --- a/packages/local-file-shell/src/shell/__tests__/runner.test.ts +++ b/packages/local-file-shell/src/shell/__tests__/runner.test.ts @@ -74,6 +74,19 @@ describe('runCommand', () => { expect(result.success).toBe(true); expect(result.stdout).toContain('/tmp'); }); + + it('should merge env into child process environment', async () => { + const result = await runCommand( + { + command: 'node -e "console.log(process.env.LOB_TEST_ENV_MERGE)"', + env: { LOB_TEST_ENV_MERGE: 'from-runner' }, + }, + { processManager }, + ); + + expect(result.success).toBe(true); + expect(result.stdout).toContain('from-runner'); + }); }); describe('background mode', () => { diff --git a/packages/local-file-shell/src/shell/runner.ts b/packages/local-file-shell/src/shell/runner.ts index 5ac1356761..4e7b3f0db8 100644 --- a/packages/local-file-shell/src/shell/runner.ts +++ b/packages/local-file-shell/src/shell/runner.ts @@ -15,7 +15,14 @@ export interface RunCommandOptions { } export async function runCommand( - { command, cwd, description, run_in_background, timeout = 120_000 }: RunCommandParams, + { + command, + cwd, + description, + env: extraEnv, + run_in_background, + timeout = 120_000, + }: RunCommandParams, { processManager, logger }: RunCommandOptions, ): Promise { const logPrefix = `[runCommand: ${description || command.slice(0, 50)}]`; @@ -23,13 +30,14 @@ export async function runCommand( const effectiveTimeout = Math.min(Math.max(timeout, 1000), 600_000); const shellConfig = getShellConfig(command); + const childEnv = extraEnv ? { ...process.env, ...extraEnv } : process.env; try { if (run_in_background) { const shellId = randomUUID(); const childProcess = spawn(shellConfig.cmd, shellConfig.args, { cwd, - env: process.env, + env: childEnv, shell: false, }); @@ -61,7 +69,7 @@ export async function runCommand( return new Promise((resolve) => { const childProcess = spawn(shellConfig.cmd, shellConfig.args, { cwd, - env: process.env, + env: childEnv, shell: false, }); diff --git a/packages/local-file-shell/src/types.ts b/packages/local-file-shell/src/types.ts index 685eac85b0..0d22587202 100644 --- a/packages/local-file-shell/src/types.ts +++ b/packages/local-file-shell/src/types.ts @@ -4,6 +4,7 @@ export interface RunCommandParams { command: string; cwd?: string; description?: string; + env?: Record; run_in_background?: boolean; timeout?: number; } diff --git a/packages/local-file-shell/vitest.config.mts b/packages/local-file-shell/vitest.config.mts new file mode 100644 index 0000000000..4ac6027d57 --- /dev/null +++ b/packages/local-file-shell/vitest.config.mts @@ -0,0 +1,7 @@ +import { defineConfig } from 'vitest/config'; + +export default defineConfig({ + test: { + environment: 'node', + }, +}); diff --git a/src/features/ChatInput/RuntimeConfig/index.tsx b/src/features/ChatInput/RuntimeConfig/index.tsx index ff0beea146..49266265cc 100644 --- a/src/features/ChatInput/RuntimeConfig/index.tsx +++ b/src/features/ChatInput/RuntimeConfig/index.tsx @@ -108,7 +108,6 @@ const RuntimeConfig = memo(() => { chatConfigByIdSelectors.getRuntimeModeById(agentId)(s), ]); - // Get working directory const topicWorkingDirectory = useChatStore(topicSelectors.currentTopicWorkingDirectory); const agentWorkingDirectory = useAgentStore((s) => agentId ? agentByIdSelectors.getAgentWorkingDirectoryById(agentId)(s) : undefined, diff --git a/src/helpers/parserPlaceholder/index.test.ts b/src/helpers/parserPlaceholder/index.test.ts index b96d88d7d2..6716cd6684 100644 --- a/src/helpers/parserPlaceholder/index.test.ts +++ b/src/helpers/parserPlaceholder/index.test.ts @@ -635,9 +635,9 @@ describe('VARIABLE_GENERATORS', () => { expect(VARIABLE_GENERATORS.userDataPath()).toBe(''); }); - it('should return default message for working directory when not specified', () => { + it('should return empty string for working directory when not on desktop', () => { const result = VARIABLE_GENERATORS.workingDirectory(); - expect(result).toBe('(not specified, use user Desktop directory as default)'); + expect(result).toBe(''); }); }); diff --git a/src/helpers/parserPlaceholder/index.ts b/src/helpers/parserPlaceholder/index.ts index 6e079b2968..539da8cd3d 100644 --- a/src/helpers/parserPlaceholder/index.ts +++ b/src/helpers/parserPlaceholder/index.ts @@ -1,3 +1,4 @@ +import { isDesktop } from '@lobechat/const'; import { uuid } from '@lobechat/utils'; import { template } from 'es-toolkit/compat'; @@ -153,16 +154,16 @@ export const VARIABLE_GENERATORS = { videosPath: () => globalAgentContextManager.getContext().videosPath ?? '', userDataPath: () => globalAgentContextManager.getContext().userDataPath ?? '', /** - * Working directory: Topic-level setting takes priority over Agent-level setting + * Working directory: topic-level override takes priority over agent-level value */ workingDirectory: () => { - // First check topic-level working directory + if (!isDesktop) return ''; + const topicWorkingDir = topicSelectors.currentTopicWorkingDirectory(useChatStore.getState()); if (topicWorkingDir) return topicWorkingDir; - // Fallback to agent-level working directory const agentWorkingDir = agentSelectors.currentAgentWorkingDirectory(useAgentStore.getState()); - return agentWorkingDir ?? '(not specified, use user Desktop directory as default)'; + return agentWorkingDir ?? '(not specified, use user Home directory as default)'; }, } as Record string>; diff --git a/src/locales/default/setting.ts b/src/locales/default/setting.ts index f9ca3f6ab7..7e6d90858a 100644 --- a/src/locales/default/setting.ts +++ b/src/locales/default/setting.ts @@ -738,8 +738,7 @@ export default { 'settingSystemTools.appEnvironment.desc': 'Built-in runtime versions in the desktop app', 'settingSystemTools.appEnvironment.electron.desc': 'Electron framework version', 'settingSystemTools.appEnvironment.node.desc': 'Embedded Node.js version', - 'settingSystemTools.appEnvironment.title': 'App Environment', - 'settingSystemTools.autoSelectDesc': 'The best available tool will be automatically selected', + 'settingSystemTools.appEnvironment.title': 'Built-in App Tools', 'settingSystemTools.category.browserAutomation': 'Browser Automation', 'settingSystemTools.category.browserAutomation.desc': 'Tools for headless browser automation and web interaction', @@ -763,10 +762,15 @@ export default { 'settingSystemTools.tools.find.desc': 'Unix find - standard file search command', 'settingSystemTools.tools.grep.desc': 'GNU grep - standard text search tool', 'settingSystemTools.tools.mdfind.desc': 'macOS Spotlight search (fast indexed search)', + 'settingSystemTools.tools.lobehub.desc': 'LobeHub CLI - manage and connect to LobeHub services', + 'settingSystemTools.tools.bun.desc': 'Bun - fast JavaScript runtime and package manager', + 'settingSystemTools.tools.bunx.desc': 'bunx - Bun package runner for executing npm packages', 'settingSystemTools.tools.node.desc': 'Node.js - JavaScript runtime for executing JS/TS', 'settingSystemTools.tools.npm.desc': 'npm - Node.js package manager for installing dependencies', + 'settingSystemTools.tools.pnpm.desc': 'pnpm - fast, disk space efficient package manager', 'settingSystemTools.tools.python.desc': 'Python - programming language runtime', 'settingSystemTools.tools.rg.desc': 'ripgrep - extremely fast text search tool', + 'settingSystemTools.tools.uv.desc': 'uv - extremely fast Python package manager', 'settingTTS.openai.sttModel': 'OpenAI Speech-to-Text Model', 'settingTTS.openai.title': 'OpenAI', 'settingTTS.openai.ttsModel': 'OpenAI Text-to-Speech Model', diff --git a/src/routes/(main)/settings/system-tools/features/CliTestSection.tsx b/src/routes/(main)/settings/system-tools/features/CliTestSection.tsx new file mode 100644 index 0000000000..c313f6c5bf --- /dev/null +++ b/src/routes/(main)/settings/system-tools/features/CliTestSection.tsx @@ -0,0 +1,103 @@ +'use client'; + +import { Button, Flexbox, Input, Text } from '@lobehub/ui'; +import { memo, useCallback, useState } from 'react'; + +import { electronSystemService } from '@/services/electron/system'; + +interface CommandResult { + args: string; + exitCode: number; + stderr: string; + stdout: string; +} + +const CliTestSection = memo(() => { + const [results, setResults] = useState([]); + const [running, setRunning] = useState(false); + const [customCmd, setCustomCmd] = useState(''); + + const runCommand = useCallback(async (args: string) => { + setRunning(true); + try { + const result = await electronSystemService.runCliCommand(args); + setResults((prev) => [...prev, { args, ...result }]); + } catch (error: any) { + setResults((prev) => [...prev, { args, exitCode: -1, stderr: String(error), stdout: '' }]); + } finally { + setRunning(false); + } + }, []); + + const presetCommands = ['--version', '--help', 'status']; + + return ( + + CLI Embedded Test + + + {presetCommands.map((cmd) => ( + + ))} + + + + setCustomCmd(e.target.value)} + onPressEnter={() => customCmd && runCommand(customCmd)} + /> + + + + {results.map((r, i) => ( + + + $ lobehub {r.args} (exit: {r.exitCode}) + + {r.stdout && ( +
+              {r.stdout}
+            
+ )} + {r.stderr && ( +
+              {r.stderr}
+            
+ )} +
+ ))} +
+ ); +}); + +export default CliTestSection; diff --git a/src/routes/(main)/settings/system-tools/features/ToolDetectorSection.tsx b/src/routes/(main)/settings/system-tools/features/ToolDetectorSection.tsx index 73a52adf12..e939e5a0b1 100644 --- a/src/routes/(main)/settings/system-tools/features/ToolDetectorSection.tsx +++ b/src/routes/(main)/settings/system-tools/features/ToolDetectorSection.tsx @@ -19,9 +19,14 @@ const TOOL_CATEGORIES = { descKey: 'settingSystemTools.category.runtimeEnvironment.desc', titleKey: 'settingSystemTools.category.runtimeEnvironment', tools: [ + { descKey: 'settingSystemTools.tools.lobehub.desc', name: 'lobehub' }, { descKey: 'settingSystemTools.tools.node.desc', name: 'node' }, { descKey: 'settingSystemTools.tools.python.desc', name: 'python' }, { descKey: 'settingSystemTools.tools.npm.desc', name: 'npm' }, + { descKey: 'settingSystemTools.tools.bun.desc', name: 'bun' }, + { descKey: 'settingSystemTools.tools.bunx.desc', name: 'bunx' }, + { descKey: 'settingSystemTools.tools.pnpm.desc', name: 'pnpm' }, + { descKey: 'settingSystemTools.tools.uv.desc', name: 'uv' }, ], }, @@ -171,7 +176,6 @@ const ToolDetectorSection = memo(() => { justify="flex-end" style={{ marginBlockStart: 8 }} > - {t('settingSystemTools.autoSelectDesc')}