mirror of
https://github.com/lobehub/lobehub
synced 2026-04-21 09:37:28 +00:00
✨ feat(desktop): embed CLI in app and PATH install (#13669)
* ✨ feat(desktop): embed CLI in app and PATH install Made-with: Cursor * ✨ feat(desktop): add CLI command execution feature and UI integration - Implemented `runCliCommand` method in `ElectronSystemService` to execute CLI commands. - Added `CliTestSection` component for testing CLI commands within the app. - Updated `SystemCtr` to include CLI command execution functionality. - Enhanced `generateCliWrapper` to create short aliases for CLI commands. - Integrated CLI testing UI in the system tools settings page. Signed-off-by: Innei <tukon479@gmail.com> * ✨ feat: enhance working directory handling for desktop - Updated working directory logic to prioritize topic-level settings over agent-level. - Introduced local storage management for agent working directories. - Modified tests to reflect changes in working directory behavior. - Added checks to ensure working directory retrieval is only performed on desktop environments. Signed-off-by: Innei <tukon479@gmail.com> * ✨ feat(desktop): implement CLI command routing and cleanup - Introduced `CliCtr` for executing CLI commands, enhancing the desktop application with CLI capabilities. - Updated `ShellCommandCtr` to route specific commands to `CliCtr`, improving command handling. - Removed legacy CLI path installation methods from `SystemCtr` and related services. - Cleaned up localization files by removing obsolete entries related to CLI path installation. Signed-off-by: Innei <tukon479@gmail.com> * 🚸 settings(system-tools): show CLI embedded test only in dev mode Made-with: Cursor --------- Signed-off-by: Innei <tukon479@gmail.com>
This commit is contained in:
parent
eebf9cb056
commit
e65e2c3628
53 changed files with 1379 additions and 168 deletions
4
.gitignore
vendored
4
.gitignore
vendored
|
|
@ -140,5 +140,9 @@ pnpm-lock.yaml
|
|||
.turbo
|
||||
spaHtmlTemplates.ts
|
||||
|
||||
# Embedded CLI bundle (built at pack time)
|
||||
apps/desktop/resources/bin/lobe-cli.js
|
||||
apps/desktop/resources/cli-package.json
|
||||
|
||||
.superpowers/
|
||||
docs/superpowers
|
||||
|
|
@ -160,7 +160,7 @@ export function spawnDaemon(args: string[]): number {
|
|||
// Re-run the same entry with --daemon-child (internal flag)
|
||||
const child = spawn(process.execPath, [...process.execArgv, ...args, '--daemon-child'], {
|
||||
detached: true,
|
||||
env: { ...process.env, LOBEHUB_DAEMON: '1' },
|
||||
env: { ...process.env, ELECTRON_RUN_AS_NODE: '1', LOBEHUB_DAEMON: '1' },
|
||||
stdio: ['ignore', logFd, logFd],
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -1,3 +1,3 @@
|
|||
import { createProgram } from './program';
|
||||
|
||||
createProgram().parse();
|
||||
createProgram().parse(process.argv, { from: 'node' });
|
||||
|
|
|
|||
|
|
@ -9,6 +9,10 @@ export default defineConfig({
|
|||
entry: ['src/index.ts'],
|
||||
fixedExtension: false,
|
||||
format: ['esm'],
|
||||
minify: true,
|
||||
outputOptions: {
|
||||
codeSplitting: false,
|
||||
},
|
||||
platform: 'node',
|
||||
target: 'node18',
|
||||
});
|
||||
|
|
|
|||
|
|
@ -109,6 +109,26 @@ const config = {
|
|||
|
||||
console.info('📦 Downloading agent-browser binary...');
|
||||
execSync('node scripts/download-agent-browser.mjs', { stdio: 'inherit', cwd: __dirname });
|
||||
|
||||
// Build and copy CLI bundle for embedding
|
||||
console.info('📦 Building CLI for embedding...');
|
||||
execSync('npm run build', { stdio: 'inherit', cwd: path.resolve(__dirname, '../cli') });
|
||||
const cliSrc = path.resolve(__dirname, '../cli/dist/index.js');
|
||||
const cliDest = path.resolve(__dirname, 'resources/bin/lobe-cli.js');
|
||||
await fs.copyFile(cliSrc, cliDest);
|
||||
|
||||
// Write a minimal package.json next to the CLI bundle so that
|
||||
// createRequire('../package.json') resolves correctly in the packaged app.
|
||||
// The CLI script lives at Resources/bin/lobe-cli.js, so '../package.json'
|
||||
// resolves to Resources/package.json.
|
||||
const cliPkg = JSON.parse(
|
||||
await fs.readFile(path.resolve(__dirname, '../cli/package.json'), 'utf8'),
|
||||
);
|
||||
await fs.writeFile(
|
||||
path.resolve(__dirname, 'resources/cli-package.json'),
|
||||
JSON.stringify({ name: cliPkg.name, type: 'module', version: cliPkg.version }),
|
||||
);
|
||||
console.info('✅ CLI bundle copied to resources/bin/lobe-cli.js');
|
||||
},
|
||||
/**
|
||||
* AfterPack hook for post-processing:
|
||||
|
|
@ -296,7 +316,10 @@ const config = {
|
|||
releaseNotes: process.env.RELEASE_NOTES || undefined,
|
||||
},
|
||||
|
||||
extraResources: [{ from: 'resources/bin', to: 'bin' }],
|
||||
extraResources: [
|
||||
{ from: 'resources/bin', to: 'bin' },
|
||||
{ from: 'resources/cli-package.json', to: 'package.json' },
|
||||
],
|
||||
|
||||
win: {
|
||||
executableName: 'LobeHub',
|
||||
|
|
|
|||
|
|
@ -90,7 +90,6 @@ export default defineConfig({
|
|||
outDir: 'dist/preload',
|
||||
sourcemap: isDev ? 'inline' : false,
|
||||
},
|
||||
|
||||
resolve: {
|
||||
alias: {
|
||||
'@': path.resolve(__dirname, 'src/main'),
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@
|
|||
"author": "LobeHub",
|
||||
"main": "./dist/main/index.js",
|
||||
"scripts": {
|
||||
"build:cli": "cd ../cli && bun run build",
|
||||
"build:main": "cross-env NODE_OPTIONS=--max-old-space-size=8192 electron-vite build",
|
||||
"build:run-unpack": "electron .",
|
||||
"dev": "electron-vite dev",
|
||||
|
|
|
|||
58
apps/desktop/src/main/controllers/CliCtr.ts
Normal file
58
apps/desktop/src/main/controllers/CliCtr.ts
Normal file
|
|
@ -0,0 +1,58 @@
|
|||
import { exec } from 'node:child_process';
|
||||
import path from 'node:path';
|
||||
import process from 'node:process';
|
||||
import { promisify } from 'node:util';
|
||||
|
||||
import { getCliWrapperDir } from '@/modules/cliEmbedding';
|
||||
import { createLogger } from '@/utils/logger';
|
||||
|
||||
import { ControllerModule, IpcMethod } from './index';
|
||||
import RemoteServerConfigCtr from './RemoteServerConfigCtr';
|
||||
|
||||
const logger = createLogger('controllers:CliCtr');
|
||||
|
||||
function normalizeServerUrl(url: string): string {
|
||||
return url.replace(/\/$/, '');
|
||||
}
|
||||
|
||||
export default class CliCtr extends ControllerModule {
|
||||
static override readonly groupName = 'cli';
|
||||
|
||||
@IpcMethod()
|
||||
async runCliCommand(args: string): Promise<{ exitCode: number; stderr: string; stdout: string }> {
|
||||
const execAsync = promisify(exec);
|
||||
const wrapperDir = getCliWrapperDir();
|
||||
const cmd = process.platform === 'win32' ? 'lobehub.cmd' : 'lobehub';
|
||||
const wrapperPath = path.join(wrapperDir, cmd);
|
||||
|
||||
const env = { ...process.env };
|
||||
|
||||
const remoteCtr = this.app.getController(RemoteServerConfigCtr);
|
||||
if (remoteCtr) {
|
||||
const [token, serverUrl] = await Promise.all([
|
||||
remoteCtr.getAccessToken(),
|
||||
remoteCtr.getRemoteServerUrl(),
|
||||
]);
|
||||
|
||||
if (token && serverUrl) {
|
||||
env.LOBEHUB_JWT = token;
|
||||
env.LOBEHUB_SERVER = normalizeServerUrl(serverUrl);
|
||||
logger.debug('Injected LOBEHUB_JWT / LOBEHUB_SERVER for CLI command');
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
const { stdout, stderr } = await execAsync(`"${wrapperPath}" ${args}`, {
|
||||
env,
|
||||
timeout: 15_000,
|
||||
});
|
||||
return { exitCode: 0, stderr, stdout };
|
||||
} catch (error: any) {
|
||||
return {
|
||||
exitCode: error.code ?? 1,
|
||||
stderr: error.stderr ?? '',
|
||||
stdout: error.stdout ?? String(error.message),
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -10,17 +10,38 @@ import { runCommand, ShellProcessManager } from '@lobechat/local-file-shell';
|
|||
|
||||
import { createLogger } from '@/utils/logger';
|
||||
|
||||
import CliCtr from './CliCtr';
|
||||
import { ControllerModule, IpcMethod } from './index';
|
||||
|
||||
const logger = createLogger('controllers:ShellCommandCtr');
|
||||
|
||||
const processManager = new ShellProcessManager();
|
||||
|
||||
/** Prefix for a simple `lh`/`lobe`/`lobehub` invocation (keyword + boundary, args via slice). */
|
||||
const SIMPLE_LH_PREFIX = /^\s*(?:lh|lobe|lobehub)(?=\s|$)/;
|
||||
|
||||
export default class ShellCommandCtr extends ControllerModule {
|
||||
static override readonly groupName = 'shellCommand';
|
||||
|
||||
@IpcMethod()
|
||||
async handleRunCommand(params: RunCommandParams): Promise<RunCommandResult> {
|
||||
const prefixMatch = SIMPLE_LH_PREFIX.exec(params.command);
|
||||
if (prefixMatch) {
|
||||
const cliCtr = this.app.getController(CliCtr);
|
||||
if (cliCtr) {
|
||||
const args = params.command.slice(prefixMatch[0].length).trim();
|
||||
logger.debug('Routing lh command to CliCtr.runCliCommand:', args);
|
||||
const result = await cliCtr.runCliCommand(args);
|
||||
return {
|
||||
exit_code: result.exitCode,
|
||||
output: result.stdout + result.stderr,
|
||||
stderr: result.stderr,
|
||||
stdout: result.stdout,
|
||||
success: result.exitCode === 0,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
return runCommand(params, { logger, processManager });
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ import { beforeEach, describe, expect, it, vi } from 'vitest';
|
|||
|
||||
import type { App } from '@/core/App';
|
||||
|
||||
import CliCtr from '../CliCtr';
|
||||
import ShellCommandCtr from '../ShellCommandCtr';
|
||||
|
||||
const { ipcMainHandleMock } = vi.hoisted(() => ({
|
||||
|
|
@ -32,7 +33,17 @@ vi.mock('node:crypto', () => ({
|
|||
randomUUID: vi.fn(() => 'test-uuid-123'),
|
||||
}));
|
||||
|
||||
const mockApp = {} as unknown as App;
|
||||
vi.mock('../CliCtr', () => ({
|
||||
default: class CliCtr {},
|
||||
}));
|
||||
|
||||
const mockCliCtr = {
|
||||
runCliCommand: vi.fn().mockResolvedValue({ exitCode: 0, stderr: '', stdout: 'cli output\n' }),
|
||||
};
|
||||
|
||||
const mockApp = {
|
||||
getController: vi.fn((c: unknown) => (c === CliCtr ? mockCliCtr : undefined)),
|
||||
} as unknown as App;
|
||||
|
||||
describe('ShellCommandCtr (thin wrapper)', () => {
|
||||
let ctr: ShellCommandCtr;
|
||||
|
|
@ -118,6 +129,28 @@ describe('ShellCommandCtr (thin wrapper)', () => {
|
|||
expect(mockChildProcess.kill).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should route lh commands to CliCtr.runCliCommand', async () => {
|
||||
const result = await ctr.handleRunCommand({
|
||||
command: 'lh status --json',
|
||||
description: 'lh status',
|
||||
});
|
||||
|
||||
expect(mockCliCtr.runCliCommand).toHaveBeenCalledWith('status --json');
|
||||
expect(result.success).toBe(true);
|
||||
expect(result.stdout).toContain('cli output');
|
||||
expect(mockSpawn).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should route lobehub commands to CliCtr.runCliCommand', async () => {
|
||||
const result = await ctr.handleRunCommand({
|
||||
command: 'lobehub search test',
|
||||
description: 'lobehub search',
|
||||
});
|
||||
|
||||
expect(mockCliCtr.runCliCommand).toHaveBeenCalledWith('search test');
|
||||
expect(result.success).toBe(true);
|
||||
});
|
||||
|
||||
it('should return error for non-existent shell_id', async () => {
|
||||
const result = await ctr.handleGetCommandOutput({
|
||||
shell_id: 'non-existent',
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ import type { CreateServicesResult, IpcServiceConstructor, MergeIpcService } fro
|
|||
|
||||
import AuthCtr from './AuthCtr';
|
||||
import BrowserWindowsCtr from './BrowserWindowsCtr';
|
||||
import CliCtr from './CliCtr';
|
||||
import DevtoolsCtr from './DevtoolsCtr';
|
||||
import GatewayConnectionCtr from './GatewayConnectionCtr';
|
||||
import LocalFileCtr from './LocalFileCtr';
|
||||
|
|
@ -23,6 +24,7 @@ import UploadFileCtr from './UploadFileCtr';
|
|||
export const controllerIpcConstructors = [
|
||||
AuthCtr,
|
||||
BrowserWindowsCtr,
|
||||
CliCtr,
|
||||
DevtoolsCtr,
|
||||
GatewayConnectionCtr,
|
||||
LocalFileCtr,
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@ import { isDev } from '@/const/env';
|
|||
import { ELECTRON_BE_PROTOCOL_SCHEME } from '@/const/protocol';
|
||||
import type { IControlModule } from '@/controllers';
|
||||
import AuthCtr from '@/controllers/AuthCtr';
|
||||
import { generateCliWrapper, getCliWrapperDir } from '@/modules/cliEmbedding';
|
||||
import {
|
||||
astSearchDetectors,
|
||||
browserAutomationDetectors,
|
||||
|
|
@ -89,9 +90,9 @@ export class App {
|
|||
logger.info('----------------------------------------------');
|
||||
logger.info('Starting LobeHub...');
|
||||
|
||||
// Append bundled binaries directory to PATH for fallback tool resolution
|
||||
// Append bundled binaries and CLI wrapper directories to PATH for tool resolution
|
||||
const pathSep = process.platform === 'win32' ? ';' : ':';
|
||||
process.env.PATH = `${process.env.PATH}${pathSep}${binDir}`;
|
||||
process.env.PATH = `${process.env.PATH}${pathSep}${binDir}${pathSep}${getCliWrapperDir()}`;
|
||||
|
||||
logger.debug('Initializing App');
|
||||
// Initialize store manager
|
||||
|
|
@ -226,6 +227,11 @@ export class App {
|
|||
// Initialize app
|
||||
await this.makeAppReady();
|
||||
|
||||
// Generate CLI wrapper for terminal usage
|
||||
generateCliWrapper().catch((error) => {
|
||||
logger.warn('Failed to generate CLI wrapper:', error);
|
||||
});
|
||||
|
||||
// Initialize i18n. Note: app.getLocale() must be called after app.whenReady() to get the correct value
|
||||
await this.i18n.init();
|
||||
this.menuManager.initialize();
|
||||
|
|
|
|||
|
|
@ -0,0 +1,97 @@
|
|||
import { chmod, mkdir, rename, symlink, unlink, writeFile } from 'node:fs/promises';
|
||||
import path from 'node:path';
|
||||
|
||||
import { app } from 'electron';
|
||||
|
||||
import { createLogger } from '@/utils/logger';
|
||||
|
||||
const logger = createLogger('modules:cliEmbedding');
|
||||
|
||||
/**
|
||||
* Resolve the correct Electron binary path per platform.
|
||||
* - AppImage: use APPIMAGE env var (the actual .AppImage file)
|
||||
* - Others: app.getPath('exe')
|
||||
*/
|
||||
function resolveElectronBinary(): string {
|
||||
if (process.platform === 'linux' && process.env.APPIMAGE) {
|
||||
return process.env.APPIMAGE;
|
||||
}
|
||||
return app.getPath('exe');
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve the CLI script path inside packaged resources.
|
||||
*/
|
||||
function resolveCliScript(): string {
|
||||
if (app.isPackaged) {
|
||||
return path.join(process.resourcesPath, 'bin', 'lobe-cli.js');
|
||||
}
|
||||
// Dev mode: app.getAppPath() points to apps/desktop/, go up to apps/cli/
|
||||
return path.join(app.getAppPath(), '..', 'cli', 'dist', 'index.js');
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the user-writable bin directory for CLI wrapper.
|
||||
*/
|
||||
export function getCliWrapperDir(): string {
|
||||
return path.join(app.getPath('userData'), 'bin');
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate shell wrapper scripts that invoke the embedded CLI
|
||||
* using Electron's Node.js runtime via ELECTRON_RUN_AS_NODE=1.
|
||||
*
|
||||
* Called on every app launch to keep paths up-to-date after auto-updates.
|
||||
*/
|
||||
export async function generateCliWrapper(): Promise<void> {
|
||||
const electronBin = resolveElectronBinary();
|
||||
const cliScript = resolveCliScript();
|
||||
const wrapperDir = getCliWrapperDir();
|
||||
|
||||
await mkdir(wrapperDir, { recursive: true });
|
||||
|
||||
if (process.platform === 'win32') {
|
||||
const content = [
|
||||
'@echo off',
|
||||
'set ELECTRON_RUN_AS_NODE=1',
|
||||
`"${electronBin}" "${cliScript}" %*`,
|
||||
].join('\r\n');
|
||||
|
||||
const cmdPath = path.join(wrapperDir, 'lobehub.cmd');
|
||||
await atomicWrite(cmdPath, content);
|
||||
|
||||
// Create short aliases: lh.cmd, lobe.cmd (copies on Windows, symlinks unreliable)
|
||||
for (const alias of ['lh.cmd', 'lobe.cmd']) {
|
||||
await atomicWrite(path.join(wrapperDir, alias), content);
|
||||
}
|
||||
|
||||
logger.info(`CLI wrapper generated: ${cmdPath}`);
|
||||
} else {
|
||||
const content = [
|
||||
'#!/bin/sh',
|
||||
`ELECTRON_RUN_AS_NODE=1 exec "${electronBin}" "${cliScript}" "$@"`,
|
||||
].join('\n');
|
||||
|
||||
const wrapperPath = path.join(wrapperDir, 'lobehub');
|
||||
await atomicWrite(wrapperPath, content);
|
||||
await chmod(wrapperPath, 0o755);
|
||||
|
||||
// Create short aliases: lh, lobe → lobehub
|
||||
for (const alias of ['lh', 'lobe']) {
|
||||
const linkPath = path.join(wrapperDir, alias);
|
||||
await unlink(linkPath).catch(() => {});
|
||||
await symlink('lobehub', linkPath);
|
||||
}
|
||||
|
||||
logger.info(`CLI wrapper generated: ${wrapperPath}`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Atomic write: write to temp file then rename to avoid partial reads.
|
||||
*/
|
||||
async function atomicWrite(filePath: string, content: string): Promise<void> {
|
||||
const tmpPath = `${filePath}.tmp.${process.pid}`;
|
||||
await writeFile(tmpPath, content, 'utf8');
|
||||
await rename(tmpPath, filePath);
|
||||
}
|
||||
1
apps/desktop/src/main/modules/cliEmbedding/index.ts
Normal file
1
apps/desktop/src/main/modules/cliEmbedding/index.ts
Normal file
|
|
@ -0,0 +1 @@
|
|||
export { generateCliWrapper, getCliWrapperDir } from './generateCliWrapper';
|
||||
|
|
@ -63,11 +63,82 @@ export const pythonDetector: IToolDetector = {
|
|||
priority: 3,
|
||||
};
|
||||
|
||||
/**
|
||||
* Bun runtime detector
|
||||
*/
|
||||
export const bunDetector: IToolDetector = createCommandDetector('bun', {
|
||||
description: 'Bun - fast JavaScript runtime and package manager',
|
||||
priority: 4,
|
||||
});
|
||||
|
||||
/**
|
||||
* Bunx package runner detector
|
||||
*/
|
||||
export const bunxDetector: IToolDetector = createCommandDetector('bunx', {
|
||||
description: 'bunx - Bun package runner for executing npm packages',
|
||||
priority: 5,
|
||||
});
|
||||
|
||||
/**
|
||||
* pnpm package manager detector
|
||||
*/
|
||||
export const pnpmDetector: IToolDetector = createCommandDetector('pnpm', {
|
||||
description: 'pnpm - fast, disk space efficient package manager',
|
||||
priority: 6,
|
||||
});
|
||||
|
||||
/**
|
||||
* uv Python package manager detector
|
||||
*/
|
||||
export const uvDetector: IToolDetector = createCommandDetector('uv', {
|
||||
description: 'uv - extremely fast Python package manager',
|
||||
priority: 7,
|
||||
});
|
||||
|
||||
/**
|
||||
* LobeHub CLI detector
|
||||
* Tries lobehub, lobe, lh in order; validates via --help output containing "LobeHub"
|
||||
*/
|
||||
export const lobehubDetector: IToolDetector = {
|
||||
description: 'LobeHub CLI - manage and connect to LobeHub services',
|
||||
async detect(): Promise<ToolStatus> {
|
||||
const commands = ['lobehub', 'lobe', 'lh'];
|
||||
const whichCmd = platform() === 'win32' ? 'where' : 'which';
|
||||
|
||||
for (const cmd of commands) {
|
||||
try {
|
||||
const { stdout: pathOut } = await execPromise(`${whichCmd} ${cmd}`, { timeout: 3000 });
|
||||
const toolPath = pathOut.trim().split('\n')[0];
|
||||
|
||||
// Validate it's actually LobeHub CLI by checking help output
|
||||
const { stdout: helpOut } = await execPromise(`${cmd} --help`, { timeout: 3000 });
|
||||
if (!helpOut.includes('LobeHub')) continue;
|
||||
|
||||
const { stdout: versionOut } = await execPromise(`${cmd} --version`, { timeout: 3000 });
|
||||
const version = versionOut.trim().split('\n')[0];
|
||||
|
||||
return { available: true, path: toolPath, version };
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
return { available: false };
|
||||
},
|
||||
name: 'lobehub',
|
||||
priority: 0,
|
||||
};
|
||||
|
||||
/**
|
||||
* All runtime environment detectors
|
||||
*/
|
||||
export const runtimeEnvironmentDetectors: IToolDetector[] = [
|
||||
lobehubDetector,
|
||||
nodeDetector,
|
||||
npmDetector,
|
||||
pythonDetector,
|
||||
bunDetector,
|
||||
bunxDetector,
|
||||
pnpmDetector,
|
||||
uvDetector,
|
||||
];
|
||||
|
|
|
|||
|
|
@ -657,7 +657,6 @@
|
|||
"settingSystemTools.appEnvironment.electron.desc": "إصدار إطار Electron",
|
||||
"settingSystemTools.appEnvironment.node.desc": "إصدار Node.js المدمج",
|
||||
"settingSystemTools.appEnvironment.title": "بيئة التطبيق",
|
||||
"settingSystemTools.autoSelectDesc": "سيتم اختيار أفضل أداة متاحة تلقائيًا",
|
||||
"settingSystemTools.category.browserAutomation": "أتمتة المتصفح",
|
||||
"settingSystemTools.category.browserAutomation.desc": "أدوات لأتمتة المتصفح بدون واجهة والتفاعل مع الويب",
|
||||
"settingSystemTools.category.contentSearch": "البحث في المحتوى",
|
||||
|
|
|
|||
|
|
@ -657,7 +657,6 @@
|
|||
"settingSystemTools.appEnvironment.electron.desc": "Версия на рамката Electron",
|
||||
"settingSystemTools.appEnvironment.node.desc": "Вградена версия на Node.js",
|
||||
"settingSystemTools.appEnvironment.title": "Среда на приложението",
|
||||
"settingSystemTools.autoSelectDesc": "Най-добрият наличен инструмент ще бъде избран автоматично",
|
||||
"settingSystemTools.category.browserAutomation": "Автоматизация на браузъра",
|
||||
"settingSystemTools.category.browserAutomation.desc": "Инструменти за автоматизация на браузъра без графичен интерфейс и уеб взаимодействие",
|
||||
"settingSystemTools.category.contentSearch": "Търсене в съдържание",
|
||||
|
|
|
|||
|
|
@ -657,7 +657,6 @@
|
|||
"settingSystemTools.appEnvironment.electron.desc": "Electron-Framework-Version",
|
||||
"settingSystemTools.appEnvironment.node.desc": "Eingebettete Node.js-Version",
|
||||
"settingSystemTools.appEnvironment.title": "App-Umgebung",
|
||||
"settingSystemTools.autoSelectDesc": "Das beste verfügbare Tool wird automatisch ausgewählt",
|
||||
"settingSystemTools.category.browserAutomation": "Browser-Automatisierung",
|
||||
"settingSystemTools.category.browserAutomation.desc": "Werkzeuge für headless Browser-Automatisierung und Web-Interaktion",
|
||||
"settingSystemTools.category.contentSearch": "Inhaltssuche",
|
||||
|
|
|
|||
|
|
@ -656,8 +656,7 @@
|
|||
"settingSystemTools.appEnvironment.desc": "Built-in runtime versions in the desktop app",
|
||||
"settingSystemTools.appEnvironment.electron.desc": "Electron framework version",
|
||||
"settingSystemTools.appEnvironment.node.desc": "Embedded Node.js version",
|
||||
"settingSystemTools.appEnvironment.title": "App Environment",
|
||||
"settingSystemTools.autoSelectDesc": "The best available tool will be automatically selected",
|
||||
"settingSystemTools.appEnvironment.title": "Built-in App Tools",
|
||||
"settingSystemTools.category.browserAutomation": "Browser Automation",
|
||||
"settingSystemTools.category.browserAutomation.desc": "Tools for headless browser automation and web interaction",
|
||||
"settingSystemTools.category.contentSearch": "Content Search",
|
||||
|
|
@ -674,14 +673,19 @@
|
|||
"settingSystemTools.title": "System Tools",
|
||||
"settingSystemTools.tools.ag.desc": "The Silver Searcher - fast code searching tool",
|
||||
"settingSystemTools.tools.agentBrowser.desc": "Agent-browser - headless browser automation CLI for AI agents",
|
||||
"settingSystemTools.tools.bun.desc": "Bun - fast JavaScript runtime and package manager",
|
||||
"settingSystemTools.tools.bunx.desc": "bunx - Bun package runner for executing npm packages",
|
||||
"settingSystemTools.tools.fd.desc": "fd - fast and user-friendly alternative to find",
|
||||
"settingSystemTools.tools.find.desc": "Unix find - standard file search command",
|
||||
"settingSystemTools.tools.grep.desc": "GNU grep - standard text search tool",
|
||||
"settingSystemTools.tools.lobehub.desc": "LobeHub CLI - manage and connect to LobeHub services",
|
||||
"settingSystemTools.tools.mdfind.desc": "macOS Spotlight search (fast indexed search)",
|
||||
"settingSystemTools.tools.node.desc": "Node.js - JavaScript runtime for executing JS/TS",
|
||||
"settingSystemTools.tools.npm.desc": "npm - Node.js package manager for installing dependencies",
|
||||
"settingSystemTools.tools.pnpm.desc": "pnpm - fast, disk space efficient package manager",
|
||||
"settingSystemTools.tools.python.desc": "Python - programming language runtime",
|
||||
"settingSystemTools.tools.rg.desc": "ripgrep - extremely fast text search tool",
|
||||
"settingSystemTools.tools.uv.desc": "uv - extremely fast Python package manager",
|
||||
"settingTTS.openai.sttModel": "OpenAI Speech-to-Text Model",
|
||||
"settingTTS.openai.title": "OpenAI",
|
||||
"settingTTS.openai.ttsModel": "OpenAI Text-to-Speech Model",
|
||||
|
|
|
|||
|
|
@ -657,7 +657,6 @@
|
|||
"settingSystemTools.appEnvironment.electron.desc": "Versión del framework Electron",
|
||||
"settingSystemTools.appEnvironment.node.desc": "Versión de Node.js integrada",
|
||||
"settingSystemTools.appEnvironment.title": "Entorno de la aplicación",
|
||||
"settingSystemTools.autoSelectDesc": "La mejor herramienta disponible se seleccionará automáticamente",
|
||||
"settingSystemTools.category.browserAutomation": "Automatización del Navegador",
|
||||
"settingSystemTools.category.browserAutomation.desc": "Herramientas para la automatización de navegadores sin cabeza e interacción web",
|
||||
"settingSystemTools.category.contentSearch": "Búsqueda de contenido",
|
||||
|
|
|
|||
|
|
@ -657,7 +657,6 @@
|
|||
"settingSystemTools.appEnvironment.electron.desc": "نسخهٔ چارچوب Electron",
|
||||
"settingSystemTools.appEnvironment.node.desc": "نسخهٔ Node.js تعبیهشده",
|
||||
"settingSystemTools.appEnvironment.title": "محیط برنامه",
|
||||
"settingSystemTools.autoSelectDesc": "بهترین ابزار موجود بهصورت خودکار انتخاب خواهد شد",
|
||||
"settingSystemTools.category.browserAutomation": "اتوماسیون مرورگر",
|
||||
"settingSystemTools.category.browserAutomation.desc": "ابزارهایی برای اتوماسیون مرورگر بدون رابط کاربری و تعامل وب",
|
||||
"settingSystemTools.category.contentSearch": "جستجوی محتوا",
|
||||
|
|
|
|||
|
|
@ -657,7 +657,6 @@
|
|||
"settingSystemTools.appEnvironment.electron.desc": "Version du framework Electron",
|
||||
"settingSystemTools.appEnvironment.node.desc": "Version de Node.js intégrée",
|
||||
"settingSystemTools.appEnvironment.title": "Environnement de l'application",
|
||||
"settingSystemTools.autoSelectDesc": "L'outil le plus performant sera sélectionné automatiquement",
|
||||
"settingSystemTools.category.browserAutomation": "Automatisation du navigateur",
|
||||
"settingSystemTools.category.browserAutomation.desc": "Outils pour l'automatisation de navigateur sans interface et l'interaction web",
|
||||
"settingSystemTools.category.contentSearch": "Recherche de contenu",
|
||||
|
|
|
|||
|
|
@ -657,7 +657,6 @@
|
|||
"settingSystemTools.appEnvironment.electron.desc": "Versione del framework Electron",
|
||||
"settingSystemTools.appEnvironment.node.desc": "Versione di Node.js integrata",
|
||||
"settingSystemTools.appEnvironment.title": "Ambiente app",
|
||||
"settingSystemTools.autoSelectDesc": "Lo strumento migliore disponibile verrà selezionato automaticamente",
|
||||
"settingSystemTools.category.browserAutomation": "Automazione del browser",
|
||||
"settingSystemTools.category.browserAutomation.desc": "Strumenti per l'automazione del browser senza interfaccia grafica e l'interazione web",
|
||||
"settingSystemTools.category.contentSearch": "Ricerca Contenuti",
|
||||
|
|
|
|||
|
|
@ -657,7 +657,6 @@
|
|||
"settingSystemTools.appEnvironment.electron.desc": "Electron フレームワークのバージョン",
|
||||
"settingSystemTools.appEnvironment.node.desc": "同梱 Node.js のバージョン",
|
||||
"settingSystemTools.appEnvironment.title": "アプリ環境",
|
||||
"settingSystemTools.autoSelectDesc": "最適な利用可能ツールが自動的に選択されます",
|
||||
"settingSystemTools.category.browserAutomation": "ブラウザー自動化",
|
||||
"settingSystemTools.category.browserAutomation.desc": "ヘッドレスブラウザーの自動化とウェブ操作のためのツール",
|
||||
"settingSystemTools.category.contentSearch": "コンテンツ検索",
|
||||
|
|
|
|||
|
|
@ -657,7 +657,6 @@
|
|||
"settingSystemTools.appEnvironment.electron.desc": "Electron 프레임워크 버전",
|
||||
"settingSystemTools.appEnvironment.node.desc": "내장 Node.js 버전",
|
||||
"settingSystemTools.appEnvironment.title": "앱 환경",
|
||||
"settingSystemTools.autoSelectDesc": "가장 적합한 도구가 자동으로 선택됩니다",
|
||||
"settingSystemTools.category.browserAutomation": "브라우저 자동화",
|
||||
"settingSystemTools.category.browserAutomation.desc": "헤드리스 브라우저 자동화 및 웹 상호작용을 위한 도구",
|
||||
"settingSystemTools.category.contentSearch": "콘텐츠 검색",
|
||||
|
|
|
|||
|
|
@ -657,7 +657,6 @@
|
|||
"settingSystemTools.appEnvironment.electron.desc": "Electron-frameworkversie",
|
||||
"settingSystemTools.appEnvironment.node.desc": "Ingesloten Node.js-versie",
|
||||
"settingSystemTools.appEnvironment.title": "App-omgeving",
|
||||
"settingSystemTools.autoSelectDesc": "Het best beschikbare hulpmiddel wordt automatisch geselecteerd",
|
||||
"settingSystemTools.category.browserAutomation": "Browserautomatisering",
|
||||
"settingSystemTools.category.browserAutomation.desc": "Tools voor headless browserautomatisering en webinteractie",
|
||||
"settingSystemTools.category.contentSearch": "Zoeken in Inhoud",
|
||||
|
|
|
|||
|
|
@ -657,7 +657,6 @@
|
|||
"settingSystemTools.appEnvironment.electron.desc": "Wersja frameworka Electron",
|
||||
"settingSystemTools.appEnvironment.node.desc": "Wersja wbudowanego Node.js",
|
||||
"settingSystemTools.appEnvironment.title": "Środowisko aplikacji",
|
||||
"settingSystemTools.autoSelectDesc": "Najlepsze dostępne narzędzie zostanie wybrane automatycznie",
|
||||
"settingSystemTools.category.browserAutomation": "Automatyzacja przeglądarki",
|
||||
"settingSystemTools.category.browserAutomation.desc": "Narzędzia do automatyzacji przeglądarki bez interfejsu graficznego i interakcji z siecią",
|
||||
"settingSystemTools.category.contentSearch": "Wyszukiwanie treści",
|
||||
|
|
|
|||
|
|
@ -657,7 +657,6 @@
|
|||
"settingSystemTools.appEnvironment.electron.desc": "Versão do framework Electron",
|
||||
"settingSystemTools.appEnvironment.node.desc": "Versão do Node.js integrada",
|
||||
"settingSystemTools.appEnvironment.title": "Ambiente do aplicativo",
|
||||
"settingSystemTools.autoSelectDesc": "A melhor ferramenta disponível será selecionada automaticamente",
|
||||
"settingSystemTools.category.browserAutomation": "Automação de Navegador",
|
||||
"settingSystemTools.category.browserAutomation.desc": "Ferramentas para automação de navegador sem interface gráfica e interação com a web",
|
||||
"settingSystemTools.category.contentSearch": "Busca de Conteúdo",
|
||||
|
|
|
|||
|
|
@ -657,7 +657,6 @@
|
|||
"settingSystemTools.appEnvironment.electron.desc": "Версия фреймворка Electron",
|
||||
"settingSystemTools.appEnvironment.node.desc": "Версия встроенного Node.js",
|
||||
"settingSystemTools.appEnvironment.title": "Среда приложения",
|
||||
"settingSystemTools.autoSelectDesc": "Лучший доступный инструмент будет выбран автоматически",
|
||||
"settingSystemTools.category.browserAutomation": "Автоматизация браузера",
|
||||
"settingSystemTools.category.browserAutomation.desc": "Инструменты для автоматизации безголового браузера и взаимодействия с вебом",
|
||||
"settingSystemTools.category.contentSearch": "Поиск по содержимому",
|
||||
|
|
|
|||
|
|
@ -657,7 +657,6 @@
|
|||
"settingSystemTools.appEnvironment.electron.desc": "Electron framework sürümü",
|
||||
"settingSystemTools.appEnvironment.node.desc": "Gömülü Node.js sürümü",
|
||||
"settingSystemTools.appEnvironment.title": "Uygulama ortamı",
|
||||
"settingSystemTools.autoSelectDesc": "En iyi mevcut araç otomatik olarak seçilecektir",
|
||||
"settingSystemTools.category.browserAutomation": "Tarayıcı Otomasyonu",
|
||||
"settingSystemTools.category.browserAutomation.desc": "Başsız tarayıcı otomasyonu ve web etkileşimi için araçlar",
|
||||
"settingSystemTools.category.contentSearch": "İçerik Arama",
|
||||
|
|
|
|||
|
|
@ -657,7 +657,6 @@
|
|||
"settingSystemTools.appEnvironment.electron.desc": "Phiên bản framework Electron",
|
||||
"settingSystemTools.appEnvironment.node.desc": "Phiên bản Node.js nhúng",
|
||||
"settingSystemTools.appEnvironment.title": "Môi trường ứng dụng",
|
||||
"settingSystemTools.autoSelectDesc": "Công cụ tốt nhất sẽ được tự động chọn",
|
||||
"settingSystemTools.category.browserAutomation": "Tự động hóa trình duyệt",
|
||||
"settingSystemTools.category.browserAutomation.desc": "Công cụ cho tự động hóa trình duyệt không giao diện và tương tác web",
|
||||
"settingSystemTools.category.contentSearch": "Tìm kiếm nội dung",
|
||||
|
|
|
|||
|
|
@ -656,8 +656,7 @@
|
|||
"settingSystemTools.appEnvironment.desc": "桌面应用内置的运行时版本",
|
||||
"settingSystemTools.appEnvironment.electron.desc": "Electron 框架版本",
|
||||
"settingSystemTools.appEnvironment.node.desc": "内嵌 Node.js 版本",
|
||||
"settingSystemTools.appEnvironment.title": "应用环境",
|
||||
"settingSystemTools.autoSelectDesc": "系统会自动选择最优的可用工具",
|
||||
"settingSystemTools.appEnvironment.title": "内建应用工具",
|
||||
"settingSystemTools.category.browserAutomation": "浏览器自动化",
|
||||
"settingSystemTools.category.browserAutomation.desc": "用于无头浏览器自动化和网页交互的工具",
|
||||
"settingSystemTools.category.contentSearch": "内容搜索",
|
||||
|
|
@ -674,14 +673,19 @@
|
|||
"settingSystemTools.title": "系统工具",
|
||||
"settingSystemTools.tools.ag.desc": "The Silver Searcher - 快速代码搜索工具",
|
||||
"settingSystemTools.tools.agentBrowser.desc": "Agent-browser - 面向AI代理的无头浏览器自动化命令行工具",
|
||||
"settingSystemTools.tools.bun.desc": "Bun - 快速的 JavaScript 运行时和包管理器",
|
||||
"settingSystemTools.tools.bunx.desc": "bunx - Bun 包执行器,用于运行 npm 包",
|
||||
"settingSystemTools.tools.fd.desc": "fd - 快速且用户友好的 find 替代品",
|
||||
"settingSystemTools.tools.find.desc": "Unix find - 标准文件搜索命令",
|
||||
"settingSystemTools.tools.grep.desc": "GNU grep - 标准文本搜索工具",
|
||||
"settingSystemTools.tools.lobehub.desc": "LobeHub CLI - 管理和连接 LobeHub 服务",
|
||||
"settingSystemTools.tools.mdfind.desc": "macOS 聚焦搜索(快速索引搜索)",
|
||||
"settingSystemTools.tools.node.desc": "Node.js - 执行 JavaScript/TypeScript 的运行时",
|
||||
"settingSystemTools.tools.npm.desc": "npm - Node.js 包管理器,用于安装依赖",
|
||||
"settingSystemTools.tools.pnpm.desc": "pnpm - 快速、节省磁盘空间的包管理器",
|
||||
"settingSystemTools.tools.python.desc": "Python - 编程语言运行时",
|
||||
"settingSystemTools.tools.rg.desc": "ripgrep - 极快的文本搜索工具",
|
||||
"settingSystemTools.tools.uv.desc": "uv - 极快的 Python 包管理器",
|
||||
"settingTTS.openai.sttModel": "OpenAI 语音识别模型",
|
||||
"settingTTS.openai.title": "OpenAI",
|
||||
"settingTTS.openai.ttsModel": "OpenAI 语音合成模型",
|
||||
|
|
|
|||
|
|
@ -657,7 +657,6 @@
|
|||
"settingSystemTools.appEnvironment.electron.desc": "Electron 框架版本",
|
||||
"settingSystemTools.appEnvironment.node.desc": "內嵌 Node.js 版本",
|
||||
"settingSystemTools.appEnvironment.title": "應用環境",
|
||||
"settingSystemTools.autoSelectDesc": "將自動選擇最佳可用工具",
|
||||
"settingSystemTools.category.browserAutomation": "瀏覽器自動化",
|
||||
"settingSystemTools.category.browserAutomation.desc": "用於無頭瀏覽器自動化和網頁交互的工具",
|
||||
"settingSystemTools.category.contentSearch": "內容搜尋",
|
||||
|
|
|
|||
|
|
@ -1,158 +1,819 @@
|
|||
/**
|
||||
* @see https://github.com/vercel-labs/agent-browser/blob/main/skills/agent-browser/SKILL.md
|
||||
* Synced from https://github.com/vercel-labs/agent-browser/blob/main/skills/agent-browser/SKILL.md
|
||||
*/
|
||||
export const systemPrompt = `<agent_browser_guides>
|
||||
You can automate websites and Electron desktop apps with the agent-browser CLI. Use the \`execScript\` tool to run local shell commands.
|
||||
# Browser Automation with agent-browser
|
||||
|
||||
# Prerequisites
|
||||
The CLI uses Chrome/Chromium via CDP directly. **LobeHub desktop** bundles \`agent-browser\` in native mode. Otherwise install via \`npm i -g agent-browser\`, \`brew install agent-browser\`, or \`cargo install agent-browser\`. Run \`agent-browser install\` to download Chrome. Existing Chrome, Brave, Playwright, and Puppeteer installations are detected automatically. Run \`agent-browser upgrade\` to update to the latest version.
|
||||
|
||||
The \`agent-browser\` CLI is bundled with the desktop app (v0.20.1) and runs in native mode by default. It automatically detects system Chrome/Chromium. If no browser is found, install Google Chrome.
|
||||
## Core Workflow
|
||||
|
||||
# Core Workflow (Snapshot-Ref Pattern)
|
||||
Every browser automation follows this pattern:
|
||||
|
||||
Use this 4-step loop for almost all tasks:
|
||||
|
||||
1. Navigate: \`agent-browser open <url>\`
|
||||
2. Snapshot: \`agent-browser snapshot -i\` (returns refs like \`@e1\`, \`@e2\`)
|
||||
3. Interact: \`click\`, \`fill\`, \`select\`, etc. with refs
|
||||
4. Re-snapshot after page changes
|
||||
|
||||
Refs are ephemeral. After navigation, form submit, modal open, or dynamic updates, old refs are invalid. Re-snapshot before the next interaction.
|
||||
|
||||
# Command Chaining
|
||||
|
||||
You can chain commands with \`&&\` in one shell call. The daemon preserves browser state across chained commands.
|
||||
1. **Navigate**: \`agent-browser open <url>\`
|
||||
2. **Snapshot**: \`agent-browser snapshot -i\` (get element refs like \`@e1\`, \`@e2\`)
|
||||
3. **Interact**: Use refs to click, fill, select
|
||||
4. **Re-snapshot**: After navigation or DOM changes, get fresh refs
|
||||
|
||||
\`\`\`bash
|
||||
agent-browser open https://example.com && agent-browser wait --load networkidle && agent-browser snapshot -i
|
||||
\`\`\`
|
||||
|
||||
Chain only when you do not need to inspect intermediate output. If you must parse snapshot output to discover refs, run snapshot separately.
|
||||
|
||||
# Essential Commands
|
||||
|
||||
## Navigation
|
||||
- \`agent-browser open <url>\`
|
||||
- \`agent-browser close\`
|
||||
- \`agent-browser back\`
|
||||
- \`agent-browser forward\`
|
||||
- \`agent-browser reload\`
|
||||
|
||||
## Snapshot and Capture
|
||||
- \`agent-browser snapshot -i\` (recommended)
|
||||
- \`agent-browser snapshot -i -C\` (include cursor-interactive elements)
|
||||
- \`agent-browser screenshot\`
|
||||
- \`agent-browser screenshot --annotate\`
|
||||
- \`agent-browser screenshot --full\`
|
||||
- \`agent-browser pdf output.pdf\`
|
||||
|
||||
## Interaction
|
||||
- \`agent-browser click @e1\`
|
||||
- \`agent-browser fill @e2 "text"\`
|
||||
- \`agent-browser type @e2 "text"\`
|
||||
- \`agent-browser select @e3 "option"\`
|
||||
- \`agent-browser check @e4\`
|
||||
- \`agent-browser press Enter\`
|
||||
- \`agent-browser scroll down 500\`
|
||||
|
||||
## Retrieval
|
||||
- \`agent-browser get text @e1\`
|
||||
- \`agent-browser get url\`
|
||||
- \`agent-browser get title\`
|
||||
|
||||
## Wait
|
||||
- \`agent-browser wait @e1\`
|
||||
- \`agent-browser wait --load networkidle\`
|
||||
- \`agent-browser wait --url "**/dashboard"\`
|
||||
- \`agent-browser wait 2000\`
|
||||
|
||||
## Diff and Verification
|
||||
- \`agent-browser diff snapshot\`
|
||||
- \`agent-browser diff screenshot --baseline before.png\`
|
||||
- \`agent-browser diff url <url1> <url2>\`
|
||||
|
||||
## Session and State
|
||||
- \`agent-browser --session <name> open <url>\`
|
||||
- \`agent-browser session list\`
|
||||
- \`agent-browser state save auth.json\`
|
||||
- \`agent-browser state load auth.json\`
|
||||
|
||||
## Chrome or Electron Connection
|
||||
|
||||
To control an existing Chrome or Electron app, it must be launched with remote debugging enabled. If the app is already running, quit it first, then relaunch with the flag:
|
||||
|
||||
**macOS (Chrome):**
|
||||
\`\`\`bash
|
||||
open -a "Google Chrome" --args --remote-debugging-port=9222
|
||||
\`\`\`
|
||||
|
||||
**macOS (Electron app, e.g. Slack):**
|
||||
\`\`\`bash
|
||||
open -a "Slack" --args --remote-debugging-port=9222
|
||||
\`\`\`
|
||||
|
||||
Then connect and control:
|
||||
- \`agent-browser --auto-connect snapshot -i\`
|
||||
- \`agent-browser --cdp 9222 snapshot -i\`
|
||||
- \`agent-browser connect 9222\`
|
||||
|
||||
# Common Patterns
|
||||
|
||||
## Form Submission
|
||||
\`\`\`bash
|
||||
agent-browser open https://example.com/signup
|
||||
agent-browser open https://example.com/form
|
||||
agent-browser snapshot -i
|
||||
agent-browser fill @e1 "Jane Doe"
|
||||
agent-browser fill @e2 "jane@example.com"
|
||||
# Output: @e1 [input type="email"], @e2 [input type="password"], @e3 [button] "Submit"
|
||||
|
||||
agent-browser fill @e1 "user@example.com"
|
||||
agent-browser fill @e2 "password123"
|
||||
agent-browser click @e3
|
||||
agent-browser wait --load networkidle
|
||||
agent-browser snapshot -i
|
||||
agent-browser wait 2000
|
||||
agent-browser snapshot -i # Check result
|
||||
\`\`\`
|
||||
|
||||
## Data Extraction
|
||||
## Command Chaining
|
||||
|
||||
Commands can be chained with \`&&\` in a single shell invocation. The browser persists between commands via a background daemon, so chaining is safe and more efficient than separate calls.
|
||||
|
||||
\`\`\`bash
|
||||
agent-browser open https://example.com/products
|
||||
agent-browser wait --load networkidle
|
||||
agent-browser snapshot -i
|
||||
agent-browser get text @e5
|
||||
# Chain open + snapshot in one call (open already waits for page load)
|
||||
agent-browser open https://example.com && agent-browser snapshot -i
|
||||
|
||||
# Chain multiple interactions
|
||||
agent-browser fill @e1 "user@example.com" && agent-browser fill @e2 "password123" && agent-browser click @e3
|
||||
|
||||
# Navigate and capture
|
||||
agent-browser open https://example.com && agent-browser screenshot
|
||||
\`\`\`
|
||||
|
||||
## Annotated Screenshot for Vision Tasks
|
||||
**When to chain:** Use \`&&\` when you don't need to read the output of an intermediate command before proceeding (e.g., open + wait + screenshot). Run commands separately when you need to parse the output first (e.g., snapshot to discover refs, then interact using those refs).
|
||||
|
||||
## Handling Authentication
|
||||
|
||||
When automating a site that requires login, choose the approach that fits:
|
||||
|
||||
**Option 1: Import auth from the user's browser (fastest for one-off tasks)**
|
||||
|
||||
\`\`\`bash
|
||||
# Connect to the user's running Chrome (they're already logged in)
|
||||
agent-browser --auto-connect state save ./auth.json
|
||||
# Use that auth state
|
||||
agent-browser --state ./auth.json open https://app.example.com/dashboard
|
||||
\`\`\`
|
||||
|
||||
State files contain session tokens in plaintext -- add to \`.gitignore\` and delete when no longer needed. Set \`AGENT_BROWSER_ENCRYPTION_KEY\` for encryption at rest.
|
||||
|
||||
**Option 2: Chrome profile reuse (zero setup)**
|
||||
|
||||
\`\`\`bash
|
||||
# List available Chrome profiles
|
||||
agent-browser profiles
|
||||
|
||||
# Reuse the user's existing Chrome login state
|
||||
agent-browser --profile Default open https://gmail.com
|
||||
\`\`\`
|
||||
|
||||
**Option 3: Persistent profile (for recurring tasks)**
|
||||
|
||||
\`\`\`bash
|
||||
# First run: login manually or via automation
|
||||
agent-browser --profile ~/.myapp open https://app.example.com/login
|
||||
# ... fill credentials, submit ...
|
||||
|
||||
# All future runs: already authenticated
|
||||
agent-browser --profile ~/.myapp open https://app.example.com/dashboard
|
||||
\`\`\`
|
||||
|
||||
**Option 4: Session name (auto-save/restore cookies + localStorage)**
|
||||
|
||||
\`\`\`bash
|
||||
agent-browser --session-name myapp open https://app.example.com/login
|
||||
# ... login flow ...
|
||||
agent-browser close # State auto-saved
|
||||
|
||||
# Next time: state auto-restored
|
||||
agent-browser --session-name myapp open https://app.example.com/dashboard
|
||||
\`\`\`
|
||||
|
||||
**Option 5: Auth vault (credentials stored encrypted, login by name)**
|
||||
|
||||
\`\`\`bash
|
||||
echo "$PASSWORD" | agent-browser auth save myapp --url https://app.example.com/login --username user --password-stdin
|
||||
agent-browser auth login myapp
|
||||
\`\`\`
|
||||
|
||||
\`auth login\` navigates with \`load\` and then waits for login form selectors to appear before filling/clicking, which is more reliable on delayed SPA login screens.
|
||||
|
||||
**Option 6: State file (manual save/load)**
|
||||
|
||||
\`\`\`bash
|
||||
# After logging in:
|
||||
agent-browser state save ./auth.json
|
||||
# In a future session:
|
||||
agent-browser state load ./auth.json
|
||||
agent-browser open https://app.example.com/dashboard
|
||||
\`\`\`
|
||||
|
||||
For OAuth, 2FA, cookie-based auth, and token refresh patterns, see the upstream \`references/authentication.md\` at https://github.com/vercel-labs/agent-browser/tree/main/skills/agent-browser/references.
|
||||
|
||||
## Essential Commands
|
||||
|
||||
\`\`\`bash
|
||||
# Batch: ALWAYS use batch for 2+ sequential commands. Commands run in order.
|
||||
agent-browser batch "open https://example.com" "snapshot -i"
|
||||
agent-browser batch "open https://example.com" "screenshot"
|
||||
agent-browser batch "click @e1" "wait 1000" "screenshot"
|
||||
|
||||
# Navigation
|
||||
agent-browser open <url> # Navigate (aliases: goto, navigate)
|
||||
agent-browser close # Close browser
|
||||
agent-browser close --all # Close all active sessions
|
||||
|
||||
# Snapshot
|
||||
agent-browser snapshot -i # Interactive elements with refs (recommended)
|
||||
agent-browser snapshot -i --urls # Include href URLs for links
|
||||
agent-browser snapshot -s "#selector" # Scope to CSS selector
|
||||
|
||||
# Interaction (use @refs from snapshot)
|
||||
agent-browser click @e1 # Click element
|
||||
agent-browser click @e1 --new-tab # Click and open in new tab
|
||||
agent-browser fill @e2 "text" # Clear and type text
|
||||
agent-browser type @e2 "text" # Type without clearing
|
||||
agent-browser select @e1 "option" # Select dropdown option
|
||||
agent-browser check @e1 # Check checkbox
|
||||
agent-browser press Enter # Press key
|
||||
agent-browser keyboard type "text" # Type at current focus (no selector)
|
||||
agent-browser keyboard inserttext "text" # Insert without key events
|
||||
agent-browser scroll down 500 # Scroll page
|
||||
agent-browser scroll down 500 --selector "div.content" # Scroll within a specific container
|
||||
|
||||
# Get information
|
||||
agent-browser get text @e1 # Get element text
|
||||
agent-browser get url # Get current URL
|
||||
agent-browser get title # Get page title
|
||||
agent-browser get cdp-url # Get CDP WebSocket URL
|
||||
|
||||
# Wait
|
||||
agent-browser wait @e1 # Wait for element
|
||||
agent-browser wait 2000 # Wait milliseconds
|
||||
agent-browser wait --url "**/page" # Wait for URL pattern
|
||||
agent-browser wait --text "Welcome" # Wait for text to appear (substring match)
|
||||
agent-browser wait --load networkidle # Wait for network idle (caution: see Pitfalls)
|
||||
agent-browser wait --fn "!document.body.innerText.includes('Loading...')" # Wait for text to disappear
|
||||
agent-browser wait "#spinner" --state hidden # Wait for element to disappear
|
||||
|
||||
# Downloads
|
||||
agent-browser download @e1 ./file.pdf # Click element to trigger download
|
||||
agent-browser wait --download ./output.zip # Wait for any download to complete
|
||||
agent-browser --download-path ./downloads open <url> # Set default download directory
|
||||
|
||||
# Tab management
|
||||
agent-browser tab list # List all open tabs
|
||||
agent-browser tab new # Open a blank new tab
|
||||
agent-browser tab new https://example.com # Open URL in a new tab
|
||||
agent-browser tab 2 # Switch to tab by index (0-based)
|
||||
agent-browser tab close # Close the current tab
|
||||
agent-browser tab close 2 # Close tab by index
|
||||
|
||||
# Network
|
||||
agent-browser network requests # Inspect tracked requests
|
||||
agent-browser network requests --type xhr,fetch # Filter by resource type
|
||||
agent-browser network requests --method POST # Filter by HTTP method
|
||||
agent-browser network requests --status 2xx # Filter by status (200, 2xx, 400-499)
|
||||
agent-browser network request <requestId> # View full request/response detail
|
||||
agent-browser network route "**/api/*" --abort # Block matching requests
|
||||
agent-browser network har start # Start HAR recording
|
||||
agent-browser network har stop ./capture.har # Stop and save HAR file
|
||||
|
||||
# Viewport & Device Emulation
|
||||
agent-browser set viewport 1920 1080 # Set viewport size (default: 1280x720)
|
||||
agent-browser set viewport 1920 1080 2 # 2x retina (same CSS size, higher res screenshots)
|
||||
agent-browser set device "iPhone 14" # Emulate device (viewport + user agent)
|
||||
|
||||
# Capture
|
||||
agent-browser screenshot # Screenshot to temp dir
|
||||
agent-browser screenshot --full # Full page screenshot
|
||||
agent-browser screenshot --annotate # Annotated screenshot with numbered element labels
|
||||
agent-browser screenshot --screenshot-dir ./shots # Save to custom directory
|
||||
agent-browser screenshot --screenshot-format jpeg --screenshot-quality 80
|
||||
agent-browser pdf output.pdf # Save as PDF
|
||||
|
||||
# Live preview / streaming
|
||||
agent-browser stream enable # Start runtime WebSocket streaming on an auto-selected port
|
||||
agent-browser stream enable --port 9223 # Bind a specific localhost port
|
||||
agent-browser stream status # Inspect enabled state, port, connection, and screencasting
|
||||
agent-browser stream disable # Stop runtime streaming and remove the .stream metadata file
|
||||
|
||||
# Clipboard
|
||||
agent-browser clipboard read # Read text from clipboard
|
||||
agent-browser clipboard write "Hello, World!" # Write text to clipboard
|
||||
agent-browser clipboard copy # Copy current selection
|
||||
agent-browser clipboard paste # Paste from clipboard
|
||||
|
||||
# Dialogs (alert, confirm, prompt, beforeunload)
|
||||
# By default, alert and beforeunload dialogs are auto-accepted so they never block the agent.
|
||||
# confirm and prompt dialogs still require explicit handling.
|
||||
# Use --no-auto-dialog (or AGENT_BROWSER_NO_AUTO_DIALOG=1) to disable automatic handling.
|
||||
agent-browser dialog accept # Accept dialog
|
||||
agent-browser dialog accept "my input" # Accept prompt dialog with text
|
||||
agent-browser dialog dismiss # Dismiss/cancel dialog
|
||||
agent-browser dialog status # Check if a dialog is currently open
|
||||
|
||||
# Diff (compare page states)
|
||||
agent-browser diff snapshot # Compare current vs last snapshot
|
||||
agent-browser diff snapshot --baseline before.txt # Compare current vs saved file
|
||||
agent-browser diff screenshot --baseline before.png # Visual pixel diff
|
||||
agent-browser diff url <url1> <url2> # Compare two pages
|
||||
agent-browser diff url <url1> <url2> --wait-until networkidle # Custom wait strategy
|
||||
agent-browser diff url <url1> <url2> --selector "#main" # Scope to element
|
||||
|
||||
# Chat (AI natural language control)
|
||||
agent-browser chat "open google.com and search for cats" # Single-shot instruction
|
||||
agent-browser chat # Interactive REPL mode
|
||||
agent-browser -q chat "summarize this page" # Quiet (text only, no tool calls)
|
||||
agent-browser -v chat "fill in the login form" # Verbose (show command output)
|
||||
agent-browser --model openai/gpt-4o chat "take a screenshot" # Override model
|
||||
\`\`\`
|
||||
|
||||
## Streaming
|
||||
|
||||
Every session automatically starts a WebSocket stream server on an OS-assigned port. Use \`agent-browser stream status\` to see the bound port and connection state. Use \`stream disable\` to tear it down, and \`stream enable --port <port>\` to re-enable on a specific port.
|
||||
|
||||
## Batch Execution
|
||||
|
||||
ALWAYS use \`batch\` when running 2+ commands in sequence. Batch executes commands in order, so dependent commands (like navigate then screenshot) work correctly. Each quoted argument is a separate command.
|
||||
|
||||
\`\`\`bash
|
||||
# Navigate and take a snapshot
|
||||
agent-browser batch "open https://example.com" "snapshot -i"
|
||||
|
||||
# Navigate, snapshot, and screenshot in one call
|
||||
agent-browser batch "open https://example.com" "snapshot -i" "screenshot"
|
||||
|
||||
# Click, wait, then screenshot
|
||||
agent-browser batch "click @e1" "wait 1000" "screenshot"
|
||||
|
||||
# With --bail to stop on first error
|
||||
agent-browser batch --bail "open https://example.com" "click @e1" "screenshot"
|
||||
\`\`\`
|
||||
|
||||
Only use a single command (not batch) when you need to read the output before deciding the next command. For example, you must run \`snapshot -i\` as a single command when you need to read the refs to decide what to click. After reading the snapshot, batch the remaining steps.
|
||||
|
||||
Stdin mode is also supported for programmatic use:
|
||||
|
||||
\`\`\`bash
|
||||
echo '[["open","https://example.com"],["screenshot"]]' | agent-browser batch --json
|
||||
agent-browser batch --bail < commands.json
|
||||
\`\`\`
|
||||
|
||||
## Efficiency Strategies
|
||||
|
||||
These patterns minimize tool calls and token usage.
|
||||
|
||||
**Use \`--urls\` to avoid re-navigation.** When you need to visit links from a page, use \`snapshot -i --urls\` to get all href URLs upfront. Then \`open\` each URL directly instead of clicking refs and navigating back.
|
||||
|
||||
**Snapshot once, act many times.** Never re-snapshot the same page. Extract all needed info (refs, URLs, text) from a single snapshot, then batch the remaining actions.
|
||||
|
||||
**Multi-page workflow (e.g. "visit N sites and screenshot each"):**
|
||||
|
||||
\`\`\`bash
|
||||
# 1. Get all URLs in one call
|
||||
agent-browser batch "open https://news.ycombinator.com" "snapshot -i --urls"
|
||||
# Read output to extract URLs, then visit each directly:
|
||||
# 2. One batch per target site
|
||||
agent-browser batch "open https://github.com/example/repo" "screenshot"
|
||||
agent-browser batch "open https://example.com/article" "screenshot"
|
||||
agent-browser batch "open https://other.com/page" "screenshot"
|
||||
\`\`\`
|
||||
|
||||
This approach uses 4 tool calls instead of 14+. Never go back to the listing page between visits.
|
||||
|
||||
## Common Patterns
|
||||
|
||||
### Form Submission
|
||||
|
||||
\`\`\`bash
|
||||
# Navigate and get the form structure
|
||||
agent-browser batch "open https://example.com/signup" "snapshot -i"
|
||||
# Read the snapshot output to identify form refs, then fill and submit
|
||||
agent-browser batch "fill @e1 \\"Jane Doe\\"" "fill @e2 \\"jane@example.com\\"" "select @e3 \\"California\\"" "check @e4" "click @e5" "wait 2000"
|
||||
\`\`\`
|
||||
|
||||
### Authentication with Auth Vault (Recommended)
|
||||
|
||||
\`\`\`bash
|
||||
# Save credentials once (encrypted with AGENT_BROWSER_ENCRYPTION_KEY)
|
||||
# Recommended: pipe password via stdin to avoid shell history exposure
|
||||
echo "pass" | agent-browser auth save github --url https://github.com/login --username user --password-stdin
|
||||
|
||||
# Login using saved profile (LLM never sees password)
|
||||
agent-browser auth login github
|
||||
|
||||
# List/show/delete profiles
|
||||
agent-browser auth list
|
||||
agent-browser auth show github
|
||||
agent-browser auth delete github
|
||||
\`\`\`
|
||||
|
||||
\`auth login\` waits for username/password/submit selectors before interacting, with a timeout tied to the default action timeout.
|
||||
|
||||
### Authentication with State Persistence
|
||||
|
||||
\`\`\`bash
|
||||
# Login once and save state
|
||||
agent-browser batch "open https://app.example.com/login" "snapshot -i"
|
||||
# Read snapshot to find form refs, then fill and submit
|
||||
agent-browser batch "fill @e1 \\"$USERNAME\\"" "fill @e2 \\"$PASSWORD\\"" "click @e3" "wait --url **/dashboard" "state save auth.json"
|
||||
|
||||
# Reuse in future sessions
|
||||
agent-browser batch "state load auth.json" "open https://app.example.com/dashboard"
|
||||
\`\`\`
|
||||
|
||||
### Session Persistence
|
||||
|
||||
\`\`\`bash
|
||||
# Auto-save/restore cookies and localStorage across browser restarts
|
||||
agent-browser --session-name myapp open https://app.example.com/login
|
||||
# ... login flow ...
|
||||
agent-browser close # State auto-saved to ~/.agent-browser/sessions/
|
||||
|
||||
# Next time, state is auto-loaded
|
||||
agent-browser --session-name myapp open https://app.example.com/dashboard
|
||||
|
||||
# Encrypt state at rest
|
||||
export AGENT_BROWSER_ENCRYPTION_KEY=$(openssl rand -hex 32)
|
||||
agent-browser --session-name secure open https://app.example.com
|
||||
|
||||
# Manage saved states
|
||||
agent-browser state list
|
||||
agent-browser state show myapp-default.json
|
||||
agent-browser state clear myapp
|
||||
agent-browser state clean --older-than 7
|
||||
\`\`\`
|
||||
|
||||
### Working with Iframes
|
||||
|
||||
Iframe content is automatically inlined in snapshots. Refs inside iframes carry frame context, so you can interact with them directly.
|
||||
|
||||
\`\`\`bash
|
||||
agent-browser batch "open https://example.com/checkout" "snapshot -i"
|
||||
# @e1 [heading] "Checkout"
|
||||
# @e2 [Iframe] "payment-frame"
|
||||
# @e3 [input] "Card number"
|
||||
# @e4 [input] "Expiry"
|
||||
# @e5 [button] "Pay"
|
||||
|
||||
# Interact directly — no frame switch needed
|
||||
agent-browser batch "fill @e3 \\"4111111111111111\\"" "fill @e4 \\"12/28\\"" "click @e5"
|
||||
|
||||
# To scope a snapshot to one iframe:
|
||||
agent-browser batch "frame @e2" "snapshot -i"
|
||||
agent-browser frame main # Return to main frame
|
||||
\`\`\`
|
||||
|
||||
### Data Extraction
|
||||
|
||||
\`\`\`bash
|
||||
agent-browser batch "open https://example.com/products" "snapshot -i"
|
||||
# Read snapshot to find element refs, then extract
|
||||
agent-browser get text @e5 # Get specific element text
|
||||
|
||||
# JSON output for parsing
|
||||
agent-browser snapshot -i --json
|
||||
agent-browser get text @e1 --json
|
||||
\`\`\`
|
||||
|
||||
### Parallel Sessions
|
||||
|
||||
\`\`\`bash
|
||||
agent-browser --session site1 open https://site-a.com
|
||||
agent-browser --session site2 open https://site-b.com
|
||||
|
||||
agent-browser --session site1 snapshot -i
|
||||
agent-browser --session site2 snapshot -i
|
||||
|
||||
agent-browser session list
|
||||
\`\`\`
|
||||
|
||||
### Connect to Existing Chrome
|
||||
|
||||
\`\`\`bash
|
||||
# Auto-discover running Chrome with remote debugging enabled
|
||||
agent-browser --auto-connect open https://example.com
|
||||
agent-browser --auto-connect snapshot
|
||||
|
||||
# Or with explicit CDP port
|
||||
agent-browser --cdp 9222 snapshot
|
||||
\`\`\`
|
||||
|
||||
Auto-connect discovers Chrome via \`DevToolsActivePort\`, common debugging ports (9222, 9229), and falls back to a direct WebSocket connection if HTTP-based CDP discovery fails.
|
||||
|
||||
### Color Scheme (Dark Mode)
|
||||
|
||||
\`\`\`bash
|
||||
# Persistent dark mode via flag (applies to all pages and new tabs)
|
||||
agent-browser --color-scheme dark open https://example.com
|
||||
|
||||
# Or via environment variable
|
||||
AGENT_BROWSER_COLOR_SCHEME=dark agent-browser open https://example.com
|
||||
|
||||
# Or set during session (persists for subsequent commands)
|
||||
agent-browser set media dark
|
||||
\`\`\`
|
||||
|
||||
### Viewport & Responsive Testing
|
||||
|
||||
\`\`\`bash
|
||||
# Set a custom viewport size (default is 1280x720)
|
||||
agent-browser set viewport 1920 1080
|
||||
agent-browser screenshot desktop.png
|
||||
|
||||
# Test mobile-width layout
|
||||
agent-browser set viewport 375 812
|
||||
agent-browser screenshot mobile.png
|
||||
|
||||
# Retina/HiDPI: same CSS layout at 2x pixel density
|
||||
# Screenshots stay at logical viewport size, but content renders at higher DPI
|
||||
agent-browser set viewport 1920 1080 2
|
||||
agent-browser screenshot retina.png
|
||||
|
||||
# Device emulation (sets viewport + user agent in one step)
|
||||
agent-browser set device "iPhone 14"
|
||||
agent-browser screenshot device.png
|
||||
\`\`\`
|
||||
|
||||
The \`scale\` parameter (3rd argument) sets \`window.devicePixelRatio\` without changing CSS layout. Use it when testing retina rendering or capturing higher-resolution screenshots.
|
||||
|
||||
### Visual Browser (Debugging)
|
||||
|
||||
\`\`\`bash
|
||||
agent-browser --headed open https://example.com
|
||||
agent-browser highlight @e1 # Highlight element
|
||||
agent-browser inspect # Open Chrome DevTools for the active page
|
||||
agent-browser record start demo.webm # Record session
|
||||
agent-browser profiler start # Start Chrome DevTools profiling
|
||||
agent-browser profiler stop trace.json # Stop and save profile (path optional)
|
||||
\`\`\`
|
||||
|
||||
Use \`AGENT_BROWSER_HEADED=1\` to enable headed mode via environment variable. Browser extensions work in both headed and headless mode.
|
||||
|
||||
### Local Files (PDFs, HTML)
|
||||
|
||||
\`\`\`bash
|
||||
# Open local files with file:// URLs
|
||||
agent-browser --allow-file-access open file:///path/to/document.pdf
|
||||
agent-browser --allow-file-access open file:///path/to/page.html
|
||||
agent-browser screenshot output.png
|
||||
\`\`\`
|
||||
|
||||
### iOS Simulator (Mobile Safari)
|
||||
|
||||
\`\`\`bash
|
||||
# List available iOS simulators
|
||||
agent-browser device list
|
||||
|
||||
# Launch Safari on a specific device
|
||||
agent-browser -p ios --device "iPhone 16 Pro" open https://example.com
|
||||
|
||||
# Same workflow as desktop - snapshot, interact, re-snapshot
|
||||
agent-browser -p ios snapshot -i
|
||||
agent-browser -p ios tap @e1 # Tap (alias for click)
|
||||
agent-browser -p ios fill @e2 "text"
|
||||
agent-browser -p ios swipe up # Mobile-specific gesture
|
||||
|
||||
# Take screenshot
|
||||
agent-browser -p ios screenshot mobile.png
|
||||
|
||||
# Close session (shuts down simulator)
|
||||
agent-browser -p ios close
|
||||
\`\`\`
|
||||
|
||||
**Requirements:** macOS with Xcode, Appium (\`npm install -g appium && appium driver install xcuitest\`)
|
||||
|
||||
**Real devices:** Works with physical iOS devices if pre-configured. Use \`--device "<UDID>"\` where UDID is from \`xcrun xctrace list devices\`.
|
||||
|
||||
## Security
|
||||
|
||||
All security features are opt-in. By default, agent-browser imposes no restrictions on navigation, actions, or output.
|
||||
|
||||
### Content Boundaries (Recommended for AI Agents)
|
||||
|
||||
Enable \`--content-boundaries\` to wrap page-sourced output in markers that help LLMs distinguish tool output from untrusted page content:
|
||||
|
||||
\`\`\`bash
|
||||
export AGENT_BROWSER_CONTENT_BOUNDARIES=1
|
||||
agent-browser snapshot
|
||||
# Output:
|
||||
# --- AGENT_BROWSER_PAGE_CONTENT nonce=<hex> origin=https://example.com ---
|
||||
# [accessibility tree]
|
||||
# --- END_AGENT_BROWSER_PAGE_CONTENT nonce=<hex> ---
|
||||
\`\`\`
|
||||
|
||||
### Domain Allowlist
|
||||
|
||||
Restrict navigation to trusted domains. Wildcards like \`*.example.com\` also match the bare domain \`example.com\`. Sub-resource requests, WebSocket, and EventSource connections to non-allowed domains are also blocked. Include CDN domains your target pages depend on:
|
||||
|
||||
\`\`\`bash
|
||||
export AGENT_BROWSER_ALLOWED_DOMAINS="example.com,*.example.com"
|
||||
agent-browser open https://example.com # OK
|
||||
agent-browser open https://malicious.com # Blocked
|
||||
\`\`\`
|
||||
|
||||
### Action Policy
|
||||
|
||||
Use a policy file to gate destructive actions:
|
||||
|
||||
\`\`\`bash
|
||||
export AGENT_BROWSER_ACTION_POLICY=./policy.json
|
||||
\`\`\`
|
||||
|
||||
Example \`policy.json\`:
|
||||
|
||||
\`\`\`json
|
||||
{ "default": "deny", "allow": ["navigate", "snapshot", "click", "scroll", "wait", "get"] }
|
||||
\`\`\`
|
||||
|
||||
Auth vault operations (\`auth login\`, etc.) bypass action policy but domain allowlist still applies.
|
||||
|
||||
### Output Limits
|
||||
|
||||
Prevent context flooding from large pages:
|
||||
|
||||
\`\`\`bash
|
||||
export AGENT_BROWSER_MAX_OUTPUT=50000
|
||||
\`\`\`
|
||||
|
||||
## Diffing (Verifying Changes)
|
||||
|
||||
Use \`diff snapshot\` after performing an action to verify it had the intended effect. This compares the current accessibility tree against the last snapshot taken in the session.
|
||||
|
||||
\`\`\`bash
|
||||
# Typical workflow: snapshot -> action -> diff
|
||||
agent-browser snapshot -i # Take baseline snapshot
|
||||
agent-browser click @e2 # Perform action
|
||||
agent-browser diff snapshot # See what changed (auto-compares to last snapshot)
|
||||
\`\`\`
|
||||
|
||||
For visual regression testing or monitoring:
|
||||
|
||||
\`\`\`bash
|
||||
# Save a baseline screenshot, then compare later
|
||||
agent-browser screenshot baseline.png
|
||||
# ... time passes or changes are made ...
|
||||
agent-browser diff screenshot --baseline baseline.png
|
||||
|
||||
# Compare staging vs production
|
||||
agent-browser diff url https://staging.example.com https://prod.example.com --screenshot
|
||||
\`\`\`
|
||||
|
||||
\`diff snapshot\` output uses \`+\` for additions and \`-\` for removals, similar to git diff. \`diff screenshot\` produces a diff image with changed pixels highlighted in red, plus a mismatch percentage.
|
||||
|
||||
## Timeouts and Slow Pages
|
||||
|
||||
The default timeout is 25 seconds. This can be overridden with the \`AGENT_BROWSER_DEFAULT_TIMEOUT\` environment variable (value in milliseconds).
|
||||
|
||||
**Important:** \`open\` already waits for the page \`load\` event before returning. In most cases, no additional wait is needed before taking a snapshot or screenshot. Only add an explicit wait when content loads asynchronously after the initial page load.
|
||||
|
||||
\`\`\`bash
|
||||
# Wait for a specific element to appear (preferred for dynamic content)
|
||||
agent-browser wait "#content"
|
||||
agent-browser wait @e1
|
||||
|
||||
# Wait a fixed duration (good default for slow SPAs)
|
||||
agent-browser wait 2000
|
||||
|
||||
# Wait for a specific URL pattern (useful after redirects)
|
||||
agent-browser wait --url "**/dashboard"
|
||||
|
||||
# Wait for text to appear on the page
|
||||
agent-browser wait --text "Results loaded"
|
||||
|
||||
# Wait for a JavaScript condition
|
||||
agent-browser wait --fn "document.querySelectorAll('.item').length > 0"
|
||||
\`\`\`
|
||||
|
||||
**Avoid \`wait --load networkidle\`** unless you are certain the site has no persistent network activity. Ad-heavy sites, sites with analytics/tracking, and sites with websockets will cause \`networkidle\` to hang indefinitely. Prefer \`wait 2000\` or \`wait <selector>\` instead.
|
||||
|
||||
## JavaScript Dialogs (alert / confirm / prompt)
|
||||
|
||||
When a page opens a JavaScript dialog (\`alert()\`, \`confirm()\`, or \`prompt()\`), it blocks all other browser commands (snapshot, screenshot, click, etc.) until the dialog is dismissed. If commands start timing out unexpectedly, check for a pending dialog:
|
||||
|
||||
\`\`\`bash
|
||||
# Check if a dialog is blocking
|
||||
agent-browser dialog status
|
||||
|
||||
# Accept the dialog (dismiss the alert / click OK)
|
||||
agent-browser dialog accept
|
||||
|
||||
# Accept a prompt dialog with input text
|
||||
agent-browser dialog accept "my input"
|
||||
|
||||
# Dismiss the dialog (click Cancel)
|
||||
agent-browser dialog dismiss
|
||||
\`\`\`
|
||||
|
||||
When a dialog is pending, all command responses include a \`warning\` field indicating the dialog type and message. In \`--json\` mode this appears as a \`"warning"\` key in the response object.
|
||||
|
||||
## Session Management and Cleanup
|
||||
|
||||
When running multiple agents or automations concurrently, always use named sessions to avoid conflicts:
|
||||
|
||||
\`\`\`bash
|
||||
# Each agent gets its own isolated session
|
||||
agent-browser --session agent1 open site-a.com
|
||||
agent-browser --session agent2 open site-b.com
|
||||
|
||||
# Check active sessions
|
||||
agent-browser session list
|
||||
\`\`\`
|
||||
|
||||
Always close your browser session when done to avoid leaked processes:
|
||||
|
||||
\`\`\`bash
|
||||
agent-browser close # Close default session
|
||||
agent-browser --session agent1 close # Close specific session
|
||||
agent-browser close --all # Close all active sessions
|
||||
\`\`\`
|
||||
|
||||
If a previous session was not closed properly, the daemon may still be running. Use \`agent-browser close\` to clean it up, or \`agent-browser close --all\` to shut down every session at once.
|
||||
|
||||
To auto-shutdown the daemon after a period of inactivity (useful for ephemeral/CI environments):
|
||||
|
||||
\`\`\`bash
|
||||
AGENT_BROWSER_IDLE_TIMEOUT_MS=60000 agent-browser open example.com
|
||||
\`\`\`
|
||||
|
||||
## Ref Lifecycle (Important)
|
||||
|
||||
Refs (\`@e1\`, \`@e2\`, etc.) are invalidated when the page changes. Always re-snapshot after:
|
||||
|
||||
- Clicking links or buttons that navigate
|
||||
- Form submissions
|
||||
- Dynamic content loading (dropdowns, modals)
|
||||
|
||||
\`\`\`bash
|
||||
agent-browser click @e5 # Navigates to new page
|
||||
agent-browser snapshot -i # MUST re-snapshot
|
||||
agent-browser click @e1 # Use new refs
|
||||
\`\`\`
|
||||
|
||||
## Annotated Screenshots (Vision Mode)
|
||||
|
||||
Use \`--annotate\` to take a screenshot with numbered labels overlaid on interactive elements. Each label \`[N]\` maps to ref \`@eN\`. This also caches refs, so you can interact with elements immediately without a separate snapshot.
|
||||
|
||||
\`\`\`bash
|
||||
agent-browser screenshot --annotate
|
||||
agent-browser click @e2
|
||||
# Output includes the image path and a legend:
|
||||
# [1] @e1 button "Submit"
|
||||
# [2] @e2 link "Home"
|
||||
# [3] @e3 textbox "Email"
|
||||
agent-browser click @e2 # Click using ref from annotated screenshot
|
||||
\`\`\`
|
||||
|
||||
## Authentication (Auth Vault)
|
||||
Use annotated screenshots when:
|
||||
|
||||
- The page has unlabeled icon buttons or visual-only elements
|
||||
- You need to verify visual layout or styling
|
||||
- Canvas or chart elements are present (invisible to text snapshots)
|
||||
- You need spatial reasoning about element positions
|
||||
|
||||
## Semantic Locators (Alternative to Refs)
|
||||
|
||||
When refs are unavailable or unreliable, use semantic locators:
|
||||
|
||||
\`\`\`bash
|
||||
echo "pass" | agent-browser auth save github --url https://github.com/login --username user --password-stdin
|
||||
agent-browser auth login github
|
||||
agent-browser find text "Sign In" click
|
||||
agent-browser find label "Email" fill "user@test.com"
|
||||
agent-browser find role button click --name "Submit"
|
||||
agent-browser find placeholder "Search" type "query"
|
||||
agent-browser find testid "submit-btn" click
|
||||
\`\`\`
|
||||
|
||||
# Security Controls (Opt-In)
|
||||
## JavaScript Evaluation (eval)
|
||||
|
||||
- Content boundaries: \`AGENT_BROWSER_CONTENT_BOUNDARIES=1\`
|
||||
- Domain allowlist: \`AGENT_BROWSER_ALLOWED_DOMAINS="example.com,*.example.com"\`
|
||||
- Action policy: \`AGENT_BROWSER_ACTION_POLICY=./policy.json\`
|
||||
- Output limits: \`AGENT_BROWSER_MAX_OUTPUT=50000\`
|
||||
|
||||
Use allowlists and policies when tasks involve unknown pages or potentially destructive actions.
|
||||
|
||||
# JavaScript Evaluation Notes
|
||||
|
||||
For complex JavaScript, use stdin mode to avoid shell quoting issues:
|
||||
Use \`eval\` to run JavaScript in the browser context. **Shell quoting can corrupt complex expressions** -- use \`--stdin\` or \`-b\` to avoid issues.
|
||||
|
||||
\`\`\`bash
|
||||
# Simple expressions work with regular quoting
|
||||
agent-browser eval 'document.title'
|
||||
agent-browser eval 'document.querySelectorAll("img").length'
|
||||
|
||||
# Complex JS: use --stdin with heredoc (RECOMMENDED)
|
||||
agent-browser eval --stdin <<'EVALEOF'
|
||||
JSON.stringify(Array.from(document.querySelectorAll("a")).map((a) => a.href))
|
||||
JSON.stringify(
|
||||
Array.from(document.querySelectorAll("img"))
|
||||
.filter(i => !i.alt)
|
||||
.map(i => ({ src: i.src.split("/").pop(), width: i.width }))
|
||||
)
|
||||
EVALEOF
|
||||
|
||||
# Alternative: base64 encoding (avoids all shell escaping issues)
|
||||
agent-browser eval -b "$(echo -n 'Array.from(document.querySelectorAll("a")).map(a => a.href)' | base64)"
|
||||
\`\`\`
|
||||
|
||||
**Why this matters:** When the shell processes your command, inner double quotes, \`!\` characters (history expansion), backticks, and \`$()\` can all corrupt the JavaScript before it reaches agent-browser. The \`--stdin\` and \`-b\` flags bypass shell interpretation entirely.
|
||||
|
||||
**Rules of thumb:**
|
||||
|
||||
- Single-line, no nested quotes -> regular \`eval 'expression'\` with single quotes is fine
|
||||
- Nested quotes, arrow functions, template literals, or multiline -> use \`eval --stdin <<'EVALEOF'\`
|
||||
- Programmatic/generated scripts -> use \`eval -b\` with base64
|
||||
|
||||
## Configuration File
|
||||
|
||||
Create \`agent-browser.json\` in the project root for persistent settings:
|
||||
|
||||
\`\`\`json
|
||||
{
|
||||
"headed": true,
|
||||
"proxy": "http://localhost:8080",
|
||||
"profile": "./browser-data"
|
||||
}
|
||||
\`\`\`
|
||||
|
||||
Priority (lowest to highest): \`~/.agent-browser/config.json\` < \`./agent-browser.json\` < env vars < CLI flags. Use \`--config <path>\` or \`AGENT_BROWSER_CONFIG\` env var for a custom config file (exits with error if missing/invalid). All CLI options map to camelCase keys (e.g., \`--executable-path\` -> \`"executablePath"\`). Boolean flags accept \`true\`/\`false\` values (e.g., \`--headed false\` overrides config). Extensions from user and project configs are merged, not replaced.
|
||||
|
||||
## Deep-Dive Documentation
|
||||
|
||||
Extended references (commands, snapshot-refs, sessions, authentication, video, profiling, proxy): https://github.com/vercel-labs/agent-browser/tree/main/skills/agent-browser/references
|
||||
|
||||
## Cloud Providers
|
||||
|
||||
Use \`-p <provider>\` (or \`AGENT_BROWSER_PROVIDER\`) to run against a cloud browser instead of launching a local Chrome instance. Supported providers: \`agentcore\`, \`browserbase\`, \`browserless\`, \`browseruse\`, \`kernel\`.
|
||||
|
||||
### AgentCore (AWS Bedrock)
|
||||
|
||||
\`\`\`bash
|
||||
# Credentials auto-resolved from env vars or AWS CLI (SSO, IAM roles, etc.)
|
||||
agent-browser -p agentcore open https://example.com
|
||||
|
||||
# With persistent browser profile
|
||||
AGENTCORE_PROFILE_ID=my-profile agent-browser -p agentcore open https://example.com
|
||||
|
||||
# With explicit region
|
||||
AGENTCORE_REGION=eu-west-1 agent-browser -p agentcore open https://example.com
|
||||
\`\`\`
|
||||
|
||||
Set \`AWS_PROFILE\` to select a named AWS profile.
|
||||
|
||||
## Browser Engine Selection
|
||||
|
||||
Use \`--engine\` to choose a local browser engine. The default is \`chrome\`.
|
||||
|
||||
\`\`\`bash
|
||||
# Use Lightpanda (fast headless browser, requires separate install)
|
||||
agent-browser --engine lightpanda open example.com
|
||||
|
||||
# Via environment variable
|
||||
export AGENT_BROWSER_ENGINE=lightpanda
|
||||
agent-browser open example.com
|
||||
|
||||
# With custom binary path
|
||||
agent-browser --engine lightpanda --executable-path /path/to/lightpanda open example.com
|
||||
\`\`\`
|
||||
|
||||
Supported engines:
|
||||
- \`chrome\` (default) -- Chrome/Chromium via CDP
|
||||
- \`lightpanda\` -- Lightpanda headless browser via CDP (10x faster, 10x less memory than Chrome)
|
||||
|
||||
Lightpanda does not support \`--extension\`, \`--profile\`, \`--state\`, or \`--allow-file-access\`. Install Lightpanda from https://lightpanda.io/docs/open-source/installation.
|
||||
|
||||
## Observability Dashboard
|
||||
|
||||
The dashboard is a standalone background server that shows live browser viewports, command activity, and console output for all sessions.
|
||||
|
||||
\`\`\`bash
|
||||
# Start the dashboard server (background, port 4848)
|
||||
agent-browser dashboard start
|
||||
|
||||
# All sessions are automatically visible in the dashboard
|
||||
agent-browser open example.com
|
||||
|
||||
# Stop the dashboard
|
||||
agent-browser dashboard stop
|
||||
\`\`\`
|
||||
|
||||
The dashboard runs independently of browser sessions on port 4848 (configurable with \`--port\`). All sessions automatically stream to the dashboard. Sessions can also be created from the dashboard UI with local engines or cloud providers.
|
||||
|
||||
### Dashboard AI Chat
|
||||
|
||||
The dashboard has an optional AI chat tab powered by the Vercel AI Gateway. Enable it by setting:
|
||||
|
||||
\`\`\`bash
|
||||
export AI_GATEWAY_API_KEY=gw_your_key_here
|
||||
export AI_GATEWAY_MODEL=anthropic/claude-sonnet-4.6 # optional default
|
||||
export AI_GATEWAY_URL=https://ai-gateway.vercel.sh # optional default
|
||||
\`\`\`
|
||||
|
||||
The Chat tab is always visible in the dashboard. Set \`AI_GATEWAY_API_KEY\` to enable AI responses.
|
||||
|
||||
## Ready-to-Use Templates
|
||||
|
||||
Example scripts in the upstream repo: https://github.com/vercel-labs/agent-browser/tree/main/skills/agent-browser/templates
|
||||
|
||||
|
||||
# Execution Rules in This Runtime
|
||||
|
||||
- Run all agent-browser commands via \`execScript\` with \`runInClient: true\` because it is a local CLI.
|
||||
- Prefer \`--json\` output when structured parsing is needed.
|
||||
- Always close sessions when done: \`agent-browser close\` (or named session close).
|
||||
- If a task stalls, use explicit wait commands instead of blind retries.
|
||||
- Always close sessions when done: \`agent-browser close\`, \`agent-browser close --all\`, or \`agent-browser --session <name> close\`.
|
||||
- If a task stalls, use explicit \`wait\` commands instead of blind retries.
|
||||
- Run \`snapshot -i\` alone when you must read refs from output; then use \`agent-browser batch\` or \`&&\` for the remaining steps (see **Batch Execution** above).
|
||||
</agent_browser_guides>
|
||||
`;
|
||||
|
|
|
|||
|
|
@ -22,7 +22,6 @@ interface OutOfScopeWarningProps {
|
|||
const OutOfScopeWarning = memo<OutOfScopeWarningProps>(({ paths }) => {
|
||||
const { t } = useTranslation('tool');
|
||||
|
||||
// Get working directory from topic or agent store
|
||||
const topicWorkingDir = useChatStore(topicSelectors.currentTopicWorkingDirectory);
|
||||
const agentWorkingDir = useAgentStore(agentSelectors.currentAgentWorkingDirectory);
|
||||
const workingDirectory = topicWorkingDir || agentWorkingDir;
|
||||
|
|
|
|||
|
|
@ -184,6 +184,8 @@ export interface RunCommandParams {
|
|||
command: string;
|
||||
cwd?: string;
|
||||
description?: string;
|
||||
/** Merged into the child process environment (after `process.env`). */
|
||||
env?: Record<string, string>;
|
||||
run_in_background?: boolean;
|
||||
timeout?: number;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -74,6 +74,19 @@ describe('runCommand', () => {
|
|||
expect(result.success).toBe(true);
|
||||
expect(result.stdout).toContain('/tmp');
|
||||
});
|
||||
|
||||
it('should merge env into child process environment', async () => {
|
||||
const result = await runCommand(
|
||||
{
|
||||
command: 'node -e "console.log(process.env.LOB_TEST_ENV_MERGE)"',
|
||||
env: { LOB_TEST_ENV_MERGE: 'from-runner' },
|
||||
},
|
||||
{ processManager },
|
||||
);
|
||||
|
||||
expect(result.success).toBe(true);
|
||||
expect(result.stdout).toContain('from-runner');
|
||||
});
|
||||
});
|
||||
|
||||
describe('background mode', () => {
|
||||
|
|
|
|||
|
|
@ -15,7 +15,14 @@ export interface RunCommandOptions {
|
|||
}
|
||||
|
||||
export async function runCommand(
|
||||
{ command, cwd, description, run_in_background, timeout = 120_000 }: RunCommandParams,
|
||||
{
|
||||
command,
|
||||
cwd,
|
||||
description,
|
||||
env: extraEnv,
|
||||
run_in_background,
|
||||
timeout = 120_000,
|
||||
}: RunCommandParams,
|
||||
{ processManager, logger }: RunCommandOptions,
|
||||
): Promise<RunCommandResult> {
|
||||
const logPrefix = `[runCommand: ${description || command.slice(0, 50)}]`;
|
||||
|
|
@ -23,13 +30,14 @@ export async function runCommand(
|
|||
|
||||
const effectiveTimeout = Math.min(Math.max(timeout, 1000), 600_000);
|
||||
const shellConfig = getShellConfig(command);
|
||||
const childEnv = extraEnv ? { ...process.env, ...extraEnv } : process.env;
|
||||
|
||||
try {
|
||||
if (run_in_background) {
|
||||
const shellId = randomUUID();
|
||||
const childProcess = spawn(shellConfig.cmd, shellConfig.args, {
|
||||
cwd,
|
||||
env: process.env,
|
||||
env: childEnv,
|
||||
shell: false,
|
||||
});
|
||||
|
||||
|
|
@ -61,7 +69,7 @@ export async function runCommand(
|
|||
return new Promise<RunCommandResult>((resolve) => {
|
||||
const childProcess = spawn(shellConfig.cmd, shellConfig.args, {
|
||||
cwd,
|
||||
env: process.env,
|
||||
env: childEnv,
|
||||
shell: false,
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ export interface RunCommandParams {
|
|||
command: string;
|
||||
cwd?: string;
|
||||
description?: string;
|
||||
env?: Record<string, string>;
|
||||
run_in_background?: boolean;
|
||||
timeout?: number;
|
||||
}
|
||||
|
|
|
|||
7
packages/local-file-shell/vitest.config.mts
Normal file
7
packages/local-file-shell/vitest.config.mts
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
import { defineConfig } from 'vitest/config';
|
||||
|
||||
export default defineConfig({
|
||||
test: {
|
||||
environment: 'node',
|
||||
},
|
||||
});
|
||||
|
|
@ -108,7 +108,6 @@ const RuntimeConfig = memo(() => {
|
|||
chatConfigByIdSelectors.getRuntimeModeById(agentId)(s),
|
||||
]);
|
||||
|
||||
// Get working directory
|
||||
const topicWorkingDirectory = useChatStore(topicSelectors.currentTopicWorkingDirectory);
|
||||
const agentWorkingDirectory = useAgentStore((s) =>
|
||||
agentId ? agentByIdSelectors.getAgentWorkingDirectoryById(agentId)(s) : undefined,
|
||||
|
|
|
|||
|
|
@ -635,9 +635,9 @@ describe('VARIABLE_GENERATORS', () => {
|
|||
expect(VARIABLE_GENERATORS.userDataPath()).toBe('');
|
||||
});
|
||||
|
||||
it('should return default message for working directory when not specified', () => {
|
||||
it('should return empty string for working directory when not on desktop', () => {
|
||||
const result = VARIABLE_GENERATORS.workingDirectory();
|
||||
expect(result).toBe('(not specified, use user Desktop directory as default)');
|
||||
expect(result).toBe('');
|
||||
});
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
import { isDesktop } from '@lobechat/const';
|
||||
import { uuid } from '@lobechat/utils';
|
||||
import { template } from 'es-toolkit/compat';
|
||||
|
||||
|
|
@ -153,16 +154,16 @@ export const VARIABLE_GENERATORS = {
|
|||
videosPath: () => globalAgentContextManager.getContext().videosPath ?? '',
|
||||
userDataPath: () => globalAgentContextManager.getContext().userDataPath ?? '',
|
||||
/**
|
||||
* Working directory: Topic-level setting takes priority over Agent-level setting
|
||||
* Working directory: topic-level override takes priority over agent-level value
|
||||
*/
|
||||
workingDirectory: () => {
|
||||
// First check topic-level working directory
|
||||
if (!isDesktop) return '';
|
||||
|
||||
const topicWorkingDir = topicSelectors.currentTopicWorkingDirectory(useChatStore.getState());
|
||||
if (topicWorkingDir) return topicWorkingDir;
|
||||
|
||||
// Fallback to agent-level working directory
|
||||
const agentWorkingDir = agentSelectors.currentAgentWorkingDirectory(useAgentStore.getState());
|
||||
return agentWorkingDir ?? '(not specified, use user Desktop directory as default)';
|
||||
return agentWorkingDir ?? '(not specified, use user Home directory as default)';
|
||||
},
|
||||
} as Record<string, () => string>;
|
||||
|
||||
|
|
|
|||
|
|
@ -738,8 +738,7 @@ export default {
|
|||
'settingSystemTools.appEnvironment.desc': 'Built-in runtime versions in the desktop app',
|
||||
'settingSystemTools.appEnvironment.electron.desc': 'Electron framework version',
|
||||
'settingSystemTools.appEnvironment.node.desc': 'Embedded Node.js version',
|
||||
'settingSystemTools.appEnvironment.title': 'App Environment',
|
||||
'settingSystemTools.autoSelectDesc': 'The best available tool will be automatically selected',
|
||||
'settingSystemTools.appEnvironment.title': 'Built-in App Tools',
|
||||
'settingSystemTools.category.browserAutomation': 'Browser Automation',
|
||||
'settingSystemTools.category.browserAutomation.desc':
|
||||
'Tools for headless browser automation and web interaction',
|
||||
|
|
@ -763,10 +762,15 @@ export default {
|
|||
'settingSystemTools.tools.find.desc': 'Unix find - standard file search command',
|
||||
'settingSystemTools.tools.grep.desc': 'GNU grep - standard text search tool',
|
||||
'settingSystemTools.tools.mdfind.desc': 'macOS Spotlight search (fast indexed search)',
|
||||
'settingSystemTools.tools.lobehub.desc': 'LobeHub CLI - manage and connect to LobeHub services',
|
||||
'settingSystemTools.tools.bun.desc': 'Bun - fast JavaScript runtime and package manager',
|
||||
'settingSystemTools.tools.bunx.desc': 'bunx - Bun package runner for executing npm packages',
|
||||
'settingSystemTools.tools.node.desc': 'Node.js - JavaScript runtime for executing JS/TS',
|
||||
'settingSystemTools.tools.npm.desc': 'npm - Node.js package manager for installing dependencies',
|
||||
'settingSystemTools.tools.pnpm.desc': 'pnpm - fast, disk space efficient package manager',
|
||||
'settingSystemTools.tools.python.desc': 'Python - programming language runtime',
|
||||
'settingSystemTools.tools.rg.desc': 'ripgrep - extremely fast text search tool',
|
||||
'settingSystemTools.tools.uv.desc': 'uv - extremely fast Python package manager',
|
||||
'settingTTS.openai.sttModel': 'OpenAI Speech-to-Text Model',
|
||||
'settingTTS.openai.title': 'OpenAI',
|
||||
'settingTTS.openai.ttsModel': 'OpenAI Text-to-Speech Model',
|
||||
|
|
|
|||
|
|
@ -0,0 +1,103 @@
|
|||
'use client';
|
||||
|
||||
import { Button, Flexbox, Input, Text } from '@lobehub/ui';
|
||||
import { memo, useCallback, useState } from 'react';
|
||||
|
||||
import { electronSystemService } from '@/services/electron/system';
|
||||
|
||||
interface CommandResult {
|
||||
args: string;
|
||||
exitCode: number;
|
||||
stderr: string;
|
||||
stdout: string;
|
||||
}
|
||||
|
||||
const CliTestSection = memo(() => {
|
||||
const [results, setResults] = useState<CommandResult[]>([]);
|
||||
const [running, setRunning] = useState(false);
|
||||
const [customCmd, setCustomCmd] = useState('');
|
||||
|
||||
const runCommand = useCallback(async (args: string) => {
|
||||
setRunning(true);
|
||||
try {
|
||||
const result = await electronSystemService.runCliCommand(args);
|
||||
setResults((prev) => [...prev, { args, ...result }]);
|
||||
} catch (error: any) {
|
||||
setResults((prev) => [...prev, { args, exitCode: -1, stderr: String(error), stdout: '' }]);
|
||||
} finally {
|
||||
setRunning(false);
|
||||
}
|
||||
}, []);
|
||||
|
||||
const presetCommands = ['--version', '--help', 'status'];
|
||||
|
||||
return (
|
||||
<Flexbox gap={16} style={{ marginTop: 24 }}>
|
||||
<Text style={{ fontSize: 18, fontWeight: 600 }}>CLI Embedded Test</Text>
|
||||
|
||||
<Flexbox horizontal gap={8} wrap="wrap">
|
||||
{presetCommands.map((cmd) => (
|
||||
<Button key={cmd} loading={running} size="small" onClick={() => runCommand(cmd)}>
|
||||
lobehub {cmd}
|
||||
</Button>
|
||||
))}
|
||||
</Flexbox>
|
||||
|
||||
<Flexbox horizontal gap={8}>
|
||||
<Input
|
||||
placeholder="Custom args (e.g. connect --help)"
|
||||
style={{ flex: 1 }}
|
||||
value={customCmd}
|
||||
onChange={(e) => setCustomCmd(e.target.value)}
|
||||
onPressEnter={() => customCmd && runCommand(customCmd)}
|
||||
/>
|
||||
<Button
|
||||
disabled={!customCmd}
|
||||
loading={running}
|
||||
size="small"
|
||||
type="primary"
|
||||
onClick={() => runCommand(customCmd)}
|
||||
>
|
||||
Run
|
||||
</Button>
|
||||
</Flexbox>
|
||||
|
||||
{results.map((r, i) => (
|
||||
<Flexbox
|
||||
gap={4}
|
||||
key={i}
|
||||
style={{
|
||||
background: 'var(--ant-color-fill-quaternary)',
|
||||
borderRadius: 8,
|
||||
fontFamily: 'monospace',
|
||||
fontSize: 12,
|
||||
padding: 12,
|
||||
}}
|
||||
>
|
||||
<Text style={{ color: 'var(--ant-color-primary)', fontWeight: 600 }}>
|
||||
$ lobehub {r.args} (exit: {r.exitCode})
|
||||
</Text>
|
||||
{r.stdout && (
|
||||
<pre style={{ margin: 0, whiteSpace: 'pre-wrap', wordBreak: 'break-all' }}>
|
||||
{r.stdout}
|
||||
</pre>
|
||||
)}
|
||||
{r.stderr && (
|
||||
<pre
|
||||
style={{
|
||||
color: 'var(--ant-color-error)',
|
||||
margin: 0,
|
||||
whiteSpace: 'pre-wrap',
|
||||
wordBreak: 'break-all',
|
||||
}}
|
||||
>
|
||||
{r.stderr}
|
||||
</pre>
|
||||
)}
|
||||
</Flexbox>
|
||||
))}
|
||||
</Flexbox>
|
||||
);
|
||||
});
|
||||
|
||||
export default CliTestSection;
|
||||
|
|
@ -19,9 +19,14 @@ const TOOL_CATEGORIES = {
|
|||
descKey: 'settingSystemTools.category.runtimeEnvironment.desc',
|
||||
titleKey: 'settingSystemTools.category.runtimeEnvironment',
|
||||
tools: [
|
||||
{ descKey: 'settingSystemTools.tools.lobehub.desc', name: 'lobehub' },
|
||||
{ descKey: 'settingSystemTools.tools.node.desc', name: 'node' },
|
||||
{ descKey: 'settingSystemTools.tools.python.desc', name: 'python' },
|
||||
{ descKey: 'settingSystemTools.tools.npm.desc', name: 'npm' },
|
||||
{ descKey: 'settingSystemTools.tools.bun.desc', name: 'bun' },
|
||||
{ descKey: 'settingSystemTools.tools.bunx.desc', name: 'bunx' },
|
||||
{ descKey: 'settingSystemTools.tools.pnpm.desc', name: 'pnpm' },
|
||||
{ descKey: 'settingSystemTools.tools.uv.desc', name: 'uv' },
|
||||
],
|
||||
},
|
||||
|
||||
|
|
@ -171,7 +176,6 @@ const ToolDetectorSection = memo(() => {
|
|||
justify="flex-end"
|
||||
style={{ marginBlockStart: 8 }}
|
||||
>
|
||||
<Text type="secondary">{t('settingSystemTools.autoSelectDesc')}</Text>
|
||||
<Button
|
||||
icon={<Icon icon={RefreshCw} spin={detecting} />}
|
||||
loading={detecting}
|
||||
|
|
|
|||
|
|
@ -1,17 +1,23 @@
|
|||
import { useTranslation } from 'react-i18next';
|
||||
|
||||
import SettingHeader from '@/routes/(main)/settings/features/SettingHeader';
|
||||
import { useUserStore } from '@/store/user';
|
||||
import { userGeneralSettingsSelectors } from '@/store/user/slices/settings/selectors';
|
||||
|
||||
import AppEnvironmentSection from './features/AppEnvironmentSection';
|
||||
import CliTestSection from './features/CliTestSection';
|
||||
import ToolDetectorSection from './features/ToolDetectorSection';
|
||||
|
||||
const Page = () => {
|
||||
const { t } = useTranslation('setting');
|
||||
const isDevMode = useUserStore((s) => userGeneralSettingsSelectors.config(s).isDevMode);
|
||||
|
||||
return (
|
||||
<>
|
||||
<SettingHeader title={t('tab.systemTools')} />
|
||||
<AppEnvironmentSection />
|
||||
<ToolDetectorSection />
|
||||
{isDevMode ? <CliTestSection /> : null}
|
||||
</>
|
||||
);
|
||||
};
|
||||
|
|
|
|||
|
|
@ -56,6 +56,10 @@ class ElectronSystemService {
|
|||
return this.ipc.system.hasLegacyLocalDb();
|
||||
}
|
||||
|
||||
async runCliCommand(args: string): Promise<{ exitCode: number; stderr: string; stdout: string }> {
|
||||
return this.ipc.cli.runCliCommand(args);
|
||||
}
|
||||
|
||||
showContextMenu = async (type: string, data?: any) => {
|
||||
return this.ipc.menu.showContextMenu({ data, type });
|
||||
};
|
||||
|
|
|
|||
|
|
@ -1,9 +1,12 @@
|
|||
import { DEFAULT_PROVIDER } from '@lobechat/business-const';
|
||||
import { DEFAULT_MODEL, DEFAUTT_AGENT_TTS_CONFIG } from '@lobechat/const';
|
||||
import { DEFAULT_MODEL, DEFAUTT_AGENT_TTS_CONFIG, isDesktop } from '@lobechat/const';
|
||||
import { type AgentBuilderContext } from '@lobechat/context-engine';
|
||||
import { type AgentMode, type LobeAgentTTSConfig, type RuntimeEnvConfig } from '@lobechat/types';
|
||||
|
||||
import { globalAgentContextManager } from '@/helpers/GlobalAgentContextManager';
|
||||
|
||||
import { type AgentStoreState } from '../initialState';
|
||||
import { getLocalAgentWorkingDirectory } from '../utils/localAgentWorkingDirectoryStorage';
|
||||
import { agentSelectors } from './selectors';
|
||||
|
||||
/**
|
||||
|
|
@ -87,8 +90,13 @@ const getAgentRuntimeEnvConfigById =
|
|||
*/
|
||||
const getAgentWorkingDirectoryById =
|
||||
(agentId: string) =>
|
||||
(s: AgentStoreState): string | undefined =>
|
||||
getAgentRuntimeEnvConfigById(agentId)(s)?.workingDirectory;
|
||||
(_s: AgentStoreState): string | undefined => {
|
||||
if (!isDesktop) return;
|
||||
|
||||
return (
|
||||
getLocalAgentWorkingDirectory(agentId) ?? globalAgentContextManager.getContext().homePath
|
||||
);
|
||||
};
|
||||
|
||||
/**
|
||||
* Get agent builder context by agentId
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@ import {
|
|||
DEFAULT_BACKGROUND_COLOR,
|
||||
DEFAULT_MODEL,
|
||||
DEFAUTT_AGENT_TTS_CONFIG,
|
||||
isDesktop,
|
||||
} from '@lobechat/const';
|
||||
import {
|
||||
type AgentMode,
|
||||
|
|
@ -18,9 +19,11 @@ import { KnowledgeType } from '@lobechat/types';
|
|||
import { VoiceList } from '@lobehub/tts';
|
||||
|
||||
import { DEFAULT_OPENING_QUESTIONS } from '@/features/AgentSetting/store/selectors';
|
||||
import { globalAgentContextManager } from '@/helpers/GlobalAgentContextManager';
|
||||
import { filterToolIds } from '@/helpers/toolFilters';
|
||||
|
||||
import { type AgentStoreState } from '../initialState';
|
||||
import { getLocalAgentWorkingDirectory } from '../utils/localAgentWorkingDirectoryStorage';
|
||||
import { builtinAgentSelectors } from './builtinAgentSelectors';
|
||||
|
||||
// ========== Meta ============== //
|
||||
|
|
@ -258,7 +261,17 @@ const currentAgentRuntimeEnvConfig = (s: AgentStoreState): RuntimeEnvConfig | un
|
|||
* Get current agent's working directory
|
||||
*/
|
||||
const currentAgentWorkingDirectory = (s: AgentStoreState): string | undefined =>
|
||||
currentAgentRuntimeEnvConfig(s)?.workingDirectory;
|
||||
(() => {
|
||||
if (!isDesktop) return;
|
||||
|
||||
const activeAgentId = s.activeAgentId;
|
||||
if (!activeAgentId) return globalAgentContextManager.getContext().homePath;
|
||||
|
||||
return (
|
||||
getLocalAgentWorkingDirectory(activeAgentId) ??
|
||||
globalAgentContextManager.getContext().homePath
|
||||
);
|
||||
})();
|
||||
|
||||
const isCurrentAgentExternal = (s: AgentStoreState): boolean => !currentAgentData(s)?.virtual;
|
||||
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
import { isDesktop } from '@lobechat/const';
|
||||
import { type AgentContextDocument } from '@lobechat/context-engine';
|
||||
import { isChatGroupSessionId } from '@lobechat/types';
|
||||
import { getSingletonAnalyticsOptional } from '@lobehub/analytics';
|
||||
|
|
@ -24,6 +25,7 @@ import type { MetaData } from '@/types/meta';
|
|||
import { merge } from '@/utils/merge';
|
||||
|
||||
import type { AgentStore } from '../../store';
|
||||
import { setLocalAgentWorkingDirectory } from '../../utils/localAgentWorkingDirectoryStorage';
|
||||
import type { AgentSliceState, LoadingState, SaveStatus } from './initialState';
|
||||
|
||||
const FETCH_AGENT_CONFIG_KEY = 'FETCH_AGENT_CONFIG';
|
||||
|
|
@ -214,7 +216,15 @@ export class AgentSliceActionImpl {
|
|||
): Promise<void> => {
|
||||
if (!agentId) return;
|
||||
|
||||
await this.#get().updateAgentChatConfigById(agentId, { runtimeEnv: config });
|
||||
if (isDesktop && 'workingDirectory' in config) {
|
||||
setLocalAgentWorkingDirectory(agentId, config.workingDirectory);
|
||||
}
|
||||
|
||||
const restConfig = { ...config };
|
||||
delete restConfig.workingDirectory;
|
||||
if (Object.keys(restConfig).length > 0) {
|
||||
await this.#get().updateAgentChatConfigById(agentId, { runtimeEnv: restConfig });
|
||||
}
|
||||
};
|
||||
|
||||
updateAgentMeta = async (meta: Partial<MetaData>): Promise<void> => {
|
||||
|
|
|
|||
53
src/store/agent/utils/localAgentWorkingDirectoryStorage.ts
Normal file
53
src/store/agent/utils/localAgentWorkingDirectoryStorage.ts
Normal file
|
|
@ -0,0 +1,53 @@
|
|||
const LOCAL_AGENT_WORKING_DIRECTORY_KEY = 'lobechat-local-agent-working-directories';
|
||||
|
||||
const getStorage = (): Storage | undefined => {
|
||||
if (typeof window === 'undefined') return;
|
||||
return window.localStorage;
|
||||
};
|
||||
|
||||
const readMap = (): Record<string, string> => {
|
||||
const storage = getStorage();
|
||||
if (!storage) return {};
|
||||
|
||||
try {
|
||||
const raw = storage.getItem(LOCAL_AGENT_WORKING_DIRECTORY_KEY);
|
||||
if (!raw) return {};
|
||||
|
||||
const parsed = JSON.parse(raw);
|
||||
if (!parsed || typeof parsed !== 'object') return {};
|
||||
|
||||
return Object.fromEntries(
|
||||
Object.entries(parsed).filter(
|
||||
(entry): entry is [string, string] =>
|
||||
typeof entry[0] === 'string' && typeof entry[1] === 'string' && !!entry[1],
|
||||
),
|
||||
);
|
||||
} catch {
|
||||
return {};
|
||||
}
|
||||
};
|
||||
|
||||
const writeMap = (value: Record<string, string>) => {
|
||||
const storage = getStorage();
|
||||
if (!storage) return;
|
||||
storage.setItem(LOCAL_AGENT_WORKING_DIRECTORY_KEY, JSON.stringify(value));
|
||||
};
|
||||
|
||||
export const getLocalAgentWorkingDirectory = (agentId: string): string | undefined => {
|
||||
if (!agentId) return;
|
||||
return readMap()[agentId];
|
||||
};
|
||||
|
||||
export const setLocalAgentWorkingDirectory = (agentId: string, workingDirectory?: string): void => {
|
||||
if (!agentId) return;
|
||||
|
||||
const map = readMap();
|
||||
|
||||
if (workingDirectory) {
|
||||
map[agentId] = workingDirectory;
|
||||
} else {
|
||||
delete map[agentId];
|
||||
}
|
||||
|
||||
writeMap(map);
|
||||
};
|
||||
|
|
@ -1,3 +1,4 @@
|
|||
import { isDesktop } from '@lobechat/const';
|
||||
import { t } from 'i18next';
|
||||
|
||||
import {
|
||||
|
|
@ -75,6 +76,8 @@ const currentActiveTopicSummary = (s: ChatStoreState): ChatTopicSummary | undefi
|
|||
* Returns undefined if no topic is active or no working directory is set
|
||||
*/
|
||||
const currentTopicWorkingDirectory = (s: ChatStoreState): string | undefined => {
|
||||
if (!isDesktop) return;
|
||||
|
||||
const activeTopic = currentActiveTopic(s);
|
||||
return activeTopic?.metadata?.workingDirectory;
|
||||
};
|
||||
|
|
|
|||
Loading…
Reference in a new issue