diff --git a/migrations/000_combined.sql b/migrations/000_combined.sql index 5ded6481..176963b4 100644 --- a/migrations/000_combined.sql +++ b/migrations/000_combined.sql @@ -30,6 +30,7 @@ CREATE TABLE IF NOT EXISTS remote_agent_codebases ( repository_url VARCHAR(500), default_cwd VARCHAR(500) NOT NULL, ai_assistant_type VARCHAR(20) DEFAULT 'claude', + allow_env_keys BOOLEAN NOT NULL DEFAULT FALSE, commands JSONB DEFAULT '{}'::jsonb, created_at TIMESTAMP DEFAULT NOW(), updated_at TIMESTAMP DEFAULT NOW() @@ -307,3 +308,7 @@ ALTER TABLE remote_agent_conversations -- From migration 016: ended_reason on sessions ALTER TABLE remote_agent_sessions ADD COLUMN IF NOT EXISTS ended_reason TEXT; + +-- From migration 021: allow_env_keys on codebases +ALTER TABLE remote_agent_codebases + ADD COLUMN IF NOT EXISTS allow_env_keys BOOLEAN NOT NULL DEFAULT FALSE; diff --git a/migrations/021_add_allow_env_keys_to_codebases.sql b/migrations/021_add_allow_env_keys_to_codebases.sql new file mode 100644 index 00000000..a1f30d2e --- /dev/null +++ b/migrations/021_add_allow_env_keys_to_codebases.sql @@ -0,0 +1,4 @@ +-- Add per-codebase consent bit for subprocess .env key leakage +-- DEFAULT FALSE = safe by default; user must explicitly opt in +ALTER TABLE remote_agent_codebases + ADD COLUMN allow_env_keys BOOLEAN NOT NULL DEFAULT FALSE; diff --git a/packages/core/package.json b/packages/core/package.json index 0c5f6b78..44adcc4a 100644 --- a/packages/core/package.json +++ b/packages/core/package.json @@ -23,7 +23,7 @@ "./state/*": "./src/state/*.ts" }, "scripts": { - "test": "bun test src/clients/ && bun test src/handlers/command-handler.test.ts && bun test src/handlers/clone.test.ts && bun test src/db/adapters/postgres.test.ts && bun test src/db/adapters/sqlite.test.ts src/db/codebases.test.ts src/db/connection.test.ts src/db/conversations.test.ts src/db/env-vars.test.ts src/db/isolation-environments.test.ts src/db/messages.test.ts src/db/sessions.test.ts src/db/workflow-events.test.ts src/db/workflows.test.ts src/utils/defaults-copy.test.ts src/utils/worktree-sync.test.ts src/utils/conversation-lock.test.ts src/utils/credential-sanitizer.test.ts src/utils/port-allocation.test.ts src/utils/error.test.ts src/utils/error-formatter.test.ts src/utils/github-graphql.test.ts src/utils/env-allowlist.test.ts src/config/ src/state/ && bun test src/utils/path-validation.test.ts && bun test src/services/cleanup-service.test.ts && bun test src/services/title-generator.test.ts && bun test src/workflows/ && bun test src/operations/workflow-operations.test.ts && bun test src/operations/isolation-operations.test.ts && bun test src/orchestrator/orchestrator.test.ts && bun test src/orchestrator/orchestrator-agent.test.ts && bun test src/orchestrator/orchestrator-isolation.test.ts", + "test": "bun test src/clients/ && bun test src/handlers/command-handler.test.ts && bun test src/handlers/clone.test.ts && bun test src/db/adapters/postgres.test.ts && bun test src/db/adapters/sqlite.test.ts src/db/codebases.test.ts src/db/connection.test.ts src/db/conversations.test.ts src/db/env-vars.test.ts src/db/isolation-environments.test.ts src/db/messages.test.ts src/db/sessions.test.ts src/db/workflow-events.test.ts src/db/workflows.test.ts src/utils/defaults-copy.test.ts src/utils/worktree-sync.test.ts src/utils/conversation-lock.test.ts src/utils/credential-sanitizer.test.ts src/utils/port-allocation.test.ts src/utils/error.test.ts src/utils/error-formatter.test.ts src/utils/github-graphql.test.ts src/utils/env-allowlist.test.ts src/utils/env-leak-scanner.test.ts src/config/ src/state/ && bun test src/utils/path-validation.test.ts && bun test src/services/cleanup-service.test.ts && bun test src/services/title-generator.test.ts && bun test src/workflows/ && bun test src/operations/workflow-operations.test.ts && bun test src/operations/isolation-operations.test.ts && bun test src/orchestrator/orchestrator.test.ts && bun test src/orchestrator/orchestrator-agent.test.ts && bun test src/orchestrator/orchestrator-isolation.test.ts", "type-check": "bun x tsc --noEmit", "build": "echo 'No build needed - Bun runs TypeScript directly'" }, diff --git a/packages/core/src/clients/claude.test.ts b/packages/core/src/clients/claude.test.ts index 41a9fbbd..e031a0d7 100644 --- a/packages/core/src/clients/claude.test.ts +++ b/packages/core/src/clients/claude.test.ts @@ -1,4 +1,4 @@ -import { describe, test, expect, mock, beforeEach, spyOn } from 'bun:test'; +import { describe, test, expect, mock, beforeEach, afterEach, spyOn } from 'bun:test'; import { createMockLogger } from '../test/mocks/logger'; const mockLogger = createMockLogger(); @@ -18,6 +18,8 @@ mock.module('@anthropic-ai/claude-agent-sdk', () => ({ import { ClaudeClient } from './claude'; import * as claudeModule from './claude'; +import * as codebaseDb from '../db/codebases'; +import * as envLeakScanner from '../utils/env-leak-scanner'; describe('ClaudeClient', () => { let client: ClaudeClient; @@ -951,4 +953,83 @@ describe('ClaudeClient', () => { expect(chunks[0]).toEqual({ type: 'assistant', content: 'Real content' }); }); }); + + describe('pre-spawn env leak gate', () => { + let spyFindByDefaultCwd: ReturnType; + let spyFindByPathPrefix: ReturnType; + let spyScan: ReturnType; + + beforeEach(() => { + spyFindByDefaultCwd = spyOn(codebaseDb, 'findCodebaseByDefaultCwd').mockResolvedValue(null); + spyFindByPathPrefix = spyOn(codebaseDb, 'findCodebaseByPathPrefix').mockResolvedValue(null); + spyScan = spyOn(envLeakScanner, 'scanPathForSensitiveKeys').mockReturnValue({ + path: '/workspace', + findings: [], + }); + mockQuery.mockImplementation(async function* () { + yield { type: 'result', session_id: 'sid-gate' }; + }); + }); + + afterEach(() => { + spyFindByDefaultCwd.mockRestore(); + spyFindByPathPrefix.mockRestore(); + spyScan.mockRestore(); + }); + + test('throws EnvLeakError when .env contains sensitive keys and codebase has no consent', async () => { + spyScan.mockReturnValueOnce({ + path: '/workspace', + findings: [{ file: '.env', keys: ['ANTHROPIC_API_KEY'] }], + }); + + await expect(async () => { + for await (const _ of client.sendQuery('test', '/workspace')) { + // consume + } + }).toThrow('Cannot add codebase'); + }); + + test('skips scan when codebase has allow_env_keys: true', async () => { + spyFindByDefaultCwd.mockResolvedValueOnce({ + id: 'codebase-1', + allow_env_keys: true, + default_cwd: '/workspace', + }); + + const chunks = []; + for await (const chunk of client.sendQuery('test', '/workspace')) { + chunks.push(chunk); + } + + expect(spyScan).not.toHaveBeenCalled(); + expect(chunks).toHaveLength(1); + }); + + test('proceeds when cwd has no registered codebase and no sensitive keys', async () => { + const chunks = []; + for await (const chunk of client.sendQuery('test', '/workspace')) { + chunks.push(chunk); + } + + expect(spyScan).toHaveBeenCalledTimes(1); + expect(chunks).toHaveLength(1); + }); + + test('uses prefix lookup for worktree paths when exact match returns null', async () => { + spyFindByPathPrefix.mockResolvedValueOnce({ + id: 'codebase-1', + allow_env_keys: true, + default_cwd: '/workspace/source', + }); + + const chunks = []; + for await (const chunk of client.sendQuery('test', '/workspace/worktrees/feature')) { + chunks.push(chunk); + } + + expect(spyFindByPathPrefix).toHaveBeenCalledWith('/workspace/worktrees/feature'); + expect(spyScan).not.toHaveBeenCalled(); + }); + }); }); diff --git a/packages/core/src/clients/claude.ts b/packages/core/src/clients/claude.ts index f70e230d..6e0d996f 100644 --- a/packages/core/src/clients/claude.ts +++ b/packages/core/src/clients/claude.ts @@ -27,6 +27,8 @@ import { } from '../types'; import { createLogger } from '@archon/paths'; import { buildCleanSubprocessEnv } from '../utils/env-allowlist'; +import { scanPathForSensitiveKeys, EnvLeakError } from '../utils/env-leak-scanner'; +import * as codebaseDb from '../db/codebases'; /** Lazy-initialized logger (deferred so test mocks can intercept createLogger) */ let cachedLog: ReturnType | undefined; @@ -258,6 +260,19 @@ export class ClaudeClient implements IAssistantClient { resumeSessionId?: string, requestOptions?: AssistantRequestOptions ): AsyncGenerator { + // Pre-spawn: check for env key leak if codebase is not explicitly consented. + // Use prefix lookup so worktree paths (e.g. .../worktrees/feature-branch) still + // match the registered source cwd (e.g. .../source). + const codebase = + (await codebaseDb.findCodebaseByDefaultCwd(cwd)) ?? + (await codebaseDb.findCodebaseByPathPrefix(cwd)); + if (!codebase?.allow_env_keys) { + const report = scanPathForSensitiveKeys(cwd); + if (report.findings.length > 0) { + throw new EnvLeakError(report); + } + } + // Note: If subprocess crashes mid-stream after yielding chunks, those chunks // are already consumed by the caller. Retry starts a fresh subprocess, so the // caller may receive partial output from the failed attempt followed by full diff --git a/packages/core/src/clients/codex.test.ts b/packages/core/src/clients/codex.test.ts index f7b9f526..3dbc9350 100644 --- a/packages/core/src/clients/codex.test.ts +++ b/packages/core/src/clients/codex.test.ts @@ -1,4 +1,4 @@ -import { describe, test, expect, mock, beforeEach } from 'bun:test'; +import { describe, test, expect, mock, beforeEach, afterEach, spyOn } from 'bun:test'; import { createMockLogger } from '../test/mocks/logger'; const mockLogger = createMockLogger(); @@ -40,6 +40,8 @@ mock.module('@openai/codex-sdk', () => ({ })); import { CodexClient } from './codex'; +import * as codebaseDb from '../db/codebases'; +import * as envLeakScanner from '../utils/env-leak-scanner'; describe('CodexClient', () => { let client: CodexClient; @@ -1000,4 +1002,89 @@ describe('CodexClient', () => { }); }); }); + + describe('pre-spawn env leak gate', () => { + let spyFindByDefaultCwd: ReturnType; + let spyFindByPathPrefix: ReturnType; + let spyScan: ReturnType; + + beforeEach(() => { + // Restore a working runStreamed default so retry-test bleed doesn't break gate tests + mockRunStreamed.mockResolvedValue({ + events: (async function* () { + yield { type: 'turn.completed', usage: defaultUsage }; + })(), + }); + spyFindByDefaultCwd = spyOn(codebaseDb, 'findCodebaseByDefaultCwd').mockResolvedValue(null); + spyFindByPathPrefix = spyOn(codebaseDb, 'findCodebaseByPathPrefix').mockResolvedValue(null); + spyScan = spyOn(envLeakScanner, 'scanPathForSensitiveKeys').mockReturnValue({ + path: '/workspace', + findings: [], + }); + }); + + afterEach(() => { + spyFindByDefaultCwd.mockRestore(); + spyFindByPathPrefix.mockRestore(); + spyScan.mockRestore(); + }); + + test('throws EnvLeakError when .env contains sensitive keys and codebase has no consent', async () => { + spyFindByDefaultCwd.mockResolvedValueOnce(null); + spyFindByPathPrefix.mockResolvedValueOnce(null); + spyScan.mockReturnValueOnce({ + path: '/workspace', + findings: [{ file: '.env', keys: ['ANTHROPIC_API_KEY'] }], + }); + + const consumeGenerator = async (): Promise => { + for await (const _ of client.sendQuery('test', '/workspace')) { + // consume + } + }; + + await expect(consumeGenerator()).rejects.toThrow('Cannot add codebase'); + }); + + test('skips scan when codebase has allow_env_keys: true', async () => { + spyFindByDefaultCwd.mockResolvedValueOnce({ + id: 'codebase-1', + allow_env_keys: true, + default_cwd: '/workspace', + }); + + const chunks = []; + for await (const chunk of client.sendQuery('test', '/workspace')) { + chunks.push(chunk); + } + + expect(spyScan).not.toHaveBeenCalled(); + }); + + test('proceeds when cwd has no registered codebase and no sensitive keys', async () => { + const chunks = []; + for await (const chunk of client.sendQuery('test', '/workspace')) { + chunks.push(chunk); + } + + expect(spyScan).toHaveBeenCalledTimes(1); + }); + + test('uses prefix lookup for worktree paths when exact match returns null', async () => { + spyFindByDefaultCwd.mockResolvedValueOnce(null); + spyFindByPathPrefix.mockResolvedValueOnce({ + id: 'codebase-1', + allow_env_keys: true, + default_cwd: '/workspace/source', + }); + + const chunks = []; + for await (const chunk of client.sendQuery('test', '/workspace/worktrees/feature')) { + chunks.push(chunk); + } + + expect(spyFindByPathPrefix).toHaveBeenCalledWith('/workspace/worktrees/feature'); + expect(spyScan).not.toHaveBeenCalled(); + }); + }); }); diff --git a/packages/core/src/clients/codex.ts b/packages/core/src/clients/codex.ts index 121bd73f..4e60942a 100644 --- a/packages/core/src/clients/codex.ts +++ b/packages/core/src/clients/codex.ts @@ -18,6 +18,8 @@ import { type TokenUsage, } from '../types'; import { createLogger } from '@archon/paths'; +import { scanPathForSensitiveKeys, EnvLeakError } from '../utils/env-leak-scanner'; +import * as codebaseDb from '../db/codebases'; /** Lazy-initialized logger (deferred so test mocks can intercept createLogger) */ let cachedLog: ReturnType | undefined; @@ -154,6 +156,19 @@ export class CodexClient implements IAssistantClient { resumeSessionId?: string, options?: AssistantRequestOptions ): AsyncGenerator { + // Pre-spawn: check for env key leak if codebase is not explicitly consented. + // Use prefix lookup so worktree paths (e.g. .../worktrees/feature-branch) still + // match the registered source cwd (e.g. .../source). + const codebase = + (await codebaseDb.findCodebaseByDefaultCwd(cwd)) ?? + (await codebaseDb.findCodebaseByPathPrefix(cwd)); + if (!codebase?.allow_env_keys) { + const report = scanPathForSensitiveKeys(cwd); + if (report.findings.length > 0) { + throw new EnvLeakError(report); + } + } + const codex = getCodex(); const threadOptions = buildThreadOptions(cwd, options); diff --git a/packages/core/src/db/codebases.test.ts b/packages/core/src/db/codebases.test.ts index 26c269a0..8fef0743 100644 --- a/packages/core/src/db/codebases.test.ts +++ b/packages/core/src/db/codebases.test.ts @@ -36,6 +36,7 @@ describe('codebases', () => { repository_url: 'https://github.com/user/repo', default_cwd: '/workspace/test-project', ai_assistant_type: 'claude', + allow_env_keys: false, commands: { plan: { path: '.claude/commands/plan.md', description: 'Plan feature' } }, created_at: new Date(), updated_at: new Date(), @@ -54,8 +55,8 @@ describe('codebases', () => { expect(result).toEqual(mockCodebase); expect(mockQuery).toHaveBeenCalledWith( - 'INSERT INTO remote_agent_codebases (name, repository_url, default_cwd, ai_assistant_type) VALUES ($1, $2, $3, $4) RETURNING *', - ['test-project', 'https://github.com/user/repo', '/workspace/test-project', 'claude'] + 'INSERT INTO remote_agent_codebases (name, repository_url, default_cwd, ai_assistant_type, allow_env_keys) VALUES ($1, $2, $3, $4, $5) RETURNING *', + ['test-project', 'https://github.com/user/repo', '/workspace/test-project', 'claude', false] ); }); @@ -73,8 +74,8 @@ describe('codebases', () => { expect(result).toEqual(codebaseWithoutOptional); expect(mockQuery).toHaveBeenCalledWith( - 'INSERT INTO remote_agent_codebases (name, repository_url, default_cwd, ai_assistant_type) VALUES ($1, $2, $3, $4) RETURNING *', - ['test-project', null, '/workspace/test-project', 'claude'] + 'INSERT INTO remote_agent_codebases (name, repository_url, default_cwd, ai_assistant_type, allow_env_keys) VALUES ($1, $2, $3, $4, $5) RETURNING *', + ['test-project', null, '/workspace/test-project', 'claude', false] ); }); @@ -297,6 +298,7 @@ describe('codebases', () => { name: 'test-repo', default_cwd: '/workspace/test-repo', ai_assistant_type: 'claude', + allow_env_keys: false, repository_url: null, commands: {}, created_at: new Date(), diff --git a/packages/core/src/db/codebases.ts b/packages/core/src/db/codebases.ts index f3847f30..7b7f4917 100644 --- a/packages/core/src/db/codebases.ts +++ b/packages/core/src/db/codebases.ts @@ -17,11 +17,13 @@ export async function createCodebase(data: { repository_url?: string; default_cwd: string; ai_assistant_type?: string; + allow_env_keys?: boolean; }): Promise { const assistantType = data.ai_assistant_type ?? 'claude'; + const allowEnvKeys = data.allow_env_keys ?? false; const result = await pool.query( - 'INSERT INTO remote_agent_codebases (name, repository_url, default_cwd, ai_assistant_type) VALUES ($1, $2, $3, $4) RETURNING *', - [data.name, data.repository_url ?? null, data.default_cwd, assistantType] + 'INSERT INTO remote_agent_codebases (name, repository_url, default_cwd, ai_assistant_type, allow_env_keys) VALUES ($1, $2, $3, $4, $5) RETURNING *', + [data.name, data.repository_url ?? null, data.default_cwd, assistantType, allowEnvKeys] ); if (!result.rows[0]) { throw new Error('Failed to create codebase: INSERT succeeded but no row returned'); @@ -96,6 +98,25 @@ export async function findCodebaseByDefaultCwd(defaultCwd: string): Promise { + const result = await pool.query( + `SELECT * FROM remote_agent_codebases + WHERE $1 LIKE default_cwd || '%' + ORDER BY length(default_cwd) DESC + LIMIT 1`, + [cwdPath] + ); + return result.rows[0] || null; +} + export async function findCodebaseByName(name: string): Promise { const result = await pool.query( 'SELECT * FROM remote_agent_codebases WHERE name = $1 ORDER BY created_at DESC LIMIT 1', diff --git a/packages/core/src/handlers/clone.test.ts b/packages/core/src/handlers/clone.test.ts index c913c1a7..d58f5f03 100644 --- a/packages/core/src/handlers/clone.test.ts +++ b/packages/core/src/handlers/clone.test.ts @@ -20,6 +20,7 @@ const mockCreateCodebase = mock(() => repository_url: 'https://github.com/owner/repo', default_cwd: '/home/test/.archon/workspaces/owner/repo/source', ai_assistant_type: 'claude', + allow_env_keys: false, commands: {}, created_at: new Date(), updated_at: new Date(), @@ -66,6 +67,20 @@ mock.module('../utils/commands', () => ({ findMarkdownFilesRecursive: mockFindMarkdownFilesRecursive, })); +// ── env-leak-scanner mock ─────────────────────────────────────────────────── +class MockEnvLeakError extends Error { + constructor(public report: unknown) { + super('Cannot add codebase — /test/path contains keys that will leak into AI subprocesses'); + this.name = 'EnvLeakError'; + } +} + +const mockScanPathForSensitiveKeys = mock(() => ({ path: '', findings: [] })); +mock.module('../utils/env-leak-scanner', () => ({ + scanPathForSensitiveKeys: mockScanPathForSensitiveKeys, + EnvLeakError: MockEnvLeakError, +})); + // ── Import module under test AFTER mocks are registered ──────────────────── import { cloneRepository, registerRepository } from './clone'; @@ -103,6 +118,7 @@ function clearMocks(): void { mockFindCodebaseByName.mockReset(); mockUpdateCodebase.mockReset(); mockFindMarkdownFilesRecursive.mockReset(); + mockScanPathForSensitiveKeys.mockReset(); mockLogger.info.mockClear(); mockLogger.debug.mockClear(); mockLogger.warn.mockClear(); @@ -116,6 +132,7 @@ function clearMocks(): void { mockFindCodebaseByName.mockResolvedValue(null); mockUpdateCodebase.mockResolvedValue(undefined); mockFindMarkdownFilesRecursive.mockResolvedValue([]); + mockScanPathForSensitiveKeys.mockReturnValue({ path: '', findings: [] }); } afterAll(() => { @@ -140,6 +157,7 @@ function makeCodebase( repository_url: 'https://github.com/owner/repo', default_cwd: '/home/test/.archon/workspaces/owner/repo/source', ai_assistant_type: 'claude', + allow_env_keys: false, commands: {}, created_at: new Date(), updated_at: new Date(), @@ -930,4 +948,27 @@ describe('RegisterResult shape', () => { expect(result.alreadyExisted).toBe(true); expect(result.commandCount).toBe(0); }); + + describe('env leak gate', () => { + test('throws EnvLeakError when scanner finds sensitive keys and allowEnvKeys is false', async () => { + mockScanPathForSensitiveKeys.mockReturnValueOnce({ + path: '/home/test/.archon/workspaces/owner/repo/source', + findings: [{ file: '.env', keys: ['ANTHROPIC_API_KEY'] }], + }); + + await expect(cloneRepository('https://github.com/owner/repo')).rejects.toThrow( + 'Cannot add codebase' + ); + }); + + test('does not scan when allowEnvKeys is true, even with scanner findings available', async () => { + mockCreateCodebase.mockResolvedValueOnce(makeCodebase() as ReturnType); + + // allowEnvKeys=true should bypass the gate; scanner not called even with findings queued + const result = await cloneRepository('https://github.com/owner/repo', true); + + expect(mockScanPathForSensitiveKeys).not.toHaveBeenCalled(); + expect(result.codebaseId).toBe('codebase-uuid-1'); + }); + }); }); diff --git a/packages/core/src/handlers/clone.ts b/packages/core/src/handlers/clone.ts index fe7e4d95..4042a3f1 100644 --- a/packages/core/src/handlers/clone.ts +++ b/packages/core/src/handlers/clone.ts @@ -16,6 +16,7 @@ import { parseOwnerRepo, } from '@archon/paths'; import { findMarkdownFilesRecursive } from '../utils/commands'; +import { scanPathForSensitiveKeys, EnvLeakError } from '../utils/env-leak-scanner'; import { createLogger } from '@archon/paths'; /** Lazy-initialized logger (deferred so test mocks can intercept createLogger) */ @@ -40,8 +41,17 @@ export interface RegisterResult { async function registerRepoAtPath( targetPath: string, name: string, - repositoryUrl: string | null + repositoryUrl: string | null, + allowEnvKeys = false ): Promise { + // Scan for sensitive keys in auto-loaded .env files before registering + if (!allowEnvKeys) { + const report = scanPathForSensitiveKeys(targetPath); + if (report.findings.length > 0) { + throw new EnvLeakError(report); + } + } + // Auto-detect assistant type based on folder structure let suggestedAssistant = 'claude'; const codexFolder = join(targetPath, '.codex'); @@ -122,6 +132,7 @@ async function registerRepoAtPath( repository_url: repositoryUrl ?? undefined, default_cwd: targetPath, ai_assistant_type: suggestedAssistant, + allow_env_keys: allowEnvKeys, }); // Auto-load commands if found @@ -190,11 +201,14 @@ function normalizeRepoUrl(rawUrl: string): { * Local paths (starting with /, ~, or .) are delegated to registerRepository * to avoid wrong owner/repo naming. See #383 for broader rethink. */ -export async function cloneRepository(repoUrl: string): Promise { +export async function cloneRepository( + repoUrl: string, + allowEnvKeys?: boolean +): Promise { // Local paths should be registered (symlink), not cloned (copied) if (repoUrl.startsWith('/') || repoUrl.startsWith('~') || repoUrl.startsWith('.')) { const resolvedPath = repoUrl.startsWith('~') ? expandTilde(repoUrl) : resolve(repoUrl); - return registerRepository(resolvedPath); + return registerRepository(resolvedPath, allowEnvKeys); } const { workingUrl, ownerName, repoName, targetPath } = normalizeRepoUrl(repoUrl); @@ -275,7 +289,12 @@ export async function cloneRepository(repoUrl: string): Promise await execFileAsync('git', ['config', '--global', '--add', 'safe.directory', targetPath]); getLog().debug({ path: targetPath }, 'safe_directory_added'); - const result = await registerRepoAtPath(targetPath, `${ownerName}/${repoName}`, workingUrl); + const result = await registerRepoAtPath( + targetPath, + `${ownerName}/${repoName}`, + workingUrl, + allowEnvKeys + ); getLog().info({ url: workingUrl, targetPath }, 'clone_completed'); return result; } @@ -283,7 +302,10 @@ export async function cloneRepository(repoUrl: string): Promise /** * Register an existing local repository in the database (no git clone). */ -export async function registerRepository(localPath: string): Promise { +export async function registerRepository( + localPath: string, + allowEnvKeys?: boolean +): Promise { // Validate path exists and is a git repo try { await execFileAsync('git', ['-C', localPath, 'rev-parse', '--git-dir']); @@ -349,5 +371,5 @@ export async function registerRepository(localPath: string): Promise { repository_url: 'https://github.com/user/my-repo', default_cwd: '/workspace/my-repo', ai_assistant_type: 'claude', + allow_env_keys: false, commands: {}, created_at: new Date(), updated_at: new Date(), @@ -566,6 +567,7 @@ describe('CommandHandler', () => { repository_url: 'https://github.com/owner/repo', default_cwd: '/workspace/repo', ai_assistant_type: 'claude', + allow_env_keys: false, commands: {}, created_at: new Date(), updated_at: new Date(), @@ -604,6 +606,7 @@ describe('CommandHandler', () => { repository_url: 'https://github.com/owner/orphaned-repo', default_cwd: '/workspace/orphaned-repo', ai_assistant_type: 'claude', + allow_env_keys: false, commands: {}, created_at: new Date(), updated_at: new Date(), @@ -718,6 +721,7 @@ describe('CommandHandler', () => { repository_url: 'https://github.com/user/my-repo', default_cwd: '/workspace/my-repo', ai_assistant_type: 'claude', + allow_env_keys: false, commands: {}, created_at: new Date(), updated_at: new Date(), diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 90b1cefd..e8caf0ec 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -145,6 +145,14 @@ export { toError } from './utils/error'; // Credential sanitization export { sanitizeCredentials, sanitizeError } from './utils/credential-sanitizer'; +// Env leak scanner +export { + EnvLeakError, + scanPathForSensitiveKeys, + formatLeakError, + type LeakReport, +} from './utils/env-leak-scanner'; + // GitHub GraphQL export { getLinkedIssueNumbers } from './utils/github-graphql'; diff --git a/packages/core/src/orchestrator/orchestrator-agent.test.ts b/packages/core/src/orchestrator/orchestrator-agent.test.ts index 07579296..70080cc0 100644 --- a/packages/core/src/orchestrator/orchestrator-agent.test.ts +++ b/packages/core/src/orchestrator/orchestrator-agent.test.ts @@ -181,6 +181,7 @@ function makeCodebase(name: string, id = `id-${name}`): Codebase { repository_url: null, default_cwd: `/repos/${name}`, ai_assistant_type: 'claude', + allow_env_keys: false, commands: {}, created_at: new Date(), updated_at: new Date(), @@ -804,6 +805,7 @@ function makeCodebaseForSync() { repository_url: 'https://github.com/test/repo', default_cwd: '/repos/test-repo', ai_assistant_type: 'claude', + allow_env_keys: false, commands: {}, created_at: new Date(), updated_at: new Date(), @@ -969,6 +971,7 @@ describe('workflow dispatch routing — interactive flag', () => { repository_url: null, default_cwd: '/repos/test-repo', ai_assistant_type: 'claude' as const, + allow_env_keys: false, commands: {}, created_at: new Date(), updated_at: new Date(), @@ -1069,6 +1072,7 @@ describe('natural-language approval routing', () => { repository_url: null, default_cwd: '/repos/test-repo', ai_assistant_type: 'claude' as const, + allow_env_keys: false, commands: {}, created_at: new Date(), updated_at: new Date(), diff --git a/packages/core/src/orchestrator/orchestrator-isolation.test.ts b/packages/core/src/orchestrator/orchestrator-isolation.test.ts index 1f533b8e..f46930f0 100644 --- a/packages/core/src/orchestrator/orchestrator-isolation.test.ts +++ b/packages/core/src/orchestrator/orchestrator-isolation.test.ts @@ -176,6 +176,7 @@ function makeCodebase(overrides?: Partial): Codebase { id: 'cb-1', name: 'test-repo', default_cwd: '/workspace/test-repo', + allow_env_keys: false, commands: {}, created_at: new Date(), updated_at: new Date(), diff --git a/packages/core/src/orchestrator/orchestrator.test.ts b/packages/core/src/orchestrator/orchestrator.test.ts index f8f199a5..d5e81038 100644 --- a/packages/core/src/orchestrator/orchestrator.test.ts +++ b/packages/core/src/orchestrator/orchestrator.test.ts @@ -216,6 +216,7 @@ const mockCodebase: Codebase = { repository_url: 'https://github.com/user/repo', default_cwd: '/workspace/test-project', ai_assistant_type: 'claude', + allow_env_keys: false, commands: {}, created_at: new Date(), updated_at: new Date(), diff --git a/packages/core/src/types/index.ts b/packages/core/src/types/index.ts index 3554c621..549891f3 100644 --- a/packages/core/src/types/index.ts +++ b/packages/core/src/types/index.ts @@ -57,6 +57,7 @@ export interface Codebase { repository_url: string | null; default_cwd: string; ai_assistant_type: string; + allow_env_keys: boolean; commands: Record; created_at: Date; updated_at: Date; diff --git a/packages/core/src/utils/env-leak-scanner.test.ts b/packages/core/src/utils/env-leak-scanner.test.ts new file mode 100644 index 00000000..fe528a15 --- /dev/null +++ b/packages/core/src/utils/env-leak-scanner.test.ts @@ -0,0 +1,110 @@ +import { describe, it, expect, beforeEach, afterEach } from 'bun:test'; +import { writeFileSync, mkdirSync, rmSync } from 'fs'; +import { join } from 'path'; +import { + scanPathForSensitiveKeys, + EnvLeakError, + SENSITIVE_KEYS, + AUTOLOADED_FILES, +} from './env-leak-scanner'; + +describe('scanPathForSensitiveKeys', () => { + const tmpDir = '/tmp/archon-test-env-scan'; + + beforeEach(() => { + mkdirSync(tmpDir, { recursive: true }); + }); + afterEach(() => { + rmSync(tmpDir, { recursive: true, force: true }); + }); + + it('returns empty findings for clean directory', () => { + const report = scanPathForSensitiveKeys(tmpDir); + expect(report.findings).toHaveLength(0); + }); + + it('returns empty findings for non-existent directory', () => { + const report = scanPathForSensitiveKeys('/tmp/archon-test-nonexistent-dir'); + expect(report.findings).toHaveLength(0); + }); + + // Each sensitive key × each auto-loaded filename + for (const key of SENSITIVE_KEYS) { + for (const filename of AUTOLOADED_FILES) { + it(`detects ${key} in ${filename}`, () => { + writeFileSync(join(tmpDir, filename), `${key}=sk-test-value\nOTHER=safe\n`); + const report = scanPathForSensitiveKeys(tmpDir); + expect(report.findings).toHaveLength(1); + expect(report.findings[0].file).toBe(filename); + expect(report.findings[0].keys).toContain(key); + // Clean up for next iteration + rmSync(join(tmpDir, filename)); + }); + } + } + + it('ignores commented-out keys', () => { + writeFileSync(join(tmpDir, '.env'), '# ANTHROPIC_API_KEY=value\n'); + const report = scanPathForSensitiveKeys(tmpDir); + expect(report.findings).toHaveLength(0); + }); + + it('ignores lines without =', () => { + writeFileSync(join(tmpDir, '.env'), 'ANTHROPIC_API_KEY\n'); + const report = scanPathForSensitiveKeys(tmpDir); + expect(report.findings).toHaveLength(0); + }); + + it('reports multiple files with findings', () => { + writeFileSync(join(tmpDir, '.env'), 'ANTHROPIC_API_KEY=sk-1\n'); + writeFileSync(join(tmpDir, '.env.local'), 'OPENAI_API_KEY=sk-2\n'); + const report = scanPathForSensitiveKeys(tmpDir); + expect(report.findings).toHaveLength(2); + }); + + it('reports multiple keys in same file', () => { + writeFileSync(join(tmpDir, '.env'), 'ANTHROPIC_API_KEY=sk-1\nOPENAI_API_KEY=sk-2\n'); + const report = scanPathForSensitiveKeys(tmpDir); + expect(report.findings).toHaveLength(1); + expect(report.findings[0].keys).toHaveLength(2); + }); + + it('ignores non-autoloaded filenames', () => { + writeFileSync(join(tmpDir, '.env.secrets'), 'ANTHROPIC_API_KEY=sk-1\n'); + const report = scanPathForSensitiveKeys(tmpDir); + expect(report.findings).toHaveLength(0); + }); + + it('ignores safe keys', () => { + writeFileSync(join(tmpDir, '.env'), 'DATABASE_URL=postgres://localhost\nNODE_ENV=dev\n'); + const report = scanPathForSensitiveKeys(tmpDir); + expect(report.findings).toHaveLength(0); + }); +}); + +describe('EnvLeakError', () => { + it('is instanceof EnvLeakError and Error', () => { + const report = { path: '/tmp', findings: [{ file: '.env', keys: ['ANTHROPIC_API_KEY'] }] }; + const err = new EnvLeakError(report); + expect(err).toBeInstanceOf(Error); + expect(err).toBeInstanceOf(EnvLeakError); + expect(err.name).toBe('EnvLeakError'); + expect(err.message).toContain('ANTHROPIC_API_KEY'); + expect(err.report).toBe(report); + }); + + it('formats multiple findings', () => { + const report = { + path: '/test', + findings: [ + { file: '.env', keys: ['ANTHROPIC_API_KEY'] }, + { file: '.env.local', keys: ['OPENAI_API_KEY', 'GEMINI_API_KEY'] }, + ], + }; + const err = new EnvLeakError(report); + expect(err.message).toContain('.env'); + expect(err.message).toContain('.env.local'); + expect(err.message).toContain('OPENAI_API_KEY'); + expect(err.message).toContain('GEMINI_API_KEY'); + }); +}); diff --git a/packages/core/src/utils/env-leak-scanner.ts b/packages/core/src/utils/env-leak-scanner.ts new file mode 100644 index 00000000..46e0a846 --- /dev/null +++ b/packages/core/src/utils/env-leak-scanner.ts @@ -0,0 +1,104 @@ +import { readFileSync, existsSync } from 'fs'; +import { join } from 'path'; + +export const SENSITIVE_KEYS = new Set([ + 'ANTHROPIC_API_KEY', + 'ANTHROPIC_AUTH_TOKEN', + 'CLAUDE_API_KEY', + 'CLAUDE_CODE_OAUTH_TOKEN', + 'OPENAI_API_KEY', + 'CODEX_API_KEY', + 'GEMINI_API_KEY', +]); + +export const AUTOLOADED_FILES = [ + '.env', + '.env.local', + '.env.development', + '.env.production', + '.env.development.local', + '.env.production.local', +]; + +export interface LeakFinding { + file: string; + keys: string[]; +} + +export interface LeakReport { + path: string; + findings: LeakFinding[]; +} + +export class EnvLeakError extends Error { + constructor(public readonly report: LeakReport) { + super(formatLeakError(report)); + this.name = 'EnvLeakError'; + } +} + +/** + * Scan `dirPath` for auto-loaded .env files containing sensitive keys. + * Pure function — no side effects. + */ +export function scanPathForSensitiveKeys(dirPath: string): LeakReport { + const findings: LeakFinding[] = []; + + for (const filename of AUTOLOADED_FILES) { + const fullPath = join(dirPath, filename); + if (!existsSync(fullPath)) continue; + + let contents: string; + try { + contents = readFileSync(fullPath, 'utf8'); + } catch (err) { + // File exists but is unreadable — treat as a finding to avoid silently bypassing the gate + const code = (err as NodeJS.ErrnoException).code; + findings.push({ file: filename, keys: [`[unreadable — ${code ?? 'unknown error'}]`] }); + continue; + } + + const foundKeys: string[] = []; + for (const line of contents.split('\n')) { + const trimmed = line.trim(); + if (trimmed.startsWith('#') || !trimmed.includes('=')) continue; + const key = trimmed.split('=')[0].trim(); + if (SENSITIVE_KEYS.has(key)) { + foundKeys.push(key); + } + } + + if (foundKeys.length > 0) { + findings.push({ file: filename, keys: foundKeys }); + } + } + + return { path: dirPath, findings }; +} + +export function formatLeakError(report: LeakReport): string { + const fileList = report.findings.map(f => ` ${f.file} — ${f.keys.join(', ')}`).join('\n'); + + return `Cannot add codebase — ${report.path} contains keys that will leak into AI subprocesses + + Found: +${fileList} + + Why this matters: + Bun subprocesses auto-load .env from their working directory. Archon cleans + its own environment, but Claude/Codex subprocesses running with cwd= + will re-inject these keys at their own startup, bypassing archon's allowlist. + This can bill the wrong API account silently. + + Choose one: + 1. Remove the key from this repo's .env (recommended): + grep -v '^ANTHROPIC_API_KEY=' .env > .env.tmp && mv .env.tmp .env + + 2. Rename to a non-auto-loaded file: + mv .env .env.secrets + # update your app to load it explicitly + + 3. Acknowledge the risk and allow this codebase to use its .env key: + Open the web UI (Settings → Projects → Add Project) and tick + "Allow env keys (I understand the risk)" when adding this project.`; +} diff --git a/packages/server/src/routes/api.codebases.test.ts b/packages/server/src/routes/api.codebases.test.ts index d0661596..e7105ddd 100644 --- a/packages/server/src/routes/api.codebases.test.ts +++ b/packages/server/src/routes/api.codebases.test.ts @@ -48,6 +48,14 @@ mock.module('@archon/core', () => ({ this.name = 'ConversationNotFoundError'; } }, + EnvLeakError: class EnvLeakError extends Error { + constructor(public report: { path: string; findings: { file: string; keys: string[] }[] }) { + super( + `Cannot add codebase — ${report.path} contains keys that will leak into AI subprocesses` + ); + this.name = 'EnvLeakError'; + } + }, getArchonWorkspacesPath: () => '/tmp/.archon/workspaces', generateAndSetTitle: mock(async () => {}), createLogger: () => ({ @@ -170,6 +178,7 @@ const MOCK_CODEBASE = { repository_url: 'https://github.com/user/repo', default_cwd: '/home/user/projects/my-project', ai_assistant_type: 'claude', + allow_env_keys: false, commands: {}, created_at: new Date().toISOString(), updated_at: new Date().toISOString(), @@ -387,7 +396,7 @@ describe('POST /api/codebases', () => { const body = (await response.json()) as { id: string }; expect(body.id).toBe('codebase-uuid-1'); - expect(mockCloneRepository).toHaveBeenCalledWith('https://github.com/user/repo'); + expect(mockCloneRepository).toHaveBeenCalledWith('https://github.com/user/repo', undefined); }); test('registers existing URL codebase with 200', async () => { @@ -424,7 +433,7 @@ describe('POST /api/codebases', () => { body: JSON.stringify({ path: '/home/user/my-repo' }), }); expect(response.status).toBe(201); - expect(mockRegisterRepository).toHaveBeenCalledWith('/home/user/my-repo'); + expect(mockRegisterRepository).toHaveBeenCalledWith('/home/user/my-repo', undefined); }); test('returns 400 when both url and path are provided', async () => { @@ -496,6 +505,44 @@ describe('POST /api/codebases', () => { const body = (await response.json()) as { error: string }; expect(body.error).toContain('authentication required'); }); + + test('returns 422 when cloneRepository throws EnvLeakError', async () => { + const { EnvLeakError } = await import('@archon/core'); + mockCloneRepository.mockImplementationOnce(async () => { + throw new EnvLeakError({ + path: '/repo/path', + findings: [{ file: '.env', keys: ['ANTHROPIC_API_KEY'] }], + }); + }); + + const app = makeApp(); + const response = await app.request('/api/codebases', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ url: 'https://github.com/user/repo' }), + }); + expect(response.status).toBe(422); + + const body = (await response.json()) as { error: string }; + expect(body.error).toContain('Cannot add codebase'); + }); + + test('passes allowEnvKeys=true to cloneRepository when body includes it', async () => { + mockCloneRepository.mockImplementationOnce(async () => ({ + codebaseId: 'clone-uuid-2', + alreadyExisted: false, + })); + mockGetCodebase.mockImplementationOnce(async () => MOCK_CODEBASE); + + const app = makeApp(); + const response = await app.request('/api/codebases', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ url: 'https://github.com/user/repo', allowEnvKeys: true }), + }); + expect(response.status).toBe(201); + expect(mockCloneRepository).toHaveBeenCalledWith('https://github.com/user/repo', true); + }); }); // --------------------------------------------------------------------------- diff --git a/packages/server/src/routes/api.ts b/packages/server/src/routes/api.ts index f47f2f6f..20d26b30 100644 --- a/packages/server/src/routes/api.ts +++ b/packages/server/src/routes/api.ts @@ -27,6 +27,7 @@ import { registerRepository, ConversationNotFoundError, generateAndSetTitle, + EnvLeakError, } from '@archon/core'; import { removeWorktree, toRepoPath, toWorktreePath } from '@archon/git'; import { @@ -816,7 +817,7 @@ export function registerApiRoutes( ): void { function apiError( c: Context, - status: 400 | 404 | 500, + status: 400 | 404 | 422 | 500, message: string, detail?: string ): Response { @@ -1482,8 +1483,8 @@ export function registerApiRoutes( try { // .refine() guarantees exactly one of url/path is present const result = body.url - ? await cloneRepository(body.url) - : await registerRepository(body.path ?? ''); + ? await cloneRepository(body.url, body.allowEnvKeys) + : await registerRepository(body.path ?? '', body.allowEnvKeys); // Fetch the full codebase record for a consistent response const codebase = await codebaseDb.getCodebase(result.codebaseId); @@ -1493,6 +1494,12 @@ export function registerApiRoutes( return c.json(codebase, result.alreadyExisted ? 200 : 201); } catch (error) { + if (error instanceof EnvLeakError) { + const path = body.url ?? body.path ?? ''; + const files = error.report.findings.map(f => f.file); + getLog().warn({ path, files }, 'add_codebase_env_leak_refused'); + return apiError(c, 422, error.message); + } getLog().error({ err: error }, 'add_codebase_failed'); return apiError( c, diff --git a/packages/server/src/routes/schemas/codebase.schemas.ts b/packages/server/src/routes/schemas/codebase.schemas.ts index d2880a6b..7114864d 100644 --- a/packages/server/src/routes/schemas/codebase.schemas.ts +++ b/packages/server/src/routes/schemas/codebase.schemas.ts @@ -16,6 +16,7 @@ export const codebaseSchema = z repository_url: z.string().nullable(), default_cwd: z.string(), ai_assistant_type: z.string(), + allow_env_keys: z.boolean(), commands: z.record(codebaseCommandSchema), created_at: z.string(), updated_at: z.string(), @@ -33,6 +34,7 @@ export const addCodebaseBodySchema = z .object({ url: z.string().min(1).optional(), path: z.string().min(1).optional(), + allowEnvKeys: z.boolean().optional(), }) .refine(b => (b.url !== undefined) !== (b.path !== undefined), { message: 'Provide either "url" or "path", not both and not neither', diff --git a/packages/web/src/lib/api.ts b/packages/web/src/lib/api.ts index 3a46568c..1478e912 100644 --- a/packages/web/src/lib/api.ts +++ b/packages/web/src/lib/api.ts @@ -38,6 +38,7 @@ export interface CodebaseResponse { repository_url: string | null; default_cwd: string; ai_assistant_type: string; + allow_env_keys: boolean; commands: Record; created_at: string; updated_at: string; @@ -157,7 +158,7 @@ export async function getCodebase(id: string): Promise { } export async function addCodebase( - input: { url: string } | { path: string } + input: { url: string; allowEnvKeys?: boolean } | { path: string; allowEnvKeys?: boolean } ): Promise { return fetchJSON('/api/codebases', { method: 'POST', diff --git a/packages/web/src/routes/SettingsPage.tsx b/packages/web/src/routes/SettingsPage.tsx index 06231500..a945ccb2 100644 --- a/packages/web/src/routes/SettingsPage.tsx +++ b/packages/web/src/routes/SettingsPage.tsx @@ -250,10 +250,20 @@ function EnvVarsPanel({ codebaseId }: { codebaseId: string }): React.ReactElemen ); } +function isEnvLeakError(error: unknown): boolean { + return ( + error instanceof Error && + 'status' in error && + (error as Error & { status: number }).status === 422 && + error.message.startsWith('Cannot add codebase') + ); +} + function ProjectsSection(): React.ReactElement { const queryClient = useQueryClient(); const [addPath, setAddPath] = useState(''); const [showAdd, setShowAdd] = useState(false); + const [allowEnvKeys, setAllowEnvKeys] = useState(false); const [expandedEnvVars, setExpandedEnvVars] = useState(null); const { data: codebases } = useQuery({ @@ -262,11 +272,13 @@ function ProjectsSection(): React.ReactElement { }); const addMutation = useMutation({ - mutationFn: (path: string) => addCodebase({ path }), + mutationFn: ({ path, allowEnvKeys }: { path: string; allowEnvKeys?: boolean }) => + addCodebase({ path, allowEnvKeys }), onSuccess: () => { void queryClient.invalidateQueries({ queryKey: ['codebases'] }); setAddPath(''); setShowAdd(false); + setAllowEnvKeys(false); }, }); @@ -280,7 +292,7 @@ function ProjectsSection(): React.ReactElement { function handleAddSubmit(e: React.FormEvent): void { e.preventDefault(); if (addPath.trim()) { - addMutation.mutate(addPath.trim()); + addMutation.mutate({ path: addPath.trim(), allowEnvKeys: allowEnvKeys || undefined }); } } @@ -373,6 +385,18 @@ function ProjectsSection(): React.ReactElement { {addMutation.error instanceof Error ? addMutation.error.message : 'Failed to add project'} + {isEnvLeakError(addMutation.error) && ( + + )} )} diff --git a/packages/workflows/src/executor-shared.ts b/packages/workflows/src/executor-shared.ts index 890eb6a1..0ac1f4ba 100644 --- a/packages/workflows/src/executor-shared.ts +++ b/packages/workflows/src/executor-shared.ts @@ -67,8 +67,13 @@ export function matchesPattern(message: string, patterns: string[]): boolean { * Classify an error to determine if it's transient (can retry) or fatal (should fail). * FATAL patterns take priority over TRANSIENT patterns to prevent an error message * containing both (e.g. "unauthorized: process exited with code 1") from being retried. + * + * First-party named error types are checked by name (immune to message rewording). */ export function classifyError(error: Error): ErrorType { + // Named first-party errors checked by name — immune to message rewording + if (error.name === 'EnvLeakError') return 'FATAL'; + const message = error.message.toLowerCase(); if (matchesPattern(message, FATAL_PATTERNS)) {