fix(env): detect and refuse target-repo .env with sensitive keys (#1036)

* fix(env): detect and refuse target-repo .env with sensitive keys (#1034)

Bun auto-loads .env from subprocess CWD regardless of the clean env
passed to Bun.spawn, silently overriding OAuth auth and billing the
wrong API account. This adds a consent-based gate at registration time
and a pre-spawn safety net in both Claude and Codex clients.

Changes:
- Add env-leak-scanner utility that checks 6 auto-loaded .env filenames
  for 7 sensitive keys (ANTHROPIC_API_KEY, OPENAI_API_KEY, etc.)
- Add allow_env_keys boolean column to codebases table (migration 021)
- Gate registerRepoAtPath to reject codebases with sensitive .env keys
  unless explicitly consented via allowEnvKeys flag
- Add pre-spawn check in ClaudeClient and CodexClient sendQuery methods
- Return 422 from POST /api/codebases on env leak detection
- Surface env leak error in web UI with "Allow env keys" checkbox
- Classify EnvLeakError as FATAL in workflow executor

Fixes #1034

* fix: address review findings for env leak scanner PR

- Fix FATAL_PATTERNS 'env leak' pattern that never matched EnvLeakError.message;
  now checks error.name === 'EnvLeakError' directly (immune to message rewording)
- Fix pre-spawn consent lookup for worktree paths: add findCodebaseByPathPrefix()
  and use it as fallback when exact match returns null; prevents opt-in from
  being silently ineffective for workflow-based runs
- Add allow_env_keys column to 000_combined.sql CREATE TABLE and idempotent ALTER
  section to fix fresh PostgreSQL installs
- Remove non-existent --allow-env-keys CLI flag from error message; replace with
  web UI-only instruction
- Narrow isEnvLeakError check from error.message.includes('env') to startsWith('Cannot add codebase')
- Distinguish ENOENT (skip) from EACCES/other errors in scanner catch block;
  unreadable files now surface as findings to avoid silently bypassing the gate
- Use cross-platform grep command instead of macOS-specific sed -i '' syntax
- Add audit log (log.warn) when 422 EnvLeakError is returned from API
- Add pre-spawn gate tests to claude.test.ts and codex.test.ts (4 tests each)
- Add env leak gate tests to clone.test.ts (2 tests)
- Add 422 and allowEnvKeys passthrough tests to api.codebases.test.ts

* simplify: reduce complexity in changed files
This commit is contained in:
Rasmus Widing 2026-04-08 09:43:47 +03:00 committed by GitHub
parent 8a44faab10
commit a1e8a16b82
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
25 changed files with 635 additions and 23 deletions

View file

@ -30,6 +30,7 @@ CREATE TABLE IF NOT EXISTS remote_agent_codebases (
repository_url VARCHAR(500),
default_cwd VARCHAR(500) NOT NULL,
ai_assistant_type VARCHAR(20) DEFAULT 'claude',
allow_env_keys BOOLEAN NOT NULL DEFAULT FALSE,
commands JSONB DEFAULT '{}'::jsonb,
created_at TIMESTAMP DEFAULT NOW(),
updated_at TIMESTAMP DEFAULT NOW()
@ -307,3 +308,7 @@ ALTER TABLE remote_agent_conversations
-- From migration 016: ended_reason on sessions
ALTER TABLE remote_agent_sessions
ADD COLUMN IF NOT EXISTS ended_reason TEXT;
-- From migration 021: allow_env_keys on codebases
ALTER TABLE remote_agent_codebases
ADD COLUMN IF NOT EXISTS allow_env_keys BOOLEAN NOT NULL DEFAULT FALSE;

View file

@ -0,0 +1,4 @@
-- Add per-codebase consent bit for subprocess .env key leakage
-- DEFAULT FALSE = safe by default; user must explicitly opt in
ALTER TABLE remote_agent_codebases
ADD COLUMN allow_env_keys BOOLEAN NOT NULL DEFAULT FALSE;

View file

@ -23,7 +23,7 @@
"./state/*": "./src/state/*.ts"
},
"scripts": {
"test": "bun test src/clients/ && bun test src/handlers/command-handler.test.ts && bun test src/handlers/clone.test.ts && bun test src/db/adapters/postgres.test.ts && bun test src/db/adapters/sqlite.test.ts src/db/codebases.test.ts src/db/connection.test.ts src/db/conversations.test.ts src/db/env-vars.test.ts src/db/isolation-environments.test.ts src/db/messages.test.ts src/db/sessions.test.ts src/db/workflow-events.test.ts src/db/workflows.test.ts src/utils/defaults-copy.test.ts src/utils/worktree-sync.test.ts src/utils/conversation-lock.test.ts src/utils/credential-sanitizer.test.ts src/utils/port-allocation.test.ts src/utils/error.test.ts src/utils/error-formatter.test.ts src/utils/github-graphql.test.ts src/utils/env-allowlist.test.ts src/config/ src/state/ && bun test src/utils/path-validation.test.ts && bun test src/services/cleanup-service.test.ts && bun test src/services/title-generator.test.ts && bun test src/workflows/ && bun test src/operations/workflow-operations.test.ts && bun test src/operations/isolation-operations.test.ts && bun test src/orchestrator/orchestrator.test.ts && bun test src/orchestrator/orchestrator-agent.test.ts && bun test src/orchestrator/orchestrator-isolation.test.ts",
"test": "bun test src/clients/ && bun test src/handlers/command-handler.test.ts && bun test src/handlers/clone.test.ts && bun test src/db/adapters/postgres.test.ts && bun test src/db/adapters/sqlite.test.ts src/db/codebases.test.ts src/db/connection.test.ts src/db/conversations.test.ts src/db/env-vars.test.ts src/db/isolation-environments.test.ts src/db/messages.test.ts src/db/sessions.test.ts src/db/workflow-events.test.ts src/db/workflows.test.ts src/utils/defaults-copy.test.ts src/utils/worktree-sync.test.ts src/utils/conversation-lock.test.ts src/utils/credential-sanitizer.test.ts src/utils/port-allocation.test.ts src/utils/error.test.ts src/utils/error-formatter.test.ts src/utils/github-graphql.test.ts src/utils/env-allowlist.test.ts src/utils/env-leak-scanner.test.ts src/config/ src/state/ && bun test src/utils/path-validation.test.ts && bun test src/services/cleanup-service.test.ts && bun test src/services/title-generator.test.ts && bun test src/workflows/ && bun test src/operations/workflow-operations.test.ts && bun test src/operations/isolation-operations.test.ts && bun test src/orchestrator/orchestrator.test.ts && bun test src/orchestrator/orchestrator-agent.test.ts && bun test src/orchestrator/orchestrator-isolation.test.ts",
"type-check": "bun x tsc --noEmit",
"build": "echo 'No build needed - Bun runs TypeScript directly'"
},

View file

@ -1,4 +1,4 @@
import { describe, test, expect, mock, beforeEach, spyOn } from 'bun:test';
import { describe, test, expect, mock, beforeEach, afterEach, spyOn } from 'bun:test';
import { createMockLogger } from '../test/mocks/logger';
const mockLogger = createMockLogger();
@ -18,6 +18,8 @@ mock.module('@anthropic-ai/claude-agent-sdk', () => ({
import { ClaudeClient } from './claude';
import * as claudeModule from './claude';
import * as codebaseDb from '../db/codebases';
import * as envLeakScanner from '../utils/env-leak-scanner';
describe('ClaudeClient', () => {
let client: ClaudeClient;
@ -951,4 +953,83 @@ describe('ClaudeClient', () => {
expect(chunks[0]).toEqual({ type: 'assistant', content: 'Real content' });
});
});
describe('pre-spawn env leak gate', () => {
let spyFindByDefaultCwd: ReturnType<typeof spyOn>;
let spyFindByPathPrefix: ReturnType<typeof spyOn>;
let spyScan: ReturnType<typeof spyOn>;
beforeEach(() => {
spyFindByDefaultCwd = spyOn(codebaseDb, 'findCodebaseByDefaultCwd').mockResolvedValue(null);
spyFindByPathPrefix = spyOn(codebaseDb, 'findCodebaseByPathPrefix').mockResolvedValue(null);
spyScan = spyOn(envLeakScanner, 'scanPathForSensitiveKeys').mockReturnValue({
path: '/workspace',
findings: [],
});
mockQuery.mockImplementation(async function* () {
yield { type: 'result', session_id: 'sid-gate' };
});
});
afterEach(() => {
spyFindByDefaultCwd.mockRestore();
spyFindByPathPrefix.mockRestore();
spyScan.mockRestore();
});
test('throws EnvLeakError when .env contains sensitive keys and codebase has no consent', async () => {
spyScan.mockReturnValueOnce({
path: '/workspace',
findings: [{ file: '.env', keys: ['ANTHROPIC_API_KEY'] }],
});
await expect(async () => {
for await (const _ of client.sendQuery('test', '/workspace')) {
// consume
}
}).toThrow('Cannot add codebase');
});
test('skips scan when codebase has allow_env_keys: true', async () => {
spyFindByDefaultCwd.mockResolvedValueOnce({
id: 'codebase-1',
allow_env_keys: true,
default_cwd: '/workspace',
});
const chunks = [];
for await (const chunk of client.sendQuery('test', '/workspace')) {
chunks.push(chunk);
}
expect(spyScan).not.toHaveBeenCalled();
expect(chunks).toHaveLength(1);
});
test('proceeds when cwd has no registered codebase and no sensitive keys', async () => {
const chunks = [];
for await (const chunk of client.sendQuery('test', '/workspace')) {
chunks.push(chunk);
}
expect(spyScan).toHaveBeenCalledTimes(1);
expect(chunks).toHaveLength(1);
});
test('uses prefix lookup for worktree paths when exact match returns null', async () => {
spyFindByPathPrefix.mockResolvedValueOnce({
id: 'codebase-1',
allow_env_keys: true,
default_cwd: '/workspace/source',
});
const chunks = [];
for await (const chunk of client.sendQuery('test', '/workspace/worktrees/feature')) {
chunks.push(chunk);
}
expect(spyFindByPathPrefix).toHaveBeenCalledWith('/workspace/worktrees/feature');
expect(spyScan).not.toHaveBeenCalled();
});
});
});

View file

@ -27,6 +27,8 @@ import {
} from '../types';
import { createLogger } from '@archon/paths';
import { buildCleanSubprocessEnv } from '../utils/env-allowlist';
import { scanPathForSensitiveKeys, EnvLeakError } from '../utils/env-leak-scanner';
import * as codebaseDb from '../db/codebases';
/** Lazy-initialized logger (deferred so test mocks can intercept createLogger) */
let cachedLog: ReturnType<typeof createLogger> | undefined;
@ -258,6 +260,19 @@ export class ClaudeClient implements IAssistantClient {
resumeSessionId?: string,
requestOptions?: AssistantRequestOptions
): AsyncGenerator<MessageChunk> {
// Pre-spawn: check for env key leak if codebase is not explicitly consented.
// Use prefix lookup so worktree paths (e.g. .../worktrees/feature-branch) still
// match the registered source cwd (e.g. .../source).
const codebase =
(await codebaseDb.findCodebaseByDefaultCwd(cwd)) ??
(await codebaseDb.findCodebaseByPathPrefix(cwd));
if (!codebase?.allow_env_keys) {
const report = scanPathForSensitiveKeys(cwd);
if (report.findings.length > 0) {
throw new EnvLeakError(report);
}
}
// Note: If subprocess crashes mid-stream after yielding chunks, those chunks
// are already consumed by the caller. Retry starts a fresh subprocess, so the
// caller may receive partial output from the failed attempt followed by full

View file

@ -1,4 +1,4 @@
import { describe, test, expect, mock, beforeEach } from 'bun:test';
import { describe, test, expect, mock, beforeEach, afterEach, spyOn } from 'bun:test';
import { createMockLogger } from '../test/mocks/logger';
const mockLogger = createMockLogger();
@ -40,6 +40,8 @@ mock.module('@openai/codex-sdk', () => ({
}));
import { CodexClient } from './codex';
import * as codebaseDb from '../db/codebases';
import * as envLeakScanner from '../utils/env-leak-scanner';
describe('CodexClient', () => {
let client: CodexClient;
@ -1000,4 +1002,89 @@ describe('CodexClient', () => {
});
});
});
describe('pre-spawn env leak gate', () => {
let spyFindByDefaultCwd: ReturnType<typeof spyOn>;
let spyFindByPathPrefix: ReturnType<typeof spyOn>;
let spyScan: ReturnType<typeof spyOn>;
beforeEach(() => {
// Restore a working runStreamed default so retry-test bleed doesn't break gate tests
mockRunStreamed.mockResolvedValue({
events: (async function* () {
yield { type: 'turn.completed', usage: defaultUsage };
})(),
});
spyFindByDefaultCwd = spyOn(codebaseDb, 'findCodebaseByDefaultCwd').mockResolvedValue(null);
spyFindByPathPrefix = spyOn(codebaseDb, 'findCodebaseByPathPrefix').mockResolvedValue(null);
spyScan = spyOn(envLeakScanner, 'scanPathForSensitiveKeys').mockReturnValue({
path: '/workspace',
findings: [],
});
});
afterEach(() => {
spyFindByDefaultCwd.mockRestore();
spyFindByPathPrefix.mockRestore();
spyScan.mockRestore();
});
test('throws EnvLeakError when .env contains sensitive keys and codebase has no consent', async () => {
spyFindByDefaultCwd.mockResolvedValueOnce(null);
spyFindByPathPrefix.mockResolvedValueOnce(null);
spyScan.mockReturnValueOnce({
path: '/workspace',
findings: [{ file: '.env', keys: ['ANTHROPIC_API_KEY'] }],
});
const consumeGenerator = async (): Promise<void> => {
for await (const _ of client.sendQuery('test', '/workspace')) {
// consume
}
};
await expect(consumeGenerator()).rejects.toThrow('Cannot add codebase');
});
test('skips scan when codebase has allow_env_keys: true', async () => {
spyFindByDefaultCwd.mockResolvedValueOnce({
id: 'codebase-1',
allow_env_keys: true,
default_cwd: '/workspace',
});
const chunks = [];
for await (const chunk of client.sendQuery('test', '/workspace')) {
chunks.push(chunk);
}
expect(spyScan).not.toHaveBeenCalled();
});
test('proceeds when cwd has no registered codebase and no sensitive keys', async () => {
const chunks = [];
for await (const chunk of client.sendQuery('test', '/workspace')) {
chunks.push(chunk);
}
expect(spyScan).toHaveBeenCalledTimes(1);
});
test('uses prefix lookup for worktree paths when exact match returns null', async () => {
spyFindByDefaultCwd.mockResolvedValueOnce(null);
spyFindByPathPrefix.mockResolvedValueOnce({
id: 'codebase-1',
allow_env_keys: true,
default_cwd: '/workspace/source',
});
const chunks = [];
for await (const chunk of client.sendQuery('test', '/workspace/worktrees/feature')) {
chunks.push(chunk);
}
expect(spyFindByPathPrefix).toHaveBeenCalledWith('/workspace/worktrees/feature');
expect(spyScan).not.toHaveBeenCalled();
});
});
});

View file

@ -18,6 +18,8 @@ import {
type TokenUsage,
} from '../types';
import { createLogger } from '@archon/paths';
import { scanPathForSensitiveKeys, EnvLeakError } from '../utils/env-leak-scanner';
import * as codebaseDb from '../db/codebases';
/** Lazy-initialized logger (deferred so test mocks can intercept createLogger) */
let cachedLog: ReturnType<typeof createLogger> | undefined;
@ -154,6 +156,19 @@ export class CodexClient implements IAssistantClient {
resumeSessionId?: string,
options?: AssistantRequestOptions
): AsyncGenerator<MessageChunk> {
// Pre-spawn: check for env key leak if codebase is not explicitly consented.
// Use prefix lookup so worktree paths (e.g. .../worktrees/feature-branch) still
// match the registered source cwd (e.g. .../source).
const codebase =
(await codebaseDb.findCodebaseByDefaultCwd(cwd)) ??
(await codebaseDb.findCodebaseByPathPrefix(cwd));
if (!codebase?.allow_env_keys) {
const report = scanPathForSensitiveKeys(cwd);
if (report.findings.length > 0) {
throw new EnvLeakError(report);
}
}
const codex = getCodex();
const threadOptions = buildThreadOptions(cwd, options);

View file

@ -36,6 +36,7 @@ describe('codebases', () => {
repository_url: 'https://github.com/user/repo',
default_cwd: '/workspace/test-project',
ai_assistant_type: 'claude',
allow_env_keys: false,
commands: { plan: { path: '.claude/commands/plan.md', description: 'Plan feature' } },
created_at: new Date(),
updated_at: new Date(),
@ -54,8 +55,8 @@ describe('codebases', () => {
expect(result).toEqual(mockCodebase);
expect(mockQuery).toHaveBeenCalledWith(
'INSERT INTO remote_agent_codebases (name, repository_url, default_cwd, ai_assistant_type) VALUES ($1, $2, $3, $4) RETURNING *',
['test-project', 'https://github.com/user/repo', '/workspace/test-project', 'claude']
'INSERT INTO remote_agent_codebases (name, repository_url, default_cwd, ai_assistant_type, allow_env_keys) VALUES ($1, $2, $3, $4, $5) RETURNING *',
['test-project', 'https://github.com/user/repo', '/workspace/test-project', 'claude', false]
);
});
@ -73,8 +74,8 @@ describe('codebases', () => {
expect(result).toEqual(codebaseWithoutOptional);
expect(mockQuery).toHaveBeenCalledWith(
'INSERT INTO remote_agent_codebases (name, repository_url, default_cwd, ai_assistant_type) VALUES ($1, $2, $3, $4) RETURNING *',
['test-project', null, '/workspace/test-project', 'claude']
'INSERT INTO remote_agent_codebases (name, repository_url, default_cwd, ai_assistant_type, allow_env_keys) VALUES ($1, $2, $3, $4, $5) RETURNING *',
['test-project', null, '/workspace/test-project', 'claude', false]
);
});
@ -297,6 +298,7 @@ describe('codebases', () => {
name: 'test-repo',
default_cwd: '/workspace/test-repo',
ai_assistant_type: 'claude',
allow_env_keys: false,
repository_url: null,
commands: {},
created_at: new Date(),

View file

@ -17,11 +17,13 @@ export async function createCodebase(data: {
repository_url?: string;
default_cwd: string;
ai_assistant_type?: string;
allow_env_keys?: boolean;
}): Promise<Codebase> {
const assistantType = data.ai_assistant_type ?? 'claude';
const allowEnvKeys = data.allow_env_keys ?? false;
const result = await pool.query<Codebase>(
'INSERT INTO remote_agent_codebases (name, repository_url, default_cwd, ai_assistant_type) VALUES ($1, $2, $3, $4) RETURNING *',
[data.name, data.repository_url ?? null, data.default_cwd, assistantType]
'INSERT INTO remote_agent_codebases (name, repository_url, default_cwd, ai_assistant_type, allow_env_keys) VALUES ($1, $2, $3, $4, $5) RETURNING *',
[data.name, data.repository_url ?? null, data.default_cwd, assistantType, allowEnvKeys]
);
if (!result.rows[0]) {
throw new Error('Failed to create codebase: INSERT succeeded but no row returned');
@ -96,6 +98,25 @@ export async function findCodebaseByDefaultCwd(defaultCwd: string): Promise<Code
return result.rows[0] || null;
}
/**
* Find a codebase whose `default_cwd` is an ancestor of the given path.
* Used for worktree-based runs where the actual `cwd` is a worktree subdirectory
* of the registered source path an exact match via `findCodebaseByDefaultCwd`
* would always return null in that case.
*
* Returns the codebase with the longest matching prefix (most specific match).
*/
export async function findCodebaseByPathPrefix(cwdPath: string): Promise<Codebase | null> {
const result = await pool.query<Codebase>(
`SELECT * FROM remote_agent_codebases
WHERE $1 LIKE default_cwd || '%'
ORDER BY length(default_cwd) DESC
LIMIT 1`,
[cwdPath]
);
return result.rows[0] || null;
}
export async function findCodebaseByName(name: string): Promise<Codebase | null> {
const result = await pool.query<Codebase>(
'SELECT * FROM remote_agent_codebases WHERE name = $1 ORDER BY created_at DESC LIMIT 1',

View file

@ -20,6 +20,7 @@ const mockCreateCodebase = mock(() =>
repository_url: 'https://github.com/owner/repo',
default_cwd: '/home/test/.archon/workspaces/owner/repo/source',
ai_assistant_type: 'claude',
allow_env_keys: false,
commands: {},
created_at: new Date(),
updated_at: new Date(),
@ -66,6 +67,20 @@ mock.module('../utils/commands', () => ({
findMarkdownFilesRecursive: mockFindMarkdownFilesRecursive,
}));
// ── env-leak-scanner mock ───────────────────────────────────────────────────
class MockEnvLeakError extends Error {
constructor(public report: unknown) {
super('Cannot add codebase — /test/path contains keys that will leak into AI subprocesses');
this.name = 'EnvLeakError';
}
}
const mockScanPathForSensitiveKeys = mock(() => ({ path: '', findings: [] }));
mock.module('../utils/env-leak-scanner', () => ({
scanPathForSensitiveKeys: mockScanPathForSensitiveKeys,
EnvLeakError: MockEnvLeakError,
}));
// ── Import module under test AFTER mocks are registered ────────────────────
import { cloneRepository, registerRepository } from './clone';
@ -103,6 +118,7 @@ function clearMocks(): void {
mockFindCodebaseByName.mockReset();
mockUpdateCodebase.mockReset();
mockFindMarkdownFilesRecursive.mockReset();
mockScanPathForSensitiveKeys.mockReset();
mockLogger.info.mockClear();
mockLogger.debug.mockClear();
mockLogger.warn.mockClear();
@ -116,6 +132,7 @@ function clearMocks(): void {
mockFindCodebaseByName.mockResolvedValue(null);
mockUpdateCodebase.mockResolvedValue(undefined);
mockFindMarkdownFilesRecursive.mockResolvedValue([]);
mockScanPathForSensitiveKeys.mockReturnValue({ path: '', findings: [] });
}
afterAll(() => {
@ -140,6 +157,7 @@ function makeCodebase(
repository_url: 'https://github.com/owner/repo',
default_cwd: '/home/test/.archon/workspaces/owner/repo/source',
ai_assistant_type: 'claude',
allow_env_keys: false,
commands: {},
created_at: new Date(),
updated_at: new Date(),
@ -930,4 +948,27 @@ describe('RegisterResult shape', () => {
expect(result.alreadyExisted).toBe(true);
expect(result.commandCount).toBe(0);
});
describe('env leak gate', () => {
test('throws EnvLeakError when scanner finds sensitive keys and allowEnvKeys is false', async () => {
mockScanPathForSensitiveKeys.mockReturnValueOnce({
path: '/home/test/.archon/workspaces/owner/repo/source',
findings: [{ file: '.env', keys: ['ANTHROPIC_API_KEY'] }],
});
await expect(cloneRepository('https://github.com/owner/repo')).rejects.toThrow(
'Cannot add codebase'
);
});
test('does not scan when allowEnvKeys is true, even with scanner findings available', async () => {
mockCreateCodebase.mockResolvedValueOnce(makeCodebase() as ReturnType<typeof makeCodebase>);
// allowEnvKeys=true should bypass the gate; scanner not called even with findings queued
const result = await cloneRepository('https://github.com/owner/repo', true);
expect(mockScanPathForSensitiveKeys).not.toHaveBeenCalled();
expect(result.codebaseId).toBe('codebase-uuid-1');
});
});
});

View file

@ -16,6 +16,7 @@ import {
parseOwnerRepo,
} from '@archon/paths';
import { findMarkdownFilesRecursive } from '../utils/commands';
import { scanPathForSensitiveKeys, EnvLeakError } from '../utils/env-leak-scanner';
import { createLogger } from '@archon/paths';
/** Lazy-initialized logger (deferred so test mocks can intercept createLogger) */
@ -40,8 +41,17 @@ export interface RegisterResult {
async function registerRepoAtPath(
targetPath: string,
name: string,
repositoryUrl: string | null
repositoryUrl: string | null,
allowEnvKeys = false
): Promise<RegisterResult> {
// Scan for sensitive keys in auto-loaded .env files before registering
if (!allowEnvKeys) {
const report = scanPathForSensitiveKeys(targetPath);
if (report.findings.length > 0) {
throw new EnvLeakError(report);
}
}
// Auto-detect assistant type based on folder structure
let suggestedAssistant = 'claude';
const codexFolder = join(targetPath, '.codex');
@ -122,6 +132,7 @@ async function registerRepoAtPath(
repository_url: repositoryUrl ?? undefined,
default_cwd: targetPath,
ai_assistant_type: suggestedAssistant,
allow_env_keys: allowEnvKeys,
});
// Auto-load commands if found
@ -190,11 +201,14 @@ function normalizeRepoUrl(rawUrl: string): {
* Local paths (starting with /, ~, or .) are delegated to registerRepository
* to avoid wrong owner/repo naming. See #383 for broader rethink.
*/
export async function cloneRepository(repoUrl: string): Promise<RegisterResult> {
export async function cloneRepository(
repoUrl: string,
allowEnvKeys?: boolean
): Promise<RegisterResult> {
// Local paths should be registered (symlink), not cloned (copied)
if (repoUrl.startsWith('/') || repoUrl.startsWith('~') || repoUrl.startsWith('.')) {
const resolvedPath = repoUrl.startsWith('~') ? expandTilde(repoUrl) : resolve(repoUrl);
return registerRepository(resolvedPath);
return registerRepository(resolvedPath, allowEnvKeys);
}
const { workingUrl, ownerName, repoName, targetPath } = normalizeRepoUrl(repoUrl);
@ -275,7 +289,12 @@ export async function cloneRepository(repoUrl: string): Promise<RegisterResult>
await execFileAsync('git', ['config', '--global', '--add', 'safe.directory', targetPath]);
getLog().debug({ path: targetPath }, 'safe_directory_added');
const result = await registerRepoAtPath(targetPath, `${ownerName}/${repoName}`, workingUrl);
const result = await registerRepoAtPath(
targetPath,
`${ownerName}/${repoName}`,
workingUrl,
allowEnvKeys
);
getLog().info({ url: workingUrl, targetPath }, 'clone_completed');
return result;
}
@ -283,7 +302,10 @@ export async function cloneRepository(repoUrl: string): Promise<RegisterResult>
/**
* Register an existing local repository in the database (no git clone).
*/
export async function registerRepository(localPath: string): Promise<RegisterResult> {
export async function registerRepository(
localPath: string,
allowEnvKeys?: boolean
): Promise<RegisterResult> {
// Validate path exists and is a git repo
try {
await execFileAsync('git', ['-C', localPath, 'rev-parse', '--git-dir']);
@ -349,5 +371,5 @@ export async function registerRepository(localPath: string): Promise<RegisterRes
);
// default_cwd is the real local path (not the symlink)
return registerRepoAtPath(localPath, name, remoteUrl);
return registerRepoAtPath(localPath, name, remoteUrl, allowEnvKeys);
}

View file

@ -511,6 +511,7 @@ describe('CommandHandler', () => {
repository_url: 'https://github.com/user/my-repo',
default_cwd: '/workspace/my-repo',
ai_assistant_type: 'claude',
allow_env_keys: false,
commands: {},
created_at: new Date(),
updated_at: new Date(),
@ -566,6 +567,7 @@ describe('CommandHandler', () => {
repository_url: 'https://github.com/owner/repo',
default_cwd: '/workspace/repo',
ai_assistant_type: 'claude',
allow_env_keys: false,
commands: {},
created_at: new Date(),
updated_at: new Date(),
@ -604,6 +606,7 @@ describe('CommandHandler', () => {
repository_url: 'https://github.com/owner/orphaned-repo',
default_cwd: '/workspace/orphaned-repo',
ai_assistant_type: 'claude',
allow_env_keys: false,
commands: {},
created_at: new Date(),
updated_at: new Date(),
@ -718,6 +721,7 @@ describe('CommandHandler', () => {
repository_url: 'https://github.com/user/my-repo',
default_cwd: '/workspace/my-repo',
ai_assistant_type: 'claude',
allow_env_keys: false,
commands: {},
created_at: new Date(),
updated_at: new Date(),

View file

@ -145,6 +145,14 @@ export { toError } from './utils/error';
// Credential sanitization
export { sanitizeCredentials, sanitizeError } from './utils/credential-sanitizer';
// Env leak scanner
export {
EnvLeakError,
scanPathForSensitiveKeys,
formatLeakError,
type LeakReport,
} from './utils/env-leak-scanner';
// GitHub GraphQL
export { getLinkedIssueNumbers } from './utils/github-graphql';

View file

@ -181,6 +181,7 @@ function makeCodebase(name: string, id = `id-${name}`): Codebase {
repository_url: null,
default_cwd: `/repos/${name}`,
ai_assistant_type: 'claude',
allow_env_keys: false,
commands: {},
created_at: new Date(),
updated_at: new Date(),
@ -804,6 +805,7 @@ function makeCodebaseForSync() {
repository_url: 'https://github.com/test/repo',
default_cwd: '/repos/test-repo',
ai_assistant_type: 'claude',
allow_env_keys: false,
commands: {},
created_at: new Date(),
updated_at: new Date(),
@ -969,6 +971,7 @@ describe('workflow dispatch routing — interactive flag', () => {
repository_url: null,
default_cwd: '/repos/test-repo',
ai_assistant_type: 'claude' as const,
allow_env_keys: false,
commands: {},
created_at: new Date(),
updated_at: new Date(),
@ -1069,6 +1072,7 @@ describe('natural-language approval routing', () => {
repository_url: null,
default_cwd: '/repos/test-repo',
ai_assistant_type: 'claude' as const,
allow_env_keys: false,
commands: {},
created_at: new Date(),
updated_at: new Date(),

View file

@ -176,6 +176,7 @@ function makeCodebase(overrides?: Partial<Codebase>): Codebase {
id: 'cb-1',
name: 'test-repo',
default_cwd: '/workspace/test-repo',
allow_env_keys: false,
commands: {},
created_at: new Date(),
updated_at: new Date(),

View file

@ -216,6 +216,7 @@ const mockCodebase: Codebase = {
repository_url: 'https://github.com/user/repo',
default_cwd: '/workspace/test-project',
ai_assistant_type: 'claude',
allow_env_keys: false,
commands: {},
created_at: new Date(),
updated_at: new Date(),

View file

@ -57,6 +57,7 @@ export interface Codebase {
repository_url: string | null;
default_cwd: string;
ai_assistant_type: string;
allow_env_keys: boolean;
commands: Record<string, { path: string; description: string }>;
created_at: Date;
updated_at: Date;

View file

@ -0,0 +1,110 @@
import { describe, it, expect, beforeEach, afterEach } from 'bun:test';
import { writeFileSync, mkdirSync, rmSync } from 'fs';
import { join } from 'path';
import {
scanPathForSensitiveKeys,
EnvLeakError,
SENSITIVE_KEYS,
AUTOLOADED_FILES,
} from './env-leak-scanner';
describe('scanPathForSensitiveKeys', () => {
const tmpDir = '/tmp/archon-test-env-scan';
beforeEach(() => {
mkdirSync(tmpDir, { recursive: true });
});
afterEach(() => {
rmSync(tmpDir, { recursive: true, force: true });
});
it('returns empty findings for clean directory', () => {
const report = scanPathForSensitiveKeys(tmpDir);
expect(report.findings).toHaveLength(0);
});
it('returns empty findings for non-existent directory', () => {
const report = scanPathForSensitiveKeys('/tmp/archon-test-nonexistent-dir');
expect(report.findings).toHaveLength(0);
});
// Each sensitive key × each auto-loaded filename
for (const key of SENSITIVE_KEYS) {
for (const filename of AUTOLOADED_FILES) {
it(`detects ${key} in ${filename}`, () => {
writeFileSync(join(tmpDir, filename), `${key}=sk-test-value\nOTHER=safe\n`);
const report = scanPathForSensitiveKeys(tmpDir);
expect(report.findings).toHaveLength(1);
expect(report.findings[0].file).toBe(filename);
expect(report.findings[0].keys).toContain(key);
// Clean up for next iteration
rmSync(join(tmpDir, filename));
});
}
}
it('ignores commented-out keys', () => {
writeFileSync(join(tmpDir, '.env'), '# ANTHROPIC_API_KEY=value\n');
const report = scanPathForSensitiveKeys(tmpDir);
expect(report.findings).toHaveLength(0);
});
it('ignores lines without =', () => {
writeFileSync(join(tmpDir, '.env'), 'ANTHROPIC_API_KEY\n');
const report = scanPathForSensitiveKeys(tmpDir);
expect(report.findings).toHaveLength(0);
});
it('reports multiple files with findings', () => {
writeFileSync(join(tmpDir, '.env'), 'ANTHROPIC_API_KEY=sk-1\n');
writeFileSync(join(tmpDir, '.env.local'), 'OPENAI_API_KEY=sk-2\n');
const report = scanPathForSensitiveKeys(tmpDir);
expect(report.findings).toHaveLength(2);
});
it('reports multiple keys in same file', () => {
writeFileSync(join(tmpDir, '.env'), 'ANTHROPIC_API_KEY=sk-1\nOPENAI_API_KEY=sk-2\n');
const report = scanPathForSensitiveKeys(tmpDir);
expect(report.findings).toHaveLength(1);
expect(report.findings[0].keys).toHaveLength(2);
});
it('ignores non-autoloaded filenames', () => {
writeFileSync(join(tmpDir, '.env.secrets'), 'ANTHROPIC_API_KEY=sk-1\n');
const report = scanPathForSensitiveKeys(tmpDir);
expect(report.findings).toHaveLength(0);
});
it('ignores safe keys', () => {
writeFileSync(join(tmpDir, '.env'), 'DATABASE_URL=postgres://localhost\nNODE_ENV=dev\n');
const report = scanPathForSensitiveKeys(tmpDir);
expect(report.findings).toHaveLength(0);
});
});
describe('EnvLeakError', () => {
it('is instanceof EnvLeakError and Error', () => {
const report = { path: '/tmp', findings: [{ file: '.env', keys: ['ANTHROPIC_API_KEY'] }] };
const err = new EnvLeakError(report);
expect(err).toBeInstanceOf(Error);
expect(err).toBeInstanceOf(EnvLeakError);
expect(err.name).toBe('EnvLeakError');
expect(err.message).toContain('ANTHROPIC_API_KEY');
expect(err.report).toBe(report);
});
it('formats multiple findings', () => {
const report = {
path: '/test',
findings: [
{ file: '.env', keys: ['ANTHROPIC_API_KEY'] },
{ file: '.env.local', keys: ['OPENAI_API_KEY', 'GEMINI_API_KEY'] },
],
};
const err = new EnvLeakError(report);
expect(err.message).toContain('.env');
expect(err.message).toContain('.env.local');
expect(err.message).toContain('OPENAI_API_KEY');
expect(err.message).toContain('GEMINI_API_KEY');
});
});

View file

@ -0,0 +1,104 @@
import { readFileSync, existsSync } from 'fs';
import { join } from 'path';
export const SENSITIVE_KEYS = new Set([
'ANTHROPIC_API_KEY',
'ANTHROPIC_AUTH_TOKEN',
'CLAUDE_API_KEY',
'CLAUDE_CODE_OAUTH_TOKEN',
'OPENAI_API_KEY',
'CODEX_API_KEY',
'GEMINI_API_KEY',
]);
export const AUTOLOADED_FILES = [
'.env',
'.env.local',
'.env.development',
'.env.production',
'.env.development.local',
'.env.production.local',
];
export interface LeakFinding {
file: string;
keys: string[];
}
export interface LeakReport {
path: string;
findings: LeakFinding[];
}
export class EnvLeakError extends Error {
constructor(public readonly report: LeakReport) {
super(formatLeakError(report));
this.name = 'EnvLeakError';
}
}
/**
* Scan `dirPath` for auto-loaded .env files containing sensitive keys.
* Pure function no side effects.
*/
export function scanPathForSensitiveKeys(dirPath: string): LeakReport {
const findings: LeakFinding[] = [];
for (const filename of AUTOLOADED_FILES) {
const fullPath = join(dirPath, filename);
if (!existsSync(fullPath)) continue;
let contents: string;
try {
contents = readFileSync(fullPath, 'utf8');
} catch (err) {
// File exists but is unreadable — treat as a finding to avoid silently bypassing the gate
const code = (err as NodeJS.ErrnoException).code;
findings.push({ file: filename, keys: [`[unreadable — ${code ?? 'unknown error'}]`] });
continue;
}
const foundKeys: string[] = [];
for (const line of contents.split('\n')) {
const trimmed = line.trim();
if (trimmed.startsWith('#') || !trimmed.includes('=')) continue;
const key = trimmed.split('=')[0].trim();
if (SENSITIVE_KEYS.has(key)) {
foundKeys.push(key);
}
}
if (foundKeys.length > 0) {
findings.push({ file: filename, keys: foundKeys });
}
}
return { path: dirPath, findings };
}
export function formatLeakError(report: LeakReport): string {
const fileList = report.findings.map(f => ` ${f.file}${f.keys.join(', ')}`).join('\n');
return `Cannot add codebase — ${report.path} contains keys that will leak into AI subprocesses
Found:
${fileList}
Why this matters:
Bun subprocesses auto-load .env from their working directory. Archon cleans
its own environment, but Claude/Codex subprocesses running with cwd=<this repo>
will re-inject these keys at their own startup, bypassing archon's allowlist.
This can bill the wrong API account silently.
Choose one:
1. Remove the key from this repo's .env (recommended):
grep -v '^ANTHROPIC_API_KEY=' .env > .env.tmp && mv .env.tmp .env
2. Rename to a non-auto-loaded file:
mv .env .env.secrets
# update your app to load it explicitly
3. Acknowledge the risk and allow this codebase to use its .env key:
Open the web UI (Settings Projects Add Project) and tick
"Allow env keys (I understand the risk)" when adding this project.`;
}

View file

@ -48,6 +48,14 @@ mock.module('@archon/core', () => ({
this.name = 'ConversationNotFoundError';
}
},
EnvLeakError: class EnvLeakError extends Error {
constructor(public report: { path: string; findings: { file: string; keys: string[] }[] }) {
super(
`Cannot add codebase — ${report.path} contains keys that will leak into AI subprocesses`
);
this.name = 'EnvLeakError';
}
},
getArchonWorkspacesPath: () => '/tmp/.archon/workspaces',
generateAndSetTitle: mock(async () => {}),
createLogger: () => ({
@ -170,6 +178,7 @@ const MOCK_CODEBASE = {
repository_url: 'https://github.com/user/repo',
default_cwd: '/home/user/projects/my-project',
ai_assistant_type: 'claude',
allow_env_keys: false,
commands: {},
created_at: new Date().toISOString(),
updated_at: new Date().toISOString(),
@ -387,7 +396,7 @@ describe('POST /api/codebases', () => {
const body = (await response.json()) as { id: string };
expect(body.id).toBe('codebase-uuid-1');
expect(mockCloneRepository).toHaveBeenCalledWith('https://github.com/user/repo');
expect(mockCloneRepository).toHaveBeenCalledWith('https://github.com/user/repo', undefined);
});
test('registers existing URL codebase with 200', async () => {
@ -424,7 +433,7 @@ describe('POST /api/codebases', () => {
body: JSON.stringify({ path: '/home/user/my-repo' }),
});
expect(response.status).toBe(201);
expect(mockRegisterRepository).toHaveBeenCalledWith('/home/user/my-repo');
expect(mockRegisterRepository).toHaveBeenCalledWith('/home/user/my-repo', undefined);
});
test('returns 400 when both url and path are provided', async () => {
@ -496,6 +505,44 @@ describe('POST /api/codebases', () => {
const body = (await response.json()) as { error: string };
expect(body.error).toContain('authentication required');
});
test('returns 422 when cloneRepository throws EnvLeakError', async () => {
const { EnvLeakError } = await import('@archon/core');
mockCloneRepository.mockImplementationOnce(async () => {
throw new EnvLeakError({
path: '/repo/path',
findings: [{ file: '.env', keys: ['ANTHROPIC_API_KEY'] }],
});
});
const app = makeApp();
const response = await app.request('/api/codebases', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ url: 'https://github.com/user/repo' }),
});
expect(response.status).toBe(422);
const body = (await response.json()) as { error: string };
expect(body.error).toContain('Cannot add codebase');
});
test('passes allowEnvKeys=true to cloneRepository when body includes it', async () => {
mockCloneRepository.mockImplementationOnce(async () => ({
codebaseId: 'clone-uuid-2',
alreadyExisted: false,
}));
mockGetCodebase.mockImplementationOnce(async () => MOCK_CODEBASE);
const app = makeApp();
const response = await app.request('/api/codebases', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ url: 'https://github.com/user/repo', allowEnvKeys: true }),
});
expect(response.status).toBe(201);
expect(mockCloneRepository).toHaveBeenCalledWith('https://github.com/user/repo', true);
});
});
// ---------------------------------------------------------------------------

View file

@ -27,6 +27,7 @@ import {
registerRepository,
ConversationNotFoundError,
generateAndSetTitle,
EnvLeakError,
} from '@archon/core';
import { removeWorktree, toRepoPath, toWorktreePath } from '@archon/git';
import {
@ -816,7 +817,7 @@ export function registerApiRoutes(
): void {
function apiError(
c: Context,
status: 400 | 404 | 500,
status: 400 | 404 | 422 | 500,
message: string,
detail?: string
): Response {
@ -1482,8 +1483,8 @@ export function registerApiRoutes(
try {
// .refine() guarantees exactly one of url/path is present
const result = body.url
? await cloneRepository(body.url)
: await registerRepository(body.path ?? '');
? await cloneRepository(body.url, body.allowEnvKeys)
: await registerRepository(body.path ?? '', body.allowEnvKeys);
// Fetch the full codebase record for a consistent response
const codebase = await codebaseDb.getCodebase(result.codebaseId);
@ -1493,6 +1494,12 @@ export function registerApiRoutes(
return c.json(codebase, result.alreadyExisted ? 200 : 201);
} catch (error) {
if (error instanceof EnvLeakError) {
const path = body.url ?? body.path ?? '';
const files = error.report.findings.map(f => f.file);
getLog().warn({ path, files }, 'add_codebase_env_leak_refused');
return apiError(c, 422, error.message);
}
getLog().error({ err: error }, 'add_codebase_failed');
return apiError(
c,

View file

@ -16,6 +16,7 @@ export const codebaseSchema = z
repository_url: z.string().nullable(),
default_cwd: z.string(),
ai_assistant_type: z.string(),
allow_env_keys: z.boolean(),
commands: z.record(codebaseCommandSchema),
created_at: z.string(),
updated_at: z.string(),
@ -33,6 +34,7 @@ export const addCodebaseBodySchema = z
.object({
url: z.string().min(1).optional(),
path: z.string().min(1).optional(),
allowEnvKeys: z.boolean().optional(),
})
.refine(b => (b.url !== undefined) !== (b.path !== undefined), {
message: 'Provide either "url" or "path", not both and not neither',

View file

@ -38,6 +38,7 @@ export interface CodebaseResponse {
repository_url: string | null;
default_cwd: string;
ai_assistant_type: string;
allow_env_keys: boolean;
commands: Record<string, { path: string; description: string }>;
created_at: string;
updated_at: string;
@ -157,7 +158,7 @@ export async function getCodebase(id: string): Promise<CodebaseResponse> {
}
export async function addCodebase(
input: { url: string } | { path: string }
input: { url: string; allowEnvKeys?: boolean } | { path: string; allowEnvKeys?: boolean }
): Promise<CodebaseResponse> {
return fetchJSON<CodebaseResponse>('/api/codebases', {
method: 'POST',

View file

@ -250,10 +250,20 @@ function EnvVarsPanel({ codebaseId }: { codebaseId: string }): React.ReactElemen
);
}
function isEnvLeakError(error: unknown): boolean {
return (
error instanceof Error &&
'status' in error &&
(error as Error & { status: number }).status === 422 &&
error.message.startsWith('Cannot add codebase')
);
}
function ProjectsSection(): React.ReactElement {
const queryClient = useQueryClient();
const [addPath, setAddPath] = useState('');
const [showAdd, setShowAdd] = useState(false);
const [allowEnvKeys, setAllowEnvKeys] = useState(false);
const [expandedEnvVars, setExpandedEnvVars] = useState<string | null>(null);
const { data: codebases } = useQuery({
@ -262,11 +272,13 @@ function ProjectsSection(): React.ReactElement {
});
const addMutation = useMutation({
mutationFn: (path: string) => addCodebase({ path }),
mutationFn: ({ path, allowEnvKeys }: { path: string; allowEnvKeys?: boolean }) =>
addCodebase({ path, allowEnvKeys }),
onSuccess: () => {
void queryClient.invalidateQueries({ queryKey: ['codebases'] });
setAddPath('');
setShowAdd(false);
setAllowEnvKeys(false);
},
});
@ -280,7 +292,7 @@ function ProjectsSection(): React.ReactElement {
function handleAddSubmit(e: React.FormEvent): void {
e.preventDefault();
if (addPath.trim()) {
addMutation.mutate(addPath.trim());
addMutation.mutate({ path: addPath.trim(), allowEnvKeys: allowEnvKeys || undefined });
}
}
@ -373,6 +385,18 @@ function ProjectsSection(): React.ReactElement {
{addMutation.error instanceof Error
? addMutation.error.message
: 'Failed to add project'}
{isEnvLeakError(addMutation.error) && (
<label className="mt-2 flex items-center gap-2 text-text-secondary">
<input
type="checkbox"
checked={allowEnvKeys}
onChange={e => {
setAllowEnvKeys(e.target.checked);
}}
/>
Allow env keys (I understand the risk)
</label>
)}
</div>
)}
</CardContent>

View file

@ -67,8 +67,13 @@ export function matchesPattern(message: string, patterns: string[]): boolean {
* Classify an error to determine if it's transient (can retry) or fatal (should fail).
* FATAL patterns take priority over TRANSIENT patterns to prevent an error message
* containing both (e.g. "unauthorized: process exited with code 1") from being retried.
*
* First-party named error types are checked by name (immune to message rewording).
*/
export function classifyError(error: Error): ErrorType {
// Named first-party errors checked by name — immune to message rewording
if (error.name === 'EnvLeakError') return 'FATAL';
const message = error.message.toLowerCase();
if (matchesPattern(message, FATAL_PATTERNS)) {