Archon/src/workflows/executor.test.ts
Rasmus Widing 779f9af63e
Fix: Add stale workflow cleanup and defense-in-depth error handling (#237)
* Fix: Add stale workflow cleanup and defense-in-depth error handling

Problem: Workflows could get stuck in "running" state indefinitely when
the async generator disconnected but the AI subprocess continued working.
This blocked new workflow invocations with "Workflow already running" errors.

Root cause: No cleanup mechanism existed for workflows that failed to
complete due to disconnection between the executor and the Claude SDK.

Solution (defense-in-depth):
1. Activity-based staleness detection: Workflows inactive for 15+ minutes
   are auto-failed when a new workflow is triggered on the same conversation
2. Top-level error handling: All errors in workflow execution are caught
   and the workflow is properly marked as failed (prevents stuck state)
3. Manual cancel command: /workflow cancel lets users force-fail stuck
   workflows immediately

Changes:
- Add last_activity_at column via migration for staleness tracking
- Add updateWorkflowActivity() to track activity during execution
- Add staleness check before blocking concurrent workflows
- Wrap workflow execution in try-catch to ensure failure is recorded
- Add /workflow cancel subcommand to command handler
- Update test to match new error handling behavior

Fixes #232

* docs: Add /workflow cancel command to documentation

* Improve error handling and add comprehensive tests for stale workflow cleanup

Error handling improvements:
- Add workflow ID and error context to updateWorkflowActivity logs
- Add stack trace, error name, and cause to top-level catch block
- Separate DB failure recording from file logging for clearer error messages
- Add try-catch around staleness cleanup with user-facing error message
- Check sendCriticalMessage return value and log when user not notified

Test coverage additions:
- Add staleness detection tests (stale vs non-stale, fallback to started_at)
- Add /workflow cancel command tests
- Add updateWorkflowActivity function tests (including non-throwing behavior)

All 845 tests pass, type-check clean, lint clean.
2026-01-15 21:31:38 +02:00

2818 lines
97 KiB
TypeScript

import { describe, it, expect, beforeEach, afterEach, mock } from 'bun:test';
import { mkdir, writeFile, rm, readFile } from 'fs/promises';
import { join } from 'path';
import { tmpdir } from 'os';
import type { IPlatformAdapter } from '../types';
import type { WorkflowDefinition } from './types';
import { createQueryResult } from '../test/mocks/database';
// Mock at the connection level to avoid polluting db/workflows module
const mockQuery = mock((query: string) => {
// For getActiveWorkflowRun query, return no active workflow by default
if (query.includes("status = 'running'")) {
return Promise.resolve(createQueryResult([]));
}
// For createWorkflowRun INSERT, return the new workflow run
if (query.includes('INSERT INTO remote_agent_workflow_runs')) {
return Promise.resolve(
createQueryResult([
{
id: 'test-workflow-run-id',
workflow_name: 'test-workflow',
conversation_id: 'conv-123',
codebase_id: 'codebase-456',
current_step_index: 0,
status: 'running' as const,
user_message: 'test user message',
metadata: {},
started_at: new Date(),
completed_at: null,
},
])
);
}
// Default: empty result for UPDATE queries and other operations
return Promise.resolve(createQueryResult([]));
});
mock.module('../db/connection', () => ({
pool: {
query: mockQuery,
},
}));
// Note: We use the REAL logger (not mocked) so it writes to temp directories
// This avoids test pollution with logger.test.ts
// Mock AI client
const mockSendQuery = mock(function* () {
yield { type: 'assistant', content: 'AI response' };
yield { type: 'result', sessionId: 'new-session-id' };
});
const mockGetAssistantClient = mock(() => ({
sendQuery: mockSendQuery,
getType: () => 'claude',
}));
mock.module('../clients/factory', () => ({
getAssistantClient: mockGetAssistantClient,
}));
// Create mock platform adapter
function createMockPlatform(): IPlatformAdapter {
return {
sendMessage: mock(() => Promise.resolve()),
ensureThread: mock((id: string) => Promise.resolve(id)),
getStreamingMode: mock(() => 'batch' as const),
getPlatformType: mock(() => 'test'),
start: mock(() => Promise.resolve()),
stop: mock(() => {}),
};
}
// Import after mocks are set up
import { executeWorkflow, isValidCommandName } from './executor';
describe('Workflow Executor', () => {
let mockPlatform: IPlatformAdapter;
let testDir: string;
beforeEach(async () => {
mockPlatform = createMockPlatform();
mockQuery.mockClear();
mockSendQuery.mockClear();
mockGetAssistantClient.mockClear();
(mockPlatform.sendMessage as ReturnType<typeof mock>).mockClear();
// Create unique temp directory for each test with command files
testDir = join(tmpdir(), `executor-test-${Date.now()}-${Math.random().toString(36).slice(2)}`);
const commandsDir = join(testDir, '.archon', 'commands');
await mkdir(commandsDir, { recursive: true });
// Create command prompt files
await writeFile(join(commandsDir, 'command-one.md'), 'Command one prompt for $USER_MESSAGE');
await writeFile(join(commandsDir, 'command-two.md'), 'Command two prompt');
await writeFile(join(commandsDir, 'first-command.md'), 'First command prompt');
});
afterEach(async () => {
try {
await rm(testDir, { recursive: true, force: true });
} catch {
// Ignore cleanup errors
}
});
describe('executeWorkflow', () => {
const testWorkflow: WorkflowDefinition = {
name: 'test-workflow',
description: 'A test workflow',
provider: 'claude',
steps: [{ command: 'command-one' }, { command: 'command-two' }],
};
it('should create a workflow run record', async () => {
await executeWorkflow(
mockPlatform,
'conv-123',
testDir,
testWorkflow,
'User wants to do something',
'db-conv-id',
'codebase-id'
);
// Verify INSERT query was called with correct parameters
const insertCalls = mockQuery.mock.calls.filter((call: unknown[]) =>
(call[0] as string).includes('INSERT INTO remote_agent_workflow_runs')
);
expect(insertCalls.length).toBeGreaterThan(0);
const params = insertCalls[0][1] as string[];
expect(params).toContain('test-workflow');
expect(params).toContain('db-conv-id');
expect(params).toContain('codebase-id');
});
it('should send workflow start notification', async () => {
await executeWorkflow(
mockPlatform,
'conv-123',
testDir,
testWorkflow,
'User message',
'db-conv-id'
);
const sendMessage = mockPlatform.sendMessage as ReturnType<typeof mock>;
const calls = sendMessage.mock.calls;
// First call should be the workflow start notification
expect(calls[0][1]).toContain('🚀 **Starting workflow**: `test-workflow`');
expect(calls[0][1]).toContain('A test workflow');
expect(calls[0][1]).toContain('`command-one` -> `command-two`');
});
it('should execute each step and send notifications', async () => {
await executeWorkflow(
mockPlatform,
'conv-123',
testDir,
testWorkflow,
'User message',
'db-conv-id'
);
const sendMessage = mockPlatform.sendMessage as ReturnType<typeof mock>;
const calls = sendMessage.mock.calls;
const messages = calls.map((call: unknown[]) => call[1]);
// Should have step notifications
expect(messages.some((m: string) => m.includes('⏳ **Step 1/2**: `command-one`'))).toBe(true);
expect(messages.some((m: string) => m.includes('⏳ **Step 2/2**: `command-two`'))).toBe(true);
});
it('should log workflow events', async () => {
await executeWorkflow(
mockPlatform,
'conv-123',
testDir,
testWorkflow,
'User message',
'db-conv-id'
);
// Verify logging by reading the JSONL log file
const logPath = join(testDir, '.archon', 'logs', 'test-workflow-run-id.jsonl');
const logContent = await readFile(logPath, 'utf-8');
const events = logContent
.trim()
.split('\n')
.map(line => JSON.parse(line));
const eventTypes = events.map((e: { type: string }) => e.type);
expect(eventTypes).toContain('workflow_start');
expect(eventTypes.filter((t: string) => t === 'step_start')).toHaveLength(2);
expect(eventTypes.filter((t: string) => t === 'step_complete')).toHaveLength(2);
expect(eventTypes).toContain('workflow_complete');
});
it('should update workflow run progress after each step', async () => {
await executeWorkflow(
mockPlatform,
'conv-123',
testDir,
testWorkflow,
'User message',
'db-conv-id'
);
// Should have UPDATE queries for step progress
const updateCalls = mockQuery.mock.calls.filter(
(call: unknown[]) =>
(call[0] as string).includes('UPDATE') &&
(call[0] as string).includes('current_step_index')
);
expect(updateCalls.length).toBeGreaterThan(0);
});
it('should complete workflow run on success', async () => {
await executeWorkflow(
mockPlatform,
'conv-123',
testDir,
testWorkflow,
'User message',
'db-conv-id'
);
// Should have UPDATE query with 'completed' status
const completeCalls = mockQuery.mock.calls.filter(
(call: unknown[]) =>
(call[0] as string).includes('UPDATE') && (call[0] as string).includes("'completed'")
);
expect(completeCalls.length).toBeGreaterThan(0);
});
it('should send completion message', async () => {
await executeWorkflow(
mockPlatform,
'conv-123',
testDir,
testWorkflow,
'User message',
'db-conv-id'
);
const sendMessage = mockPlatform.sendMessage as ReturnType<typeof mock>;
const calls = sendMessage.mock.calls;
const lastMessage = calls[calls.length - 1][1];
expect(lastMessage).toContain('✅ **Workflow complete**: `test-workflow`');
});
// Platform-specific completion message behavior
// GitHub suppresses completion messages (comment-based interface makes them redundant)
// All other platforms receive completion messages
it.each([
{ platform: 'telegram', shouldSendCompletion: true },
{ platform: 'slack', shouldSendCompletion: true },
{ platform: 'discord', shouldSendCompletion: true },
{ platform: 'github', shouldSendCompletion: false },
])(
'$platform platform: completion message should be $shouldSendCompletion',
async ({ platform, shouldSendCompletion }) => {
(mockPlatform.getPlatformType as ReturnType<typeof mock>).mockReturnValue(platform);
await executeWorkflow(
mockPlatform,
'conv-123',
testDir,
testWorkflow,
'User message',
'db-conv-id'
);
const sendMessage = mockPlatform.sendMessage as ReturnType<typeof mock>;
const calls = sendMessage.mock.calls;
const completionCalls = calls.filter((call: unknown[]) =>
(call[1] as string).includes('**Workflow complete**')
);
if (shouldSendCompletion) {
expect(completionCalls.length).toBeGreaterThan(0);
} else {
expect(completionCalls).toHaveLength(0);
}
}
);
it('should handle missing command prompt file', async () => {
const workflowWithMissingCommand: WorkflowDefinition = {
name: 'missing-command-workflow',
description: 'Has a missing command',
steps: [{ command: 'nonexistent-command' }],
};
await executeWorkflow(
mockPlatform,
'conv-123',
testDir,
workflowWithMissingCommand,
'User message',
'db-conv-id'
);
// Should fail the workflow run - verify by checking for UPDATE with 'failed'
const failCalls = mockQuery.mock.calls.filter(
(call: unknown[]) =>
(call[0] as string).includes('UPDATE') && (call[0] as string).includes("'failed'")
);
expect(failCalls.length).toBeGreaterThan(0);
// Verify error was logged by reading log file
const logPath = join(testDir, '.archon', 'logs', 'test-workflow-run-id.jsonl');
const logContent = await readFile(logPath, 'utf-8');
const events = logContent
.trim()
.split('\n')
.map(line => JSON.parse(line));
expect(events.some((e: { type: string }) => e.type === 'workflow_error')).toBe(true);
const sendMessage = mockPlatform.sendMessage as ReturnType<typeof mock>;
const calls = sendMessage.mock.calls;
const messages = calls.map((call: unknown[]) => call[1]);
expect(messages.some((m: string) => m.includes('❌ **Workflow failed**'))).toBe(true);
});
it('should handle codebase_id being undefined', async () => {
await executeWorkflow(
mockPlatform,
'conv-123',
testDir,
testWorkflow,
'User message',
'db-conv-id',
undefined // no codebase
);
// Verify INSERT was called with null for codebase_id
const insertCalls = mockQuery.mock.calls.filter((call: unknown[]) =>
(call[0] as string).includes('INSERT INTO remote_agent_workflow_runs')
);
expect(insertCalls.length).toBeGreaterThan(0);
const params = insertCalls[0][1] as (string | null)[];
// codebase_id should be null (3rd parameter)
expect(params[2]).toBeNull();
});
});
describe('step context management', () => {
it('should start fresh session for first step', async () => {
const commandsDir = join(testDir, '.archon', 'commands');
await writeFile(join(commandsDir, 'first.md'), 'First step prompt');
const workflow: WorkflowDefinition = {
name: 'fresh-context-test',
description: 'Test fresh context',
steps: [{ command: 'first' }],
};
await executeWorkflow(
mockPlatform,
'conv-123',
testDir,
workflow,
'User message',
'db-conv-id'
);
// Workflow executed successfully - verify completion query
const completeCalls = mockQuery.mock.calls.filter(
(call: unknown[]) =>
(call[0] as string).includes('UPDATE') && (call[0] as string).includes("'completed'")
);
expect(completeCalls.length).toBeGreaterThan(0);
});
it('should respect clearContext flag', async () => {
// Create additional command files for this test
const commandsDir = join(testDir, '.archon', 'commands');
await writeFile(join(commandsDir, 'context-one.md'), 'Command one');
await writeFile(join(commandsDir, 'context-two.md'), 'Command two');
const workflow: WorkflowDefinition = {
name: 'clear-context-test',
description: 'Test clear context',
steps: [{ command: 'context-one' }, { command: 'context-two', clearContext: true }],
};
await executeWorkflow(
mockPlatform,
'conv-123',
testDir,
workflow,
'User message',
'db-conv-id'
);
// Both steps should complete
expect(mockSendQuery).toHaveBeenCalledTimes(2);
const completeCalls = mockQuery.mock.calls.filter(
(call: unknown[]) =>
(call[0] as string).includes('UPDATE') && (call[0] as string).includes("'completed'")
);
expect(completeCalls.length).toBeGreaterThan(0);
});
});
describe('edge cases', () => {
it('should handle workflow with single step', async () => {
const commandsDir = join(testDir, '.archon', 'commands');
await writeFile(join(commandsDir, 'single.md'), 'Single command prompt');
const singleStepWorkflow: WorkflowDefinition = {
name: 'single-step-workflow',
description: 'Only one step',
steps: [{ command: 'single' }],
};
await executeWorkflow(
mockPlatform,
'conv-123',
testDir,
singleStepWorkflow,
'User message',
'db-conv-id'
);
expect(mockSendQuery).toHaveBeenCalledTimes(1);
const completeCalls = mockQuery.mock.calls.filter(
(call: unknown[]) =>
(call[0] as string).includes('UPDATE') && (call[0] as string).includes("'completed'")
);
expect(completeCalls.length).toBeGreaterThan(0);
// Verify workflow start notification IS sent, but no "Step 1/1" notification
const sendMessage = mockPlatform.sendMessage as ReturnType<typeof mock>;
const calls = sendMessage.mock.calls;
const messages = calls.map((call: unknown[]) => call[1]);
expect(messages.some((m: string) => m.includes('Starting workflow'))).toBe(true);
expect(messages.some((m: string) => m.includes('**Step 1/1**'))).toBe(false);
});
it('should handle workflow with many steps', async () => {
const commandsDir = join(testDir, '.archon', 'commands');
// Create 5 command files
for (let i = 0; i < 5; i++) {
await writeFile(join(commandsDir, `cmd-${String(i)}.md`), `Command ${String(i)} prompt`);
}
const manyStepsWorkflow: WorkflowDefinition = {
name: 'many-steps-workflow',
description: 'Five steps',
steps: Array.from({ length: 5 }, (_, i) => ({ command: `cmd-${String(i)}` })),
};
await executeWorkflow(
mockPlatform,
'conv-123',
testDir,
manyStepsWorkflow,
'User message',
'db-conv-id'
);
expect(mockSendQuery).toHaveBeenCalledTimes(5);
// Verify multiple UPDATE queries for step progress
const updateCalls = mockQuery.mock.calls.filter(
(call: unknown[]) =>
(call[0] as string).includes('UPDATE') &&
(call[0] as string).includes('current_step_index')
);
expect(updateCalls.length).toBeGreaterThan(0);
// Verify completion
const completeCalls = mockQuery.mock.calls.filter(
(call: unknown[]) =>
(call[0] as string).includes('UPDATE') && (call[0] as string).includes("'completed'")
);
expect(completeCalls.length).toBeGreaterThan(0);
});
it('should substitute $USER_MESSAGE in command prompt', async () => {
const commandsDir = join(testDir, '.archon', 'commands');
await writeFile(
join(commandsDir, 'substitution.md'),
'User wants: $USER_MESSAGE\nWorkflow ID: $WORKFLOW_ID'
);
const workflow: WorkflowDefinition = {
name: 'substitution-workflow',
description: 'Test variable substitution',
steps: [{ command: 'substitution' }],
};
await executeWorkflow(
mockPlatform,
'conv-123',
testDir,
workflow,
'Build a feature',
'db-conv-id'
);
// The AI client should receive the substituted prompt
expect(mockSendQuery).toHaveBeenCalled();
});
it('should substitute $ARGUMENTS in command prompt (same as $USER_MESSAGE)', async () => {
const commandsDir = join(testDir, '.archon', 'commands');
await writeFile(join(commandsDir, 'arguments-test.md'), 'Request: $ARGUMENTS');
// Track calls before this test
const callCountBefore = mockSendQuery.mock.calls.length;
const workflow: WorkflowDefinition = {
name: 'arguments-workflow',
description: 'Test $ARGUMENTS substitution',
steps: [{ command: 'arguments-test' }],
};
await executeWorkflow(
mockPlatform,
'conv-123',
testDir,
workflow,
'Help me debug this issue',
'db-conv-id'
);
// The AI client should receive the substituted prompt
expect(mockSendQuery.mock.calls.length).toBeGreaterThan(callCountBefore);
// $ARGUMENTS should be replaced with the user message from the mock database row
// (which is 'test user message' - see mockQuery setup at top of file)
const callArg = mockSendQuery.mock.calls[callCountBefore][0] as string;
expect(callArg).toContain('test user message');
expect(callArg).not.toContain('$ARGUMENTS');
});
it('should handle empty user message', async () => {
await executeWorkflow(
mockPlatform,
'conv-123',
testDir,
{
name: 'test-workflow',
description: 'Test',
steps: [{ command: 'command-one' }],
},
'', // Empty user message
'db-conv-id'
);
const completeCalls = mockQuery.mock.calls.filter(
(call: unknown[]) =>
(call[0] as string).includes('UPDATE') && (call[0] as string).includes("'completed'")
);
expect(completeCalls.length).toBeGreaterThan(0);
});
it('should handle user message with special characters', async () => {
await executeWorkflow(
mockPlatform,
'conv-123',
testDir,
{
name: 'test-workflow',
description: 'Test',
steps: [{ command: 'command-one' }],
},
'Fix the "bug" in `src/index.ts` with $variables',
'db-conv-id'
);
const completeCalls = mockQuery.mock.calls.filter(
(call: unknown[]) =>
(call[0] as string).includes('UPDATE') && (call[0] as string).includes("'completed'")
);
expect(completeCalls.length).toBeGreaterThan(0);
});
it('should fail when command file is empty', async () => {
const commandsDir = join(testDir, '.archon', 'commands');
await writeFile(join(commandsDir, 'empty.md'), '');
const workflow: WorkflowDefinition = {
name: 'empty-command-workflow',
description: 'Has empty command file',
steps: [{ command: 'empty' }],
};
await executeWorkflow(
mockPlatform,
'conv-123',
testDir,
workflow,
'User message',
'db-conv-id'
);
// Empty prompt file is treated as invalid - workflow should fail
const failCalls = mockQuery.mock.calls.filter(
(call: unknown[]) =>
(call[0] as string).includes('UPDATE') && (call[0] as string).includes("'failed'")
);
expect(failCalls.length).toBeGreaterThan(0);
});
it('should fail on second step if it is missing', async () => {
const commandsDir = join(testDir, '.archon', 'commands');
await writeFile(join(commandsDir, 'existing.md'), 'This command exists');
// 'missing' file does not exist
const workflow: WorkflowDefinition = {
name: 'partial-workflow',
description: 'Second step is missing',
steps: [{ command: 'existing' }, { command: 'missing' }],
};
await executeWorkflow(
mockPlatform,
'conv-123',
testDir,
workflow,
'User message',
'db-conv-id'
);
// First step should succeed - verify by reading log file
const logPath = join(testDir, '.archon', 'logs', 'test-workflow-run-id.jsonl');
const logContent = await readFile(logPath, 'utf-8');
const events = logContent
.trim()
.split('\n')
.map(line => JSON.parse(line));
const stepCompleteEvents = events.filter((e: { type: string }) => e.type === 'step_complete');
expect(stepCompleteEvents).toHaveLength(1);
// But workflow should fail overall
const failCalls = mockQuery.mock.calls.filter(
(call: unknown[]) =>
(call[0] as string).includes('UPDATE') && (call[0] as string).includes("'failed'")
);
expect(failCalls.length).toBeGreaterThan(0);
});
it('should use default provider (claude) when not specified', async () => {
const workflow: WorkflowDefinition = {
name: 'no-provider-workflow',
description: 'No provider specified',
// provider is undefined
steps: [{ command: 'command-one' }],
};
await executeWorkflow(
mockPlatform,
'conv-123',
testDir,
workflow,
'User message',
'db-conv-id'
);
// Should use claude by default
expect(mockGetAssistantClient).toHaveBeenCalledWith('claude');
});
it('should use specified provider', async () => {
const workflow: WorkflowDefinition = {
name: 'codex-workflow',
description: 'Uses codex',
provider: 'codex',
steps: [{ command: 'command-one' }],
};
await executeWorkflow(
mockPlatform,
'conv-123',
testDir,
workflow,
'User message',
'db-conv-id'
);
expect(mockGetAssistantClient).toHaveBeenCalledWith('codex');
});
it('should handle streaming mode', async () => {
// Switch platform to streaming mode
(mockPlatform.getStreamingMode as ReturnType<typeof mock>).mockReturnValue('stream');
await executeWorkflow(
mockPlatform,
'conv-123',
testDir,
{
name: 'test-workflow',
description: 'Test',
steps: [{ command: 'command-one' }],
},
'User message',
'db-conv-id'
);
// Should still complete
const completeCalls = mockQuery.mock.calls.filter(
(call: unknown[]) =>
(call[0] as string).includes('UPDATE') && (call[0] as string).includes("'completed'")
);
expect(completeCalls.length).toBeGreaterThan(0);
});
it('should reject invalid command names with path traversal', async () => {
const workflow: WorkflowDefinition = {
name: 'path-traversal-workflow',
description: 'Has invalid command name',
steps: [{ command: '../../../etc/passwd' }],
};
await executeWorkflow(
mockPlatform,
'conv-123',
testDir,
workflow,
'User message',
'db-conv-id'
);
// Should fail - path traversal rejected
const failCalls = mockQuery.mock.calls.filter(
(call: unknown[]) =>
(call[0] as string).includes('UPDATE') && (call[0] as string).includes("'failed'")
);
expect(failCalls.length).toBeGreaterThan(0);
});
it('should reject command names starting with dot', async () => {
const workflow: WorkflowDefinition = {
name: 'dotfile-workflow',
description: 'Has invalid command name',
steps: [{ command: '.hidden' }],
};
await executeWorkflow(
mockPlatform,
'conv-123',
testDir,
workflow,
'User message',
'db-conv-id'
);
// Should fail - dotfile rejected
const failCalls = mockQuery.mock.calls.filter(
(call: unknown[]) =>
(call[0] as string).includes('UPDATE') && (call[0] as string).includes("'failed'")
);
expect(failCalls.length).toBeGreaterThan(0);
});
it('should return specific error message for empty command file (Issue #128)', async () => {
const commandsDir = join(testDir, '.archon', 'commands');
await writeFile(join(commandsDir, 'empty-cmd.md'), ' \n ');
const workflow: WorkflowDefinition = {
name: 'empty-file-workflow',
description: 'Has empty command file',
steps: [{ command: 'empty-cmd' }],
};
await executeWorkflow(
mockPlatform,
'conv-123',
testDir,
workflow,
'User message',
'db-conv-id'
);
// Should fail with specific "empty_file" error message
const sendMessageCalls = (mockPlatform.sendMessage as ReturnType<typeof mock>).mock.calls;
const failureMessages = sendMessageCalls.filter(
(call: unknown[]) =>
typeof call[1] === 'string' && (call[1] as string).includes('Command file is empty')
);
expect(failureMessages.length).toBeGreaterThan(0);
});
it('should return specific error message for path traversal (Issue #128)', async () => {
const workflow: WorkflowDefinition = {
name: 'path-traversal-workflow',
description: 'Has path traversal command',
steps: [{ command: '../../../etc/passwd' }],
};
await executeWorkflow(
mockPlatform,
'conv-123',
testDir,
workflow,
'User message',
'db-conv-id'
);
// Should fail with specific "invalid_name" error message
const sendMessageCalls = (mockPlatform.sendMessage as ReturnType<typeof mock>).mock.calls;
const failureMessages = sendMessageCalls.filter(
(call: unknown[]) =>
typeof call[1] === 'string' &&
(call[1] as string).includes('Invalid command name (potential path traversal)')
);
expect(failureMessages.length).toBeGreaterThan(0);
});
it('should return specific error message for missing command file (Issue #128)', async () => {
const workflow: WorkflowDefinition = {
name: 'missing-cmd-workflow',
description: 'Has missing command file',
steps: [{ command: 'totally-nonexistent-cmd' }],
};
await executeWorkflow(
mockPlatform,
'conv-123',
testDir,
workflow,
'User message',
'db-conv-id'
);
// Should fail with specific "not_found" error message that includes searched paths
const sendMessageCalls = (mockPlatform.sendMessage as ReturnType<typeof mock>).mock.calls;
const failureMessages = sendMessageCalls.filter(
(call: unknown[]) =>
typeof call[1] === 'string' &&
(call[1] as string).includes('Command prompt not found') &&
(call[1] as string).includes('searched:')
);
expect(failureMessages.length).toBeGreaterThan(0);
});
});
describe('AI client error hints (Issue #126)', () => {
it('should include rate limit hint for 429 errors', async () => {
// Mock AI client to throw rate limit error
mockSendQuery.mockImplementation(function* () {
throw new Error('API returned 429: Too many requests');
});
await executeWorkflow(
mockPlatform,
'conv-123',
testDir,
{
name: 'rate-limit-workflow',
description: 'Test rate limit handling',
steps: [{ command: 'command-one' }],
},
'User message',
'db-conv-id'
);
// Should include hint about rate limiting
const sendMessageCalls = (mockPlatform.sendMessage as ReturnType<typeof mock>).mock.calls;
const hintMessages = sendMessageCalls.filter(
(call: unknown[]) =>
typeof call[1] === 'string' &&
(call[1] as string).includes('Rate limited') &&
(call[1] as string).includes('wait')
);
expect(hintMessages.length).toBeGreaterThan(0);
// Reset mock for other tests
mockSendQuery.mockImplementation(function* () {
yield { type: 'assistant', content: 'AI response' };
yield { type: 'result', sessionId: 'new-session-id' };
});
});
it('should include auth hint for 401 errors', async () => {
// Mock AI client to throw auth error
mockSendQuery.mockImplementation(function* () {
throw new Error('401 Unauthorized: Invalid API key');
});
await executeWorkflow(
mockPlatform,
'conv-123',
testDir,
{
name: 'auth-error-workflow',
description: 'Test auth error handling',
steps: [{ command: 'command-one' }],
},
'User message',
'db-conv-id'
);
// Should include hint about checking API key
const sendMessageCalls = (mockPlatform.sendMessage as ReturnType<typeof mock>).mock.calls;
const hintMessages = sendMessageCalls.filter(
(call: unknown[]) => typeof call[1] === 'string' && (call[1] as string).includes('API key')
);
expect(hintMessages.length).toBeGreaterThan(0);
// Reset mock for other tests
mockSendQuery.mockImplementation(function* () {
yield { type: 'assistant', content: 'AI response' };
yield { type: 'result', sessionId: 'new-session-id' };
});
});
it('should include permission hint for 403 errors', async () => {
// Mock AI client to throw 403 error
mockSendQuery.mockImplementation(function* () {
throw new Error('403 Forbidden: Insufficient permissions');
});
await executeWorkflow(
mockPlatform,
'conv-123',
testDir,
{
name: 'forbidden-workflow',
description: 'Test 403 error handling',
steps: [{ command: 'command-one' }],
},
'User message',
'db-conv-id'
);
// Should include hint about checking API access
const sendMessageCalls = (mockPlatform.sendMessage as ReturnType<typeof mock>).mock.calls;
const hintMessages = sendMessageCalls.filter(
(call: unknown[]) =>
typeof call[1] === 'string' && (call[1] as string).includes('Permission denied')
);
expect(hintMessages.length).toBeGreaterThan(0);
// Reset mock for other tests
mockSendQuery.mockImplementation(function* () {
yield { type: 'assistant', content: 'AI response' };
yield { type: 'result', sessionId: 'new-session-id' };
});
});
it('should include network hint for timeout errors', async () => {
// Mock AI client to throw timeout error
mockSendQuery.mockImplementation(function* () {
throw new Error('Request timeout: ETIMEDOUT');
});
await executeWorkflow(
mockPlatform,
'conv-123',
testDir,
{
name: 'timeout-workflow',
description: 'Test timeout handling',
steps: [{ command: 'command-one' }],
},
'User message',
'db-conv-id'
);
// Should include hint about network issues
const sendMessageCalls = (mockPlatform.sendMessage as ReturnType<typeof mock>).mock.calls;
const hintMessages = sendMessageCalls.filter(
(call: unknown[]) =>
typeof call[1] === 'string' && (call[1] as string).includes('Network issue')
);
expect(hintMessages.length).toBeGreaterThan(0);
// Reset mock for other tests
mockSendQuery.mockImplementation(function* () {
yield { type: 'assistant', content: 'AI response' };
yield { type: 'result', sessionId: 'new-session-id' };
});
});
});
describe('platform message error handling', () => {
it('should continue workflow when platform.sendMessage fails', async () => {
// Mock sendMessage to throw an error
const sendMessageMock = mock(() => Promise.reject(new Error('Platform API rate limit')));
mockPlatform.sendMessage = sendMessageMock;
await executeWorkflow(
mockPlatform,
'conv-123',
testDir,
{
name: 'test-workflow',
description: 'Test',
steps: [{ command: 'command-one' }],
},
'User message',
'db-conv-id'
);
// Workflow should still complete successfully despite sendMessage failures
const completeCalls = mockQuery.mock.calls.filter(
(call: unknown[]) =>
(call[0] as string).includes('UPDATE') && (call[0] as string).includes("'completed'")
);
expect(completeCalls.length).toBeGreaterThan(0);
});
it('should continue workflow when sendMessage fails during streaming', async () => {
// Switch platform to streaming mode
(mockPlatform.getStreamingMode as ReturnType<typeof mock>).mockReturnValue('stream');
// Mock sendMessage to throw an error
const sendMessageMock = mock(() => Promise.reject(new Error('Network error')));
mockPlatform.sendMessage = sendMessageMock;
await executeWorkflow(
mockPlatform,
'conv-123',
testDir,
{
name: 'test-workflow',
description: 'Test',
steps: [{ command: 'command-one' }],
},
'User message',
'db-conv-id'
);
// Workflow should still complete successfully
const completeCalls = mockQuery.mock.calls.filter(
(call: unknown[]) =>
(call[0] as string).includes('UPDATE') && (call[0] as string).includes("'completed'")
);
expect(completeCalls.length).toBeGreaterThan(0);
});
it('should continue to next step when sendMessage fails on step notification', async () => {
// Create a mock that fails on first call, succeeds on rest
let callCount = 0;
const sendMessageMock = mock(() => {
callCount++;
if (callCount === 1) {
return Promise.reject(new Error('First send failed'));
}
return Promise.resolve();
});
mockPlatform.sendMessage = sendMessageMock;
await executeWorkflow(
mockPlatform,
'conv-123',
testDir,
{
name: 'test-workflow',
description: 'Test',
steps: [{ command: 'command-one' }, { command: 'command-two' }],
},
'User message',
'db-conv-id'
);
// Both steps should have been executed (2 calls to sendQuery)
expect(mockSendQuery).toHaveBeenCalledTimes(2);
// Workflow should complete
const completeCalls = mockQuery.mock.calls.filter(
(call: unknown[]) =>
(call[0] as string).includes('UPDATE') && (call[0] as string).includes("'completed'")
);
expect(completeCalls.length).toBeGreaterThan(0);
});
it('should log errors with context when sendMessage fails', async () => {
const errorLogs: unknown[][] = [];
const originalConsoleError = console.error;
console.error = mock((...args: unknown[]) => {
errorLogs.push(args);
});
const sendMessageMock = mock(() => Promise.reject(new Error('API timeout')));
mockPlatform.sendMessage = sendMessageMock;
await executeWorkflow(
mockPlatform,
'conv-123',
testDir,
{ name: 'test-workflow', description: 'Test', steps: [{ command: 'command-one' }] },
'User message',
'db-conv-id'
);
// Restore console.error
console.error = originalConsoleError;
// Verify error was logged with correct structure
const safeSendLogs = errorLogs.filter(
log => log[0] === '[WorkflowExecutor] Failed to send message'
);
expect(safeSendLogs.length).toBeGreaterThan(0);
// Check that context is included
const logContext = safeSendLogs[0][1] as Record<string, unknown>;
expect(logContext).toHaveProperty('conversationId', 'conv-123');
expect(logContext).toHaveProperty('error', 'API timeout');
expect(logContext).toHaveProperty('errorType');
expect(logContext).toHaveProperty('platformType');
});
it('should mark workflow as failed on fatal authentication errors (no throw)', async () => {
const sendMessageMock = mock(() =>
Promise.reject(new Error('401 Unauthorized: Invalid token'))
);
mockPlatform.sendMessage = sendMessageMock;
// With top-level error handling, executeWorkflow should NOT throw
// Instead it marks the workflow as failed and returns normally
await executeWorkflow(
mockPlatform,
'conv-123',
testDir,
{ name: 'test-workflow', description: 'Test', steps: [{ command: 'command-one' }] },
'User message',
'db-conv-id'
);
// Verify workflow was marked as failed in database
const failCalls = mockQuery.mock.calls.filter(
(call: unknown[]) =>
(call[0] as string).includes('UPDATE') && (call[0] as string).includes("'failed'")
);
expect(failCalls.length).toBeGreaterThan(0);
});
it('should continue workflow when tool message send fails in streaming mode', async () => {
(mockPlatform.getStreamingMode as ReturnType<typeof mock>).mockReturnValue('stream');
// Mock AI client to yield tool messages
mockSendQuery.mockImplementation(function* () {
yield { type: 'assistant', content: 'Starting...' };
yield { type: 'tool', toolName: 'read_file', toolInput: { path: '/tmp/test.ts' } };
yield { type: 'result', sessionId: 'new-session-id' };
});
const sendMessageMock = mock(() => Promise.reject(new Error('Rate limited')));
mockPlatform.sendMessage = sendMessageMock;
await executeWorkflow(
mockPlatform,
'conv-123',
testDir,
{ name: 'test-workflow', description: 'Test', steps: [{ command: 'command-one' }] },
'User message',
'db-conv-id'
);
// Workflow should complete despite all sendMessage calls failing
const completeCalls = mockQuery.mock.calls.filter(
(call: unknown[]) =>
(call[0] as string).includes('UPDATE') && (call[0] as string).includes("'completed'")
);
expect(completeCalls.length).toBeGreaterThan(0);
});
it('should record workflow failure in database even when failure notification fails', async () => {
const sendMessageMock = mock(() => Promise.reject(new Error('Cannot send')));
mockPlatform.sendMessage = sendMessageMock;
await executeWorkflow(
mockPlatform,
'conv-123',
testDir,
{
name: 'failing-workflow',
description: 'Test',
steps: [{ command: 'nonexistent-command' }],
},
'User message',
'db-conv-id'
);
// Database should still record the failure
const failCalls = mockQuery.mock.calls.filter(
(call: unknown[]) =>
(call[0] as string).includes('UPDATE') && (call[0] as string).includes("'failed'")
);
expect(failCalls.length).toBeGreaterThan(0);
});
it('should warn user about dropped messages in streaming mode', async () => {
(mockPlatform.getStreamingMode as ReturnType<typeof mock>).mockReturnValue('stream');
// Mock AI client to yield multiple messages
mockSendQuery.mockImplementation(function* () {
yield { type: 'assistant', content: 'Message 1' };
yield { type: 'assistant', content: 'Message 2' };
yield { type: 'assistant', content: 'Message 3' };
yield { type: 'result', sessionId: 'new-session-id' };
});
// Fail on messages 2 and 3, succeed on others
let callCount = 0;
const sendMessageMock = mock(() => {
callCount++;
// Fail on assistant message sends (calls 2, 3, 4 after step notification)
if (callCount >= 3 && callCount <= 4) {
return Promise.reject(new Error('Rate limited'));
}
return Promise.resolve();
});
mockPlatform.sendMessage = sendMessageMock;
await executeWorkflow(
mockPlatform,
'conv-123',
testDir,
{ name: 'test-workflow', description: 'Test', steps: [{ command: 'command-one' }] },
'User message',
'db-conv-id'
);
// Verify warning message was attempted
const calls = (mockPlatform.sendMessage as ReturnType<typeof mock>).mock.calls;
const warningCalls = calls.filter((call: unknown[]) =>
(call[1] as string).includes('message(s) failed to deliver')
);
expect(warningCalls.length).toBeGreaterThan(0);
});
it('should handle intermittent sendMessage failures throughout workflow', async () => {
let callCount = 0;
const sendMessageMock = mock(() => {
callCount++;
// Fail on calls 2 and 5 (mid-workflow notifications)
if (callCount === 2 || callCount === 5) {
return Promise.reject(new Error('Intermittent failure'));
}
return Promise.resolve();
});
mockPlatform.sendMessage = sendMessageMock;
await executeWorkflow(
mockPlatform,
'conv-123',
testDir,
{
name: 'test-workflow',
description: 'Test',
steps: [{ command: 'command-one' }, { command: 'command-two' }],
},
'User message',
'db-conv-id'
);
// Both steps should have been executed
expect(mockSendQuery).toHaveBeenCalledTimes(2);
// Workflow should complete
const completeCalls = mockQuery.mock.calls.filter(
(call: unknown[]) =>
(call[0] as string).includes('UPDATE') && (call[0] as string).includes("'completed'")
);
expect(completeCalls.length).toBeGreaterThan(0);
});
it('should retry critical completion message on transient errors', async () => {
// Track all sendMessage attempts
let callCount = 0;
const sendMessageMock = mock(() => {
callCount++;
// Fail first 2 attempts on completion message, succeed on 3rd
// Completion message is the last one sent
if (callCount >= 4 && callCount <= 5) {
return Promise.reject(new Error('Connection timeout'));
}
return Promise.resolve();
});
mockPlatform.sendMessage = sendMessageMock;
await executeWorkflow(
mockPlatform,
'conv-123',
testDir,
{ name: 'test-workflow', description: 'Test', steps: [{ command: 'command-one' }] },
'User message',
'db-conv-id'
);
// Verify multiple attempts were made for completion message
const calls = (mockPlatform.sendMessage as ReturnType<typeof mock>).mock.calls;
const completionCalls = calls.filter((call: unknown[]) =>
(call[1] as string).includes('**Workflow complete**')
);
// Should have retried
expect(completionCalls.length).toBeGreaterThanOrEqual(1);
});
describe('staleness detection', () => {
it('should fail stale workflow and start new one when last_activity_at > 15 min ago', async () => {
// Mock getActiveWorkflowRun to return a stale workflow (20 min inactive)
const staleTime = new Date(Date.now() - 20 * 60 * 1000); // 20 minutes ago
mockQuery.mockImplementation((query: string) => {
if (query.includes("status = 'running'")) {
return Promise.resolve(
createQueryResult([
{
id: 'stale-workflow-id',
workflow_name: 'old-workflow',
conversation_id: 'conv-123',
status: 'running' as const,
started_at: staleTime,
last_activity_at: staleTime,
completed_at: null,
current_step_index: 0,
user_message: 'old message',
metadata: {},
},
])
);
}
if (query.includes('INSERT INTO remote_agent_workflow_runs')) {
return Promise.resolve(
createQueryResult([
{
id: 'new-workflow-run-id',
workflow_name: 'test-workflow',
conversation_id: 'conv-123',
status: 'running' as const,
started_at: new Date(),
last_activity_at: new Date(),
completed_at: null,
current_step_index: 0,
user_message: 'test user message',
metadata: {},
},
])
);
}
return Promise.resolve(createQueryResult([]));
});
await executeWorkflow(
mockPlatform,
'conv-123',
testDir,
{ name: 'test-workflow', description: 'Test', steps: [{ command: 'command-one' }] },
'User message',
'db-conv-id'
);
// Verify stale workflow was marked as failed
const failCalls = mockQuery.mock.calls.filter(
(call: unknown[]) =>
(call[0] as string).includes('UPDATE') &&
(call[0] as string).includes("'failed'") &&
(call[1] as string[])?.includes('stale-workflow-id')
);
expect(failCalls.length).toBeGreaterThan(0);
// Verify new workflow was created
const insertCalls = mockQuery.mock.calls.filter((call: unknown[]) =>
(call[0] as string).includes('INSERT INTO remote_agent_workflow_runs')
);
expect(insertCalls.length).toBeGreaterThan(0);
// Reset mock
mockQuery.mockImplementation((query: string) => {
if (query.includes("status = 'running'")) {
return Promise.resolve(createQueryResult([]));
}
if (query.includes('INSERT INTO remote_agent_workflow_runs')) {
return Promise.resolve(
createQueryResult([
{
id: 'test-workflow-run-id',
workflow_name: 'test-workflow',
conversation_id: 'conv-123',
status: 'running' as const,
started_at: new Date(),
completed_at: null,
current_step_index: 0,
user_message: 'test user message',
metadata: {},
},
])
);
}
return Promise.resolve(createQueryResult([]));
});
});
it('should block new workflow when active workflow is not stale', async () => {
// Mock getActiveWorkflowRun to return a recent workflow (5 min inactive)
const recentTime = new Date(Date.now() - 5 * 60 * 1000); // 5 minutes ago
mockQuery.mockImplementation((query: string) => {
if (query.includes("status = 'running'")) {
return Promise.resolve(
createQueryResult([
{
id: 'active-workflow-id',
workflow_name: 'active-workflow',
conversation_id: 'conv-123',
status: 'running' as const,
started_at: recentTime,
last_activity_at: recentTime,
completed_at: null,
current_step_index: 0,
user_message: 'active message',
metadata: {},
},
])
);
}
return Promise.resolve(createQueryResult([]));
});
await executeWorkflow(
mockPlatform,
'conv-123',
testDir,
{ name: 'test-workflow', description: 'Test', steps: [{ command: 'command-one' }] },
'User message',
'db-conv-id'
);
// Verify rejection message was sent
const sendMessage = mockPlatform.sendMessage as ReturnType<typeof mock>;
const calls = sendMessage.mock.calls;
const blockingMessages = calls.filter((call: unknown[]) =>
(call[1] as string).includes('Workflow already running')
);
expect(blockingMessages.length).toBe(1);
// Verify no INSERT was made (new workflow not created)
const insertCalls = mockQuery.mock.calls.filter((call: unknown[]) =>
(call[0] as string).includes('INSERT INTO remote_agent_workflow_runs')
);
expect(insertCalls.length).toBe(0);
// Reset mock
mockQuery.mockImplementation((query: string) => {
if (query.includes("status = 'running'")) {
return Promise.resolve(createQueryResult([]));
}
if (query.includes('INSERT INTO remote_agent_workflow_runs')) {
return Promise.resolve(
createQueryResult([
{
id: 'test-workflow-run-id',
workflow_name: 'test-workflow',
conversation_id: 'conv-123',
status: 'running' as const,
started_at: new Date(),
completed_at: null,
current_step_index: 0,
user_message: 'test user message',
metadata: {},
},
])
);
}
return Promise.resolve(createQueryResult([]));
});
});
it('should fallback to started_at when last_activity_at is null', async () => {
// Mock getActiveWorkflowRun to return a workflow with null last_activity_at but old started_at
const staleTime = new Date(Date.now() - 20 * 60 * 1000); // 20 minutes ago
mockQuery.mockImplementation((query: string) => {
if (query.includes("status = 'running'")) {
return Promise.resolve(
createQueryResult([
{
id: 'stale-workflow-id',
workflow_name: 'old-workflow',
conversation_id: 'conv-123',
status: 'running' as const,
started_at: staleTime,
last_activity_at: null, // null - should fallback to started_at
completed_at: null,
current_step_index: 0,
user_message: 'old message',
metadata: {},
},
])
);
}
if (query.includes('INSERT INTO remote_agent_workflow_runs')) {
return Promise.resolve(
createQueryResult([
{
id: 'new-workflow-run-id',
workflow_name: 'test-workflow',
conversation_id: 'conv-123',
status: 'running' as const,
started_at: new Date(),
last_activity_at: new Date(),
completed_at: null,
current_step_index: 0,
user_message: 'test user message',
metadata: {},
},
])
);
}
return Promise.resolve(createQueryResult([]));
});
await executeWorkflow(
mockPlatform,
'conv-123',
testDir,
{ name: 'test-workflow', description: 'Test', steps: [{ command: 'command-one' }] },
'User message',
'db-conv-id'
);
// Verify stale workflow was marked as failed (fallback to started_at worked)
const failCalls = mockQuery.mock.calls.filter(
(call: unknown[]) =>
(call[0] as string).includes('UPDATE') &&
(call[0] as string).includes("'failed'") &&
(call[1] as string[])?.includes('stale-workflow-id')
);
expect(failCalls.length).toBeGreaterThan(0);
// Reset mock
mockQuery.mockImplementation((query: string) => {
if (query.includes("status = 'running'")) {
return Promise.resolve(createQueryResult([]));
}
if (query.includes('INSERT INTO remote_agent_workflow_runs')) {
return Promise.resolve(
createQueryResult([
{
id: 'test-workflow-run-id',
workflow_name: 'test-workflow',
conversation_id: 'conv-123',
status: 'running' as const,
started_at: new Date(),
completed_at: null,
current_step_index: 0,
user_message: 'test user message',
metadata: {},
},
])
);
}
return Promise.resolve(createQueryResult([]));
});
});
it('should show cleanup error message when failWorkflowRun fails for stale workflow', async () => {
// Mock getActiveWorkflowRun to return a stale workflow
const staleTime = new Date(Date.now() - 20 * 60 * 1000);
let failWorkflowCallCount = 0;
mockQuery.mockImplementation((query: string) => {
if (query.includes("status = 'running'")) {
return Promise.resolve(
createQueryResult([
{
id: 'stale-workflow-id',
workflow_name: 'old-workflow',
conversation_id: 'conv-123',
status: 'running' as const,
started_at: staleTime,
last_activity_at: staleTime,
completed_at: null,
current_step_index: 0,
user_message: 'old message',
metadata: {},
},
])
);
}
// Fail the staleness cleanup query
if (query.includes('UPDATE') && query.includes("'failed'")) {
failWorkflowCallCount++;
if (failWorkflowCallCount === 1) {
// First fail call is for staleness cleanup
return Promise.reject(new Error('Database connection lost'));
}
}
return Promise.resolve(createQueryResult([]));
});
await executeWorkflow(
mockPlatform,
'conv-123',
testDir,
{ name: 'test-workflow', description: 'Test', steps: [{ command: 'command-one' }] },
'User message',
'db-conv-id'
);
// Verify user received cleanup error message
const sendMessage = mockPlatform.sendMessage as ReturnType<typeof mock>;
const calls = sendMessage.mock.calls;
const cleanupErrorMessages = calls.filter((call: unknown[]) =>
(call[1] as string).includes('Workflow blocked') &&
(call[1] as string).includes('/workflow cancel')
);
expect(cleanupErrorMessages.length).toBe(1);
// Verify no new workflow was created
const insertCalls = mockQuery.mock.calls.filter((call: unknown[]) =>
(call[0] as string).includes('INSERT INTO remote_agent_workflow_runs')
);
expect(insertCalls.length).toBe(0);
// Reset mock
mockQuery.mockImplementation((query: string) => {
if (query.includes("status = 'running'")) {
return Promise.resolve(createQueryResult([]));
}
if (query.includes('INSERT INTO remote_agent_workflow_runs')) {
return Promise.resolve(
createQueryResult([
{
id: 'test-workflow-run-id',
workflow_name: 'test-workflow',
conversation_id: 'conv-123',
status: 'running' as const,
started_at: new Date(),
completed_at: null,
current_step_index: 0,
user_message: 'test user message',
metadata: {},
},
])
);
}
return Promise.resolve(createQueryResult([]));
});
});
});
});
describe('loop workflow execution', () => {
it('should execute loop and complete on signal', async () => {
// Mock AI to return COMPLETE on 3rd iteration
let callCount = 0;
mockSendQuery.mockImplementation(function* () {
callCount++;
if (callCount >= 3) {
yield { type: 'assistant', content: 'All done! <promise>COMPLETE</promise>' };
} else {
yield { type: 'assistant', content: `Working on iteration ${String(callCount)}...` };
}
yield { type: 'result', sessionId: `session-${String(callCount)}` };
});
const loopWorkflow: WorkflowDefinition = {
name: 'test-loop',
description: 'Test loop workflow',
loop: { until: 'COMPLETE', max_iterations: 10, fresh_context: false },
prompt: 'Do the thing. Output <promise>COMPLETE</promise> when done.',
};
await executeWorkflow(
mockPlatform,
'conv-123',
testDir,
loopWorkflow,
'Implement everything',
'db-conv-id'
);
// Should have run 3 iterations
expect(mockSendQuery).toHaveBeenCalledTimes(3);
// Should complete successfully
const completeCalls = mockQuery.mock.calls.filter(
(call: unknown[]) =>
(call[0] as string).includes('UPDATE') && (call[0] as string).includes("'completed'")
);
expect(completeCalls.length).toBeGreaterThan(0);
// Reset mock
mockSendQuery.mockImplementation(function* () {
yield { type: 'assistant', content: 'AI response' };
yield { type: 'result', sessionId: 'new-session-id' };
});
});
it('should fail when max iterations reached without completion', async () => {
// Mock AI to never return completion signal
mockSendQuery.mockImplementation(function* () {
yield { type: 'assistant', content: 'Still working...' };
yield { type: 'result', sessionId: 'session-id' };
});
const loopWorkflow: WorkflowDefinition = {
name: 'endless-loop',
description: 'Never completes',
loop: { until: 'COMPLETE', max_iterations: 3, fresh_context: false },
prompt: 'Do something that never finishes.',
};
await executeWorkflow(
mockPlatform,
'conv-123',
testDir,
loopWorkflow,
'Try forever',
'db-conv-id'
);
// Should have run exactly max_iterations times
expect(mockSendQuery).toHaveBeenCalledTimes(3);
// Should fail
const failCalls = mockQuery.mock.calls.filter(
(call: unknown[]) =>
(call[0] as string).includes('UPDATE') && (call[0] as string).includes("'failed'")
);
expect(failCalls.length).toBeGreaterThan(0);
// Reset mock
mockSendQuery.mockImplementation(function* () {
yield { type: 'assistant', content: 'AI response' };
yield { type: 'result', sessionId: 'new-session-id' };
});
});
it('should detect completion signal in <promise> tags', async () => {
mockSendQuery.mockImplementation(function* () {
yield { type: 'assistant', content: 'Done! <promise>DONE</promise>' };
yield { type: 'result', sessionId: 'session-id' };
});
const loopWorkflow: WorkflowDefinition = {
name: 'promise-test',
description: 'Test promise tag detection',
loop: { until: 'DONE', max_iterations: 5, fresh_context: false },
prompt: 'Output <promise>DONE</promise> when finished.',
};
await executeWorkflow(mockPlatform, 'conv-123', testDir, loopWorkflow, 'Test', 'db-conv-id');
// Should complete on first iteration
expect(mockSendQuery).toHaveBeenCalledTimes(1);
// Reset mock
mockSendQuery.mockImplementation(function* () {
yield { type: 'assistant', content: 'AI response' };
yield { type: 'result', sessionId: 'new-session-id' };
});
});
it('should update metadata with iteration count', async () => {
mockSendQuery.mockImplementation(function* () {
yield { type: 'assistant', content: '<promise>COMPLETE</promise>' };
yield { type: 'result', sessionId: 'session-id' };
});
const loopWorkflow: WorkflowDefinition = {
name: 'metadata-test',
description: 'Test metadata updates',
loop: { until: 'COMPLETE', max_iterations: 10, fresh_context: false },
prompt: 'Complete immediately.',
};
await executeWorkflow(mockPlatform, 'conv-123', testDir, loopWorkflow, 'Test', 'db-conv-id');
// Should have UPDATE with metadata
const metadataCalls = mockQuery.mock.calls.filter(
(call: unknown[]) =>
(call[0] as string).includes('UPDATE') && (call[0] as string).includes('metadata')
);
expect(metadataCalls.length).toBeGreaterThan(0);
// Reset mock
mockSendQuery.mockImplementation(function* () {
yield { type: 'assistant', content: 'AI response' };
yield { type: 'result', sessionId: 'new-session-id' };
});
});
it('should handle single iteration loop (max_iterations = 1)', async () => {
mockSendQuery.mockImplementation(function* () {
yield { type: 'assistant', content: 'No completion signal here' };
yield { type: 'result', sessionId: 'session-id' };
});
const loopWorkflow: WorkflowDefinition = {
name: 'single-iteration',
description: 'Single iteration limit',
loop: { until: 'COMPLETE', max_iterations: 1, fresh_context: false },
prompt: 'Try once.',
};
await executeWorkflow(mockPlatform, 'conv-123', testDir, loopWorkflow, 'Test', 'db-conv-id');
// Should have run exactly 1 time
expect(mockSendQuery).toHaveBeenCalledTimes(1);
// Reset mock
mockSendQuery.mockImplementation(function* () {
yield { type: 'assistant', content: 'AI response' };
yield { type: 'result', sessionId: 'new-session-id' };
});
});
it('should detect plain completion signal (backwards compatibility)', async () => {
mockSendQuery.mockImplementation(function* () {
yield { type: 'assistant', content: 'All tasks done! COMPLETE' };
yield { type: 'result', sessionId: 'session-id' };
});
const loopWorkflow: WorkflowDefinition = {
name: 'plain-signal-test',
description: 'Test plain signal detection',
loop: { until: 'COMPLETE', max_iterations: 5, fresh_context: false },
prompt: 'Output COMPLETE when finished.',
};
await executeWorkflow(mockPlatform, 'conv-123', testDir, loopWorkflow, 'Test', 'db-conv-id');
// Should complete on first iteration (plain signal detected)
expect(mockSendQuery).toHaveBeenCalledTimes(1);
// Reset mock
mockSendQuery.mockImplementation(function* () {
yield { type: 'assistant', content: 'AI response' };
yield { type: 'result', sessionId: 'new-session-id' };
});
});
it('should handle AI error during iteration', async () => {
let callCount = 0;
mockSendQuery.mockImplementation(function* () {
callCount++;
if (callCount === 2) {
throw new Error('AI service unavailable');
}
yield { type: 'assistant', content: `Iteration ${String(callCount)}` };
yield { type: 'result', sessionId: `session-${String(callCount)}` };
});
const loopWorkflow: WorkflowDefinition = {
name: 'error-test',
description: 'Test error handling',
loop: { until: 'COMPLETE', max_iterations: 5, fresh_context: false },
prompt: 'Work until done.',
};
await executeWorkflow(mockPlatform, 'conv-123', testDir, loopWorkflow, 'Test', 'db-conv-id');
// Should have run 2 iterations (failed on 2nd)
expect(mockSendQuery).toHaveBeenCalledTimes(2);
// Should fail
const failCalls = mockQuery.mock.calls.filter(
(call: unknown[]) =>
(call[0] as string).includes('UPDATE') && (call[0] as string).includes("'failed'")
);
expect(failCalls.length).toBeGreaterThan(0);
// Reset mock
mockSendQuery.mockImplementation(function* () {
yield { type: 'assistant', content: 'AI response' };
yield { type: 'result', sessionId: 'new-session-id' };
});
});
it('should substitute $USER_MESSAGE in loop prompt', async () => {
// Track the prompt passed to sendQuery
let receivedPrompt = '';
mockSendQuery.mockImplementation(function* (prompt: string) {
receivedPrompt = prompt;
yield { type: 'assistant', content: '<promise>COMPLETE</promise>' };
yield { type: 'result', sessionId: 'session-id' };
});
const loopWorkflow: WorkflowDefinition = {
name: 'substitution-test',
description: 'Test variable substitution',
loop: { until: 'COMPLETE', max_iterations: 5, fresh_context: false },
prompt: 'User wants: $USER_MESSAGE',
};
await executeWorkflow(
mockPlatform,
'conv-123',
testDir,
loopWorkflow,
'build a feature',
'db-conv-id'
);
// User message from the mock database row is 'test user message'
expect(receivedPrompt).toContain('test user message');
expect(receivedPrompt).not.toContain('$USER_MESSAGE');
// Reset mock
mockSendQuery.mockImplementation(function* () {
yield { type: 'assistant', content: 'AI response' };
yield { type: 'result', sessionId: 'new-session-id' };
});
});
it('should start fresh session each iteration when fresh_context is true', async () => {
// Track session IDs passed to each iteration
const receivedSessionIds: (string | undefined)[] = [];
let callCount = 0;
mockSendQuery.mockImplementation(function* (
_prompt: string,
_cwd: string,
sessionId?: string
) {
receivedSessionIds.push(sessionId);
callCount++;
if (callCount >= 3) {
yield { type: 'assistant', content: '<promise>COMPLETE</promise>' };
} else {
yield { type: 'assistant', content: `Iteration ${String(callCount)}` };
}
yield { type: 'result', sessionId: `session-${String(callCount)}` };
});
const loopWorkflow: WorkflowDefinition = {
name: 'fresh-context-loop',
description: 'Test fresh_context: true',
loop: { until: 'COMPLETE', max_iterations: 5, fresh_context: true },
prompt: 'Do work with fresh context each time.',
};
await executeWorkflow(mockPlatform, 'conv-123', testDir, loopWorkflow, 'Test', 'db-conv-id');
// Should have run 3 iterations
expect(mockSendQuery).toHaveBeenCalledTimes(3);
// ALL iterations should have undefined session ID (fresh context)
expect(receivedSessionIds).toEqual([undefined, undefined, undefined]);
// Reset mock
mockSendQuery.mockImplementation(function* () {
yield { type: 'assistant', content: 'AI response' };
yield { type: 'result', sessionId: 'new-session-id' };
});
});
it('should detect completion signal split across multiple chunks', async () => {
// Simulate AI returning completion signal across multiple yield statements
mockSendQuery.mockImplementation(function* () {
yield { type: 'assistant', content: 'Processing complete. ' };
yield { type: 'assistant', content: '<prom' };
yield { type: 'assistant', content: 'ise>COMPLETE</promise>' };
yield { type: 'assistant', content: ' Done!' };
yield { type: 'result', sessionId: 'session-id' };
});
const loopWorkflow: WorkflowDefinition = {
name: 'split-signal-test',
description: 'Test signal detection across chunks',
loop: { until: 'COMPLETE', max_iterations: 5, fresh_context: false },
prompt: 'Output completion signal.',
};
await executeWorkflow(mockPlatform, 'conv-123', testDir, loopWorkflow, 'Test', 'db-conv-id');
// Should complete on first iteration (signal accumulated across chunks)
expect(mockSendQuery).toHaveBeenCalledTimes(1);
// Should have marked as completed
const completeCalls = mockQuery.mock.calls.filter(
(call: unknown[]) =>
(call[0] as string).includes('UPDATE') && (call[0] as string).includes("'completed'")
);
expect(completeCalls.length).toBeGreaterThan(0);
// Reset mock
mockSendQuery.mockImplementation(function* () {
yield { type: 'assistant', content: 'AI response' };
yield { type: 'result', sessionId: 'new-session-id' };
});
});
it('should NOT detect false positive plain signal in middle of text', async () => {
// This tests that "not COMPLETE yet" doesn't match "COMPLETE"
mockSendQuery.mockImplementation(function* () {
yield { type: 'assistant', content: 'The task is not COMPLETE yet, more work needed.' };
yield { type: 'result', sessionId: 'session-id' };
});
const loopWorkflow: WorkflowDefinition = {
name: 'false-positive-test',
description: 'Test false positive prevention',
loop: { until: 'COMPLETE', max_iterations: 2, fresh_context: false },
prompt: 'Work until done.',
};
await executeWorkflow(mockPlatform, 'conv-123', testDir, loopWorkflow, 'Test', 'db-conv-id');
// Should have run max_iterations times (NOT detected as complete)
expect(mockSendQuery).toHaveBeenCalledTimes(2);
// Should have FAILED (not completed)
const failCalls = mockQuery.mock.calls.filter(
(call: unknown[]) =>
(call[0] as string).includes('UPDATE') && (call[0] as string).includes("'failed'")
);
expect(failCalls.length).toBeGreaterThan(0);
// Reset mock
mockSendQuery.mockImplementation(function* () {
yield { type: 'assistant', content: 'AI response' };
yield { type: 'result', sessionId: 'new-session-id' };
});
});
});
describe('issueContext handling', () => {
describe('step workflow with context', () => {
it('should pass issueContext to workflow step', async () => {
const workflow: WorkflowDefinition = {
name: 'context-workflow',
description: 'Test workflow with context',
provider: 'claude',
steps: [{ command: 'command-one' }],
};
const issueContext =
'[GitHub Issue Context]\nIssue #42: "Test Issue"\nAuthor: testuser\n\nDescription:\nTest issue body';
await executeWorkflow(
mockPlatform,
'conv-123',
testDir,
workflow,
'test user message',
'db-conv-id',
'codebase-id',
issueContext
);
// Verify AI client received the context appended to prompt
expect(mockSendQuery.mock.calls.length).toBeGreaterThan(0);
const promptArg = mockSendQuery.mock.calls[0][0] as string;
expect(promptArg).toContain('[GitHub Issue Context]');
expect(promptArg).toContain('Issue #42');
});
it('should substitute $CONTEXT variable in step workflow', async () => {
// Create command file that uses $CONTEXT variable
const commandsDir = join(testDir, '.archon', 'commands');
await writeFile(
join(commandsDir, 'context-command.md'),
'Process the following context:\n\n$CONTEXT\n\nNow execute the task.'
);
const workflow: WorkflowDefinition = {
name: 'context-var-workflow',
description: 'Test workflow with $CONTEXT variable',
provider: 'claude',
steps: [{ command: 'context-command' }],
};
const issueContext = 'GitHub Issue #123 content here';
await executeWorkflow(
mockPlatform,
'conv-123',
testDir,
workflow,
'test message',
'db-conv-id',
'codebase-id',
issueContext
);
const promptArg = mockSendQuery.mock.calls[0][0] as string;
// Should have substituted $CONTEXT but NOT appended again (to avoid duplication)
expect(promptArg).toContain('Process the following context:');
expect(promptArg).toContain('GitHub Issue #123 content here');
// Count occurrences - should appear only once (substituted, not appended)
const matches = promptArg.match(/GitHub Issue #123 content here/g);
expect(matches?.length).toBe(1);
});
it('should clear $CONTEXT variable when issueContext is undefined', async () => {
// Create command file that uses $CONTEXT variable
const commandsDir = join(testDir, '.archon', 'commands');
await writeFile(
join(commandsDir, 'context-command.md'),
'Process: $CONTEXT and $EXTERNAL_CONTEXT then continue'
);
const workflow: WorkflowDefinition = {
name: 'context-var-workflow',
description: 'Test workflow with $CONTEXT variable',
provider: 'claude',
steps: [{ command: 'context-command' }],
};
// No issueContext provided
await executeWorkflow(
mockPlatform,
'conv-123',
testDir,
workflow,
'test message',
'db-conv-id',
'codebase-id'
);
const promptArg = mockSendQuery.mock.calls[0][0] as string;
// Variables should be cleared (replaced with empty string)
expect(promptArg).not.toContain('$CONTEXT');
expect(promptArg).not.toContain('$EXTERNAL_CONTEXT');
expect(promptArg).toContain('Process: and then continue');
});
it('should handle context with special regex characters', async () => {
// Create command file that uses $CONTEXT variable
const commandsDir = join(testDir, '.archon', 'commands');
await writeFile(join(commandsDir, 'context-command.md'), 'Context: $CONTEXT');
const workflow: WorkflowDefinition = {
name: 'regex-test-workflow',
description: 'Test special characters in context',
provider: 'claude',
steps: [{ command: 'context-command' }],
};
// Context with regex special characters that could break naive substitution
const issueContext =
'Issue: Add dark mode with $20 budget & (regex) patterns like .* and [a-z]+ and $CONTEXT literal';
await executeWorkflow(
mockPlatform,
'conv-123',
testDir,
workflow,
'test message',
'db-conv-id',
'codebase-id',
issueContext
);
const promptArg = mockSendQuery.mock.calls[0][0] as string;
// All special characters should be preserved exactly
expect(promptArg).toContain('$20 budget');
expect(promptArg).toContain('(regex)');
expect(promptArg).toContain('.*');
expect(promptArg).toContain('[a-z]+');
expect(promptArg).toContain('$CONTEXT literal');
});
it('should handle multiple context variables in same prompt', async () => {
// Create command file with multiple context variables
const commandsDir = join(testDir, '.archon', 'commands');
await writeFile(
join(commandsDir, 'multi-context.md'),
'First: $CONTEXT\n\nSecond: $EXTERNAL_CONTEXT\n\nThird: $ISSUE_CONTEXT'
);
const workflow: WorkflowDefinition = {
name: 'multi-var-workflow',
description: 'Test multiple context variables',
provider: 'claude',
steps: [{ command: 'multi-context' }],
};
const issueContext = 'Shared context value';
await executeWorkflow(
mockPlatform,
'conv-123',
testDir,
workflow,
'test message',
'db-conv-id',
'codebase-id',
issueContext
);
const promptArg = mockSendQuery.mock.calls[0][0] as string;
// All three variables should be substituted with the same context
expect(promptArg).toContain('First: Shared context value');
expect(promptArg).toContain('Second: Shared context value');
expect(promptArg).toContain('Third: Shared context value');
// Context should NOT be appended since variables were substituted
expect(promptArg).not.toContain('---');
// Should appear exactly 3 times (once per variable)
const matches = promptArg.match(/Shared context value/g);
expect(matches?.length).toBe(3);
});
});
describe('loop workflow with context', () => {
it('should pass issueContext to loop workflow iterations', async () => {
// Override mock to return exit phrase
mockSendQuery.mockImplementation(function* () {
yield { type: 'assistant', content: '<promise>LOOP_COMPLETE</promise>' };
yield { type: 'result', sessionId: 'session-id' };
});
const loopWorkflow: WorkflowDefinition = {
name: 'loop-context-workflow',
description: 'Test loop workflow with context',
provider: 'claude',
loop: {
until: 'LOOP_COMPLETE',
max_iterations: 2,
fresh_context: false,
},
prompt: 'Process the task based on the provided context. User message: $USER_MESSAGE',
};
const issueContext =
'[GitHub Issue Context]\nIssue #99: "Loop Test"\nAuthor: loopuser\n\nBody content';
await executeWorkflow(
mockPlatform,
'conv-123',
testDir,
loopWorkflow,
'test trigger',
'db-conv-id',
'codebase-id',
issueContext
);
// Verify AI received context in the prompt
expect(mockSendQuery.mock.calls.length).toBeGreaterThan(0);
const promptArg = mockSendQuery.mock.calls[0][0] as string;
expect(promptArg).toContain('[GitHub Issue Context]');
expect(promptArg).toContain('Issue #99');
});
it('should substitute $ISSUE_CONTEXT in loop workflow', async () => {
mockSendQuery.mockImplementation(function* () {
yield { type: 'assistant', content: '<promise>DONE</promise>' };
yield { type: 'result', sessionId: 'session-id' };
});
const loopWorkflow: WorkflowDefinition = {
name: 'loop-var-workflow',
description: 'Test loop with $ISSUE_CONTEXT',
provider: 'claude',
loop: {
until: 'DONE',
max_iterations: 1,
fresh_context: false,
},
prompt: 'Given this context:\n$ISSUE_CONTEXT\n\nExecute: $USER_MESSAGE',
};
const issueContext = 'PR #555 details here';
await executeWorkflow(
mockPlatform,
'conv-123',
testDir,
loopWorkflow,
'implement feature',
'db-conv-id',
'codebase-id',
issueContext
);
const promptArg = mockSendQuery.mock.calls[0][0] as string;
expect(promptArg).toContain('Given this context:');
expect(promptArg).toContain('PR #555 details here');
// Should appear only once (substituted, not appended)
const matches = promptArg.match(/PR #555 details here/g);
expect(matches?.length).toBe(1);
});
});
describe('metadata storage', () => {
it('should store issueContext in workflow run metadata', async () => {
const workflow: WorkflowDefinition = {
name: 'metadata-workflow',
description: 'Test metadata storage',
provider: 'claude',
steps: [{ command: 'command-one' }],
};
const issueContext = 'Issue #77 context';
await executeWorkflow(
mockPlatform,
'conv-123',
testDir,
workflow,
'test message',
'db-conv-id',
'codebase-id',
issueContext
);
// Check that createWorkflowRun was called with metadata containing github_context
const insertCalls = mockQuery.mock.calls.filter(
call => typeof call[0] === 'string' && call[0].includes('INSERT')
);
expect(insertCalls.length).toBeGreaterThan(0);
const insertParams = insertCalls[0][1] as string[];
// The 5th parameter should be the metadata JSON
expect(insertParams[4]).toBe(JSON.stringify({ github_context: 'Issue #77 context' }));
});
it('should store empty metadata when issueContext is undefined', async () => {
const workflow: WorkflowDefinition = {
name: 'no-metadata-workflow',
description: 'Test without context',
provider: 'claude',
steps: [{ command: 'command-one' }],
};
await executeWorkflow(
mockPlatform,
'conv-123',
testDir,
workflow,
'test message',
'db-conv-id',
'codebase-id'
);
const insertCalls = mockQuery.mock.calls.filter(
call => typeof call[0] === 'string' && call[0].includes('INSERT')
);
expect(insertCalls.length).toBeGreaterThan(0);
const insertParams = insertCalls[0][1] as string[];
expect(insertParams[4]).toBe('{}');
});
});
});
describe('parallel block execution', () => {
beforeEach(async () => {
// Create command files for parallel block tests
const commandsDir = join(testDir, '.archon', 'commands');
await writeFile(join(commandsDir, 'parallel-a.md'), 'Parallel step A prompt');
await writeFile(join(commandsDir, 'parallel-b.md'), 'Parallel step B prompt');
await writeFile(join(commandsDir, 'parallel-c.md'), 'Parallel step C prompt');
await writeFile(join(commandsDir, 'step-before.md'), 'Step before parallel');
await writeFile(join(commandsDir, 'step-after.md'), 'Step after parallel');
// Reset mock to default behavior
mockSendQuery.mockImplementation(function* () {
yield { type: 'assistant', content: 'AI response' };
yield { type: 'result', sessionId: 'new-session-id' };
});
});
it('should execute all parallel steps concurrently', async () => {
const parallelWorkflow: WorkflowDefinition = {
name: 'parallel-test',
description: 'Test workflow with parallel block',
steps: [
{
parallel: [
{ command: 'parallel-a' },
{ command: 'parallel-b' },
{ command: 'parallel-c' },
],
},
],
};
await executeWorkflow(
mockPlatform,
'conv-123',
testDir,
parallelWorkflow,
'Run parallel',
'db-conv-id'
);
// AI client should be called 3 times (once for each parallel step)
expect(mockSendQuery).toHaveBeenCalledTimes(3);
// Workflow should complete successfully
const completeCalls = mockQuery.mock.calls.filter(
(call: unknown[]) =>
(call[0] as string).includes('UPDATE') && (call[0] as string).includes("'completed'")
);
expect(completeCalls.length).toBeGreaterThan(0);
});
it('should fail workflow if any parallel step fails', async () => {
// Mock AI client to fail on 'parallel-b'
let callCount = 0;
mockSendQuery.mockImplementation(function* (prompt: string) {
callCount++;
// Fail the second call (parallel-b)
if (prompt.includes('Parallel step B')) {
throw new Error('Parallel step B failed unexpectedly');
}
yield { type: 'assistant', content: `Response ${String(callCount)}` };
yield { type: 'result', sessionId: `session-${String(callCount)}` };
});
const parallelWorkflow: WorkflowDefinition = {
name: 'parallel-fail-test',
description: 'Test parallel failure handling',
steps: [
{
parallel: [
{ command: 'parallel-a' },
{ command: 'parallel-b' },
{ command: 'parallel-c' },
],
},
],
};
await executeWorkflow(
mockPlatform,
'conv-123',
testDir,
parallelWorkflow,
'Run parallel',
'db-conv-id'
);
// Workflow should fail
const failCalls = mockQuery.mock.calls.filter(
(call: unknown[]) =>
(call[0] as string).includes('UPDATE') && (call[0] as string).includes("'failed'")
);
expect(failCalls.length).toBeGreaterThan(0);
// Should send failure message to user
const sendMessage = mockPlatform.sendMessage as ReturnType<typeof mock>;
const messages = sendMessage.mock.calls.map((call: unknown[]) => call[1]);
expect(
messages.some((m: string) => m.includes('**Workflow failed** in parallel block'))
).toBe(true);
// Reset mock
mockSendQuery.mockImplementation(function* () {
yield { type: 'assistant', content: 'AI response' };
yield { type: 'result', sessionId: 'new-session-id' };
});
});
it('should execute sequential step, then parallel block, then sequential step', async () => {
const mixedWorkflow: WorkflowDefinition = {
name: 'mixed-workflow',
description: 'Test sequential and parallel mix',
steps: [
{ command: 'step-before' },
{
parallel: [{ command: 'parallel-a' }, { command: 'parallel-b' }],
},
{ command: 'step-after' },
],
};
await executeWorkflow(
mockPlatform,
'conv-123',
testDir,
mixedWorkflow,
'Run mixed',
'db-conv-id'
);
// AI client should be called 4 times total:
// 1 (step-before) + 2 (parallel) + 1 (step-after)
expect(mockSendQuery).toHaveBeenCalledTimes(4);
// Workflow should complete successfully
const completeCalls = mockQuery.mock.calls.filter(
(call: unknown[]) =>
(call[0] as string).includes('UPDATE') && (call[0] as string).includes("'completed'")
);
expect(completeCalls.length).toBeGreaterThan(0);
// Verify step notifications were sent for all steps
const sendMessage = mockPlatform.sendMessage as ReturnType<typeof mock>;
const messages = sendMessage.mock.calls.map((call: unknown[]) => call[1]);
// Sequential step notifications
expect(messages.some((m: string) => m.includes('**Step 1/3**: `step-before`'))).toBe(true);
expect(messages.some((m: string) => m.includes('**Step 3/3**: `step-after`'))).toBe(true);
// Parallel block notification
expect(
messages.some(
(m: string) =>
m.includes('**Parallel block**') &&
m.includes('`parallel-a`') &&
m.includes('`parallel-b`')
)
).toBe(true);
});
it('should send correct notification format for parallel blocks', async () => {
const parallelWorkflow: WorkflowDefinition = {
name: 'parallel-notification-test',
description: 'Test parallel block notification',
steps: [
{
parallel: [{ command: 'parallel-a' }, { command: 'parallel-b' }],
},
],
};
await executeWorkflow(
mockPlatform,
'conv-123',
testDir,
parallelWorkflow,
'Run parallel',
'db-conv-id'
);
const sendMessage = mockPlatform.sendMessage as ReturnType<typeof mock>;
const messages = sendMessage.mock.calls.map((call: unknown[]) => call[1]);
// Should have parallel block notification with correct format
const parallelNotification = messages.find(
(m: string) => typeof m === 'string' && m.includes('**Parallel block**')
);
expect(parallelNotification).toBeDefined();
expect(parallelNotification).toContain('(2 steps)');
expect(parallelNotification).toContain('`parallel-a`');
expect(parallelNotification).toContain('`parallel-b`');
});
it('should give each parallel step a fresh session (no resume)', async () => {
// Track session IDs passed to each parallel step
const receivedSessionIds: (string | undefined)[] = [];
mockSendQuery.mockImplementation(function* (
_prompt: string,
_cwd: string,
sessionId?: string
) {
receivedSessionIds.push(sessionId);
yield { type: 'assistant', content: 'Response' };
yield { type: 'result', sessionId: 'session-id' };
});
const parallelWorkflow: WorkflowDefinition = {
name: 'parallel-session-test',
description: 'Test parallel session handling',
steps: [
{
parallel: [{ command: 'parallel-a' }, { command: 'parallel-b' }],
},
],
};
await executeWorkflow(
mockPlatform,
'conv-123',
testDir,
parallelWorkflow,
'Run parallel',
'db-conv-id'
);
// All parallel steps should have undefined session ID (fresh session)
expect(receivedSessionIds).toEqual([undefined, undefined]);
// Reset mock
mockSendQuery.mockImplementation(function* () {
yield { type: 'assistant', content: 'AI response' };
yield { type: 'result', sessionId: 'new-session-id' };
});
});
it('should execute workflow with multiple parallel blocks', async () => {
// Create additional command files
const commandsDir = join(testDir, '.archon', 'commands');
await writeFile(join(commandsDir, 'parallel-d.md'), 'Parallel step D prompt');
const multiParallelWorkflow: WorkflowDefinition = {
name: 'multi-parallel-test',
description: 'Test multiple parallel blocks',
steps: [
{ command: 'step-before' },
{
parallel: [{ command: 'parallel-a' }, { command: 'parallel-b' }],
},
{ command: 'step-after' },
{
parallel: [{ command: 'parallel-c' }, { command: 'parallel-d' }],
},
],
};
await executeWorkflow(
mockPlatform,
'conv-123',
testDir,
multiParallelWorkflow,
'Run multi-parallel',
'db-conv-id'
);
// AI client should be called 6 times total:
// 1 (step-before) + 2 (first parallel) + 1 (step-after) + 2 (second parallel)
expect(mockSendQuery).toHaveBeenCalledTimes(6);
// Workflow should complete successfully
const completeCalls = mockQuery.mock.calls.filter(
(call: unknown[]) =>
(call[0] as string).includes('UPDATE') && (call[0] as string).includes("'completed'")
);
expect(completeCalls.length).toBeGreaterThan(0);
});
it('should report all failures when multiple parallel steps fail', async () => {
// Mock AI client to fail on all parallel steps
mockSendQuery.mockImplementation(function* (prompt: string) {
if (prompt.includes('Parallel step A')) {
throw new Error('Step A: Connection timeout');
}
if (prompt.includes('Parallel step B')) {
throw new Error('Step B: Rate limit exceeded');
}
if (prompt.includes('Parallel step C')) {
throw new Error('Step C: Authentication failed');
}
yield { type: 'assistant', content: 'Response' };
yield { type: 'result', sessionId: 'session-id' };
});
const parallelWorkflow: WorkflowDefinition = {
name: 'all-fail-test',
description: 'Test all parallel steps failing',
steps: [
{
parallel: [
{ command: 'parallel-a' },
{ command: 'parallel-b' },
{ command: 'parallel-c' },
],
},
],
};
await executeWorkflow(
mockPlatform,
'conv-123',
testDir,
parallelWorkflow,
'Run parallel',
'db-conv-id'
);
// Workflow should fail
const failCalls = mockQuery.mock.calls.filter(
(call: unknown[]) =>
(call[0] as string).includes('UPDATE') && (call[0] as string).includes("'failed'")
);
expect(failCalls.length).toBeGreaterThan(0);
// Should send failure message containing ALL errors
const sendMessage = mockPlatform.sendMessage as ReturnType<typeof mock>;
const messages = sendMessage.mock.calls.map((call: unknown[]) => call[1]);
const failureMessage = messages.find(
(m: string) => typeof m === 'string' && m.includes('**Workflow failed** in parallel block')
);
expect(failureMessage).toBeDefined();
// All three errors should be reported
expect(failureMessage).toContain('parallel-a');
expect(failureMessage).toContain('parallel-b');
expect(failureMessage).toContain('parallel-c');
// Reset mock
mockSendQuery.mockImplementation(function* () {
yield { type: 'assistant', content: 'AI response' };
yield { type: 'result', sessionId: 'new-session-id' };
});
});
it('should execute step-only workflow unchanged (backward compatibility)', async () => {
// Create sequential-only workflow (no parallel blocks)
const sequentialWorkflow: WorkflowDefinition = {
name: 'sequential-only',
description: 'Test backward compatibility with sequential workflows',
steps: [{ command: 'step-before' }, { command: 'step-after' }],
};
await executeWorkflow(
mockPlatform,
'conv-123',
testDir,
sequentialWorkflow,
'Run sequential',
'db-conv-id'
);
// AI client should be called 2 times (once per step)
expect(mockSendQuery).toHaveBeenCalledTimes(2);
// Workflow should complete successfully
const completeCalls = mockQuery.mock.calls.filter(
(call: unknown[]) =>
(call[0] as string).includes('UPDATE') && (call[0] as string).includes("'completed'")
);
expect(completeCalls.length).toBeGreaterThan(0);
// Should have step notifications (not parallel block notifications)
const sendMessage = mockPlatform.sendMessage as ReturnType<typeof mock>;
const messages = sendMessage.mock.calls.map((call: unknown[]) => call[1]);
expect(messages.some((m: string) => m.includes('**Step 1/2**'))).toBe(true);
expect(messages.some((m: string) => m.includes('**Step 2/2**'))).toBe(true);
// No parallel block notifications
expect(messages.some((m: string) => m.includes('**Parallel block**'))).toBe(false);
});
it('should reset session after parallel block (next sequential step gets fresh session)', async () => {
// Track session IDs passed to each step
const receivedSessionIds: (string | undefined)[] = [];
mockSendQuery.mockImplementation(function* (
_prompt: string,
_cwd: string,
sessionId?: string
) {
receivedSessionIds.push(sessionId);
yield { type: 'assistant', content: 'Response' };
yield { type: 'result', sessionId: 'step-session-id' };
});
// Workflow: step-before -> [parallel-a, parallel-b] -> step-after
const mixedWorkflow: WorkflowDefinition = {
name: 'session-reset-test',
description: 'Test session reset after parallel block',
steps: [
{ command: 'step-before' },
{
parallel: [{ command: 'parallel-a' }, { command: 'parallel-b' }],
},
{ command: 'step-after' },
],
};
await executeWorkflow(
mockPlatform,
'conv-123',
testDir,
mixedWorkflow,
'Run mixed',
'db-conv-id'
);
// Should have 4 calls: step-before, parallel-a, parallel-b, step-after
expect(receivedSessionIds).toHaveLength(4);
// step-before: first step always gets fresh session (undefined)
expect(receivedSessionIds[0]).toBeUndefined();
// parallel-a and parallel-b: always get fresh sessions (undefined)
expect(receivedSessionIds[1]).toBeUndefined();
expect(receivedSessionIds[2]).toBeUndefined();
// step-after: should get fresh session after parallel block (undefined, not step-before's session)
// This verifies that currentSessionId is reset to undefined after parallel block
expect(receivedSessionIds[3]).toBeUndefined();
// Reset mock
mockSendQuery.mockImplementation(function* () {
yield { type: 'assistant', content: 'AI response' };
yield { type: 'result', sessionId: 'new-session-id' };
});
});
});
});
describe('isValidCommandName', () => {
it('should reject empty string', () => {
expect(isValidCommandName('')).toBe(false);
});
it('should reject forward slashes (path traversal)', () => {
expect(isValidCommandName('foo/bar')).toBe(false);
expect(isValidCommandName('../etc/passwd')).toBe(false);
});
it('should reject backslashes (Windows path separator)', () => {
expect(isValidCommandName('foo\\bar')).toBe(false);
});
it('should reject double dots (parent directory reference)', () => {
expect(isValidCommandName('..')).toBe(false);
expect(isValidCommandName('..test')).toBe(false);
});
it('should reject names starting with dot (hidden files)', () => {
expect(isValidCommandName('.hidden')).toBe(false);
expect(isValidCommandName('.gitignore')).toBe(false);
});
it('should accept valid names with hyphens', () => {
expect(isValidCommandName('my-command')).toBe(true);
expect(isValidCommandName('review-pr')).toBe(true);
});
it('should accept valid names with underscores', () => {
expect(isValidCommandName('my_command')).toBe(true);
expect(isValidCommandName('my_command_123')).toBe(true);
});
it('should accept simple alphanumeric names', () => {
expect(isValidCommandName('plan')).toBe(true);
expect(isValidCommandName('execute')).toBe(true);
expect(isValidCommandName('commit')).toBe(true);
});
it('should accept names with numbers', () => {
expect(isValidCommandName('step1')).toBe(true);
expect(isValidCommandName('v2release')).toBe(true);
});
});