mirror of
https://github.com/coleam00/Archon
synced 2026-04-21 21:47:53 +00:00
* Fix: Add stale workflow cleanup and defense-in-depth error handling Problem: Workflows could get stuck in "running" state indefinitely when the async generator disconnected but the AI subprocess continued working. This blocked new workflow invocations with "Workflow already running" errors. Root cause: No cleanup mechanism existed for workflows that failed to complete due to disconnection between the executor and the Claude SDK. Solution (defense-in-depth): 1. Activity-based staleness detection: Workflows inactive for 15+ minutes are auto-failed when a new workflow is triggered on the same conversation 2. Top-level error handling: All errors in workflow execution are caught and the workflow is properly marked as failed (prevents stuck state) 3. Manual cancel command: /workflow cancel lets users force-fail stuck workflows immediately Changes: - Add last_activity_at column via migration for staleness tracking - Add updateWorkflowActivity() to track activity during execution - Add staleness check before blocking concurrent workflows - Wrap workflow execution in try-catch to ensure failure is recorded - Add /workflow cancel subcommand to command handler - Update test to match new error handling behavior Fixes #232 * docs: Add /workflow cancel command to documentation * Improve error handling and add comprehensive tests for stale workflow cleanup Error handling improvements: - Add workflow ID and error context to updateWorkflowActivity logs - Add stack trace, error name, and cause to top-level catch block - Separate DB failure recording from file logging for clearer error messages - Add try-catch around staleness cleanup with user-facing error message - Check sendCriticalMessage return value and log when user not notified Test coverage additions: - Add staleness detection tests (stale vs non-stale, fallback to started_at) - Add /workflow cancel command tests - Add updateWorkflowActivity function tests (including non-throwing behavior) All 845 tests pass, type-check clean, lint clean.
2818 lines
97 KiB
TypeScript
2818 lines
97 KiB
TypeScript
import { describe, it, expect, beforeEach, afterEach, mock } from 'bun:test';
|
|
import { mkdir, writeFile, rm, readFile } from 'fs/promises';
|
|
import { join } from 'path';
|
|
import { tmpdir } from 'os';
|
|
import type { IPlatformAdapter } from '../types';
|
|
import type { WorkflowDefinition } from './types';
|
|
import { createQueryResult } from '../test/mocks/database';
|
|
|
|
// Mock at the connection level to avoid polluting db/workflows module
|
|
const mockQuery = mock((query: string) => {
|
|
// For getActiveWorkflowRun query, return no active workflow by default
|
|
if (query.includes("status = 'running'")) {
|
|
return Promise.resolve(createQueryResult([]));
|
|
}
|
|
// For createWorkflowRun INSERT, return the new workflow run
|
|
if (query.includes('INSERT INTO remote_agent_workflow_runs')) {
|
|
return Promise.resolve(
|
|
createQueryResult([
|
|
{
|
|
id: 'test-workflow-run-id',
|
|
workflow_name: 'test-workflow',
|
|
conversation_id: 'conv-123',
|
|
codebase_id: 'codebase-456',
|
|
current_step_index: 0,
|
|
status: 'running' as const,
|
|
user_message: 'test user message',
|
|
metadata: {},
|
|
started_at: new Date(),
|
|
completed_at: null,
|
|
},
|
|
])
|
|
);
|
|
}
|
|
// Default: empty result for UPDATE queries and other operations
|
|
return Promise.resolve(createQueryResult([]));
|
|
});
|
|
|
|
mock.module('../db/connection', () => ({
|
|
pool: {
|
|
query: mockQuery,
|
|
},
|
|
}));
|
|
|
|
// Note: We use the REAL logger (not mocked) so it writes to temp directories
|
|
// This avoids test pollution with logger.test.ts
|
|
|
|
// Mock AI client
|
|
const mockSendQuery = mock(function* () {
|
|
yield { type: 'assistant', content: 'AI response' };
|
|
yield { type: 'result', sessionId: 'new-session-id' };
|
|
});
|
|
|
|
const mockGetAssistantClient = mock(() => ({
|
|
sendQuery: mockSendQuery,
|
|
getType: () => 'claude',
|
|
}));
|
|
|
|
mock.module('../clients/factory', () => ({
|
|
getAssistantClient: mockGetAssistantClient,
|
|
}));
|
|
|
|
// Create mock platform adapter
|
|
function createMockPlatform(): IPlatformAdapter {
|
|
return {
|
|
sendMessage: mock(() => Promise.resolve()),
|
|
ensureThread: mock((id: string) => Promise.resolve(id)),
|
|
getStreamingMode: mock(() => 'batch' as const),
|
|
getPlatformType: mock(() => 'test'),
|
|
start: mock(() => Promise.resolve()),
|
|
stop: mock(() => {}),
|
|
};
|
|
}
|
|
|
|
// Import after mocks are set up
|
|
import { executeWorkflow, isValidCommandName } from './executor';
|
|
|
|
describe('Workflow Executor', () => {
|
|
let mockPlatform: IPlatformAdapter;
|
|
let testDir: string;
|
|
|
|
beforeEach(async () => {
|
|
mockPlatform = createMockPlatform();
|
|
mockQuery.mockClear();
|
|
mockSendQuery.mockClear();
|
|
mockGetAssistantClient.mockClear();
|
|
(mockPlatform.sendMessage as ReturnType<typeof mock>).mockClear();
|
|
|
|
// Create unique temp directory for each test with command files
|
|
testDir = join(tmpdir(), `executor-test-${Date.now()}-${Math.random().toString(36).slice(2)}`);
|
|
const commandsDir = join(testDir, '.archon', 'commands');
|
|
await mkdir(commandsDir, { recursive: true });
|
|
|
|
// Create command prompt files
|
|
await writeFile(join(commandsDir, 'command-one.md'), 'Command one prompt for $USER_MESSAGE');
|
|
await writeFile(join(commandsDir, 'command-two.md'), 'Command two prompt');
|
|
await writeFile(join(commandsDir, 'first-command.md'), 'First command prompt');
|
|
});
|
|
|
|
afterEach(async () => {
|
|
try {
|
|
await rm(testDir, { recursive: true, force: true });
|
|
} catch {
|
|
// Ignore cleanup errors
|
|
}
|
|
});
|
|
|
|
describe('executeWorkflow', () => {
|
|
const testWorkflow: WorkflowDefinition = {
|
|
name: 'test-workflow',
|
|
description: 'A test workflow',
|
|
provider: 'claude',
|
|
steps: [{ command: 'command-one' }, { command: 'command-two' }],
|
|
};
|
|
|
|
it('should create a workflow run record', async () => {
|
|
await executeWorkflow(
|
|
mockPlatform,
|
|
'conv-123',
|
|
testDir,
|
|
testWorkflow,
|
|
'User wants to do something',
|
|
'db-conv-id',
|
|
'codebase-id'
|
|
);
|
|
|
|
// Verify INSERT query was called with correct parameters
|
|
const insertCalls = mockQuery.mock.calls.filter((call: unknown[]) =>
|
|
(call[0] as string).includes('INSERT INTO remote_agent_workflow_runs')
|
|
);
|
|
expect(insertCalls.length).toBeGreaterThan(0);
|
|
const params = insertCalls[0][1] as string[];
|
|
expect(params).toContain('test-workflow');
|
|
expect(params).toContain('db-conv-id');
|
|
expect(params).toContain('codebase-id');
|
|
});
|
|
|
|
it('should send workflow start notification', async () => {
|
|
await executeWorkflow(
|
|
mockPlatform,
|
|
'conv-123',
|
|
testDir,
|
|
testWorkflow,
|
|
'User message',
|
|
'db-conv-id'
|
|
);
|
|
|
|
const sendMessage = mockPlatform.sendMessage as ReturnType<typeof mock>;
|
|
const calls = sendMessage.mock.calls;
|
|
|
|
// First call should be the workflow start notification
|
|
expect(calls[0][1]).toContain('🚀 **Starting workflow**: `test-workflow`');
|
|
expect(calls[0][1]).toContain('A test workflow');
|
|
expect(calls[0][1]).toContain('`command-one` -> `command-two`');
|
|
});
|
|
|
|
it('should execute each step and send notifications', async () => {
|
|
await executeWorkflow(
|
|
mockPlatform,
|
|
'conv-123',
|
|
testDir,
|
|
testWorkflow,
|
|
'User message',
|
|
'db-conv-id'
|
|
);
|
|
|
|
const sendMessage = mockPlatform.sendMessage as ReturnType<typeof mock>;
|
|
const calls = sendMessage.mock.calls;
|
|
const messages = calls.map((call: unknown[]) => call[1]);
|
|
|
|
// Should have step notifications
|
|
expect(messages.some((m: string) => m.includes('⏳ **Step 1/2**: `command-one`'))).toBe(true);
|
|
expect(messages.some((m: string) => m.includes('⏳ **Step 2/2**: `command-two`'))).toBe(true);
|
|
});
|
|
|
|
it('should log workflow events', async () => {
|
|
await executeWorkflow(
|
|
mockPlatform,
|
|
'conv-123',
|
|
testDir,
|
|
testWorkflow,
|
|
'User message',
|
|
'db-conv-id'
|
|
);
|
|
|
|
// Verify logging by reading the JSONL log file
|
|
const logPath = join(testDir, '.archon', 'logs', 'test-workflow-run-id.jsonl');
|
|
const logContent = await readFile(logPath, 'utf-8');
|
|
const events = logContent
|
|
.trim()
|
|
.split('\n')
|
|
.map(line => JSON.parse(line));
|
|
|
|
const eventTypes = events.map((e: { type: string }) => e.type);
|
|
expect(eventTypes).toContain('workflow_start');
|
|
expect(eventTypes.filter((t: string) => t === 'step_start')).toHaveLength(2);
|
|
expect(eventTypes.filter((t: string) => t === 'step_complete')).toHaveLength(2);
|
|
expect(eventTypes).toContain('workflow_complete');
|
|
});
|
|
|
|
it('should update workflow run progress after each step', async () => {
|
|
await executeWorkflow(
|
|
mockPlatform,
|
|
'conv-123',
|
|
testDir,
|
|
testWorkflow,
|
|
'User message',
|
|
'db-conv-id'
|
|
);
|
|
|
|
// Should have UPDATE queries for step progress
|
|
const updateCalls = mockQuery.mock.calls.filter(
|
|
(call: unknown[]) =>
|
|
(call[0] as string).includes('UPDATE') &&
|
|
(call[0] as string).includes('current_step_index')
|
|
);
|
|
expect(updateCalls.length).toBeGreaterThan(0);
|
|
});
|
|
|
|
it('should complete workflow run on success', async () => {
|
|
await executeWorkflow(
|
|
mockPlatform,
|
|
'conv-123',
|
|
testDir,
|
|
testWorkflow,
|
|
'User message',
|
|
'db-conv-id'
|
|
);
|
|
|
|
// Should have UPDATE query with 'completed' status
|
|
const completeCalls = mockQuery.mock.calls.filter(
|
|
(call: unknown[]) =>
|
|
(call[0] as string).includes('UPDATE') && (call[0] as string).includes("'completed'")
|
|
);
|
|
expect(completeCalls.length).toBeGreaterThan(0);
|
|
});
|
|
|
|
it('should send completion message', async () => {
|
|
await executeWorkflow(
|
|
mockPlatform,
|
|
'conv-123',
|
|
testDir,
|
|
testWorkflow,
|
|
'User message',
|
|
'db-conv-id'
|
|
);
|
|
|
|
const sendMessage = mockPlatform.sendMessage as ReturnType<typeof mock>;
|
|
const calls = sendMessage.mock.calls;
|
|
const lastMessage = calls[calls.length - 1][1];
|
|
|
|
expect(lastMessage).toContain('✅ **Workflow complete**: `test-workflow`');
|
|
});
|
|
|
|
// Platform-specific completion message behavior
|
|
// GitHub suppresses completion messages (comment-based interface makes them redundant)
|
|
// All other platforms receive completion messages
|
|
it.each([
|
|
{ platform: 'telegram', shouldSendCompletion: true },
|
|
{ platform: 'slack', shouldSendCompletion: true },
|
|
{ platform: 'discord', shouldSendCompletion: true },
|
|
{ platform: 'github', shouldSendCompletion: false },
|
|
])(
|
|
'$platform platform: completion message should be $shouldSendCompletion',
|
|
async ({ platform, shouldSendCompletion }) => {
|
|
(mockPlatform.getPlatformType as ReturnType<typeof mock>).mockReturnValue(platform);
|
|
|
|
await executeWorkflow(
|
|
mockPlatform,
|
|
'conv-123',
|
|
testDir,
|
|
testWorkflow,
|
|
'User message',
|
|
'db-conv-id'
|
|
);
|
|
|
|
const sendMessage = mockPlatform.sendMessage as ReturnType<typeof mock>;
|
|
const calls = sendMessage.mock.calls;
|
|
const completionCalls = calls.filter((call: unknown[]) =>
|
|
(call[1] as string).includes('**Workflow complete**')
|
|
);
|
|
|
|
if (shouldSendCompletion) {
|
|
expect(completionCalls.length).toBeGreaterThan(0);
|
|
} else {
|
|
expect(completionCalls).toHaveLength(0);
|
|
}
|
|
}
|
|
);
|
|
|
|
it('should handle missing command prompt file', async () => {
|
|
const workflowWithMissingCommand: WorkflowDefinition = {
|
|
name: 'missing-command-workflow',
|
|
description: 'Has a missing command',
|
|
steps: [{ command: 'nonexistent-command' }],
|
|
};
|
|
|
|
await executeWorkflow(
|
|
mockPlatform,
|
|
'conv-123',
|
|
testDir,
|
|
workflowWithMissingCommand,
|
|
'User message',
|
|
'db-conv-id'
|
|
);
|
|
|
|
// Should fail the workflow run - verify by checking for UPDATE with 'failed'
|
|
const failCalls = mockQuery.mock.calls.filter(
|
|
(call: unknown[]) =>
|
|
(call[0] as string).includes('UPDATE') && (call[0] as string).includes("'failed'")
|
|
);
|
|
expect(failCalls.length).toBeGreaterThan(0);
|
|
|
|
// Verify error was logged by reading log file
|
|
const logPath = join(testDir, '.archon', 'logs', 'test-workflow-run-id.jsonl');
|
|
const logContent = await readFile(logPath, 'utf-8');
|
|
const events = logContent
|
|
.trim()
|
|
.split('\n')
|
|
.map(line => JSON.parse(line));
|
|
expect(events.some((e: { type: string }) => e.type === 'workflow_error')).toBe(true);
|
|
|
|
const sendMessage = mockPlatform.sendMessage as ReturnType<typeof mock>;
|
|
const calls = sendMessage.mock.calls;
|
|
const messages = calls.map((call: unknown[]) => call[1]);
|
|
|
|
expect(messages.some((m: string) => m.includes('❌ **Workflow failed**'))).toBe(true);
|
|
});
|
|
|
|
it('should handle codebase_id being undefined', async () => {
|
|
await executeWorkflow(
|
|
mockPlatform,
|
|
'conv-123',
|
|
testDir,
|
|
testWorkflow,
|
|
'User message',
|
|
'db-conv-id',
|
|
undefined // no codebase
|
|
);
|
|
|
|
// Verify INSERT was called with null for codebase_id
|
|
const insertCalls = mockQuery.mock.calls.filter((call: unknown[]) =>
|
|
(call[0] as string).includes('INSERT INTO remote_agent_workflow_runs')
|
|
);
|
|
expect(insertCalls.length).toBeGreaterThan(0);
|
|
const params = insertCalls[0][1] as (string | null)[];
|
|
// codebase_id should be null (3rd parameter)
|
|
expect(params[2]).toBeNull();
|
|
});
|
|
});
|
|
|
|
describe('step context management', () => {
|
|
it('should start fresh session for first step', async () => {
|
|
const commandsDir = join(testDir, '.archon', 'commands');
|
|
await writeFile(join(commandsDir, 'first.md'), 'First step prompt');
|
|
|
|
const workflow: WorkflowDefinition = {
|
|
name: 'fresh-context-test',
|
|
description: 'Test fresh context',
|
|
steps: [{ command: 'first' }],
|
|
};
|
|
|
|
await executeWorkflow(
|
|
mockPlatform,
|
|
'conv-123',
|
|
testDir,
|
|
workflow,
|
|
'User message',
|
|
'db-conv-id'
|
|
);
|
|
|
|
// Workflow executed successfully - verify completion query
|
|
const completeCalls = mockQuery.mock.calls.filter(
|
|
(call: unknown[]) =>
|
|
(call[0] as string).includes('UPDATE') && (call[0] as string).includes("'completed'")
|
|
);
|
|
expect(completeCalls.length).toBeGreaterThan(0);
|
|
});
|
|
|
|
it('should respect clearContext flag', async () => {
|
|
// Create additional command files for this test
|
|
const commandsDir = join(testDir, '.archon', 'commands');
|
|
await writeFile(join(commandsDir, 'context-one.md'), 'Command one');
|
|
await writeFile(join(commandsDir, 'context-two.md'), 'Command two');
|
|
|
|
const workflow: WorkflowDefinition = {
|
|
name: 'clear-context-test',
|
|
description: 'Test clear context',
|
|
steps: [{ command: 'context-one' }, { command: 'context-two', clearContext: true }],
|
|
};
|
|
|
|
await executeWorkflow(
|
|
mockPlatform,
|
|
'conv-123',
|
|
testDir,
|
|
workflow,
|
|
'User message',
|
|
'db-conv-id'
|
|
);
|
|
|
|
// Both steps should complete
|
|
expect(mockSendQuery).toHaveBeenCalledTimes(2);
|
|
const completeCalls = mockQuery.mock.calls.filter(
|
|
(call: unknown[]) =>
|
|
(call[0] as string).includes('UPDATE') && (call[0] as string).includes("'completed'")
|
|
);
|
|
expect(completeCalls.length).toBeGreaterThan(0);
|
|
});
|
|
});
|
|
|
|
describe('edge cases', () => {
|
|
it('should handle workflow with single step', async () => {
|
|
const commandsDir = join(testDir, '.archon', 'commands');
|
|
await writeFile(join(commandsDir, 'single.md'), 'Single command prompt');
|
|
|
|
const singleStepWorkflow: WorkflowDefinition = {
|
|
name: 'single-step-workflow',
|
|
description: 'Only one step',
|
|
steps: [{ command: 'single' }],
|
|
};
|
|
|
|
await executeWorkflow(
|
|
mockPlatform,
|
|
'conv-123',
|
|
testDir,
|
|
singleStepWorkflow,
|
|
'User message',
|
|
'db-conv-id'
|
|
);
|
|
|
|
expect(mockSendQuery).toHaveBeenCalledTimes(1);
|
|
const completeCalls = mockQuery.mock.calls.filter(
|
|
(call: unknown[]) =>
|
|
(call[0] as string).includes('UPDATE') && (call[0] as string).includes("'completed'")
|
|
);
|
|
expect(completeCalls.length).toBeGreaterThan(0);
|
|
|
|
// Verify workflow start notification IS sent, but no "Step 1/1" notification
|
|
const sendMessage = mockPlatform.sendMessage as ReturnType<typeof mock>;
|
|
const calls = sendMessage.mock.calls;
|
|
const messages = calls.map((call: unknown[]) => call[1]);
|
|
expect(messages.some((m: string) => m.includes('Starting workflow'))).toBe(true);
|
|
expect(messages.some((m: string) => m.includes('**Step 1/1**'))).toBe(false);
|
|
});
|
|
|
|
it('should handle workflow with many steps', async () => {
|
|
const commandsDir = join(testDir, '.archon', 'commands');
|
|
|
|
// Create 5 command files
|
|
for (let i = 0; i < 5; i++) {
|
|
await writeFile(join(commandsDir, `cmd-${String(i)}.md`), `Command ${String(i)} prompt`);
|
|
}
|
|
|
|
const manyStepsWorkflow: WorkflowDefinition = {
|
|
name: 'many-steps-workflow',
|
|
description: 'Five steps',
|
|
steps: Array.from({ length: 5 }, (_, i) => ({ command: `cmd-${String(i)}` })),
|
|
};
|
|
|
|
await executeWorkflow(
|
|
mockPlatform,
|
|
'conv-123',
|
|
testDir,
|
|
manyStepsWorkflow,
|
|
'User message',
|
|
'db-conv-id'
|
|
);
|
|
|
|
expect(mockSendQuery).toHaveBeenCalledTimes(5);
|
|
// Verify multiple UPDATE queries for step progress
|
|
const updateCalls = mockQuery.mock.calls.filter(
|
|
(call: unknown[]) =>
|
|
(call[0] as string).includes('UPDATE') &&
|
|
(call[0] as string).includes('current_step_index')
|
|
);
|
|
expect(updateCalls.length).toBeGreaterThan(0);
|
|
// Verify completion
|
|
const completeCalls = mockQuery.mock.calls.filter(
|
|
(call: unknown[]) =>
|
|
(call[0] as string).includes('UPDATE') && (call[0] as string).includes("'completed'")
|
|
);
|
|
expect(completeCalls.length).toBeGreaterThan(0);
|
|
});
|
|
|
|
it('should substitute $USER_MESSAGE in command prompt', async () => {
|
|
const commandsDir = join(testDir, '.archon', 'commands');
|
|
await writeFile(
|
|
join(commandsDir, 'substitution.md'),
|
|
'User wants: $USER_MESSAGE\nWorkflow ID: $WORKFLOW_ID'
|
|
);
|
|
|
|
const workflow: WorkflowDefinition = {
|
|
name: 'substitution-workflow',
|
|
description: 'Test variable substitution',
|
|
steps: [{ command: 'substitution' }],
|
|
};
|
|
|
|
await executeWorkflow(
|
|
mockPlatform,
|
|
'conv-123',
|
|
testDir,
|
|
workflow,
|
|
'Build a feature',
|
|
'db-conv-id'
|
|
);
|
|
|
|
// The AI client should receive the substituted prompt
|
|
expect(mockSendQuery).toHaveBeenCalled();
|
|
});
|
|
|
|
it('should substitute $ARGUMENTS in command prompt (same as $USER_MESSAGE)', async () => {
|
|
const commandsDir = join(testDir, '.archon', 'commands');
|
|
await writeFile(join(commandsDir, 'arguments-test.md'), 'Request: $ARGUMENTS');
|
|
|
|
// Track calls before this test
|
|
const callCountBefore = mockSendQuery.mock.calls.length;
|
|
|
|
const workflow: WorkflowDefinition = {
|
|
name: 'arguments-workflow',
|
|
description: 'Test $ARGUMENTS substitution',
|
|
steps: [{ command: 'arguments-test' }],
|
|
};
|
|
|
|
await executeWorkflow(
|
|
mockPlatform,
|
|
'conv-123',
|
|
testDir,
|
|
workflow,
|
|
'Help me debug this issue',
|
|
'db-conv-id'
|
|
);
|
|
|
|
// The AI client should receive the substituted prompt
|
|
expect(mockSendQuery.mock.calls.length).toBeGreaterThan(callCountBefore);
|
|
// $ARGUMENTS should be replaced with the user message from the mock database row
|
|
// (which is 'test user message' - see mockQuery setup at top of file)
|
|
const callArg = mockSendQuery.mock.calls[callCountBefore][0] as string;
|
|
expect(callArg).toContain('test user message');
|
|
expect(callArg).not.toContain('$ARGUMENTS');
|
|
});
|
|
|
|
it('should handle empty user message', async () => {
|
|
await executeWorkflow(
|
|
mockPlatform,
|
|
'conv-123',
|
|
testDir,
|
|
{
|
|
name: 'test-workflow',
|
|
description: 'Test',
|
|
steps: [{ command: 'command-one' }],
|
|
},
|
|
'', // Empty user message
|
|
'db-conv-id'
|
|
);
|
|
|
|
const completeCalls = mockQuery.mock.calls.filter(
|
|
(call: unknown[]) =>
|
|
(call[0] as string).includes('UPDATE') && (call[0] as string).includes("'completed'")
|
|
);
|
|
expect(completeCalls.length).toBeGreaterThan(0);
|
|
});
|
|
|
|
it('should handle user message with special characters', async () => {
|
|
await executeWorkflow(
|
|
mockPlatform,
|
|
'conv-123',
|
|
testDir,
|
|
{
|
|
name: 'test-workflow',
|
|
description: 'Test',
|
|
steps: [{ command: 'command-one' }],
|
|
},
|
|
'Fix the "bug" in `src/index.ts` with $variables',
|
|
'db-conv-id'
|
|
);
|
|
|
|
const completeCalls = mockQuery.mock.calls.filter(
|
|
(call: unknown[]) =>
|
|
(call[0] as string).includes('UPDATE') && (call[0] as string).includes("'completed'")
|
|
);
|
|
expect(completeCalls.length).toBeGreaterThan(0);
|
|
});
|
|
|
|
it('should fail when command file is empty', async () => {
|
|
const commandsDir = join(testDir, '.archon', 'commands');
|
|
await writeFile(join(commandsDir, 'empty.md'), '');
|
|
|
|
const workflow: WorkflowDefinition = {
|
|
name: 'empty-command-workflow',
|
|
description: 'Has empty command file',
|
|
steps: [{ command: 'empty' }],
|
|
};
|
|
|
|
await executeWorkflow(
|
|
mockPlatform,
|
|
'conv-123',
|
|
testDir,
|
|
workflow,
|
|
'User message',
|
|
'db-conv-id'
|
|
);
|
|
|
|
// Empty prompt file is treated as invalid - workflow should fail
|
|
const failCalls = mockQuery.mock.calls.filter(
|
|
(call: unknown[]) =>
|
|
(call[0] as string).includes('UPDATE') && (call[0] as string).includes("'failed'")
|
|
);
|
|
expect(failCalls.length).toBeGreaterThan(0);
|
|
});
|
|
|
|
it('should fail on second step if it is missing', async () => {
|
|
const commandsDir = join(testDir, '.archon', 'commands');
|
|
await writeFile(join(commandsDir, 'existing.md'), 'This command exists');
|
|
// 'missing' file does not exist
|
|
|
|
const workflow: WorkflowDefinition = {
|
|
name: 'partial-workflow',
|
|
description: 'Second step is missing',
|
|
steps: [{ command: 'existing' }, { command: 'missing' }],
|
|
};
|
|
|
|
await executeWorkflow(
|
|
mockPlatform,
|
|
'conv-123',
|
|
testDir,
|
|
workflow,
|
|
'User message',
|
|
'db-conv-id'
|
|
);
|
|
|
|
// First step should succeed - verify by reading log file
|
|
const logPath = join(testDir, '.archon', 'logs', 'test-workflow-run-id.jsonl');
|
|
const logContent = await readFile(logPath, 'utf-8');
|
|
const events = logContent
|
|
.trim()
|
|
.split('\n')
|
|
.map(line => JSON.parse(line));
|
|
const stepCompleteEvents = events.filter((e: { type: string }) => e.type === 'step_complete');
|
|
expect(stepCompleteEvents).toHaveLength(1);
|
|
|
|
// But workflow should fail overall
|
|
const failCalls = mockQuery.mock.calls.filter(
|
|
(call: unknown[]) =>
|
|
(call[0] as string).includes('UPDATE') && (call[0] as string).includes("'failed'")
|
|
);
|
|
expect(failCalls.length).toBeGreaterThan(0);
|
|
});
|
|
|
|
it('should use default provider (claude) when not specified', async () => {
|
|
const workflow: WorkflowDefinition = {
|
|
name: 'no-provider-workflow',
|
|
description: 'No provider specified',
|
|
// provider is undefined
|
|
steps: [{ command: 'command-one' }],
|
|
};
|
|
|
|
await executeWorkflow(
|
|
mockPlatform,
|
|
'conv-123',
|
|
testDir,
|
|
workflow,
|
|
'User message',
|
|
'db-conv-id'
|
|
);
|
|
|
|
// Should use claude by default
|
|
expect(mockGetAssistantClient).toHaveBeenCalledWith('claude');
|
|
});
|
|
|
|
it('should use specified provider', async () => {
|
|
const workflow: WorkflowDefinition = {
|
|
name: 'codex-workflow',
|
|
description: 'Uses codex',
|
|
provider: 'codex',
|
|
steps: [{ command: 'command-one' }],
|
|
};
|
|
|
|
await executeWorkflow(
|
|
mockPlatform,
|
|
'conv-123',
|
|
testDir,
|
|
workflow,
|
|
'User message',
|
|
'db-conv-id'
|
|
);
|
|
|
|
expect(mockGetAssistantClient).toHaveBeenCalledWith('codex');
|
|
});
|
|
|
|
it('should handle streaming mode', async () => {
|
|
// Switch platform to streaming mode
|
|
(mockPlatform.getStreamingMode as ReturnType<typeof mock>).mockReturnValue('stream');
|
|
|
|
await executeWorkflow(
|
|
mockPlatform,
|
|
'conv-123',
|
|
testDir,
|
|
{
|
|
name: 'test-workflow',
|
|
description: 'Test',
|
|
steps: [{ command: 'command-one' }],
|
|
},
|
|
'User message',
|
|
'db-conv-id'
|
|
);
|
|
|
|
// Should still complete
|
|
const completeCalls = mockQuery.mock.calls.filter(
|
|
(call: unknown[]) =>
|
|
(call[0] as string).includes('UPDATE') && (call[0] as string).includes("'completed'")
|
|
);
|
|
expect(completeCalls.length).toBeGreaterThan(0);
|
|
});
|
|
|
|
it('should reject invalid command names with path traversal', async () => {
|
|
const workflow: WorkflowDefinition = {
|
|
name: 'path-traversal-workflow',
|
|
description: 'Has invalid command name',
|
|
steps: [{ command: '../../../etc/passwd' }],
|
|
};
|
|
|
|
await executeWorkflow(
|
|
mockPlatform,
|
|
'conv-123',
|
|
testDir,
|
|
workflow,
|
|
'User message',
|
|
'db-conv-id'
|
|
);
|
|
|
|
// Should fail - path traversal rejected
|
|
const failCalls = mockQuery.mock.calls.filter(
|
|
(call: unknown[]) =>
|
|
(call[0] as string).includes('UPDATE') && (call[0] as string).includes("'failed'")
|
|
);
|
|
expect(failCalls.length).toBeGreaterThan(0);
|
|
});
|
|
|
|
it('should reject command names starting with dot', async () => {
|
|
const workflow: WorkflowDefinition = {
|
|
name: 'dotfile-workflow',
|
|
description: 'Has invalid command name',
|
|
steps: [{ command: '.hidden' }],
|
|
};
|
|
|
|
await executeWorkflow(
|
|
mockPlatform,
|
|
'conv-123',
|
|
testDir,
|
|
workflow,
|
|
'User message',
|
|
'db-conv-id'
|
|
);
|
|
|
|
// Should fail - dotfile rejected
|
|
const failCalls = mockQuery.mock.calls.filter(
|
|
(call: unknown[]) =>
|
|
(call[0] as string).includes('UPDATE') && (call[0] as string).includes("'failed'")
|
|
);
|
|
expect(failCalls.length).toBeGreaterThan(0);
|
|
});
|
|
|
|
it('should return specific error message for empty command file (Issue #128)', async () => {
|
|
const commandsDir = join(testDir, '.archon', 'commands');
|
|
await writeFile(join(commandsDir, 'empty-cmd.md'), ' \n ');
|
|
|
|
const workflow: WorkflowDefinition = {
|
|
name: 'empty-file-workflow',
|
|
description: 'Has empty command file',
|
|
steps: [{ command: 'empty-cmd' }],
|
|
};
|
|
|
|
await executeWorkflow(
|
|
mockPlatform,
|
|
'conv-123',
|
|
testDir,
|
|
workflow,
|
|
'User message',
|
|
'db-conv-id'
|
|
);
|
|
|
|
// Should fail with specific "empty_file" error message
|
|
const sendMessageCalls = (mockPlatform.sendMessage as ReturnType<typeof mock>).mock.calls;
|
|
const failureMessages = sendMessageCalls.filter(
|
|
(call: unknown[]) =>
|
|
typeof call[1] === 'string' && (call[1] as string).includes('Command file is empty')
|
|
);
|
|
expect(failureMessages.length).toBeGreaterThan(0);
|
|
});
|
|
|
|
it('should return specific error message for path traversal (Issue #128)', async () => {
|
|
const workflow: WorkflowDefinition = {
|
|
name: 'path-traversal-workflow',
|
|
description: 'Has path traversal command',
|
|
steps: [{ command: '../../../etc/passwd' }],
|
|
};
|
|
|
|
await executeWorkflow(
|
|
mockPlatform,
|
|
'conv-123',
|
|
testDir,
|
|
workflow,
|
|
'User message',
|
|
'db-conv-id'
|
|
);
|
|
|
|
// Should fail with specific "invalid_name" error message
|
|
const sendMessageCalls = (mockPlatform.sendMessage as ReturnType<typeof mock>).mock.calls;
|
|
const failureMessages = sendMessageCalls.filter(
|
|
(call: unknown[]) =>
|
|
typeof call[1] === 'string' &&
|
|
(call[1] as string).includes('Invalid command name (potential path traversal)')
|
|
);
|
|
expect(failureMessages.length).toBeGreaterThan(0);
|
|
});
|
|
|
|
it('should return specific error message for missing command file (Issue #128)', async () => {
|
|
const workflow: WorkflowDefinition = {
|
|
name: 'missing-cmd-workflow',
|
|
description: 'Has missing command file',
|
|
steps: [{ command: 'totally-nonexistent-cmd' }],
|
|
};
|
|
|
|
await executeWorkflow(
|
|
mockPlatform,
|
|
'conv-123',
|
|
testDir,
|
|
workflow,
|
|
'User message',
|
|
'db-conv-id'
|
|
);
|
|
|
|
// Should fail with specific "not_found" error message that includes searched paths
|
|
const sendMessageCalls = (mockPlatform.sendMessage as ReturnType<typeof mock>).mock.calls;
|
|
const failureMessages = sendMessageCalls.filter(
|
|
(call: unknown[]) =>
|
|
typeof call[1] === 'string' &&
|
|
(call[1] as string).includes('Command prompt not found') &&
|
|
(call[1] as string).includes('searched:')
|
|
);
|
|
expect(failureMessages.length).toBeGreaterThan(0);
|
|
});
|
|
});
|
|
|
|
describe('AI client error hints (Issue #126)', () => {
|
|
it('should include rate limit hint for 429 errors', async () => {
|
|
// Mock AI client to throw rate limit error
|
|
mockSendQuery.mockImplementation(function* () {
|
|
throw new Error('API returned 429: Too many requests');
|
|
});
|
|
|
|
await executeWorkflow(
|
|
mockPlatform,
|
|
'conv-123',
|
|
testDir,
|
|
{
|
|
name: 'rate-limit-workflow',
|
|
description: 'Test rate limit handling',
|
|
steps: [{ command: 'command-one' }],
|
|
},
|
|
'User message',
|
|
'db-conv-id'
|
|
);
|
|
|
|
// Should include hint about rate limiting
|
|
const sendMessageCalls = (mockPlatform.sendMessage as ReturnType<typeof mock>).mock.calls;
|
|
const hintMessages = sendMessageCalls.filter(
|
|
(call: unknown[]) =>
|
|
typeof call[1] === 'string' &&
|
|
(call[1] as string).includes('Rate limited') &&
|
|
(call[1] as string).includes('wait')
|
|
);
|
|
expect(hintMessages.length).toBeGreaterThan(0);
|
|
|
|
// Reset mock for other tests
|
|
mockSendQuery.mockImplementation(function* () {
|
|
yield { type: 'assistant', content: 'AI response' };
|
|
yield { type: 'result', sessionId: 'new-session-id' };
|
|
});
|
|
});
|
|
|
|
it('should include auth hint for 401 errors', async () => {
|
|
// Mock AI client to throw auth error
|
|
mockSendQuery.mockImplementation(function* () {
|
|
throw new Error('401 Unauthorized: Invalid API key');
|
|
});
|
|
|
|
await executeWorkflow(
|
|
mockPlatform,
|
|
'conv-123',
|
|
testDir,
|
|
{
|
|
name: 'auth-error-workflow',
|
|
description: 'Test auth error handling',
|
|
steps: [{ command: 'command-one' }],
|
|
},
|
|
'User message',
|
|
'db-conv-id'
|
|
);
|
|
|
|
// Should include hint about checking API key
|
|
const sendMessageCalls = (mockPlatform.sendMessage as ReturnType<typeof mock>).mock.calls;
|
|
const hintMessages = sendMessageCalls.filter(
|
|
(call: unknown[]) => typeof call[1] === 'string' && (call[1] as string).includes('API key')
|
|
);
|
|
expect(hintMessages.length).toBeGreaterThan(0);
|
|
|
|
// Reset mock for other tests
|
|
mockSendQuery.mockImplementation(function* () {
|
|
yield { type: 'assistant', content: 'AI response' };
|
|
yield { type: 'result', sessionId: 'new-session-id' };
|
|
});
|
|
});
|
|
|
|
it('should include permission hint for 403 errors', async () => {
|
|
// Mock AI client to throw 403 error
|
|
mockSendQuery.mockImplementation(function* () {
|
|
throw new Error('403 Forbidden: Insufficient permissions');
|
|
});
|
|
|
|
await executeWorkflow(
|
|
mockPlatform,
|
|
'conv-123',
|
|
testDir,
|
|
{
|
|
name: 'forbidden-workflow',
|
|
description: 'Test 403 error handling',
|
|
steps: [{ command: 'command-one' }],
|
|
},
|
|
'User message',
|
|
'db-conv-id'
|
|
);
|
|
|
|
// Should include hint about checking API access
|
|
const sendMessageCalls = (mockPlatform.sendMessage as ReturnType<typeof mock>).mock.calls;
|
|
const hintMessages = sendMessageCalls.filter(
|
|
(call: unknown[]) =>
|
|
typeof call[1] === 'string' && (call[1] as string).includes('Permission denied')
|
|
);
|
|
expect(hintMessages.length).toBeGreaterThan(0);
|
|
|
|
// Reset mock for other tests
|
|
mockSendQuery.mockImplementation(function* () {
|
|
yield { type: 'assistant', content: 'AI response' };
|
|
yield { type: 'result', sessionId: 'new-session-id' };
|
|
});
|
|
});
|
|
|
|
it('should include network hint for timeout errors', async () => {
|
|
// Mock AI client to throw timeout error
|
|
mockSendQuery.mockImplementation(function* () {
|
|
throw new Error('Request timeout: ETIMEDOUT');
|
|
});
|
|
|
|
await executeWorkflow(
|
|
mockPlatform,
|
|
'conv-123',
|
|
testDir,
|
|
{
|
|
name: 'timeout-workflow',
|
|
description: 'Test timeout handling',
|
|
steps: [{ command: 'command-one' }],
|
|
},
|
|
'User message',
|
|
'db-conv-id'
|
|
);
|
|
|
|
// Should include hint about network issues
|
|
const sendMessageCalls = (mockPlatform.sendMessage as ReturnType<typeof mock>).mock.calls;
|
|
const hintMessages = sendMessageCalls.filter(
|
|
(call: unknown[]) =>
|
|
typeof call[1] === 'string' && (call[1] as string).includes('Network issue')
|
|
);
|
|
expect(hintMessages.length).toBeGreaterThan(0);
|
|
|
|
// Reset mock for other tests
|
|
mockSendQuery.mockImplementation(function* () {
|
|
yield { type: 'assistant', content: 'AI response' };
|
|
yield { type: 'result', sessionId: 'new-session-id' };
|
|
});
|
|
});
|
|
});
|
|
|
|
describe('platform message error handling', () => {
|
|
it('should continue workflow when platform.sendMessage fails', async () => {
|
|
// Mock sendMessage to throw an error
|
|
const sendMessageMock = mock(() => Promise.reject(new Error('Platform API rate limit')));
|
|
mockPlatform.sendMessage = sendMessageMock;
|
|
|
|
await executeWorkflow(
|
|
mockPlatform,
|
|
'conv-123',
|
|
testDir,
|
|
{
|
|
name: 'test-workflow',
|
|
description: 'Test',
|
|
steps: [{ command: 'command-one' }],
|
|
},
|
|
'User message',
|
|
'db-conv-id'
|
|
);
|
|
|
|
// Workflow should still complete successfully despite sendMessage failures
|
|
const completeCalls = mockQuery.mock.calls.filter(
|
|
(call: unknown[]) =>
|
|
(call[0] as string).includes('UPDATE') && (call[0] as string).includes("'completed'")
|
|
);
|
|
expect(completeCalls.length).toBeGreaterThan(0);
|
|
});
|
|
|
|
it('should continue workflow when sendMessage fails during streaming', async () => {
|
|
// Switch platform to streaming mode
|
|
(mockPlatform.getStreamingMode as ReturnType<typeof mock>).mockReturnValue('stream');
|
|
|
|
// Mock sendMessage to throw an error
|
|
const sendMessageMock = mock(() => Promise.reject(new Error('Network error')));
|
|
mockPlatform.sendMessage = sendMessageMock;
|
|
|
|
await executeWorkflow(
|
|
mockPlatform,
|
|
'conv-123',
|
|
testDir,
|
|
{
|
|
name: 'test-workflow',
|
|
description: 'Test',
|
|
steps: [{ command: 'command-one' }],
|
|
},
|
|
'User message',
|
|
'db-conv-id'
|
|
);
|
|
|
|
// Workflow should still complete successfully
|
|
const completeCalls = mockQuery.mock.calls.filter(
|
|
(call: unknown[]) =>
|
|
(call[0] as string).includes('UPDATE') && (call[0] as string).includes("'completed'")
|
|
);
|
|
expect(completeCalls.length).toBeGreaterThan(0);
|
|
});
|
|
|
|
it('should continue to next step when sendMessage fails on step notification', async () => {
|
|
// Create a mock that fails on first call, succeeds on rest
|
|
let callCount = 0;
|
|
const sendMessageMock = mock(() => {
|
|
callCount++;
|
|
if (callCount === 1) {
|
|
return Promise.reject(new Error('First send failed'));
|
|
}
|
|
return Promise.resolve();
|
|
});
|
|
mockPlatform.sendMessage = sendMessageMock;
|
|
|
|
await executeWorkflow(
|
|
mockPlatform,
|
|
'conv-123',
|
|
testDir,
|
|
{
|
|
name: 'test-workflow',
|
|
description: 'Test',
|
|
steps: [{ command: 'command-one' }, { command: 'command-two' }],
|
|
},
|
|
'User message',
|
|
'db-conv-id'
|
|
);
|
|
|
|
// Both steps should have been executed (2 calls to sendQuery)
|
|
expect(mockSendQuery).toHaveBeenCalledTimes(2);
|
|
|
|
// Workflow should complete
|
|
const completeCalls = mockQuery.mock.calls.filter(
|
|
(call: unknown[]) =>
|
|
(call[0] as string).includes('UPDATE') && (call[0] as string).includes("'completed'")
|
|
);
|
|
expect(completeCalls.length).toBeGreaterThan(0);
|
|
});
|
|
|
|
it('should log errors with context when sendMessage fails', async () => {
|
|
const errorLogs: unknown[][] = [];
|
|
const originalConsoleError = console.error;
|
|
console.error = mock((...args: unknown[]) => {
|
|
errorLogs.push(args);
|
|
});
|
|
|
|
const sendMessageMock = mock(() => Promise.reject(new Error('API timeout')));
|
|
mockPlatform.sendMessage = sendMessageMock;
|
|
|
|
await executeWorkflow(
|
|
mockPlatform,
|
|
'conv-123',
|
|
testDir,
|
|
{ name: 'test-workflow', description: 'Test', steps: [{ command: 'command-one' }] },
|
|
'User message',
|
|
'db-conv-id'
|
|
);
|
|
|
|
// Restore console.error
|
|
console.error = originalConsoleError;
|
|
|
|
// Verify error was logged with correct structure
|
|
const safeSendLogs = errorLogs.filter(
|
|
log => log[0] === '[WorkflowExecutor] Failed to send message'
|
|
);
|
|
expect(safeSendLogs.length).toBeGreaterThan(0);
|
|
|
|
// Check that context is included
|
|
const logContext = safeSendLogs[0][1] as Record<string, unknown>;
|
|
expect(logContext).toHaveProperty('conversationId', 'conv-123');
|
|
expect(logContext).toHaveProperty('error', 'API timeout');
|
|
expect(logContext).toHaveProperty('errorType');
|
|
expect(logContext).toHaveProperty('platformType');
|
|
});
|
|
|
|
it('should mark workflow as failed on fatal authentication errors (no throw)', async () => {
|
|
const sendMessageMock = mock(() =>
|
|
Promise.reject(new Error('401 Unauthorized: Invalid token'))
|
|
);
|
|
mockPlatform.sendMessage = sendMessageMock;
|
|
|
|
// With top-level error handling, executeWorkflow should NOT throw
|
|
// Instead it marks the workflow as failed and returns normally
|
|
await executeWorkflow(
|
|
mockPlatform,
|
|
'conv-123',
|
|
testDir,
|
|
{ name: 'test-workflow', description: 'Test', steps: [{ command: 'command-one' }] },
|
|
'User message',
|
|
'db-conv-id'
|
|
);
|
|
|
|
// Verify workflow was marked as failed in database
|
|
const failCalls = mockQuery.mock.calls.filter(
|
|
(call: unknown[]) =>
|
|
(call[0] as string).includes('UPDATE') && (call[0] as string).includes("'failed'")
|
|
);
|
|
expect(failCalls.length).toBeGreaterThan(0);
|
|
});
|
|
|
|
it('should continue workflow when tool message send fails in streaming mode', async () => {
|
|
(mockPlatform.getStreamingMode as ReturnType<typeof mock>).mockReturnValue('stream');
|
|
|
|
// Mock AI client to yield tool messages
|
|
mockSendQuery.mockImplementation(function* () {
|
|
yield { type: 'assistant', content: 'Starting...' };
|
|
yield { type: 'tool', toolName: 'read_file', toolInput: { path: '/tmp/test.ts' } };
|
|
yield { type: 'result', sessionId: 'new-session-id' };
|
|
});
|
|
|
|
const sendMessageMock = mock(() => Promise.reject(new Error('Rate limited')));
|
|
mockPlatform.sendMessage = sendMessageMock;
|
|
|
|
await executeWorkflow(
|
|
mockPlatform,
|
|
'conv-123',
|
|
testDir,
|
|
{ name: 'test-workflow', description: 'Test', steps: [{ command: 'command-one' }] },
|
|
'User message',
|
|
'db-conv-id'
|
|
);
|
|
|
|
// Workflow should complete despite all sendMessage calls failing
|
|
const completeCalls = mockQuery.mock.calls.filter(
|
|
(call: unknown[]) =>
|
|
(call[0] as string).includes('UPDATE') && (call[0] as string).includes("'completed'")
|
|
);
|
|
expect(completeCalls.length).toBeGreaterThan(0);
|
|
});
|
|
|
|
it('should record workflow failure in database even when failure notification fails', async () => {
|
|
const sendMessageMock = mock(() => Promise.reject(new Error('Cannot send')));
|
|
mockPlatform.sendMessage = sendMessageMock;
|
|
|
|
await executeWorkflow(
|
|
mockPlatform,
|
|
'conv-123',
|
|
testDir,
|
|
{
|
|
name: 'failing-workflow',
|
|
description: 'Test',
|
|
steps: [{ command: 'nonexistent-command' }],
|
|
},
|
|
'User message',
|
|
'db-conv-id'
|
|
);
|
|
|
|
// Database should still record the failure
|
|
const failCalls = mockQuery.mock.calls.filter(
|
|
(call: unknown[]) =>
|
|
(call[0] as string).includes('UPDATE') && (call[0] as string).includes("'failed'")
|
|
);
|
|
expect(failCalls.length).toBeGreaterThan(0);
|
|
});
|
|
|
|
it('should warn user about dropped messages in streaming mode', async () => {
|
|
(mockPlatform.getStreamingMode as ReturnType<typeof mock>).mockReturnValue('stream');
|
|
|
|
// Mock AI client to yield multiple messages
|
|
mockSendQuery.mockImplementation(function* () {
|
|
yield { type: 'assistant', content: 'Message 1' };
|
|
yield { type: 'assistant', content: 'Message 2' };
|
|
yield { type: 'assistant', content: 'Message 3' };
|
|
yield { type: 'result', sessionId: 'new-session-id' };
|
|
});
|
|
|
|
// Fail on messages 2 and 3, succeed on others
|
|
let callCount = 0;
|
|
const sendMessageMock = mock(() => {
|
|
callCount++;
|
|
// Fail on assistant message sends (calls 2, 3, 4 after step notification)
|
|
if (callCount >= 3 && callCount <= 4) {
|
|
return Promise.reject(new Error('Rate limited'));
|
|
}
|
|
return Promise.resolve();
|
|
});
|
|
mockPlatform.sendMessage = sendMessageMock;
|
|
|
|
await executeWorkflow(
|
|
mockPlatform,
|
|
'conv-123',
|
|
testDir,
|
|
{ name: 'test-workflow', description: 'Test', steps: [{ command: 'command-one' }] },
|
|
'User message',
|
|
'db-conv-id'
|
|
);
|
|
|
|
// Verify warning message was attempted
|
|
const calls = (mockPlatform.sendMessage as ReturnType<typeof mock>).mock.calls;
|
|
const warningCalls = calls.filter((call: unknown[]) =>
|
|
(call[1] as string).includes('message(s) failed to deliver')
|
|
);
|
|
expect(warningCalls.length).toBeGreaterThan(0);
|
|
});
|
|
|
|
it('should handle intermittent sendMessage failures throughout workflow', async () => {
|
|
let callCount = 0;
|
|
const sendMessageMock = mock(() => {
|
|
callCount++;
|
|
// Fail on calls 2 and 5 (mid-workflow notifications)
|
|
if (callCount === 2 || callCount === 5) {
|
|
return Promise.reject(new Error('Intermittent failure'));
|
|
}
|
|
return Promise.resolve();
|
|
});
|
|
mockPlatform.sendMessage = sendMessageMock;
|
|
|
|
await executeWorkflow(
|
|
mockPlatform,
|
|
'conv-123',
|
|
testDir,
|
|
{
|
|
name: 'test-workflow',
|
|
description: 'Test',
|
|
steps: [{ command: 'command-one' }, { command: 'command-two' }],
|
|
},
|
|
'User message',
|
|
'db-conv-id'
|
|
);
|
|
|
|
// Both steps should have been executed
|
|
expect(mockSendQuery).toHaveBeenCalledTimes(2);
|
|
|
|
// Workflow should complete
|
|
const completeCalls = mockQuery.mock.calls.filter(
|
|
(call: unknown[]) =>
|
|
(call[0] as string).includes('UPDATE') && (call[0] as string).includes("'completed'")
|
|
);
|
|
expect(completeCalls.length).toBeGreaterThan(0);
|
|
});
|
|
|
|
it('should retry critical completion message on transient errors', async () => {
|
|
// Track all sendMessage attempts
|
|
let callCount = 0;
|
|
const sendMessageMock = mock(() => {
|
|
callCount++;
|
|
// Fail first 2 attempts on completion message, succeed on 3rd
|
|
// Completion message is the last one sent
|
|
if (callCount >= 4 && callCount <= 5) {
|
|
return Promise.reject(new Error('Connection timeout'));
|
|
}
|
|
return Promise.resolve();
|
|
});
|
|
mockPlatform.sendMessage = sendMessageMock;
|
|
|
|
await executeWorkflow(
|
|
mockPlatform,
|
|
'conv-123',
|
|
testDir,
|
|
{ name: 'test-workflow', description: 'Test', steps: [{ command: 'command-one' }] },
|
|
'User message',
|
|
'db-conv-id'
|
|
);
|
|
|
|
// Verify multiple attempts were made for completion message
|
|
const calls = (mockPlatform.sendMessage as ReturnType<typeof mock>).mock.calls;
|
|
const completionCalls = calls.filter((call: unknown[]) =>
|
|
(call[1] as string).includes('**Workflow complete**')
|
|
);
|
|
// Should have retried
|
|
expect(completionCalls.length).toBeGreaterThanOrEqual(1);
|
|
});
|
|
|
|
describe('staleness detection', () => {
|
|
it('should fail stale workflow and start new one when last_activity_at > 15 min ago', async () => {
|
|
// Mock getActiveWorkflowRun to return a stale workflow (20 min inactive)
|
|
const staleTime = new Date(Date.now() - 20 * 60 * 1000); // 20 minutes ago
|
|
mockQuery.mockImplementation((query: string) => {
|
|
if (query.includes("status = 'running'")) {
|
|
return Promise.resolve(
|
|
createQueryResult([
|
|
{
|
|
id: 'stale-workflow-id',
|
|
workflow_name: 'old-workflow',
|
|
conversation_id: 'conv-123',
|
|
status: 'running' as const,
|
|
started_at: staleTime,
|
|
last_activity_at: staleTime,
|
|
completed_at: null,
|
|
current_step_index: 0,
|
|
user_message: 'old message',
|
|
metadata: {},
|
|
},
|
|
])
|
|
);
|
|
}
|
|
if (query.includes('INSERT INTO remote_agent_workflow_runs')) {
|
|
return Promise.resolve(
|
|
createQueryResult([
|
|
{
|
|
id: 'new-workflow-run-id',
|
|
workflow_name: 'test-workflow',
|
|
conversation_id: 'conv-123',
|
|
status: 'running' as const,
|
|
started_at: new Date(),
|
|
last_activity_at: new Date(),
|
|
completed_at: null,
|
|
current_step_index: 0,
|
|
user_message: 'test user message',
|
|
metadata: {},
|
|
},
|
|
])
|
|
);
|
|
}
|
|
return Promise.resolve(createQueryResult([]));
|
|
});
|
|
|
|
await executeWorkflow(
|
|
mockPlatform,
|
|
'conv-123',
|
|
testDir,
|
|
{ name: 'test-workflow', description: 'Test', steps: [{ command: 'command-one' }] },
|
|
'User message',
|
|
'db-conv-id'
|
|
);
|
|
|
|
// Verify stale workflow was marked as failed
|
|
const failCalls = mockQuery.mock.calls.filter(
|
|
(call: unknown[]) =>
|
|
(call[0] as string).includes('UPDATE') &&
|
|
(call[0] as string).includes("'failed'") &&
|
|
(call[1] as string[])?.includes('stale-workflow-id')
|
|
);
|
|
expect(failCalls.length).toBeGreaterThan(0);
|
|
|
|
// Verify new workflow was created
|
|
const insertCalls = mockQuery.mock.calls.filter((call: unknown[]) =>
|
|
(call[0] as string).includes('INSERT INTO remote_agent_workflow_runs')
|
|
);
|
|
expect(insertCalls.length).toBeGreaterThan(0);
|
|
|
|
// Reset mock
|
|
mockQuery.mockImplementation((query: string) => {
|
|
if (query.includes("status = 'running'")) {
|
|
return Promise.resolve(createQueryResult([]));
|
|
}
|
|
if (query.includes('INSERT INTO remote_agent_workflow_runs')) {
|
|
return Promise.resolve(
|
|
createQueryResult([
|
|
{
|
|
id: 'test-workflow-run-id',
|
|
workflow_name: 'test-workflow',
|
|
conversation_id: 'conv-123',
|
|
status: 'running' as const,
|
|
started_at: new Date(),
|
|
completed_at: null,
|
|
current_step_index: 0,
|
|
user_message: 'test user message',
|
|
metadata: {},
|
|
},
|
|
])
|
|
);
|
|
}
|
|
return Promise.resolve(createQueryResult([]));
|
|
});
|
|
});
|
|
|
|
it('should block new workflow when active workflow is not stale', async () => {
|
|
// Mock getActiveWorkflowRun to return a recent workflow (5 min inactive)
|
|
const recentTime = new Date(Date.now() - 5 * 60 * 1000); // 5 minutes ago
|
|
mockQuery.mockImplementation((query: string) => {
|
|
if (query.includes("status = 'running'")) {
|
|
return Promise.resolve(
|
|
createQueryResult([
|
|
{
|
|
id: 'active-workflow-id',
|
|
workflow_name: 'active-workflow',
|
|
conversation_id: 'conv-123',
|
|
status: 'running' as const,
|
|
started_at: recentTime,
|
|
last_activity_at: recentTime,
|
|
completed_at: null,
|
|
current_step_index: 0,
|
|
user_message: 'active message',
|
|
metadata: {},
|
|
},
|
|
])
|
|
);
|
|
}
|
|
return Promise.resolve(createQueryResult([]));
|
|
});
|
|
|
|
await executeWorkflow(
|
|
mockPlatform,
|
|
'conv-123',
|
|
testDir,
|
|
{ name: 'test-workflow', description: 'Test', steps: [{ command: 'command-one' }] },
|
|
'User message',
|
|
'db-conv-id'
|
|
);
|
|
|
|
// Verify rejection message was sent
|
|
const sendMessage = mockPlatform.sendMessage as ReturnType<typeof mock>;
|
|
const calls = sendMessage.mock.calls;
|
|
const blockingMessages = calls.filter((call: unknown[]) =>
|
|
(call[1] as string).includes('Workflow already running')
|
|
);
|
|
expect(blockingMessages.length).toBe(1);
|
|
|
|
// Verify no INSERT was made (new workflow not created)
|
|
const insertCalls = mockQuery.mock.calls.filter((call: unknown[]) =>
|
|
(call[0] as string).includes('INSERT INTO remote_agent_workflow_runs')
|
|
);
|
|
expect(insertCalls.length).toBe(0);
|
|
|
|
// Reset mock
|
|
mockQuery.mockImplementation((query: string) => {
|
|
if (query.includes("status = 'running'")) {
|
|
return Promise.resolve(createQueryResult([]));
|
|
}
|
|
if (query.includes('INSERT INTO remote_agent_workflow_runs')) {
|
|
return Promise.resolve(
|
|
createQueryResult([
|
|
{
|
|
id: 'test-workflow-run-id',
|
|
workflow_name: 'test-workflow',
|
|
conversation_id: 'conv-123',
|
|
status: 'running' as const,
|
|
started_at: new Date(),
|
|
completed_at: null,
|
|
current_step_index: 0,
|
|
user_message: 'test user message',
|
|
metadata: {},
|
|
},
|
|
])
|
|
);
|
|
}
|
|
return Promise.resolve(createQueryResult([]));
|
|
});
|
|
});
|
|
|
|
it('should fallback to started_at when last_activity_at is null', async () => {
|
|
// Mock getActiveWorkflowRun to return a workflow with null last_activity_at but old started_at
|
|
const staleTime = new Date(Date.now() - 20 * 60 * 1000); // 20 minutes ago
|
|
mockQuery.mockImplementation((query: string) => {
|
|
if (query.includes("status = 'running'")) {
|
|
return Promise.resolve(
|
|
createQueryResult([
|
|
{
|
|
id: 'stale-workflow-id',
|
|
workflow_name: 'old-workflow',
|
|
conversation_id: 'conv-123',
|
|
status: 'running' as const,
|
|
started_at: staleTime,
|
|
last_activity_at: null, // null - should fallback to started_at
|
|
completed_at: null,
|
|
current_step_index: 0,
|
|
user_message: 'old message',
|
|
metadata: {},
|
|
},
|
|
])
|
|
);
|
|
}
|
|
if (query.includes('INSERT INTO remote_agent_workflow_runs')) {
|
|
return Promise.resolve(
|
|
createQueryResult([
|
|
{
|
|
id: 'new-workflow-run-id',
|
|
workflow_name: 'test-workflow',
|
|
conversation_id: 'conv-123',
|
|
status: 'running' as const,
|
|
started_at: new Date(),
|
|
last_activity_at: new Date(),
|
|
completed_at: null,
|
|
current_step_index: 0,
|
|
user_message: 'test user message',
|
|
metadata: {},
|
|
},
|
|
])
|
|
);
|
|
}
|
|
return Promise.resolve(createQueryResult([]));
|
|
});
|
|
|
|
await executeWorkflow(
|
|
mockPlatform,
|
|
'conv-123',
|
|
testDir,
|
|
{ name: 'test-workflow', description: 'Test', steps: [{ command: 'command-one' }] },
|
|
'User message',
|
|
'db-conv-id'
|
|
);
|
|
|
|
// Verify stale workflow was marked as failed (fallback to started_at worked)
|
|
const failCalls = mockQuery.mock.calls.filter(
|
|
(call: unknown[]) =>
|
|
(call[0] as string).includes('UPDATE') &&
|
|
(call[0] as string).includes("'failed'") &&
|
|
(call[1] as string[])?.includes('stale-workflow-id')
|
|
);
|
|
expect(failCalls.length).toBeGreaterThan(0);
|
|
|
|
// Reset mock
|
|
mockQuery.mockImplementation((query: string) => {
|
|
if (query.includes("status = 'running'")) {
|
|
return Promise.resolve(createQueryResult([]));
|
|
}
|
|
if (query.includes('INSERT INTO remote_agent_workflow_runs')) {
|
|
return Promise.resolve(
|
|
createQueryResult([
|
|
{
|
|
id: 'test-workflow-run-id',
|
|
workflow_name: 'test-workflow',
|
|
conversation_id: 'conv-123',
|
|
status: 'running' as const,
|
|
started_at: new Date(),
|
|
completed_at: null,
|
|
current_step_index: 0,
|
|
user_message: 'test user message',
|
|
metadata: {},
|
|
},
|
|
])
|
|
);
|
|
}
|
|
return Promise.resolve(createQueryResult([]));
|
|
});
|
|
});
|
|
|
|
it('should show cleanup error message when failWorkflowRun fails for stale workflow', async () => {
|
|
// Mock getActiveWorkflowRun to return a stale workflow
|
|
const staleTime = new Date(Date.now() - 20 * 60 * 1000);
|
|
let failWorkflowCallCount = 0;
|
|
mockQuery.mockImplementation((query: string) => {
|
|
if (query.includes("status = 'running'")) {
|
|
return Promise.resolve(
|
|
createQueryResult([
|
|
{
|
|
id: 'stale-workflow-id',
|
|
workflow_name: 'old-workflow',
|
|
conversation_id: 'conv-123',
|
|
status: 'running' as const,
|
|
started_at: staleTime,
|
|
last_activity_at: staleTime,
|
|
completed_at: null,
|
|
current_step_index: 0,
|
|
user_message: 'old message',
|
|
metadata: {},
|
|
},
|
|
])
|
|
);
|
|
}
|
|
// Fail the staleness cleanup query
|
|
if (query.includes('UPDATE') && query.includes("'failed'")) {
|
|
failWorkflowCallCount++;
|
|
if (failWorkflowCallCount === 1) {
|
|
// First fail call is for staleness cleanup
|
|
return Promise.reject(new Error('Database connection lost'));
|
|
}
|
|
}
|
|
return Promise.resolve(createQueryResult([]));
|
|
});
|
|
|
|
await executeWorkflow(
|
|
mockPlatform,
|
|
'conv-123',
|
|
testDir,
|
|
{ name: 'test-workflow', description: 'Test', steps: [{ command: 'command-one' }] },
|
|
'User message',
|
|
'db-conv-id'
|
|
);
|
|
|
|
// Verify user received cleanup error message
|
|
const sendMessage = mockPlatform.sendMessage as ReturnType<typeof mock>;
|
|
const calls = sendMessage.mock.calls;
|
|
const cleanupErrorMessages = calls.filter((call: unknown[]) =>
|
|
(call[1] as string).includes('Workflow blocked') &&
|
|
(call[1] as string).includes('/workflow cancel')
|
|
);
|
|
expect(cleanupErrorMessages.length).toBe(1);
|
|
|
|
// Verify no new workflow was created
|
|
const insertCalls = mockQuery.mock.calls.filter((call: unknown[]) =>
|
|
(call[0] as string).includes('INSERT INTO remote_agent_workflow_runs')
|
|
);
|
|
expect(insertCalls.length).toBe(0);
|
|
|
|
// Reset mock
|
|
mockQuery.mockImplementation((query: string) => {
|
|
if (query.includes("status = 'running'")) {
|
|
return Promise.resolve(createQueryResult([]));
|
|
}
|
|
if (query.includes('INSERT INTO remote_agent_workflow_runs')) {
|
|
return Promise.resolve(
|
|
createQueryResult([
|
|
{
|
|
id: 'test-workflow-run-id',
|
|
workflow_name: 'test-workflow',
|
|
conversation_id: 'conv-123',
|
|
status: 'running' as const,
|
|
started_at: new Date(),
|
|
completed_at: null,
|
|
current_step_index: 0,
|
|
user_message: 'test user message',
|
|
metadata: {},
|
|
},
|
|
])
|
|
);
|
|
}
|
|
return Promise.resolve(createQueryResult([]));
|
|
});
|
|
});
|
|
});
|
|
});
|
|
|
|
describe('loop workflow execution', () => {
|
|
it('should execute loop and complete on signal', async () => {
|
|
// Mock AI to return COMPLETE on 3rd iteration
|
|
let callCount = 0;
|
|
mockSendQuery.mockImplementation(function* () {
|
|
callCount++;
|
|
if (callCount >= 3) {
|
|
yield { type: 'assistant', content: 'All done! <promise>COMPLETE</promise>' };
|
|
} else {
|
|
yield { type: 'assistant', content: `Working on iteration ${String(callCount)}...` };
|
|
}
|
|
yield { type: 'result', sessionId: `session-${String(callCount)}` };
|
|
});
|
|
|
|
const loopWorkflow: WorkflowDefinition = {
|
|
name: 'test-loop',
|
|
description: 'Test loop workflow',
|
|
loop: { until: 'COMPLETE', max_iterations: 10, fresh_context: false },
|
|
prompt: 'Do the thing. Output <promise>COMPLETE</promise> when done.',
|
|
};
|
|
|
|
await executeWorkflow(
|
|
mockPlatform,
|
|
'conv-123',
|
|
testDir,
|
|
loopWorkflow,
|
|
'Implement everything',
|
|
'db-conv-id'
|
|
);
|
|
|
|
// Should have run 3 iterations
|
|
expect(mockSendQuery).toHaveBeenCalledTimes(3);
|
|
|
|
// Should complete successfully
|
|
const completeCalls = mockQuery.mock.calls.filter(
|
|
(call: unknown[]) =>
|
|
(call[0] as string).includes('UPDATE') && (call[0] as string).includes("'completed'")
|
|
);
|
|
expect(completeCalls.length).toBeGreaterThan(0);
|
|
|
|
// Reset mock
|
|
mockSendQuery.mockImplementation(function* () {
|
|
yield { type: 'assistant', content: 'AI response' };
|
|
yield { type: 'result', sessionId: 'new-session-id' };
|
|
});
|
|
});
|
|
|
|
it('should fail when max iterations reached without completion', async () => {
|
|
// Mock AI to never return completion signal
|
|
mockSendQuery.mockImplementation(function* () {
|
|
yield { type: 'assistant', content: 'Still working...' };
|
|
yield { type: 'result', sessionId: 'session-id' };
|
|
});
|
|
|
|
const loopWorkflow: WorkflowDefinition = {
|
|
name: 'endless-loop',
|
|
description: 'Never completes',
|
|
loop: { until: 'COMPLETE', max_iterations: 3, fresh_context: false },
|
|
prompt: 'Do something that never finishes.',
|
|
};
|
|
|
|
await executeWorkflow(
|
|
mockPlatform,
|
|
'conv-123',
|
|
testDir,
|
|
loopWorkflow,
|
|
'Try forever',
|
|
'db-conv-id'
|
|
);
|
|
|
|
// Should have run exactly max_iterations times
|
|
expect(mockSendQuery).toHaveBeenCalledTimes(3);
|
|
|
|
// Should fail
|
|
const failCalls = mockQuery.mock.calls.filter(
|
|
(call: unknown[]) =>
|
|
(call[0] as string).includes('UPDATE') && (call[0] as string).includes("'failed'")
|
|
);
|
|
expect(failCalls.length).toBeGreaterThan(0);
|
|
|
|
// Reset mock
|
|
mockSendQuery.mockImplementation(function* () {
|
|
yield { type: 'assistant', content: 'AI response' };
|
|
yield { type: 'result', sessionId: 'new-session-id' };
|
|
});
|
|
});
|
|
|
|
it('should detect completion signal in <promise> tags', async () => {
|
|
mockSendQuery.mockImplementation(function* () {
|
|
yield { type: 'assistant', content: 'Done! <promise>DONE</promise>' };
|
|
yield { type: 'result', sessionId: 'session-id' };
|
|
});
|
|
|
|
const loopWorkflow: WorkflowDefinition = {
|
|
name: 'promise-test',
|
|
description: 'Test promise tag detection',
|
|
loop: { until: 'DONE', max_iterations: 5, fresh_context: false },
|
|
prompt: 'Output <promise>DONE</promise> when finished.',
|
|
};
|
|
|
|
await executeWorkflow(mockPlatform, 'conv-123', testDir, loopWorkflow, 'Test', 'db-conv-id');
|
|
|
|
// Should complete on first iteration
|
|
expect(mockSendQuery).toHaveBeenCalledTimes(1);
|
|
|
|
// Reset mock
|
|
mockSendQuery.mockImplementation(function* () {
|
|
yield { type: 'assistant', content: 'AI response' };
|
|
yield { type: 'result', sessionId: 'new-session-id' };
|
|
});
|
|
});
|
|
|
|
it('should update metadata with iteration count', async () => {
|
|
mockSendQuery.mockImplementation(function* () {
|
|
yield { type: 'assistant', content: '<promise>COMPLETE</promise>' };
|
|
yield { type: 'result', sessionId: 'session-id' };
|
|
});
|
|
|
|
const loopWorkflow: WorkflowDefinition = {
|
|
name: 'metadata-test',
|
|
description: 'Test metadata updates',
|
|
loop: { until: 'COMPLETE', max_iterations: 10, fresh_context: false },
|
|
prompt: 'Complete immediately.',
|
|
};
|
|
|
|
await executeWorkflow(mockPlatform, 'conv-123', testDir, loopWorkflow, 'Test', 'db-conv-id');
|
|
|
|
// Should have UPDATE with metadata
|
|
const metadataCalls = mockQuery.mock.calls.filter(
|
|
(call: unknown[]) =>
|
|
(call[0] as string).includes('UPDATE') && (call[0] as string).includes('metadata')
|
|
);
|
|
expect(metadataCalls.length).toBeGreaterThan(0);
|
|
|
|
// Reset mock
|
|
mockSendQuery.mockImplementation(function* () {
|
|
yield { type: 'assistant', content: 'AI response' };
|
|
yield { type: 'result', sessionId: 'new-session-id' };
|
|
});
|
|
});
|
|
|
|
it('should handle single iteration loop (max_iterations = 1)', async () => {
|
|
mockSendQuery.mockImplementation(function* () {
|
|
yield { type: 'assistant', content: 'No completion signal here' };
|
|
yield { type: 'result', sessionId: 'session-id' };
|
|
});
|
|
|
|
const loopWorkflow: WorkflowDefinition = {
|
|
name: 'single-iteration',
|
|
description: 'Single iteration limit',
|
|
loop: { until: 'COMPLETE', max_iterations: 1, fresh_context: false },
|
|
prompt: 'Try once.',
|
|
};
|
|
|
|
await executeWorkflow(mockPlatform, 'conv-123', testDir, loopWorkflow, 'Test', 'db-conv-id');
|
|
|
|
// Should have run exactly 1 time
|
|
expect(mockSendQuery).toHaveBeenCalledTimes(1);
|
|
|
|
// Reset mock
|
|
mockSendQuery.mockImplementation(function* () {
|
|
yield { type: 'assistant', content: 'AI response' };
|
|
yield { type: 'result', sessionId: 'new-session-id' };
|
|
});
|
|
});
|
|
|
|
it('should detect plain completion signal (backwards compatibility)', async () => {
|
|
mockSendQuery.mockImplementation(function* () {
|
|
yield { type: 'assistant', content: 'All tasks done! COMPLETE' };
|
|
yield { type: 'result', sessionId: 'session-id' };
|
|
});
|
|
|
|
const loopWorkflow: WorkflowDefinition = {
|
|
name: 'plain-signal-test',
|
|
description: 'Test plain signal detection',
|
|
loop: { until: 'COMPLETE', max_iterations: 5, fresh_context: false },
|
|
prompt: 'Output COMPLETE when finished.',
|
|
};
|
|
|
|
await executeWorkflow(mockPlatform, 'conv-123', testDir, loopWorkflow, 'Test', 'db-conv-id');
|
|
|
|
// Should complete on first iteration (plain signal detected)
|
|
expect(mockSendQuery).toHaveBeenCalledTimes(1);
|
|
|
|
// Reset mock
|
|
mockSendQuery.mockImplementation(function* () {
|
|
yield { type: 'assistant', content: 'AI response' };
|
|
yield { type: 'result', sessionId: 'new-session-id' };
|
|
});
|
|
});
|
|
|
|
it('should handle AI error during iteration', async () => {
|
|
let callCount = 0;
|
|
mockSendQuery.mockImplementation(function* () {
|
|
callCount++;
|
|
if (callCount === 2) {
|
|
throw new Error('AI service unavailable');
|
|
}
|
|
yield { type: 'assistant', content: `Iteration ${String(callCount)}` };
|
|
yield { type: 'result', sessionId: `session-${String(callCount)}` };
|
|
});
|
|
|
|
const loopWorkflow: WorkflowDefinition = {
|
|
name: 'error-test',
|
|
description: 'Test error handling',
|
|
loop: { until: 'COMPLETE', max_iterations: 5, fresh_context: false },
|
|
prompt: 'Work until done.',
|
|
};
|
|
|
|
await executeWorkflow(mockPlatform, 'conv-123', testDir, loopWorkflow, 'Test', 'db-conv-id');
|
|
|
|
// Should have run 2 iterations (failed on 2nd)
|
|
expect(mockSendQuery).toHaveBeenCalledTimes(2);
|
|
|
|
// Should fail
|
|
const failCalls = mockQuery.mock.calls.filter(
|
|
(call: unknown[]) =>
|
|
(call[0] as string).includes('UPDATE') && (call[0] as string).includes("'failed'")
|
|
);
|
|
expect(failCalls.length).toBeGreaterThan(0);
|
|
|
|
// Reset mock
|
|
mockSendQuery.mockImplementation(function* () {
|
|
yield { type: 'assistant', content: 'AI response' };
|
|
yield { type: 'result', sessionId: 'new-session-id' };
|
|
});
|
|
});
|
|
|
|
it('should substitute $USER_MESSAGE in loop prompt', async () => {
|
|
// Track the prompt passed to sendQuery
|
|
let receivedPrompt = '';
|
|
mockSendQuery.mockImplementation(function* (prompt: string) {
|
|
receivedPrompt = prompt;
|
|
yield { type: 'assistant', content: '<promise>COMPLETE</promise>' };
|
|
yield { type: 'result', sessionId: 'session-id' };
|
|
});
|
|
|
|
const loopWorkflow: WorkflowDefinition = {
|
|
name: 'substitution-test',
|
|
description: 'Test variable substitution',
|
|
loop: { until: 'COMPLETE', max_iterations: 5, fresh_context: false },
|
|
prompt: 'User wants: $USER_MESSAGE',
|
|
};
|
|
|
|
await executeWorkflow(
|
|
mockPlatform,
|
|
'conv-123',
|
|
testDir,
|
|
loopWorkflow,
|
|
'build a feature',
|
|
'db-conv-id'
|
|
);
|
|
|
|
// User message from the mock database row is 'test user message'
|
|
expect(receivedPrompt).toContain('test user message');
|
|
expect(receivedPrompt).not.toContain('$USER_MESSAGE');
|
|
|
|
// Reset mock
|
|
mockSendQuery.mockImplementation(function* () {
|
|
yield { type: 'assistant', content: 'AI response' };
|
|
yield { type: 'result', sessionId: 'new-session-id' };
|
|
});
|
|
});
|
|
|
|
it('should start fresh session each iteration when fresh_context is true', async () => {
|
|
// Track session IDs passed to each iteration
|
|
const receivedSessionIds: (string | undefined)[] = [];
|
|
let callCount = 0;
|
|
|
|
mockSendQuery.mockImplementation(function* (
|
|
_prompt: string,
|
|
_cwd: string,
|
|
sessionId?: string
|
|
) {
|
|
receivedSessionIds.push(sessionId);
|
|
callCount++;
|
|
if (callCount >= 3) {
|
|
yield { type: 'assistant', content: '<promise>COMPLETE</promise>' };
|
|
} else {
|
|
yield { type: 'assistant', content: `Iteration ${String(callCount)}` };
|
|
}
|
|
yield { type: 'result', sessionId: `session-${String(callCount)}` };
|
|
});
|
|
|
|
const loopWorkflow: WorkflowDefinition = {
|
|
name: 'fresh-context-loop',
|
|
description: 'Test fresh_context: true',
|
|
loop: { until: 'COMPLETE', max_iterations: 5, fresh_context: true },
|
|
prompt: 'Do work with fresh context each time.',
|
|
};
|
|
|
|
await executeWorkflow(mockPlatform, 'conv-123', testDir, loopWorkflow, 'Test', 'db-conv-id');
|
|
|
|
// Should have run 3 iterations
|
|
expect(mockSendQuery).toHaveBeenCalledTimes(3);
|
|
|
|
// ALL iterations should have undefined session ID (fresh context)
|
|
expect(receivedSessionIds).toEqual([undefined, undefined, undefined]);
|
|
|
|
// Reset mock
|
|
mockSendQuery.mockImplementation(function* () {
|
|
yield { type: 'assistant', content: 'AI response' };
|
|
yield { type: 'result', sessionId: 'new-session-id' };
|
|
});
|
|
});
|
|
|
|
it('should detect completion signal split across multiple chunks', async () => {
|
|
// Simulate AI returning completion signal across multiple yield statements
|
|
mockSendQuery.mockImplementation(function* () {
|
|
yield { type: 'assistant', content: 'Processing complete. ' };
|
|
yield { type: 'assistant', content: '<prom' };
|
|
yield { type: 'assistant', content: 'ise>COMPLETE</promise>' };
|
|
yield { type: 'assistant', content: ' Done!' };
|
|
yield { type: 'result', sessionId: 'session-id' };
|
|
});
|
|
|
|
const loopWorkflow: WorkflowDefinition = {
|
|
name: 'split-signal-test',
|
|
description: 'Test signal detection across chunks',
|
|
loop: { until: 'COMPLETE', max_iterations: 5, fresh_context: false },
|
|
prompt: 'Output completion signal.',
|
|
};
|
|
|
|
await executeWorkflow(mockPlatform, 'conv-123', testDir, loopWorkflow, 'Test', 'db-conv-id');
|
|
|
|
// Should complete on first iteration (signal accumulated across chunks)
|
|
expect(mockSendQuery).toHaveBeenCalledTimes(1);
|
|
|
|
// Should have marked as completed
|
|
const completeCalls = mockQuery.mock.calls.filter(
|
|
(call: unknown[]) =>
|
|
(call[0] as string).includes('UPDATE') && (call[0] as string).includes("'completed'")
|
|
);
|
|
expect(completeCalls.length).toBeGreaterThan(0);
|
|
|
|
// Reset mock
|
|
mockSendQuery.mockImplementation(function* () {
|
|
yield { type: 'assistant', content: 'AI response' };
|
|
yield { type: 'result', sessionId: 'new-session-id' };
|
|
});
|
|
});
|
|
|
|
it('should NOT detect false positive plain signal in middle of text', async () => {
|
|
// This tests that "not COMPLETE yet" doesn't match "COMPLETE"
|
|
mockSendQuery.mockImplementation(function* () {
|
|
yield { type: 'assistant', content: 'The task is not COMPLETE yet, more work needed.' };
|
|
yield { type: 'result', sessionId: 'session-id' };
|
|
});
|
|
|
|
const loopWorkflow: WorkflowDefinition = {
|
|
name: 'false-positive-test',
|
|
description: 'Test false positive prevention',
|
|
loop: { until: 'COMPLETE', max_iterations: 2, fresh_context: false },
|
|
prompt: 'Work until done.',
|
|
};
|
|
|
|
await executeWorkflow(mockPlatform, 'conv-123', testDir, loopWorkflow, 'Test', 'db-conv-id');
|
|
|
|
// Should have run max_iterations times (NOT detected as complete)
|
|
expect(mockSendQuery).toHaveBeenCalledTimes(2);
|
|
|
|
// Should have FAILED (not completed)
|
|
const failCalls = mockQuery.mock.calls.filter(
|
|
(call: unknown[]) =>
|
|
(call[0] as string).includes('UPDATE') && (call[0] as string).includes("'failed'")
|
|
);
|
|
expect(failCalls.length).toBeGreaterThan(0);
|
|
|
|
// Reset mock
|
|
mockSendQuery.mockImplementation(function* () {
|
|
yield { type: 'assistant', content: 'AI response' };
|
|
yield { type: 'result', sessionId: 'new-session-id' };
|
|
});
|
|
});
|
|
});
|
|
|
|
describe('issueContext handling', () => {
|
|
describe('step workflow with context', () => {
|
|
it('should pass issueContext to workflow step', async () => {
|
|
const workflow: WorkflowDefinition = {
|
|
name: 'context-workflow',
|
|
description: 'Test workflow with context',
|
|
provider: 'claude',
|
|
steps: [{ command: 'command-one' }],
|
|
};
|
|
|
|
const issueContext =
|
|
'[GitHub Issue Context]\nIssue #42: "Test Issue"\nAuthor: testuser\n\nDescription:\nTest issue body';
|
|
|
|
await executeWorkflow(
|
|
mockPlatform,
|
|
'conv-123',
|
|
testDir,
|
|
workflow,
|
|
'test user message',
|
|
'db-conv-id',
|
|
'codebase-id',
|
|
issueContext
|
|
);
|
|
|
|
// Verify AI client received the context appended to prompt
|
|
expect(mockSendQuery.mock.calls.length).toBeGreaterThan(0);
|
|
const promptArg = mockSendQuery.mock.calls[0][0] as string;
|
|
expect(promptArg).toContain('[GitHub Issue Context]');
|
|
expect(promptArg).toContain('Issue #42');
|
|
});
|
|
|
|
it('should substitute $CONTEXT variable in step workflow', async () => {
|
|
// Create command file that uses $CONTEXT variable
|
|
const commandsDir = join(testDir, '.archon', 'commands');
|
|
await writeFile(
|
|
join(commandsDir, 'context-command.md'),
|
|
'Process the following context:\n\n$CONTEXT\n\nNow execute the task.'
|
|
);
|
|
|
|
const workflow: WorkflowDefinition = {
|
|
name: 'context-var-workflow',
|
|
description: 'Test workflow with $CONTEXT variable',
|
|
provider: 'claude',
|
|
steps: [{ command: 'context-command' }],
|
|
};
|
|
|
|
const issueContext = 'GitHub Issue #123 content here';
|
|
|
|
await executeWorkflow(
|
|
mockPlatform,
|
|
'conv-123',
|
|
testDir,
|
|
workflow,
|
|
'test message',
|
|
'db-conv-id',
|
|
'codebase-id',
|
|
issueContext
|
|
);
|
|
|
|
const promptArg = mockSendQuery.mock.calls[0][0] as string;
|
|
// Should have substituted $CONTEXT but NOT appended again (to avoid duplication)
|
|
expect(promptArg).toContain('Process the following context:');
|
|
expect(promptArg).toContain('GitHub Issue #123 content here');
|
|
// Count occurrences - should appear only once (substituted, not appended)
|
|
const matches = promptArg.match(/GitHub Issue #123 content here/g);
|
|
expect(matches?.length).toBe(1);
|
|
});
|
|
|
|
it('should clear $CONTEXT variable when issueContext is undefined', async () => {
|
|
// Create command file that uses $CONTEXT variable
|
|
const commandsDir = join(testDir, '.archon', 'commands');
|
|
await writeFile(
|
|
join(commandsDir, 'context-command.md'),
|
|
'Process: $CONTEXT and $EXTERNAL_CONTEXT then continue'
|
|
);
|
|
|
|
const workflow: WorkflowDefinition = {
|
|
name: 'context-var-workflow',
|
|
description: 'Test workflow with $CONTEXT variable',
|
|
provider: 'claude',
|
|
steps: [{ command: 'context-command' }],
|
|
};
|
|
|
|
// No issueContext provided
|
|
await executeWorkflow(
|
|
mockPlatform,
|
|
'conv-123',
|
|
testDir,
|
|
workflow,
|
|
'test message',
|
|
'db-conv-id',
|
|
'codebase-id'
|
|
);
|
|
|
|
const promptArg = mockSendQuery.mock.calls[0][0] as string;
|
|
// Variables should be cleared (replaced with empty string)
|
|
expect(promptArg).not.toContain('$CONTEXT');
|
|
expect(promptArg).not.toContain('$EXTERNAL_CONTEXT');
|
|
expect(promptArg).toContain('Process: and then continue');
|
|
});
|
|
|
|
it('should handle context with special regex characters', async () => {
|
|
// Create command file that uses $CONTEXT variable
|
|
const commandsDir = join(testDir, '.archon', 'commands');
|
|
await writeFile(join(commandsDir, 'context-command.md'), 'Context: $CONTEXT');
|
|
|
|
const workflow: WorkflowDefinition = {
|
|
name: 'regex-test-workflow',
|
|
description: 'Test special characters in context',
|
|
provider: 'claude',
|
|
steps: [{ command: 'context-command' }],
|
|
};
|
|
|
|
// Context with regex special characters that could break naive substitution
|
|
const issueContext =
|
|
'Issue: Add dark mode with $20 budget & (regex) patterns like .* and [a-z]+ and $CONTEXT literal';
|
|
|
|
await executeWorkflow(
|
|
mockPlatform,
|
|
'conv-123',
|
|
testDir,
|
|
workflow,
|
|
'test message',
|
|
'db-conv-id',
|
|
'codebase-id',
|
|
issueContext
|
|
);
|
|
|
|
const promptArg = mockSendQuery.mock.calls[0][0] as string;
|
|
// All special characters should be preserved exactly
|
|
expect(promptArg).toContain('$20 budget');
|
|
expect(promptArg).toContain('(regex)');
|
|
expect(promptArg).toContain('.*');
|
|
expect(promptArg).toContain('[a-z]+');
|
|
expect(promptArg).toContain('$CONTEXT literal');
|
|
});
|
|
|
|
it('should handle multiple context variables in same prompt', async () => {
|
|
// Create command file with multiple context variables
|
|
const commandsDir = join(testDir, '.archon', 'commands');
|
|
await writeFile(
|
|
join(commandsDir, 'multi-context.md'),
|
|
'First: $CONTEXT\n\nSecond: $EXTERNAL_CONTEXT\n\nThird: $ISSUE_CONTEXT'
|
|
);
|
|
|
|
const workflow: WorkflowDefinition = {
|
|
name: 'multi-var-workflow',
|
|
description: 'Test multiple context variables',
|
|
provider: 'claude',
|
|
steps: [{ command: 'multi-context' }],
|
|
};
|
|
|
|
const issueContext = 'Shared context value';
|
|
|
|
await executeWorkflow(
|
|
mockPlatform,
|
|
'conv-123',
|
|
testDir,
|
|
workflow,
|
|
'test message',
|
|
'db-conv-id',
|
|
'codebase-id',
|
|
issueContext
|
|
);
|
|
|
|
const promptArg = mockSendQuery.mock.calls[0][0] as string;
|
|
// All three variables should be substituted with the same context
|
|
expect(promptArg).toContain('First: Shared context value');
|
|
expect(promptArg).toContain('Second: Shared context value');
|
|
expect(promptArg).toContain('Third: Shared context value');
|
|
// Context should NOT be appended since variables were substituted
|
|
expect(promptArg).not.toContain('---');
|
|
// Should appear exactly 3 times (once per variable)
|
|
const matches = promptArg.match(/Shared context value/g);
|
|
expect(matches?.length).toBe(3);
|
|
});
|
|
});
|
|
|
|
describe('loop workflow with context', () => {
|
|
it('should pass issueContext to loop workflow iterations', async () => {
|
|
// Override mock to return exit phrase
|
|
mockSendQuery.mockImplementation(function* () {
|
|
yield { type: 'assistant', content: '<promise>LOOP_COMPLETE</promise>' };
|
|
yield { type: 'result', sessionId: 'session-id' };
|
|
});
|
|
|
|
const loopWorkflow: WorkflowDefinition = {
|
|
name: 'loop-context-workflow',
|
|
description: 'Test loop workflow with context',
|
|
provider: 'claude',
|
|
loop: {
|
|
until: 'LOOP_COMPLETE',
|
|
max_iterations: 2,
|
|
fresh_context: false,
|
|
},
|
|
prompt: 'Process the task based on the provided context. User message: $USER_MESSAGE',
|
|
};
|
|
|
|
const issueContext =
|
|
'[GitHub Issue Context]\nIssue #99: "Loop Test"\nAuthor: loopuser\n\nBody content';
|
|
|
|
await executeWorkflow(
|
|
mockPlatform,
|
|
'conv-123',
|
|
testDir,
|
|
loopWorkflow,
|
|
'test trigger',
|
|
'db-conv-id',
|
|
'codebase-id',
|
|
issueContext
|
|
);
|
|
|
|
// Verify AI received context in the prompt
|
|
expect(mockSendQuery.mock.calls.length).toBeGreaterThan(0);
|
|
const promptArg = mockSendQuery.mock.calls[0][0] as string;
|
|
expect(promptArg).toContain('[GitHub Issue Context]');
|
|
expect(promptArg).toContain('Issue #99');
|
|
});
|
|
|
|
it('should substitute $ISSUE_CONTEXT in loop workflow', async () => {
|
|
mockSendQuery.mockImplementation(function* () {
|
|
yield { type: 'assistant', content: '<promise>DONE</promise>' };
|
|
yield { type: 'result', sessionId: 'session-id' };
|
|
});
|
|
|
|
const loopWorkflow: WorkflowDefinition = {
|
|
name: 'loop-var-workflow',
|
|
description: 'Test loop with $ISSUE_CONTEXT',
|
|
provider: 'claude',
|
|
loop: {
|
|
until: 'DONE',
|
|
max_iterations: 1,
|
|
fresh_context: false,
|
|
},
|
|
prompt: 'Given this context:\n$ISSUE_CONTEXT\n\nExecute: $USER_MESSAGE',
|
|
};
|
|
|
|
const issueContext = 'PR #555 details here';
|
|
|
|
await executeWorkflow(
|
|
mockPlatform,
|
|
'conv-123',
|
|
testDir,
|
|
loopWorkflow,
|
|
'implement feature',
|
|
'db-conv-id',
|
|
'codebase-id',
|
|
issueContext
|
|
);
|
|
|
|
const promptArg = mockSendQuery.mock.calls[0][0] as string;
|
|
expect(promptArg).toContain('Given this context:');
|
|
expect(promptArg).toContain('PR #555 details here');
|
|
// Should appear only once (substituted, not appended)
|
|
const matches = promptArg.match(/PR #555 details here/g);
|
|
expect(matches?.length).toBe(1);
|
|
});
|
|
});
|
|
|
|
describe('metadata storage', () => {
|
|
it('should store issueContext in workflow run metadata', async () => {
|
|
const workflow: WorkflowDefinition = {
|
|
name: 'metadata-workflow',
|
|
description: 'Test metadata storage',
|
|
provider: 'claude',
|
|
steps: [{ command: 'command-one' }],
|
|
};
|
|
|
|
const issueContext = 'Issue #77 context';
|
|
|
|
await executeWorkflow(
|
|
mockPlatform,
|
|
'conv-123',
|
|
testDir,
|
|
workflow,
|
|
'test message',
|
|
'db-conv-id',
|
|
'codebase-id',
|
|
issueContext
|
|
);
|
|
|
|
// Check that createWorkflowRun was called with metadata containing github_context
|
|
const insertCalls = mockQuery.mock.calls.filter(
|
|
call => typeof call[0] === 'string' && call[0].includes('INSERT')
|
|
);
|
|
expect(insertCalls.length).toBeGreaterThan(0);
|
|
const insertParams = insertCalls[0][1] as string[];
|
|
// The 5th parameter should be the metadata JSON
|
|
expect(insertParams[4]).toBe(JSON.stringify({ github_context: 'Issue #77 context' }));
|
|
});
|
|
|
|
it('should store empty metadata when issueContext is undefined', async () => {
|
|
const workflow: WorkflowDefinition = {
|
|
name: 'no-metadata-workflow',
|
|
description: 'Test without context',
|
|
provider: 'claude',
|
|
steps: [{ command: 'command-one' }],
|
|
};
|
|
|
|
await executeWorkflow(
|
|
mockPlatform,
|
|
'conv-123',
|
|
testDir,
|
|
workflow,
|
|
'test message',
|
|
'db-conv-id',
|
|
'codebase-id'
|
|
);
|
|
|
|
const insertCalls = mockQuery.mock.calls.filter(
|
|
call => typeof call[0] === 'string' && call[0].includes('INSERT')
|
|
);
|
|
expect(insertCalls.length).toBeGreaterThan(0);
|
|
const insertParams = insertCalls[0][1] as string[];
|
|
expect(insertParams[4]).toBe('{}');
|
|
});
|
|
});
|
|
});
|
|
|
|
describe('parallel block execution', () => {
|
|
beforeEach(async () => {
|
|
// Create command files for parallel block tests
|
|
const commandsDir = join(testDir, '.archon', 'commands');
|
|
await writeFile(join(commandsDir, 'parallel-a.md'), 'Parallel step A prompt');
|
|
await writeFile(join(commandsDir, 'parallel-b.md'), 'Parallel step B prompt');
|
|
await writeFile(join(commandsDir, 'parallel-c.md'), 'Parallel step C prompt');
|
|
await writeFile(join(commandsDir, 'step-before.md'), 'Step before parallel');
|
|
await writeFile(join(commandsDir, 'step-after.md'), 'Step after parallel');
|
|
|
|
// Reset mock to default behavior
|
|
mockSendQuery.mockImplementation(function* () {
|
|
yield { type: 'assistant', content: 'AI response' };
|
|
yield { type: 'result', sessionId: 'new-session-id' };
|
|
});
|
|
});
|
|
|
|
it('should execute all parallel steps concurrently', async () => {
|
|
const parallelWorkflow: WorkflowDefinition = {
|
|
name: 'parallel-test',
|
|
description: 'Test workflow with parallel block',
|
|
steps: [
|
|
{
|
|
parallel: [
|
|
{ command: 'parallel-a' },
|
|
{ command: 'parallel-b' },
|
|
{ command: 'parallel-c' },
|
|
],
|
|
},
|
|
],
|
|
};
|
|
|
|
await executeWorkflow(
|
|
mockPlatform,
|
|
'conv-123',
|
|
testDir,
|
|
parallelWorkflow,
|
|
'Run parallel',
|
|
'db-conv-id'
|
|
);
|
|
|
|
// AI client should be called 3 times (once for each parallel step)
|
|
expect(mockSendQuery).toHaveBeenCalledTimes(3);
|
|
|
|
// Workflow should complete successfully
|
|
const completeCalls = mockQuery.mock.calls.filter(
|
|
(call: unknown[]) =>
|
|
(call[0] as string).includes('UPDATE') && (call[0] as string).includes("'completed'")
|
|
);
|
|
expect(completeCalls.length).toBeGreaterThan(0);
|
|
});
|
|
|
|
it('should fail workflow if any parallel step fails', async () => {
|
|
// Mock AI client to fail on 'parallel-b'
|
|
let callCount = 0;
|
|
mockSendQuery.mockImplementation(function* (prompt: string) {
|
|
callCount++;
|
|
// Fail the second call (parallel-b)
|
|
if (prompt.includes('Parallel step B')) {
|
|
throw new Error('Parallel step B failed unexpectedly');
|
|
}
|
|
yield { type: 'assistant', content: `Response ${String(callCount)}` };
|
|
yield { type: 'result', sessionId: `session-${String(callCount)}` };
|
|
});
|
|
|
|
const parallelWorkflow: WorkflowDefinition = {
|
|
name: 'parallel-fail-test',
|
|
description: 'Test parallel failure handling',
|
|
steps: [
|
|
{
|
|
parallel: [
|
|
{ command: 'parallel-a' },
|
|
{ command: 'parallel-b' },
|
|
{ command: 'parallel-c' },
|
|
],
|
|
},
|
|
],
|
|
};
|
|
|
|
await executeWorkflow(
|
|
mockPlatform,
|
|
'conv-123',
|
|
testDir,
|
|
parallelWorkflow,
|
|
'Run parallel',
|
|
'db-conv-id'
|
|
);
|
|
|
|
// Workflow should fail
|
|
const failCalls = mockQuery.mock.calls.filter(
|
|
(call: unknown[]) =>
|
|
(call[0] as string).includes('UPDATE') && (call[0] as string).includes("'failed'")
|
|
);
|
|
expect(failCalls.length).toBeGreaterThan(0);
|
|
|
|
// Should send failure message to user
|
|
const sendMessage = mockPlatform.sendMessage as ReturnType<typeof mock>;
|
|
const messages = sendMessage.mock.calls.map((call: unknown[]) => call[1]);
|
|
expect(
|
|
messages.some((m: string) => m.includes('**Workflow failed** in parallel block'))
|
|
).toBe(true);
|
|
|
|
// Reset mock
|
|
mockSendQuery.mockImplementation(function* () {
|
|
yield { type: 'assistant', content: 'AI response' };
|
|
yield { type: 'result', sessionId: 'new-session-id' };
|
|
});
|
|
});
|
|
|
|
it('should execute sequential step, then parallel block, then sequential step', async () => {
|
|
const mixedWorkflow: WorkflowDefinition = {
|
|
name: 'mixed-workflow',
|
|
description: 'Test sequential and parallel mix',
|
|
steps: [
|
|
{ command: 'step-before' },
|
|
{
|
|
parallel: [{ command: 'parallel-a' }, { command: 'parallel-b' }],
|
|
},
|
|
{ command: 'step-after' },
|
|
],
|
|
};
|
|
|
|
await executeWorkflow(
|
|
mockPlatform,
|
|
'conv-123',
|
|
testDir,
|
|
mixedWorkflow,
|
|
'Run mixed',
|
|
'db-conv-id'
|
|
);
|
|
|
|
// AI client should be called 4 times total:
|
|
// 1 (step-before) + 2 (parallel) + 1 (step-after)
|
|
expect(mockSendQuery).toHaveBeenCalledTimes(4);
|
|
|
|
// Workflow should complete successfully
|
|
const completeCalls = mockQuery.mock.calls.filter(
|
|
(call: unknown[]) =>
|
|
(call[0] as string).includes('UPDATE') && (call[0] as string).includes("'completed'")
|
|
);
|
|
expect(completeCalls.length).toBeGreaterThan(0);
|
|
|
|
// Verify step notifications were sent for all steps
|
|
const sendMessage = mockPlatform.sendMessage as ReturnType<typeof mock>;
|
|
const messages = sendMessage.mock.calls.map((call: unknown[]) => call[1]);
|
|
|
|
// Sequential step notifications
|
|
expect(messages.some((m: string) => m.includes('**Step 1/3**: `step-before`'))).toBe(true);
|
|
expect(messages.some((m: string) => m.includes('**Step 3/3**: `step-after`'))).toBe(true);
|
|
|
|
// Parallel block notification
|
|
expect(
|
|
messages.some(
|
|
(m: string) =>
|
|
m.includes('**Parallel block**') &&
|
|
m.includes('`parallel-a`') &&
|
|
m.includes('`parallel-b`')
|
|
)
|
|
).toBe(true);
|
|
});
|
|
|
|
it('should send correct notification format for parallel blocks', async () => {
|
|
const parallelWorkflow: WorkflowDefinition = {
|
|
name: 'parallel-notification-test',
|
|
description: 'Test parallel block notification',
|
|
steps: [
|
|
{
|
|
parallel: [{ command: 'parallel-a' }, { command: 'parallel-b' }],
|
|
},
|
|
],
|
|
};
|
|
|
|
await executeWorkflow(
|
|
mockPlatform,
|
|
'conv-123',
|
|
testDir,
|
|
parallelWorkflow,
|
|
'Run parallel',
|
|
'db-conv-id'
|
|
);
|
|
|
|
const sendMessage = mockPlatform.sendMessage as ReturnType<typeof mock>;
|
|
const messages = sendMessage.mock.calls.map((call: unknown[]) => call[1]);
|
|
|
|
// Should have parallel block notification with correct format
|
|
const parallelNotification = messages.find(
|
|
(m: string) => typeof m === 'string' && m.includes('**Parallel block**')
|
|
);
|
|
expect(parallelNotification).toBeDefined();
|
|
expect(parallelNotification).toContain('(2 steps)');
|
|
expect(parallelNotification).toContain('`parallel-a`');
|
|
expect(parallelNotification).toContain('`parallel-b`');
|
|
});
|
|
|
|
it('should give each parallel step a fresh session (no resume)', async () => {
|
|
// Track session IDs passed to each parallel step
|
|
const receivedSessionIds: (string | undefined)[] = [];
|
|
|
|
mockSendQuery.mockImplementation(function* (
|
|
_prompt: string,
|
|
_cwd: string,
|
|
sessionId?: string
|
|
) {
|
|
receivedSessionIds.push(sessionId);
|
|
yield { type: 'assistant', content: 'Response' };
|
|
yield { type: 'result', sessionId: 'session-id' };
|
|
});
|
|
|
|
const parallelWorkflow: WorkflowDefinition = {
|
|
name: 'parallel-session-test',
|
|
description: 'Test parallel session handling',
|
|
steps: [
|
|
{
|
|
parallel: [{ command: 'parallel-a' }, { command: 'parallel-b' }],
|
|
},
|
|
],
|
|
};
|
|
|
|
await executeWorkflow(
|
|
mockPlatform,
|
|
'conv-123',
|
|
testDir,
|
|
parallelWorkflow,
|
|
'Run parallel',
|
|
'db-conv-id'
|
|
);
|
|
|
|
// All parallel steps should have undefined session ID (fresh session)
|
|
expect(receivedSessionIds).toEqual([undefined, undefined]);
|
|
|
|
// Reset mock
|
|
mockSendQuery.mockImplementation(function* () {
|
|
yield { type: 'assistant', content: 'AI response' };
|
|
yield { type: 'result', sessionId: 'new-session-id' };
|
|
});
|
|
});
|
|
|
|
it('should execute workflow with multiple parallel blocks', async () => {
|
|
// Create additional command files
|
|
const commandsDir = join(testDir, '.archon', 'commands');
|
|
await writeFile(join(commandsDir, 'parallel-d.md'), 'Parallel step D prompt');
|
|
|
|
const multiParallelWorkflow: WorkflowDefinition = {
|
|
name: 'multi-parallel-test',
|
|
description: 'Test multiple parallel blocks',
|
|
steps: [
|
|
{ command: 'step-before' },
|
|
{
|
|
parallel: [{ command: 'parallel-a' }, { command: 'parallel-b' }],
|
|
},
|
|
{ command: 'step-after' },
|
|
{
|
|
parallel: [{ command: 'parallel-c' }, { command: 'parallel-d' }],
|
|
},
|
|
],
|
|
};
|
|
|
|
await executeWorkflow(
|
|
mockPlatform,
|
|
'conv-123',
|
|
testDir,
|
|
multiParallelWorkflow,
|
|
'Run multi-parallel',
|
|
'db-conv-id'
|
|
);
|
|
|
|
// AI client should be called 6 times total:
|
|
// 1 (step-before) + 2 (first parallel) + 1 (step-after) + 2 (second parallel)
|
|
expect(mockSendQuery).toHaveBeenCalledTimes(6);
|
|
|
|
// Workflow should complete successfully
|
|
const completeCalls = mockQuery.mock.calls.filter(
|
|
(call: unknown[]) =>
|
|
(call[0] as string).includes('UPDATE') && (call[0] as string).includes("'completed'")
|
|
);
|
|
expect(completeCalls.length).toBeGreaterThan(0);
|
|
});
|
|
|
|
it('should report all failures when multiple parallel steps fail', async () => {
|
|
// Mock AI client to fail on all parallel steps
|
|
mockSendQuery.mockImplementation(function* (prompt: string) {
|
|
if (prompt.includes('Parallel step A')) {
|
|
throw new Error('Step A: Connection timeout');
|
|
}
|
|
if (prompt.includes('Parallel step B')) {
|
|
throw new Error('Step B: Rate limit exceeded');
|
|
}
|
|
if (prompt.includes('Parallel step C')) {
|
|
throw new Error('Step C: Authentication failed');
|
|
}
|
|
yield { type: 'assistant', content: 'Response' };
|
|
yield { type: 'result', sessionId: 'session-id' };
|
|
});
|
|
|
|
const parallelWorkflow: WorkflowDefinition = {
|
|
name: 'all-fail-test',
|
|
description: 'Test all parallel steps failing',
|
|
steps: [
|
|
{
|
|
parallel: [
|
|
{ command: 'parallel-a' },
|
|
{ command: 'parallel-b' },
|
|
{ command: 'parallel-c' },
|
|
],
|
|
},
|
|
],
|
|
};
|
|
|
|
await executeWorkflow(
|
|
mockPlatform,
|
|
'conv-123',
|
|
testDir,
|
|
parallelWorkflow,
|
|
'Run parallel',
|
|
'db-conv-id'
|
|
);
|
|
|
|
// Workflow should fail
|
|
const failCalls = mockQuery.mock.calls.filter(
|
|
(call: unknown[]) =>
|
|
(call[0] as string).includes('UPDATE') && (call[0] as string).includes("'failed'")
|
|
);
|
|
expect(failCalls.length).toBeGreaterThan(0);
|
|
|
|
// Should send failure message containing ALL errors
|
|
const sendMessage = mockPlatform.sendMessage as ReturnType<typeof mock>;
|
|
const messages = sendMessage.mock.calls.map((call: unknown[]) => call[1]);
|
|
const failureMessage = messages.find(
|
|
(m: string) => typeof m === 'string' && m.includes('**Workflow failed** in parallel block')
|
|
);
|
|
|
|
expect(failureMessage).toBeDefined();
|
|
// All three errors should be reported
|
|
expect(failureMessage).toContain('parallel-a');
|
|
expect(failureMessage).toContain('parallel-b');
|
|
expect(failureMessage).toContain('parallel-c');
|
|
|
|
// Reset mock
|
|
mockSendQuery.mockImplementation(function* () {
|
|
yield { type: 'assistant', content: 'AI response' };
|
|
yield { type: 'result', sessionId: 'new-session-id' };
|
|
});
|
|
});
|
|
|
|
it('should execute step-only workflow unchanged (backward compatibility)', async () => {
|
|
// Create sequential-only workflow (no parallel blocks)
|
|
const sequentialWorkflow: WorkflowDefinition = {
|
|
name: 'sequential-only',
|
|
description: 'Test backward compatibility with sequential workflows',
|
|
steps: [{ command: 'step-before' }, { command: 'step-after' }],
|
|
};
|
|
|
|
await executeWorkflow(
|
|
mockPlatform,
|
|
'conv-123',
|
|
testDir,
|
|
sequentialWorkflow,
|
|
'Run sequential',
|
|
'db-conv-id'
|
|
);
|
|
|
|
// AI client should be called 2 times (once per step)
|
|
expect(mockSendQuery).toHaveBeenCalledTimes(2);
|
|
|
|
// Workflow should complete successfully
|
|
const completeCalls = mockQuery.mock.calls.filter(
|
|
(call: unknown[]) =>
|
|
(call[0] as string).includes('UPDATE') && (call[0] as string).includes("'completed'")
|
|
);
|
|
expect(completeCalls.length).toBeGreaterThan(0);
|
|
|
|
// Should have step notifications (not parallel block notifications)
|
|
const sendMessage = mockPlatform.sendMessage as ReturnType<typeof mock>;
|
|
const messages = sendMessage.mock.calls.map((call: unknown[]) => call[1]);
|
|
expect(messages.some((m: string) => m.includes('**Step 1/2**'))).toBe(true);
|
|
expect(messages.some((m: string) => m.includes('**Step 2/2**'))).toBe(true);
|
|
// No parallel block notifications
|
|
expect(messages.some((m: string) => m.includes('**Parallel block**'))).toBe(false);
|
|
});
|
|
|
|
it('should reset session after parallel block (next sequential step gets fresh session)', async () => {
|
|
// Track session IDs passed to each step
|
|
const receivedSessionIds: (string | undefined)[] = [];
|
|
|
|
mockSendQuery.mockImplementation(function* (
|
|
_prompt: string,
|
|
_cwd: string,
|
|
sessionId?: string
|
|
) {
|
|
receivedSessionIds.push(sessionId);
|
|
yield { type: 'assistant', content: 'Response' };
|
|
yield { type: 'result', sessionId: 'step-session-id' };
|
|
});
|
|
|
|
// Workflow: step-before -> [parallel-a, parallel-b] -> step-after
|
|
const mixedWorkflow: WorkflowDefinition = {
|
|
name: 'session-reset-test',
|
|
description: 'Test session reset after parallel block',
|
|
steps: [
|
|
{ command: 'step-before' },
|
|
{
|
|
parallel: [{ command: 'parallel-a' }, { command: 'parallel-b' }],
|
|
},
|
|
{ command: 'step-after' },
|
|
],
|
|
};
|
|
|
|
await executeWorkflow(
|
|
mockPlatform,
|
|
'conv-123',
|
|
testDir,
|
|
mixedWorkflow,
|
|
'Run mixed',
|
|
'db-conv-id'
|
|
);
|
|
|
|
// Should have 4 calls: step-before, parallel-a, parallel-b, step-after
|
|
expect(receivedSessionIds).toHaveLength(4);
|
|
|
|
// step-before: first step always gets fresh session (undefined)
|
|
expect(receivedSessionIds[0]).toBeUndefined();
|
|
|
|
// parallel-a and parallel-b: always get fresh sessions (undefined)
|
|
expect(receivedSessionIds[1]).toBeUndefined();
|
|
expect(receivedSessionIds[2]).toBeUndefined();
|
|
|
|
// step-after: should get fresh session after parallel block (undefined, not step-before's session)
|
|
// This verifies that currentSessionId is reset to undefined after parallel block
|
|
expect(receivedSessionIds[3]).toBeUndefined();
|
|
|
|
// Reset mock
|
|
mockSendQuery.mockImplementation(function* () {
|
|
yield { type: 'assistant', content: 'AI response' };
|
|
yield { type: 'result', sessionId: 'new-session-id' };
|
|
});
|
|
});
|
|
});
|
|
});
|
|
|
|
describe('isValidCommandName', () => {
|
|
it('should reject empty string', () => {
|
|
expect(isValidCommandName('')).toBe(false);
|
|
});
|
|
|
|
it('should reject forward slashes (path traversal)', () => {
|
|
expect(isValidCommandName('foo/bar')).toBe(false);
|
|
expect(isValidCommandName('../etc/passwd')).toBe(false);
|
|
});
|
|
|
|
it('should reject backslashes (Windows path separator)', () => {
|
|
expect(isValidCommandName('foo\\bar')).toBe(false);
|
|
});
|
|
|
|
it('should reject double dots (parent directory reference)', () => {
|
|
expect(isValidCommandName('..')).toBe(false);
|
|
expect(isValidCommandName('..test')).toBe(false);
|
|
});
|
|
|
|
it('should reject names starting with dot (hidden files)', () => {
|
|
expect(isValidCommandName('.hidden')).toBe(false);
|
|
expect(isValidCommandName('.gitignore')).toBe(false);
|
|
});
|
|
|
|
it('should accept valid names with hyphens', () => {
|
|
expect(isValidCommandName('my-command')).toBe(true);
|
|
expect(isValidCommandName('review-pr')).toBe(true);
|
|
});
|
|
|
|
it('should accept valid names with underscores', () => {
|
|
expect(isValidCommandName('my_command')).toBe(true);
|
|
expect(isValidCommandName('my_command_123')).toBe(true);
|
|
});
|
|
|
|
it('should accept simple alphanumeric names', () => {
|
|
expect(isValidCommandName('plan')).toBe(true);
|
|
expect(isValidCommandName('execute')).toBe(true);
|
|
expect(isValidCommandName('commit')).toBe(true);
|
|
});
|
|
|
|
it('should accept names with numbers', () => {
|
|
expect(isValidCommandName('step1')).toBe(true);
|
|
expect(isValidCommandName('v2release')).toBe(true);
|
|
});
|
|
});
|