diff --git a/.archon/test-fixtures/mcp/e2e-filesystem.json b/.archon/test-fixtures/mcp/e2e-filesystem.json deleted file mode 100644 index 57e9fad3..00000000 --- a/.archon/test-fixtures/mcp/e2e-filesystem.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "filesystem": { - "command": "npx", - "args": ["-y", "@modelcontextprotocol/server-filesystem", "/tmp"] - } -} diff --git a/.archon/workflows/e2e-all-nodes.yaml b/.archon/workflows/e2e-all-nodes.yaml deleted file mode 100644 index 92820458..00000000 --- a/.archon/workflows/e2e-all-nodes.yaml +++ /dev/null @@ -1,54 +0,0 @@ -# E2E smoke test — all node types -# Verifies: bash, prompt, script (bun), structured output, effort control, $nodeId.output refs -# NOTE: AI nodes run sequentially to avoid concurrent Claude CLI subprocess issues in CI -name: e2e-all-nodes -description: "Comprehensive E2E test exercising bash, prompt, script, and structured output nodes." -provider: claude -model: haiku - -nodes: - # 1. Bash node — no AI, runs shell, stdout captured as output - - id: bash-check - bash: "echo '{\"status\":\"ok\",\"cwd\":\"'$(pwd)'\"}'" - - # 2. Script node (bun runtime) — verifies script execution - - id: script-bun - script: echo-args - runtime: bun - depends_on: [bash-check] - timeout: 30000 - - # 3. Prompt node — simple AI call, verifies sendQuery works - - id: prompt-simple - prompt: "The bash node returned: $bash-check.output — confirm you received it by saying 'received'. Say nothing else." - depends_on: [script-bun] - allowed_tools: [] - idle_timeout: 120000 - - # 4. Structured output node — verifies output_format translation - - id: structured - prompt: "Classify the text 'hello world' as either 'greeting' or 'math'." - output_format: - type: object - properties: - category: - type: string - enum: ["greeting", "math"] - required: ["category"] - additionalProperties: false - depends_on: [prompt-simple] - allowed_tools: [] - idle_timeout: 120000 - - # 5. Bash node using $nodeId.output from structured node - - id: bash-read-output - bash: "echo 'Structured output category: $structured.output'" - depends_on: [structured] - - # 6. Prompt with effort control — verifies effort passes through to SDK - - id: prompt-effort - prompt: "Say 'effort-ok' and nothing else." - effort: low - depends_on: [structured] - allowed_tools: [] - idle_timeout: 120000 diff --git a/.archon/workflows/e2e-claude-smoke.yaml b/.archon/workflows/e2e-claude-smoke.yaml index 36ddd6c9..5f30253a 100644 --- a/.archon/workflows/e2e-claude-smoke.yaml +++ b/.archon/workflows/e2e-claude-smoke.yaml @@ -1,48 +1,26 @@ # E2E smoke test — Claude provider -# Verifies: provider selection, sendQuery, structured output, tool use, -# command node, workflow-level model, node-level model override -# NOTE: Nodes run sequentially to avoid concurrent Claude CLI subprocess issues in CI +# Verifies: Claude connectivity (sendQuery), command node loading, $nodeId.output refs +# Design: Only uses allowed_tools: [] (no tool use) and no output_format (no structured output) +# because the Claude CLI subprocess is extremely slow with those features in CI. name: e2e-claude-smoke -description: "E2E smoke test for Claude provider. Tests prompt, structured output, tool use, command node, and model overrides." +description: "Smoke test for Claude provider. Verifies prompt response and command node loading." provider: claude model: haiku nodes: - # 1. Simple prompt — verifies basic sendQuery + # 1. Simple prompt — verifies Claude API connectivity via sendQuery - id: simple prompt: "What is 2+2? Answer with just the number, nothing else." allowed_tools: [] - idle_timeout: 120000 + idle_timeout: 30000 - # 2. Structured output — verifies output_format translation - - id: structured - prompt: "Classify this input as 'math' or 'text': '2+2=4'" - output_format: - type: object - properties: - category: - type: string - enum: ["math", "text"] - required: ["category"] - additionalProperties: false - allowed_tools: [] - idle_timeout: 120000 - depends_on: [simple] - - # 3. Tool use — verifies agent can use tools - - id: tool-use - prompt: "Read the file package.json and tell me the 'name' field value. Answer with just the name, nothing else." - allowed_tools: [Read] - idle_timeout: 120000 - depends_on: [structured] - - # 4. Command node — verifies command file loading + # 2. Command node — verifies command file discovery and loading - id: command-test command: e2e-echo-command - idle_timeout: 120000 - depends_on: [tool-use] + idle_timeout: 30000 + depends_on: [simple] - # 5. Bash node reads structured output field - - id: verify-structured - bash: "echo 'category=$structured.output.category'" - depends_on: [structured] + # 3. Bash node — verifies $nodeId.output substitution from AI node + - id: verify-output + bash: "echo 'simple=$simple.output command=$command-test.output'" + depends_on: [simple, command-test] diff --git a/.archon/workflows/e2e-codex-smoke.yaml b/.archon/workflows/e2e-codex-smoke.yaml index b8d20253..05bfb3c1 100644 --- a/.archon/workflows/e2e-codex-smoke.yaml +++ b/.archon/workflows/e2e-codex-smoke.yaml @@ -8,7 +8,7 @@ model: gpt-5.1-codex-mini nodes: - id: simple prompt: "What is 2+2? Answer with just the number, nothing else." - idle_timeout: 60000 + idle_timeout: 30000 - id: structured prompt: "Classify this input as 'math' or 'text': '2+2=4'. Return JSON only." @@ -20,5 +20,5 @@ nodes: enum: ["math", "text"] required: ["category"] additionalProperties: false - idle_timeout: 60000 + idle_timeout: 30000 depends_on: [simple] diff --git a/.archon/workflows/e2e-mixed-providers.yaml b/.archon/workflows/e2e-mixed-providers.yaml index 2b2a86ec..db454b1e 100644 --- a/.archon/workflows/e2e-mixed-providers.yaml +++ b/.archon/workflows/e2e-mixed-providers.yaml @@ -12,23 +12,16 @@ nodes: - id: claude-node prompt: "Say 'claude-ok' and nothing else." allowed_tools: [] - idle_timeout: 60000 + idle_timeout: 30000 - # 2. Codex node — provider override + # 2. Codex node — provider override (runs parallel with claude-node, different providers) - id: codex-node prompt: "Say 'codex-ok' and nothing else." provider: codex model: gpt-5.1-codex-mini - idle_timeout: 60000 + idle_timeout: 30000 - # 3. Claude node reads Codex output — cross-provider ref - - id: claude-reads-codex - prompt: "The codex node said: '$codex-node.output'. Confirm you received it by saying 'cross-provider-ok'. Say nothing else." - allowed_tools: [] - idle_timeout: 60000 - depends_on: [codex-node] - - # 4. Bash node verifies both outputs + # 3. Bash node verifies both outputs — cross-provider ref - id: verify - bash: "echo 'claude=$claude-node.output codex=$codex-node.output cross=$claude-reads-codex.output'" - depends_on: [claude-node, codex-node, claude-reads-codex] + bash: "echo 'claude=$claude-node.output codex=$codex-node.output'" + depends_on: [claude-node, codex-node] diff --git a/.archon/workflows/e2e-skills-mcp.yaml b/.archon/workflows/e2e-skills-mcp.yaml deleted file mode 100644 index 29a166f6..00000000 --- a/.archon/workflows/e2e-skills-mcp.yaml +++ /dev/null @@ -1,56 +0,0 @@ -# E2E smoke test — Claude advanced features (skills, MCP, effort, systemPrompt) -# Verifies: skills injection, MCP server loading, effort control, custom system prompt -# NOTE: AI nodes run sequentially to avoid concurrent Claude CLI subprocess issues in CI -# NOTE: MCP test uses model: sonnet because Haiku does not support MCP tool search -name: e2e-skills-mcp -description: "Tests Claude-specific advanced features: skills injection, MCP server, effort control, and systemPrompt." -provider: claude -model: haiku - -nodes: - # 1. Skills injection — verifies AgentDefinition wrapping - - id: skill-test - prompt: "Confirm your skill loading status. If the E2E test skill is loaded, follow its instructions." - skills: - - e2e-test-skill - allowed_tools: [Read] - idle_timeout: 120000 - - # 2. MCP server — verifies MCP config loading and tool availability - # Uses sonnet because Haiku does not support MCP tool search - - id: mcp-test - prompt: "You have a filesystem MCP server available. Use it to list the contents of /tmp. Report what you find briefly." - model: sonnet - mcp: .archon/test-fixtures/mcp/e2e-filesystem.json - idle_timeout: 120000 - depends_on: [skill-test] - - # 3. Effort control — verifies effort passes through to SDK - - id: effort-test - prompt: "Say 'effort-ok' and nothing else." - effort: low - allowed_tools: [] - idle_timeout: 120000 - depends_on: [mcp-test] - - # 4. Custom system prompt — verifies systemPrompt injection - - id: system-prompt-test - prompt: "What is your role? Answer in 5 words or fewer." - systemPrompt: "You are a smoke test validator. Always start your response with 'VALIDATOR:'" - allowed_tools: [] - idle_timeout: 120000 - depends_on: [effort-test] - - # 5. Context shared — verifies session continuity - - id: context-shared-setup - prompt: "Remember the secret code: ORANGE-42. Say 'stored' and nothing else." - allowed_tools: [] - idle_timeout: 120000 - depends_on: [system-prompt-test] - - - id: context-shared-verify - prompt: "What was the secret code I told you to remember? Say just the code, nothing else." - context: shared - allowed_tools: [] - idle_timeout: 120000 - depends_on: [context-shared-setup] diff --git a/.claude/skills/e2e-test-skill/SKILL.md b/.claude/skills/e2e-test-skill/SKILL.md deleted file mode 100644 index 1d128dc6..00000000 --- a/.claude/skills/e2e-test-skill/SKILL.md +++ /dev/null @@ -1,8 +0,0 @@ ---- -name: E2E Test Skill -description: Minimal skill for smoke testing skill injection in CI ---- - -# E2E Test Skill - -You have the E2E test skill loaded. When asked to confirm skill loading, respond with exactly: "skill-loaded-ok" diff --git a/.github/workflows/e2e-smoke.yml b/.github/workflows/e2e-smoke.yml index abd8a262..80ca966f 100644 --- a/.github/workflows/e2e-smoke.yml +++ b/.github/workflows/e2e-smoke.yml @@ -34,7 +34,7 @@ jobs: # ─── Tier 2a: Claude provider ────────────────────────────────────────── e2e-claude: runs-on: ubuntu-latest - timeout-minutes: 20 + timeout-minutes: 5 steps: - uses: actions/checkout@v4 @@ -43,11 +43,6 @@ jobs: with: bun-version: 1.3.11 - - name: Setup Node.js (for npx/MCP servers) - uses: actions/setup-node@v4 - with: - node-version: 22 - - name: Install Claude Code CLI run: | curl -fsSL https://claude.ai/install.sh | bash @@ -63,24 +58,10 @@ jobs: run: | bun run cli workflow run e2e-claude-smoke --no-worktree "smoke test" - - name: Run all-nodes test - env: - ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} - CLAUDE_BIN_PATH: ~/.local/bin/claude - run: | - bun run cli workflow run e2e-all-nodes --no-worktree "smoke test" - - - name: Run skills + MCP test - env: - ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} - CLAUDE_BIN_PATH: ~/.local/bin/claude - run: | - bun run cli workflow run e2e-skills-mcp --no-worktree "smoke test" - # ─── Tier 2b: Codex provider ─────────────────────────────────────────── e2e-codex: runs-on: ubuntu-latest - timeout-minutes: 10 + timeout-minutes: 5 steps: - uses: actions/checkout@v4 @@ -110,7 +91,7 @@ jobs: # ─── Tier 3: Mixed providers ─────────────────────────────────────────── e2e-mixed: runs-on: ubuntu-latest - timeout-minutes: 10 + timeout-minutes: 5 needs: [e2e-claude, e2e-codex] steps: - uses: actions/checkout@v4