refactor(ci): strip E2E smoke tests to bare minimum for speed

Claude CLI is extremely slow with structured output (~4 min) and tool use (~2 min) in CI, making the previous multi-workflow approach take 10+ min. Radical simplification: - Remove e2e-all-nodes (redundant with deterministic + claude-smoke) - Remove e2e-skills-mcp (advanced features too slow for per-commit smoke) - Remove structured output and tool use from Claude smoke test (too slow) - Strip Claude smoke to: 1 prompt + 1 command + 1 bash verify node - Keep mixed providers (simplified: 1 Claude + 1 Codex + bash verify) - All timeouts reduced to 30s, all job timeouts to 5 min - Remove MCP test fixtures and e2e-test-skill (no longer needed) Expected: Claude job ~15s of AI time, Codex ~5s, mixed ~10s Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-21 13:37:41 +00:00 · 2026-04-16 10:50:11 -05:00 · 2026-04-16 10:50:11 -05:00 · bf9091159c
commit bf9091159c
parent 4c259e7a0a
8 changed files with 24 additions and 196 deletions
--- a/.archon/test-fixtures/mcp/e2e-filesystem.json
+++ b/.archon/test-fixtures/mcp/e2e-filesystem.json
@ -1,6 +0,0 @@
-{
-  "filesystem": {
-    "command": "npx",
-    "args": ["-y", "@modelcontextprotocol/server-filesystem", "/tmp"]
-  }
-}
--- a/.archon/workflows/e2e-all-nodes.yaml
+++ b/.archon/workflows/e2e-all-nodes.yaml
@ -1,54 +0,0 @@
-# E2E smoke test — all node types
-# Verifies: bash, prompt, script (bun), structured output, effort control, $nodeId.output refs
-# NOTE: AI nodes run sequentially to avoid concurrent Claude CLI subprocess issues in CI
-name: e2e-all-nodes
-description: "Comprehensive E2E test exercising bash, prompt, script, and structured output nodes."
-provider: claude
-model: haiku
-
-nodes:
-  # 1. Bash node — no AI, runs shell, stdout captured as output
-  - id: bash-check
-    bash: "echo '{\"status\":\"ok\",\"cwd\":\"'$(pwd)'\"}'"
-
-  # 2. Script node (bun runtime) — verifies script execution
-  - id: script-bun
-    script: echo-args
-    runtime: bun
-    depends_on: [bash-check]
-    timeout: 30000
-
-  # 3. Prompt node — simple AI call, verifies sendQuery works
-  - id: prompt-simple
-    prompt: "The bash node returned: $bash-check.output — confirm you received it by saying 'received'. Say nothing else."
-    depends_on: [script-bun]
-    allowed_tools: []
-    idle_timeout: 120000
-
-  # 4. Structured output node — verifies output_format translation
-  - id: structured
-    prompt: "Classify the text 'hello world' as either 'greeting' or 'math'."
-    output_format:
-      type: object
-      properties:
-        category:
-          type: string
-          enum: ["greeting", "math"]
-      required: ["category"]
-      additionalProperties: false
-    depends_on: [prompt-simple]
-    allowed_tools: []
-    idle_timeout: 120000
-
-  # 5. Bash node using $nodeId.output from structured node
-  - id: bash-read-output
-    bash: "echo 'Structured output category: $structured.output'"
-    depends_on: [structured]
-
-  # 6. Prompt with effort control — verifies effort passes through to SDK
-  - id: prompt-effort
-    prompt: "Say 'effort-ok' and nothing else."
-    effort: low
-    depends_on: [structured]
-    allowed_tools: []
-    idle_timeout: 120000
--- a/.archon/workflows/e2e-claude-smoke.yaml
+++ b/.archon/workflows/e2e-claude-smoke.yaml
@ -1,48 +1,26 @@
 # E2E smoke test — Claude provider
-# Verifies: provider selection, sendQuery, structured output, tool use,
-#           command node, workflow-level model, node-level model override
-# NOTE: Nodes run sequentially to avoid concurrent Claude CLI subprocess issues in CI
+# Verifies: Claude connectivity (sendQuery), command node loading, $nodeId.output refs
+# Design: Only uses allowed_tools: [] (no tool use) and no output_format (no structured output)
+# because the Claude CLI subprocess is extremely slow with those features in CI.
 name: e2e-claude-smoke
-description: "E2E smoke test for Claude provider. Tests prompt, structured output, tool use, command node, and model overrides."
+description: "Smoke test for Claude provider. Verifies prompt response and command node loading."
 provider: claude
 model: haiku

 nodes:
-  # 1. Simple prompt — verifies basic sendQuery
+  # 1. Simple prompt — verifies Claude API connectivity via sendQuery
  - id: simple
    prompt: "What is 2+2? Answer with just the number, nothing else."
    allowed_tools: []
-    idle_timeout: 120000
+    idle_timeout: 30000

-  # 2. Structured output — verifies output_format translation
-  - id: structured
-    prompt: "Classify this input as 'math' or 'text': '2+2=4'"
-    output_format:
-      type: object
-      properties:
-        category:
-          type: string
-          enum: ["math", "text"]
-      required: ["category"]
-      additionalProperties: false
-    allowed_tools: []
-    idle_timeout: 120000
-    depends_on: [simple]
-
-  # 3. Tool use — verifies agent can use tools
-  - id: tool-use
-    prompt: "Read the file package.json and tell me the 'name' field value. Answer with just the name, nothing else."
-    allowed_tools: [Read]
-    idle_timeout: 120000
-    depends_on: [structured]
-
-  # 4. Command node — verifies command file loading
+  # 2. Command node — verifies command file discovery and loading
  - id: command-test
    command: e2e-echo-command
-    idle_timeout: 120000
-    depends_on: [tool-use]
+    idle_timeout: 30000
+    depends_on: [simple]

-  # 5. Bash node reads structured output field
-  - id: verify-structured
-    bash: "echo 'category=$structured.output.category'"
-    depends_on: [structured]
+  # 3. Bash node — verifies $nodeId.output substitution from AI node
+  - id: verify-output
+    bash: "echo 'simple=$simple.output command=$command-test.output'"
+    depends_on: [simple, command-test]
--- a/.archon/workflows/e2e-codex-smoke.yaml
+++ b/.archon/workflows/e2e-codex-smoke.yaml
@ -8,7 +8,7 @@ model: gpt-5.1-codex-mini
 nodes:
  - id: simple
    prompt: "What is 2+2? Answer with just the number, nothing else."
-    idle_timeout: 60000
+    idle_timeout: 30000

  - id: structured
    prompt: "Classify this input as 'math' or 'text': '2+2=4'. Return JSON only."
@ -20,5 +20,5 @@ nodes:
          enum: ["math", "text"]
      required: ["category"]
      additionalProperties: false
-    idle_timeout: 60000
+    idle_timeout: 30000
    depends_on: [simple]
--- a/.archon/workflows/e2e-mixed-providers.yaml
+++ b/.archon/workflows/e2e-mixed-providers.yaml
@ -12,23 +12,16 @@ nodes:
  - id: claude-node
    prompt: "Say 'claude-ok' and nothing else."
    allowed_tools: []
-    idle_timeout: 60000
+    idle_timeout: 30000

-  # 2. Codex node — provider override
+  # 2. Codex node — provider override (runs parallel with claude-node, different providers)
  - id: codex-node
    prompt: "Say 'codex-ok' and nothing else."
    provider: codex
    model: gpt-5.1-codex-mini
-    idle_timeout: 60000
+    idle_timeout: 30000

-  # 3. Claude node reads Codex output — cross-provider ref
-  - id: claude-reads-codex
-    prompt: "The codex node said: '$codex-node.output'. Confirm you received it by saying 'cross-provider-ok'. Say nothing else."
-    allowed_tools: []
-    idle_timeout: 60000
-    depends_on: [codex-node]
-
-  # 4. Bash node verifies both outputs
+  # 3. Bash node verifies both outputs — cross-provider ref
  - id: verify
-    bash: "echo 'claude=$claude-node.output codex=$codex-node.output cross=$claude-reads-codex.output'"
-    depends_on: [claude-node, codex-node, claude-reads-codex]
+    bash: "echo 'claude=$claude-node.output codex=$codex-node.output'"
+    depends_on: [claude-node, codex-node]
--- a/.archon/workflows/e2e-skills-mcp.yaml
+++ b/.archon/workflows/e2e-skills-mcp.yaml
@ -1,56 +0,0 @@
-# E2E smoke test — Claude advanced features (skills, MCP, effort, systemPrompt)
-# Verifies: skills injection, MCP server loading, effort control, custom system prompt
-# NOTE: AI nodes run sequentially to avoid concurrent Claude CLI subprocess issues in CI
-# NOTE: MCP test uses model: sonnet because Haiku does not support MCP tool search
-name: e2e-skills-mcp
-description: "Tests Claude-specific advanced features: skills injection, MCP server, effort control, and systemPrompt."
-provider: claude
-model: haiku
-
-nodes:
-  # 1. Skills injection — verifies AgentDefinition wrapping
-  - id: skill-test
-    prompt: "Confirm your skill loading status. If the E2E test skill is loaded, follow its instructions."
-    skills:
-      - e2e-test-skill
-    allowed_tools: [Read]
-    idle_timeout: 120000
-
-  # 2. MCP server — verifies MCP config loading and tool availability
-  #    Uses sonnet because Haiku does not support MCP tool search
-  - id: mcp-test
-    prompt: "You have a filesystem MCP server available. Use it to list the contents of /tmp. Report what you find briefly."
-    model: sonnet
-    mcp: .archon/test-fixtures/mcp/e2e-filesystem.json
-    idle_timeout: 120000
-    depends_on: [skill-test]
-
-  # 3. Effort control — verifies effort passes through to SDK
-  - id: effort-test
-    prompt: "Say 'effort-ok' and nothing else."
-    effort: low
-    allowed_tools: []
-    idle_timeout: 120000
-    depends_on: [mcp-test]
-
-  # 4. Custom system prompt — verifies systemPrompt injection
-  - id: system-prompt-test
-    prompt: "What is your role? Answer in 5 words or fewer."
-    systemPrompt: "You are a smoke test validator. Always start your response with 'VALIDATOR:'"
-    allowed_tools: []
-    idle_timeout: 120000
-    depends_on: [effort-test]
-
-  # 5. Context shared — verifies session continuity
-  - id: context-shared-setup
-    prompt: "Remember the secret code: ORANGE-42. Say 'stored' and nothing else."
-    allowed_tools: []
-    idle_timeout: 120000
-    depends_on: [system-prompt-test]
-
-  - id: context-shared-verify
-    prompt: "What was the secret code I told you to remember? Say just the code, nothing else."
-    context: shared
-    allowed_tools: []
-    idle_timeout: 120000
-    depends_on: [context-shared-setup]
--- a/.claude/skills/e2e-test-skill/SKILL.md
+++ b/.claude/skills/e2e-test-skill/SKILL.md
@ -1,8 +0,0 @@
---
-name: E2E Test Skill
-description: Minimal skill for smoke testing skill injection in CI
---
-
-# E2E Test Skill
-
-You have the E2E test skill loaded. When asked to confirm skill loading, respond with exactly: "skill-loaded-ok"
--- a/.github/workflows/e2e-smoke.yml
+++ b/.github/workflows/e2e-smoke.yml
@ -34,7 +34,7 @@ jobs:
  # ─── Tier 2a: Claude provider ──────────────────────────────────────────
  e2e-claude:
    runs-on: ubuntu-latest
-    timeout-minutes: 20
+    timeout-minutes: 5
    steps:
      - uses: actions/checkout@v4

@ -43,11 +43,6 @@ jobs:
        with:
          bun-version: 1.3.11

-      - name: Setup Node.js (for npx/MCP servers)
-        uses: actions/setup-node@v4
-        with:
-          node-version: 22
-
      - name: Install Claude Code CLI
        run: |
          curl -fsSL https://claude.ai/install.sh | bash
@ -63,24 +58,10 @@ jobs:
        run: |
          bun run cli workflow run e2e-claude-smoke --no-worktree "smoke test"

-      - name: Run all-nodes test
-        env:
-          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
-          CLAUDE_BIN_PATH: ~/.local/bin/claude
-        run: |
-          bun run cli workflow run e2e-all-nodes --no-worktree "smoke test"
-
-      - name: Run skills + MCP test
-        env:
-          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
-          CLAUDE_BIN_PATH: ~/.local/bin/claude
-        run: |
-          bun run cli workflow run e2e-skills-mcp --no-worktree "smoke test"
-
  # ─── Tier 2b: Codex provider ───────────────────────────────────────────
  e2e-codex:
    runs-on: ubuntu-latest
-    timeout-minutes: 10
+    timeout-minutes: 5
    steps:
      - uses: actions/checkout@v4

@ -110,7 +91,7 @@ jobs:
  # ─── Tier 3: Mixed providers ───────────────────────────────────────────
  e2e-mixed:
    runs-on: ubuntu-latest
-    timeout-minutes: 10
+    timeout-minutes: 5
    needs: [e2e-claude, e2e-codex]
    steps:
      - uses: actions/checkout@v4