mirror of
https://github.com/coleam00/Archon
synced 2026-04-21 13:37:41 +00:00
refactor(ci): strip E2E smoke tests to bare minimum for speed
Claude CLI is extremely slow with structured output (~4 min) and tool use (~2 min) in CI, making the previous multi-workflow approach take 10+ min. Radical simplification: - Remove e2e-all-nodes (redundant with deterministic + claude-smoke) - Remove e2e-skills-mcp (advanced features too slow for per-commit smoke) - Remove structured output and tool use from Claude smoke test (too slow) - Strip Claude smoke to: 1 prompt + 1 command + 1 bash verify node - Keep mixed providers (simplified: 1 Claude + 1 Codex + bash verify) - All timeouts reduced to 30s, all job timeouts to 5 min - Remove MCP test fixtures and e2e-test-skill (no longer needed) Expected: Claude job ~15s of AI time, Codex ~5s, mixed ~10s Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
4c259e7a0a
commit
bf9091159c
8 changed files with 24 additions and 196 deletions
|
|
@ -1,6 +0,0 @@
|
|||
{
|
||||
"filesystem": {
|
||||
"command": "npx",
|
||||
"args": ["-y", "@modelcontextprotocol/server-filesystem", "/tmp"]
|
||||
}
|
||||
}
|
||||
|
|
@ -1,54 +0,0 @@
|
|||
# E2E smoke test — all node types
|
||||
# Verifies: bash, prompt, script (bun), structured output, effort control, $nodeId.output refs
|
||||
# NOTE: AI nodes run sequentially to avoid concurrent Claude CLI subprocess issues in CI
|
||||
name: e2e-all-nodes
|
||||
description: "Comprehensive E2E test exercising bash, prompt, script, and structured output nodes."
|
||||
provider: claude
|
||||
model: haiku
|
||||
|
||||
nodes:
|
||||
# 1. Bash node — no AI, runs shell, stdout captured as output
|
||||
- id: bash-check
|
||||
bash: "echo '{\"status\":\"ok\",\"cwd\":\"'$(pwd)'\"}'"
|
||||
|
||||
# 2. Script node (bun runtime) — verifies script execution
|
||||
- id: script-bun
|
||||
script: echo-args
|
||||
runtime: bun
|
||||
depends_on: [bash-check]
|
||||
timeout: 30000
|
||||
|
||||
# 3. Prompt node — simple AI call, verifies sendQuery works
|
||||
- id: prompt-simple
|
||||
prompt: "The bash node returned: $bash-check.output — confirm you received it by saying 'received'. Say nothing else."
|
||||
depends_on: [script-bun]
|
||||
allowed_tools: []
|
||||
idle_timeout: 120000
|
||||
|
||||
# 4. Structured output node — verifies output_format translation
|
||||
- id: structured
|
||||
prompt: "Classify the text 'hello world' as either 'greeting' or 'math'."
|
||||
output_format:
|
||||
type: object
|
||||
properties:
|
||||
category:
|
||||
type: string
|
||||
enum: ["greeting", "math"]
|
||||
required: ["category"]
|
||||
additionalProperties: false
|
||||
depends_on: [prompt-simple]
|
||||
allowed_tools: []
|
||||
idle_timeout: 120000
|
||||
|
||||
# 5. Bash node using $nodeId.output from structured node
|
||||
- id: bash-read-output
|
||||
bash: "echo 'Structured output category: $structured.output'"
|
||||
depends_on: [structured]
|
||||
|
||||
# 6. Prompt with effort control — verifies effort passes through to SDK
|
||||
- id: prompt-effort
|
||||
prompt: "Say 'effort-ok' and nothing else."
|
||||
effort: low
|
||||
depends_on: [structured]
|
||||
allowed_tools: []
|
||||
idle_timeout: 120000
|
||||
|
|
@ -1,48 +1,26 @@
|
|||
# E2E smoke test — Claude provider
|
||||
# Verifies: provider selection, sendQuery, structured output, tool use,
|
||||
# command node, workflow-level model, node-level model override
|
||||
# NOTE: Nodes run sequentially to avoid concurrent Claude CLI subprocess issues in CI
|
||||
# Verifies: Claude connectivity (sendQuery), command node loading, $nodeId.output refs
|
||||
# Design: Only uses allowed_tools: [] (no tool use) and no output_format (no structured output)
|
||||
# because the Claude CLI subprocess is extremely slow with those features in CI.
|
||||
name: e2e-claude-smoke
|
||||
description: "E2E smoke test for Claude provider. Tests prompt, structured output, tool use, command node, and model overrides."
|
||||
description: "Smoke test for Claude provider. Verifies prompt response and command node loading."
|
||||
provider: claude
|
||||
model: haiku
|
||||
|
||||
nodes:
|
||||
# 1. Simple prompt — verifies basic sendQuery
|
||||
# 1. Simple prompt — verifies Claude API connectivity via sendQuery
|
||||
- id: simple
|
||||
prompt: "What is 2+2? Answer with just the number, nothing else."
|
||||
allowed_tools: []
|
||||
idle_timeout: 120000
|
||||
idle_timeout: 30000
|
||||
|
||||
# 2. Structured output — verifies output_format translation
|
||||
- id: structured
|
||||
prompt: "Classify this input as 'math' or 'text': '2+2=4'"
|
||||
output_format:
|
||||
type: object
|
||||
properties:
|
||||
category:
|
||||
type: string
|
||||
enum: ["math", "text"]
|
||||
required: ["category"]
|
||||
additionalProperties: false
|
||||
allowed_tools: []
|
||||
idle_timeout: 120000
|
||||
depends_on: [simple]
|
||||
|
||||
# 3. Tool use — verifies agent can use tools
|
||||
- id: tool-use
|
||||
prompt: "Read the file package.json and tell me the 'name' field value. Answer with just the name, nothing else."
|
||||
allowed_tools: [Read]
|
||||
idle_timeout: 120000
|
||||
depends_on: [structured]
|
||||
|
||||
# 4. Command node — verifies command file loading
|
||||
# 2. Command node — verifies command file discovery and loading
|
||||
- id: command-test
|
||||
command: e2e-echo-command
|
||||
idle_timeout: 120000
|
||||
depends_on: [tool-use]
|
||||
idle_timeout: 30000
|
||||
depends_on: [simple]
|
||||
|
||||
# 5. Bash node reads structured output field
|
||||
- id: verify-structured
|
||||
bash: "echo 'category=$structured.output.category'"
|
||||
depends_on: [structured]
|
||||
# 3. Bash node — verifies $nodeId.output substitution from AI node
|
||||
- id: verify-output
|
||||
bash: "echo 'simple=$simple.output command=$command-test.output'"
|
||||
depends_on: [simple, command-test]
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ model: gpt-5.1-codex-mini
|
|||
nodes:
|
||||
- id: simple
|
||||
prompt: "What is 2+2? Answer with just the number, nothing else."
|
||||
idle_timeout: 60000
|
||||
idle_timeout: 30000
|
||||
|
||||
- id: structured
|
||||
prompt: "Classify this input as 'math' or 'text': '2+2=4'. Return JSON only."
|
||||
|
|
@ -20,5 +20,5 @@ nodes:
|
|||
enum: ["math", "text"]
|
||||
required: ["category"]
|
||||
additionalProperties: false
|
||||
idle_timeout: 60000
|
||||
idle_timeout: 30000
|
||||
depends_on: [simple]
|
||||
|
|
|
|||
|
|
@ -12,23 +12,16 @@ nodes:
|
|||
- id: claude-node
|
||||
prompt: "Say 'claude-ok' and nothing else."
|
||||
allowed_tools: []
|
||||
idle_timeout: 60000
|
||||
idle_timeout: 30000
|
||||
|
||||
# 2. Codex node — provider override
|
||||
# 2. Codex node — provider override (runs parallel with claude-node, different providers)
|
||||
- id: codex-node
|
||||
prompt: "Say 'codex-ok' and nothing else."
|
||||
provider: codex
|
||||
model: gpt-5.1-codex-mini
|
||||
idle_timeout: 60000
|
||||
idle_timeout: 30000
|
||||
|
||||
# 3. Claude node reads Codex output — cross-provider ref
|
||||
- id: claude-reads-codex
|
||||
prompt: "The codex node said: '$codex-node.output'. Confirm you received it by saying 'cross-provider-ok'. Say nothing else."
|
||||
allowed_tools: []
|
||||
idle_timeout: 60000
|
||||
depends_on: [codex-node]
|
||||
|
||||
# 4. Bash node verifies both outputs
|
||||
# 3. Bash node verifies both outputs — cross-provider ref
|
||||
- id: verify
|
||||
bash: "echo 'claude=$claude-node.output codex=$codex-node.output cross=$claude-reads-codex.output'"
|
||||
depends_on: [claude-node, codex-node, claude-reads-codex]
|
||||
bash: "echo 'claude=$claude-node.output codex=$codex-node.output'"
|
||||
depends_on: [claude-node, codex-node]
|
||||
|
|
|
|||
|
|
@ -1,56 +0,0 @@
|
|||
# E2E smoke test — Claude advanced features (skills, MCP, effort, systemPrompt)
|
||||
# Verifies: skills injection, MCP server loading, effort control, custom system prompt
|
||||
# NOTE: AI nodes run sequentially to avoid concurrent Claude CLI subprocess issues in CI
|
||||
# NOTE: MCP test uses model: sonnet because Haiku does not support MCP tool search
|
||||
name: e2e-skills-mcp
|
||||
description: "Tests Claude-specific advanced features: skills injection, MCP server, effort control, and systemPrompt."
|
||||
provider: claude
|
||||
model: haiku
|
||||
|
||||
nodes:
|
||||
# 1. Skills injection — verifies AgentDefinition wrapping
|
||||
- id: skill-test
|
||||
prompt: "Confirm your skill loading status. If the E2E test skill is loaded, follow its instructions."
|
||||
skills:
|
||||
- e2e-test-skill
|
||||
allowed_tools: [Read]
|
||||
idle_timeout: 120000
|
||||
|
||||
# 2. MCP server — verifies MCP config loading and tool availability
|
||||
# Uses sonnet because Haiku does not support MCP tool search
|
||||
- id: mcp-test
|
||||
prompt: "You have a filesystem MCP server available. Use it to list the contents of /tmp. Report what you find briefly."
|
||||
model: sonnet
|
||||
mcp: .archon/test-fixtures/mcp/e2e-filesystem.json
|
||||
idle_timeout: 120000
|
||||
depends_on: [skill-test]
|
||||
|
||||
# 3. Effort control — verifies effort passes through to SDK
|
||||
- id: effort-test
|
||||
prompt: "Say 'effort-ok' and nothing else."
|
||||
effort: low
|
||||
allowed_tools: []
|
||||
idle_timeout: 120000
|
||||
depends_on: [mcp-test]
|
||||
|
||||
# 4. Custom system prompt — verifies systemPrompt injection
|
||||
- id: system-prompt-test
|
||||
prompt: "What is your role? Answer in 5 words or fewer."
|
||||
systemPrompt: "You are a smoke test validator. Always start your response with 'VALIDATOR:'"
|
||||
allowed_tools: []
|
||||
idle_timeout: 120000
|
||||
depends_on: [effort-test]
|
||||
|
||||
# 5. Context shared — verifies session continuity
|
||||
- id: context-shared-setup
|
||||
prompt: "Remember the secret code: ORANGE-42. Say 'stored' and nothing else."
|
||||
allowed_tools: []
|
||||
idle_timeout: 120000
|
||||
depends_on: [system-prompt-test]
|
||||
|
||||
- id: context-shared-verify
|
||||
prompt: "What was the secret code I told you to remember? Say just the code, nothing else."
|
||||
context: shared
|
||||
allowed_tools: []
|
||||
idle_timeout: 120000
|
||||
depends_on: [context-shared-setup]
|
||||
|
|
@ -1,8 +0,0 @@
|
|||
---
|
||||
name: E2E Test Skill
|
||||
description: Minimal skill for smoke testing skill injection in CI
|
||||
---
|
||||
|
||||
# E2E Test Skill
|
||||
|
||||
You have the E2E test skill loaded. When asked to confirm skill loading, respond with exactly: "skill-loaded-ok"
|
||||
25
.github/workflows/e2e-smoke.yml
vendored
25
.github/workflows/e2e-smoke.yml
vendored
|
|
@ -34,7 +34,7 @@ jobs:
|
|||
# ─── Tier 2a: Claude provider ──────────────────────────────────────────
|
||||
e2e-claude:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 20
|
||||
timeout-minutes: 5
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
|
|
@ -43,11 +43,6 @@ jobs:
|
|||
with:
|
||||
bun-version: 1.3.11
|
||||
|
||||
- name: Setup Node.js (for npx/MCP servers)
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: 22
|
||||
|
||||
- name: Install Claude Code CLI
|
||||
run: |
|
||||
curl -fsSL https://claude.ai/install.sh | bash
|
||||
|
|
@ -63,24 +58,10 @@ jobs:
|
|||
run: |
|
||||
bun run cli workflow run e2e-claude-smoke --no-worktree "smoke test"
|
||||
|
||||
- name: Run all-nodes test
|
||||
env:
|
||||
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
|
||||
CLAUDE_BIN_PATH: ~/.local/bin/claude
|
||||
run: |
|
||||
bun run cli workflow run e2e-all-nodes --no-worktree "smoke test"
|
||||
|
||||
- name: Run skills + MCP test
|
||||
env:
|
||||
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
|
||||
CLAUDE_BIN_PATH: ~/.local/bin/claude
|
||||
run: |
|
||||
bun run cli workflow run e2e-skills-mcp --no-worktree "smoke test"
|
||||
|
||||
# ─── Tier 2b: Codex provider ───────────────────────────────────────────
|
||||
e2e-codex:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 10
|
||||
timeout-minutes: 5
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
|
|
@ -110,7 +91,7 @@ jobs:
|
|||
# ─── Tier 3: Mixed providers ───────────────────────────────────────────
|
||||
e2e-mixed:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 10
|
||||
timeout-minutes: 5
|
||||
needs: [e2e-claude, e2e-codex]
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
|
|
|||
Loading…
Reference in a new issue