n8n/packages/@n8n/instance-ai/evaluations/checklist/verifier.ts
José Braulio González Valido 560f300716
test: Add Instance AI workflow evals CI pipeline (no-changelog) (#28366)
Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-20 14:15:41 +00:00

88 lines
2.4 KiB
TypeScript

import { z } from 'zod';
import { createEvalAgent } from '../../src/utils/eval-agents';
import type { WorkflowResponse } from '../clients/n8n-client';
import { MOCK_EXECUTION_VERIFY_PROMPT } from '../system-prompts/mock-execution-verify';
import type { ChecklistItem, ChecklistResult } from '../types';
// ---------------------------------------------------------------------------
// Structured output schema
// ---------------------------------------------------------------------------
const checklistResultSchema = z.object({
results: z.array(
z.object({
id: z.number(),
pass: z.boolean(),
reasoning: z.string(),
failureCategory: z.string().optional(),
rootCause: z.string().optional(),
}),
),
});
// ---------------------------------------------------------------------------
// Public API
// ---------------------------------------------------------------------------
const MAX_VERIFY_ATTEMPTS = 2;
export async function verifyChecklist(
checklist: ChecklistItem[],
verificationArtifact: string,
_workflowJsons: WorkflowResponse[],
): Promise<ChecklistResult[]> {
const llmItems = checklist.filter((i) => i.strategy === 'llm');
if (llmItems.length === 0) return [];
const userMessage = `## Checklist
${JSON.stringify(llmItems, null, 2)}
## Verification Artifact
${verificationArtifact}
Verify each checklist item against the artifact above.`;
const validIds = new Set(llmItems.map((i) => i.id));
for (let attempt = 0; attempt < MAX_VERIFY_ATTEMPTS; attempt++) {
const agent = createEvalAgent('eval-checklist-verifier', {
instructions: MOCK_EXECUTION_VERIFY_PROMPT,
cache: true,
}).structuredOutput(checklistResultSchema);
const result = await agent.generate(userMessage);
const parsed = result.structuredOutput as z.infer<typeof checklistResultSchema> | undefined;
const results: ChecklistResult[] = [];
if (parsed?.results) {
for (const entry of parsed.results) {
if (
typeof entry.id === 'number' &&
typeof entry.pass === 'boolean' &&
validIds.has(entry.id)
) {
results.push({
id: entry.id,
pass: entry.pass,
reasoning: entry.reasoning ?? '',
strategy: 'llm',
failureCategory:
entry.failureCategory ?? (!entry.pass ? 'verification_failure' : undefined),
rootCause: entry.rootCause,
});
}
}
}
if (results.length > 0) {
results.sort((a, b) => a.id - b.id);
return results;
}
}
return [];
}