n8n/packages/@n8n/ai-workflow-builder.ee/evaluations/programmatic/programmatic-evaluation.ts
oleg f880a74d99
refactor(ai-builder): Implement unified evaluations harness (#23955)
Signed-off-by: Oleg Ivaniv <me@olegivaniv.com>
Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com>
2026-01-13 12:11:13 +00:00

89 lines
2.8 KiB
TypeScript

import type { INodeTypeDescription } from 'n8n-workflow';
import type { ProgrammaticEvaluationInput, ProgrammaticViolation } from '@/validation/types';
import {
evaluateConnections,
evaluateCredentials,
evaluateNodes,
evaluateTools,
evaluateAgentPrompt,
evaluateFromAi,
evaluateTrigger,
} from './evaluators';
import {
evaluateWorkflowSimilarity,
evaluateWorkflowSimilarityMultiple,
} from './evaluators/workflow-similarity';
import { calculateOverallScore } from './score';
export async function programmaticEvaluation(
input: ProgrammaticEvaluationInput,
nodeTypes: INodeTypeDescription[],
) {
const { generatedWorkflow, referenceWorkflows, preset = 'standard' } = input;
const connectionsEvaluationResult = evaluateConnections(generatedWorkflow, nodeTypes);
const nodesEvaluationResult = evaluateNodes(generatedWorkflow, nodeTypes);
const triggerEvaluationResult = evaluateTrigger(generatedWorkflow, nodeTypes);
const agentPromptEvaluationResult = evaluateAgentPrompt(generatedWorkflow);
const toolsEvaluationResult = evaluateTools(generatedWorkflow, nodeTypes);
const fromAiEvaluationResult = evaluateFromAi(generatedWorkflow, nodeTypes);
const credentialsEvaluationResult = evaluateCredentials(generatedWorkflow);
// Workflow similarity evaluation
let similarityEvaluationResult = null;
if (referenceWorkflows && referenceWorkflows.length > 0) {
try {
if (referenceWorkflows.length === 1) {
similarityEvaluationResult = await evaluateWorkflowSimilarity(
generatedWorkflow,
referenceWorkflows[0],
preset,
);
} else {
similarityEvaluationResult = await evaluateWorkflowSimilarityMultiple(
generatedWorkflow,
referenceWorkflows,
preset,
);
}
} catch (error) {
// Fallback to neutral result if similarity check fails - error captured in violation
const violation: ProgrammaticViolation = {
name: 'workflow-similarity-evaluation-failed',
type: 'critical',
description: `Similarity evaluation failed: ${(error as Error).message}`,
pointsDeducted: 0,
};
similarityEvaluationResult = {
violations: [violation],
score: 0,
};
}
}
const overallScore = calculateOverallScore({
connections: connectionsEvaluationResult,
nodes: nodesEvaluationResult,
trigger: triggerEvaluationResult,
agentPrompt: agentPromptEvaluationResult,
tools: toolsEvaluationResult,
fromAi: fromAiEvaluationResult,
credentials: credentialsEvaluationResult,
similarity: similarityEvaluationResult,
});
return {
overallScore,
connections: connectionsEvaluationResult,
nodes: nodesEvaluationResult,
trigger: triggerEvaluationResult,
agentPrompt: agentPromptEvaluationResult,
tools: toolsEvaluationResult,
fromAi: fromAiEvaluationResult,
credentials: credentialsEvaluationResult,
similarity: similarityEvaluationResult,
};
}