n8n/.github/workflows/test-evals-ai-reusable.yml
Declan Carroll e4dbe0db6b
ci: Update GitHub Actions to latest versions for Node.js 24 compatibility (#26949)
Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-12 14:01:53 +00:00

108 lines
3.8 KiB
YAML

name: 'Test: Evals AI'
on:
workflow_call:
inputs:
branch:
description: 'GitHub branch to test.'
type: string
default: 'master'
suite:
description: 'Evaluation suite to run (pairwise or llm-judge).'
type: string
default: 'pairwise'
dataset:
description: 'LangSmith dataset to use.'
type: string
required: true
repetitions:
description: 'Number of repetitions to run.'
type: number
default: 1
judges:
description: 'Number of judges to use.'
type: number
default: 1
concurrency:
description: 'Max concurrent evaluations.'
type: number
default: 10
experiment_name_prefix:
description: 'Prefix for the experiment name. If empty, will be auto-generated from branch name.'
type: string
default: ''
jobs:
evals:
name: Run ${{ inputs.suite }} Evaluations
runs-on: blacksmith-2vcpu-ubuntu-2204
env:
N8N_AI_ANTHROPIC_KEY: ${{ secrets.EVALS_ANTHROPIC_KEY }}
LANGSMITH_TRACING: true
LANGSMITH_ENDPOINT: ${{ secrets.EVALS_LANGSMITH_ENDPOINT }}
LANGSMITH_API_KEY: ${{ secrets.EVALS_LANGSMITH_API_KEY }}
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
ref: ${{ inputs.branch }}
- name: Generate experiment name
id: experiment
run: |
DATE=$(date +%Y_%m_%d)
PREFIX="${{ inputs.experiment_name_prefix }}"
if [ -n "$PREFIX" ]; then
NAME="${PREFIX}_${DATE}"
else
# Extract ticket ID from branch name (e.g., AI-1234 from ai-1234-feature-name)
BRANCH="${{ inputs.branch }}"
TICKET=$(echo "$BRANCH" | grep -oE '^[Aa][Ii]-[0-9]+' | tr '[:lower:]' '[:upper:]' || true)
if [ -n "$TICKET" ]; then
NAME="${TICKET}_${DATE}"
else
# Sanitize branch name for experiment name
SANITIZED_BRANCH=$(echo "$BRANCH" | sed 's/[^a-zA-Z0-9_.-]/_/g' | sed 's/__*/_/g')
NAME="CI_${SANITIZED_BRANCH}_${DATE}"
fi
fi
echo "name=$NAME" >> "$GITHUB_OUTPUT"
echo "Generated experiment name: $NAME"
- name: Setup and Build
uses: ./.github/actions/setup-nodejs
- name: Install uv
uses: astral-sh/setup-uv@6ee6290f1cbc4156c0bdd66691b2c144ef8df19a # v7.4.0
with:
enable-cache: true
- name: Install just
uses: extractions/setup-just@e33e0265a09d6d736e2ee1e0eb685ef1de4669ff # v3.0.0
- name: Install Python
working-directory: packages/@n8n/ai-workflow-builder.ee/evaluations/programmatic/python
run: uv python install 3.11
- name: Install workflow comparison dependencies
working-directory: packages/@n8n/ai-workflow-builder.ee/evaluations/programmatic/python
run: just sync-all
- name: Export Node Types
run: |
./packages/cli/bin/n8n export:nodes --output ./packages/@n8n/ai-workflow-builder.ee/evaluations/nodes.json
- name: Run Evaluations
working-directory: packages/@n8n/ai-workflow-builder.ee/evaluations
run: |
pnpm eval \
--suite "${{ inputs.suite }}" \
--backend langsmith \
--dataset "${{ inputs.dataset }}" \
--repetitions ${{ inputs.repetitions }} \
--judges ${{ inputs.judges }} \
--concurrency ${{ inputs.concurrency }} \
--name "${{ steps.experiment.outputs.name }}" \
${{ secrets.EVALS_WEBHOOK_URL && format('--webhook-url "{0}"', secrets.EVALS_WEBHOOK_URL) || '' }} \
${{ secrets.EVALS_WEBHOOK_SECRET && format('--webhook-secret "{0}"', secrets.EVALS_WEBHOOK_SECRET) || '' }}