chore: add pr classification bot (#2055)

This commit is contained in:
Tom Alexander 2026-04-06 10:12:10 -04:00 committed by GitHub
parent 8125d249d9
commit bcb02f3364
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 282 additions and 0 deletions

261
.github/workflows/pr-triage.yml vendored Normal file
View file

@ -0,0 +1,261 @@
name: PR Triage
on:
pull_request:
types: [opened, synchronize, reopened, ready_for_review]
workflow_dispatch:
inputs:
pr_number:
description:
PR number to classify (leave blank to classify all open PRs)
required: false
type: string
permissions:
contents: read
pull-requests: write
issues: write
jobs:
classify:
name: Classify PR risk tier
runs-on: ubuntu-24.04
# For pull_request events skip drafts; workflow_dispatch always runs
if:
${{ github.event_name == 'workflow_dispatch' ||
!github.event.pull_request.draft }}
steps:
- name: Classify and label PR(s)
uses: actions/github-script@v7
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
script: |
const owner = context.repo.owner;
const repo = context.repo.repo;
// ── Determine which PRs to process ──────────────────────────────
let prNumbers;
if (context.eventName === 'workflow_dispatch') {
// Use context.payload.inputs to avoid script-injection via template interpolation
const input = (context.payload.inputs?.pr_number ?? '').trim();
if (input && input !== '') {
prNumbers = [Number(input)];
} else {
const openPRs = await github.paginate(
github.rest.pulls.list,
{ owner, repo, state: 'open', per_page: 100 }
);
prNumbers = openPRs.map(pr => pr.number);
console.log(`Bulk triage: found ${prNumbers.length} open PRs`);
}
} else {
prNumbers = [context.payload.pull_request.number];
}
// ── Shared constants ─────────────────────────────────────────────
const TIER4_PATTERNS = [
/^packages\/api\/src\/middleware\/auth/,
/^packages\/api\/src\/routers\/api\/me\./,
/^packages\/api\/src\/routers\/api\/team\./,
/^packages\/api\/src\/routers\/external-api\//,
/^packages\/api\/src\/models\/(user|team|teamInvite)\./,
/^packages\/api\/src\/config\./,
/^packages\/api\/src\/tasks\//,
/^packages\/otel-collector\//,
/^docker\/otel-collector\//,
/^docker\/clickhouse\//,
/^\.github\/workflows\//,
];
const TIER1_PATTERNS = [
/\.(md|txt|png|jpg|jpeg|gif|svg|ico)$/i,
/^yarn\.lock$/,
/^package-lock\.json$/,
/^\.yarnrc\.yml$/,
/^\.github\/images\//,
/^\.env\.example$/,
];
const BOT_AUTHORS = ['dependabot', 'dependabot[bot]'];
const AGENT_BRANCH_PREFIXES = ['claude/', 'agent/', 'ai/'];
const TIER_LABELS = {
1: { name: 'review/tier-1', color: '0E8A16', description: 'Trivial — auto-merge candidate once CI passes' },
2: { name: 'review/tier-2', color: '1D76DB', description: 'Low risk — AI review + quick human skim' },
3: { name: 'review/tier-3', color: 'E4E669', description: 'Standard — full human review required' },
4: { name: 'review/tier-4', color: 'B60205', description: 'Critical — deep review + domain expert sign-off' },
};
const TIER_INFO = {
1: {
emoji: '🟢',
headline: 'Tier 1 — Trivial',
detail: 'Docs, images, lock files, or a dependency bump. No functional code changes detected.',
process: 'Auto-merge once CI passes. No human review required.',
sla: 'Resolves automatically.',
},
2: {
emoji: '🔵',
headline: 'Tier 2 — Low Risk',
detail: 'Small, isolated change with no API route or data model modifications.',
process: 'AI review + quick human skim (target: 515 min). Reviewer validates AI assessment and checks for domain-specific concerns.',
sla: 'Resolve within 4 business hours.',
},
3: {
emoji: '🟡',
headline: 'Tier 3 — Standard',
detail: 'Introduces new logic, modifies core functionality, or touches areas with non-trivial risk.',
process: 'Full human review — logic, architecture, edge cases.',
sla: 'First-pass feedback within 1 business day.',
},
4: {
emoji: '🔴',
headline: 'Tier 4 — Critical',
detail: 'Touches auth, data models, config, tasks, OTel pipeline, ClickHouse, or CI/CD.',
process: 'Deep review from a domain expert. Synchronous walkthrough may be required.',
sla: 'Schedule synchronous review within 2 business days.',
},
};
// ── Ensure tier labels exist (once, before the loop) ─────────────
const repoLabels = await github.paginate(
github.rest.issues.listLabelsForRepo,
{ owner, repo, per_page: 100 }
);
const repoLabelNames = new Set(repoLabels.map(l => l.name));
for (const label of Object.values(TIER_LABELS)) {
if (!repoLabelNames.has(label.name)) {
await github.rest.issues.createLabel({ owner, repo, ...label });
repoLabelNames.add(label.name);
}
}
// ── Classify a single PR ─────────────────────────────────────────
async function classifyPR(prNumber) {
// Fetch changed files
const filesRes = await github.paginate(
github.rest.pulls.listFiles,
{ owner, repo, pull_number: prNumber, per_page: 100 }
);
const files = filesRes.map(f => f.filename);
const linesChanged = filesRes.reduce((sum, f) => sum + f.additions + f.deletions, 0);
// Fetch PR metadata
const { data: pr } = await github.rest.pulls.get({ owner, repo, pull_number: prNumber });
const author = pr.user.login;
const branchName = pr.head.ref;
// Skip drafts when running in bulk mode
if (pr.draft) {
console.log(`Skipping PR #${prNumber}: draft`);
return;
}
// Check for manual tier override — if a human last applied the label, respect it
const { data: currentLabels } = await github.rest.issues.listLabelsOnIssue({ owner, repo, issue_number: prNumber });
const existingTierLabel = currentLabels.find(l => l.name.startsWith('review/tier-'));
if (existingTierLabel) {
const events = await github.paginate(
github.rest.issues.listEvents,
{ owner, repo, issue_number: prNumber, per_page: 100 }
);
const lastLabelEvent = events
.filter(e => e.event === 'labeled' && e.label?.name === existingTierLabel.name)
.pop();
if (lastLabelEvent && lastLabelEvent.actor.type !== 'Bot') {
console.log(`PR #${prNumber}: tier manually set to ${existingTierLabel.name} by ${lastLabelEvent.actor.login} — skipping`);
return;
}
}
// Classify
const isTier4 = files.some(f => TIER4_PATTERNS.some(p => p.test(f)));
const isTrivialAuthor = BOT_AUTHORS.includes(author);
const allFilesTrivial = files.length > 0 && files.every(f => TIER1_PATTERNS.some(p => p.test(f)));
const isTier1 = isTrivialAuthor || allFilesTrivial;
const isAgentBranch = AGENT_BRANCH_PREFIXES.some(p => branchName.startsWith(p));
const touchesApiModels = files.some(f =>
f.startsWith('packages/api/src/models/') || f.startsWith('packages/api/src/routers/')
);
const isSmallDiff = linesChanged < 100;
// Agent branches are bumped to Tier 3 regardless of size to ensure human review
const isTier2 = !isTier4 && !isTier1 && isSmallDiff && !touchesApiModels && !isAgentBranch;
let tier;
if (isTier4) tier = 4;
else if (isTier1) tier = 1;
else if (isTier2) tier = 2;
else tier = 3;
// Escalate very large non-critical PRs to Tier 4; this also applies to agent
// branches that were bumped to Tier 3 above — a 400+ line agent-generated change
// warrants deep review regardless of which files it touches.
if (tier === 3 && linesChanged > 400) tier = 4;
// Apply label
for (const existing of currentLabels) {
if (existing.name.startsWith('review/tier-') && existing.name !== TIER_LABELS[tier].name) {
await github.rest.issues.removeLabel({ owner, repo, issue_number: prNumber, name: existing.name });
}
}
if (!currentLabels.find(l => l.name === TIER_LABELS[tier].name)) {
await github.rest.issues.addLabels({ owner, repo, issue_number: prNumber, labels: [TIER_LABELS[tier].name] });
}
// Build comment body
const info = TIER_INFO[tier];
const signals = [];
if (isTier4) signals.push('critical-path files detected');
if (isAgentBranch) signals.push(`agent branch (\`${branchName}\`)`);
if (linesChanged > 400) signals.push(`large diff (${linesChanged} lines changed)`);
if (isTrivialAuthor) signals.push(`bot author (${author})`);
if (allFilesTrivial && !isTrivialAuthor) signals.push('all files are docs/images/lock files');
if (touchesApiModels) signals.push('API routes or data models changed');
const signalList = signals.length > 0 ? `\n**Signals**: ${signals.join(', ')}` : '';
const body = [
'<!-- pr-triage -->',
`## ${info.emoji} ${info.headline}`,
'',
info.detail,
signalList,
'',
`**Review process**: ${info.process}`,
`**SLA**: ${info.sla}`,
'',
`<details><summary>Stats</summary>`,
'',
`- Files changed: ${files.length}`,
`- Lines changed: ${linesChanged}`,
`- Branch: \`${branchName}\``,
`- Author: ${author}`,
'',
'</details>',
'',
`> To override this classification, remove the \`${TIER_LABELS[tier].name}\` label and apply a different \`review/tier-*\` label. Manual overrides are preserved on subsequent pushes.`,
].join('\n');
// Post or update the single triage comment
const comments = await github.paginate(
github.rest.issues.listComments,
{ owner, repo, issue_number: prNumber, per_page: 100 }
);
const existing = comments.find(c => c.user.login === 'github-actions[bot]' && c.body.includes('<!-- pr-triage -->'));
if (existing) {
await github.rest.issues.updateComment({ owner, repo, comment_id: existing.id, body });
} else {
await github.rest.issues.createComment({ owner, repo, issue_number: prNumber, body });
}
console.log(`PR #${prNumber}: Tier ${tier} (${linesChanged} lines, ${files.length} files)`);
}
// ── Process all target PRs ───────────────────────────────────────
for (const prNumber of prNumbers) {
try {
await classifyPR(prNumber);
} catch (err) {
console.error(`PR #${prNumber}: classification failed — ${err.message}`);
}
}

View file

@ -137,6 +137,27 @@ make dev-e2e-clean # Remove test artifacts
- **Database patterns**: MongoDB for metadata with Mongoose, ClickHouse for
telemetry queries
## PR Hygiene for Agent-Generated Code
When using agentic tools to generate PRs, follow these practices to keep reviews
efficient and accurate:
1. **Scope PRs to a single logical change**, even if the agent can produce more
in one session. Smaller, focused PRs move through the review pipeline faster
and are easier to classify accurately.
2. **Write the PR description to explain intent (the "why"), not just what
changed.** Reviewers need to understand the goal to catch cases where the
agent solved the wrong problem or made a plausible-but-wrong trade-off.
3. **Name agent-generated branches with a `claude/`, `agent/`, or `ai/` prefix**
(e.g., `claude/add-rate-limiting`). This allows the PR triage classifier to
apply appropriate scrutiny and lets reviewers calibrate their attention.
4. **Write or update tests alongside the implementation**, not after. Configure
your agent to produce tests before writing implementation code. See the
Testing section below for the commands to use.
## GitHub Action Workflow (when invoked via @claude)
When working on issues or PRs through the GitHub Action: