chore: add pr classification bot (#2055)

2026-04-21 13:37:15 +00:00 · 2026-04-06 10:12:10 -04:00 · 2026-04-06 10:12:10 -04:00 · bcb02f3364
commit bcb02f3364
parent 8125d249d9
2 changed files with 282 additions and 0 deletions
--- a/.github/workflows/pr-triage.yml
+++ b/.github/workflows/pr-triage.yml
@ -0,0 +1,261 @@
+name: PR Triage
+
+on:
+  pull_request:
+    types: [opened, synchronize, reopened, ready_for_review]
+  workflow_dispatch:
+    inputs:
+      pr_number:
+        description:
+          PR number to classify (leave blank to classify all open PRs)
+        required: false
+        type: string
+
+permissions:
+  contents: read
+  pull-requests: write
+  issues: write
+
+jobs:
+  classify:
+    name: Classify PR risk tier
+    runs-on: ubuntu-24.04
+    # For pull_request events skip drafts; workflow_dispatch always runs
+    if:
+      ${{ github.event_name == 'workflow_dispatch' ||
+      !github.event.pull_request.draft }}
+    steps:
+      - name: Classify and label PR(s)
+        uses: actions/github-script@v7
+        with:
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+          script: |
+            const owner = context.repo.owner;
+            const repo = context.repo.repo;
+
+            // ── Determine which PRs to process ──────────────────────────────
+            let prNumbers;
+            if (context.eventName === 'workflow_dispatch') {
+              // Use context.payload.inputs to avoid script-injection via template interpolation
+              const input = (context.payload.inputs?.pr_number ?? '').trim();
+              if (input && input !== '') {
+                prNumbers = [Number(input)];
+              } else {
+                const openPRs = await github.paginate(
+                  github.rest.pulls.list,
+                  { owner, repo, state: 'open', per_page: 100 }
+                );
+                prNumbers = openPRs.map(pr => pr.number);
+                console.log(`Bulk triage: found ${prNumbers.length} open PRs`);
+              }
+            } else {
+              prNumbers = [context.payload.pull_request.number];
+            }
+
+            // ── Shared constants ─────────────────────────────────────────────
+            const TIER4_PATTERNS = [
+              /^packages\/api\/src\/middleware\/auth/,
+              /^packages\/api\/src\/routers\/api\/me\./,
+              /^packages\/api\/src\/routers\/api\/team\./,
+              /^packages\/api\/src\/routers\/external-api\//,
+              /^packages\/api\/src\/models\/(user|team|teamInvite)\./,
+              /^packages\/api\/src\/config\./,
+              /^packages\/api\/src\/tasks\//,
+              /^packages\/otel-collector\//,
+              /^docker\/otel-collector\//,
+              /^docker\/clickhouse\//,
+              /^\.github\/workflows\//,
+            ];
+
+            const TIER1_PATTERNS = [
+              /\.(md|txt|png|jpg|jpeg|gif|svg|ico)$/i,
+              /^yarn\.lock$/,
+              /^package-lock\.json$/,
+              /^\.yarnrc\.yml$/,
+              /^\.github\/images\//,
+              /^\.env\.example$/,
+            ];
+
+            const BOT_AUTHORS = ['dependabot', 'dependabot[bot]'];
+            const AGENT_BRANCH_PREFIXES = ['claude/', 'agent/', 'ai/'];
+
+            const TIER_LABELS = {
+              1: { name: 'review/tier-1', color: '0E8A16', description: 'Trivial — auto-merge candidate once CI passes' },
+              2: { name: 'review/tier-2', color: '1D76DB', description: 'Low risk — AI review + quick human skim' },
+              3: { name: 'review/tier-3', color: 'E4E669', description: 'Standard — full human review required' },
+              4: { name: 'review/tier-4', color: 'B60205', description: 'Critical — deep review + domain expert sign-off' },
+            };
+
+            const TIER_INFO = {
+              1: {
+                emoji: '🟢',
+                headline: 'Tier 1 — Trivial',
+                detail: 'Docs, images, lock files, or a dependency bump. No functional code changes detected.',
+                process: 'Auto-merge once CI passes. No human review required.',
+                sla: 'Resolves automatically.',
+              },
+              2: {
+                emoji: '🔵',
+                headline: 'Tier 2 — Low Risk',
+                detail: 'Small, isolated change with no API route or data model modifications.',
+                process: 'AI review + quick human skim (target: 5–15 min). Reviewer validates AI assessment and checks for domain-specific concerns.',
+                sla: 'Resolve within 4 business hours.',
+              },
+              3: {
+                emoji: '🟡',
+                headline: 'Tier 3 — Standard',
+                detail: 'Introduces new logic, modifies core functionality, or touches areas with non-trivial risk.',
+                process: 'Full human review — logic, architecture, edge cases.',
+                sla: 'First-pass feedback within 1 business day.',
+              },
+              4: {
+                emoji: '🔴',
+                headline: 'Tier 4 — Critical',
+                detail: 'Touches auth, data models, config, tasks, OTel pipeline, ClickHouse, or CI/CD.',
+                process: 'Deep review from a domain expert. Synchronous walkthrough may be required.',
+                sla: 'Schedule synchronous review within 2 business days.',
+              },
+            };
+
+            // ── Ensure tier labels exist (once, before the loop) ─────────────
+            const repoLabels = await github.paginate(
+              github.rest.issues.listLabelsForRepo,
+              { owner, repo, per_page: 100 }
+            );
+            const repoLabelNames = new Set(repoLabels.map(l => l.name));
+            for (const label of Object.values(TIER_LABELS)) {
+              if (!repoLabelNames.has(label.name)) {
+                await github.rest.issues.createLabel({ owner, repo, ...label });
+                repoLabelNames.add(label.name);
+              }
+            }
+
+            // ── Classify a single PR ─────────────────────────────────────────
+            async function classifyPR(prNumber) {
+              // Fetch changed files
+              const filesRes = await github.paginate(
+                github.rest.pulls.listFiles,
+                { owner, repo, pull_number: prNumber, per_page: 100 }
+              );
+              const files = filesRes.map(f => f.filename);
+              const linesChanged = filesRes.reduce((sum, f) => sum + f.additions + f.deletions, 0);
+
+              // Fetch PR metadata
+              const { data: pr } = await github.rest.pulls.get({ owner, repo, pull_number: prNumber });
+              const author = pr.user.login;
+              const branchName = pr.head.ref;
+
+              // Skip drafts when running in bulk mode
+              if (pr.draft) {
+                console.log(`Skipping PR #${prNumber}: draft`);
+                return;
+              }
+
+              // Check for manual tier override — if a human last applied the label, respect it
+              const { data: currentLabels } = await github.rest.issues.listLabelsOnIssue({ owner, repo, issue_number: prNumber });
+              const existingTierLabel = currentLabels.find(l => l.name.startsWith('review/tier-'));
+              if (existingTierLabel) {
+                const events = await github.paginate(
+                  github.rest.issues.listEvents,
+                  { owner, repo, issue_number: prNumber, per_page: 100 }
+                );
+                const lastLabelEvent = events
+                  .filter(e => e.event === 'labeled' && e.label?.name === existingTierLabel.name)
+                  .pop();
+                if (lastLabelEvent && lastLabelEvent.actor.type !== 'Bot') {
+                  console.log(`PR #${prNumber}: tier manually set to ${existingTierLabel.name} by ${lastLabelEvent.actor.login} — skipping`);
+                  return;
+                }
+              }
+
+              // Classify
+              const isTier4 = files.some(f => TIER4_PATTERNS.some(p => p.test(f)));
+              const isTrivialAuthor = BOT_AUTHORS.includes(author);
+              const allFilesTrivial = files.length > 0 && files.every(f => TIER1_PATTERNS.some(p => p.test(f)));
+              const isTier1 = isTrivialAuthor || allFilesTrivial;
+              const isAgentBranch = AGENT_BRANCH_PREFIXES.some(p => branchName.startsWith(p));
+              const touchesApiModels = files.some(f =>
+                f.startsWith('packages/api/src/models/') || f.startsWith('packages/api/src/routers/')
+              );
+              const isSmallDiff = linesChanged < 100;
+              // Agent branches are bumped to Tier 3 regardless of size to ensure human review
+              const isTier2 = !isTier4 && !isTier1 && isSmallDiff && !touchesApiModels && !isAgentBranch;
+
+              let tier;
+              if (isTier4) tier = 4;
+              else if (isTier1) tier = 1;
+              else if (isTier2) tier = 2;
+              else tier = 3;
+
+              // Escalate very large non-critical PRs to Tier 4; this also applies to agent
+              // branches that were bumped to Tier 3 above — a 400+ line agent-generated change
+              // warrants deep review regardless of which files it touches.
+              if (tier === 3 && linesChanged > 400) tier = 4;
+
+              // Apply label
+              for (const existing of currentLabels) {
+                if (existing.name.startsWith('review/tier-') && existing.name !== TIER_LABELS[tier].name) {
+                  await github.rest.issues.removeLabel({ owner, repo, issue_number: prNumber, name: existing.name });
+                }
+              }
+              if (!currentLabels.find(l => l.name === TIER_LABELS[tier].name)) {
+                await github.rest.issues.addLabels({ owner, repo, issue_number: prNumber, labels: [TIER_LABELS[tier].name] });
+              }
+
+              // Build comment body
+              const info = TIER_INFO[tier];
+              const signals = [];
+              if (isTier4) signals.push('critical-path files detected');
+              if (isAgentBranch) signals.push(`agent branch (\`${branchName}\`)`);
+              if (linesChanged > 400) signals.push(`large diff (${linesChanged} lines changed)`);
+              if (isTrivialAuthor) signals.push(`bot author (${author})`);
+              if (allFilesTrivial && !isTrivialAuthor) signals.push('all files are docs/images/lock files');
+              if (touchesApiModels) signals.push('API routes or data models changed');
+
+              const signalList = signals.length > 0 ? `\n**Signals**: ${signals.join(', ')}` : '';
+
+              const body = [
+                '<!-- pr-triage -->',
+                `## ${info.emoji} ${info.headline}`,
+                '',
+                info.detail,
+                signalList,
+                '',
+                `**Review process**: ${info.process}`,
+                `**SLA**: ${info.sla}`,
+                '',
+                `<details><summary>Stats</summary>`,
+                '',
+                `- Files changed: ${files.length}`,
+                `- Lines changed: ${linesChanged}`,
+                `- Branch: \`${branchName}\``,
+                `- Author: ${author}`,
+                '',
+                '</details>',
+                '',
+                `> To override this classification, remove the \`${TIER_LABELS[tier].name}\` label and apply a different \`review/tier-*\` label. Manual overrides are preserved on subsequent pushes.`,
+              ].join('\n');
+
+              // Post or update the single triage comment
+              const comments = await github.paginate(
+                github.rest.issues.listComments,
+                { owner, repo, issue_number: prNumber, per_page: 100 }
+              );
+              const existing = comments.find(c => c.user.login === 'github-actions[bot]' && c.body.includes('<!-- pr-triage -->'));
+              if (existing) {
+                await github.rest.issues.updateComment({ owner, repo, comment_id: existing.id, body });
+              } else {
+                await github.rest.issues.createComment({ owner, repo, issue_number: prNumber, body });
+              }
+
+              console.log(`PR #${prNumber}: Tier ${tier} (${linesChanged} lines, ${files.length} files)`);
+            }
+
+            // ── Process all target PRs ───────────────────────────────────────
+            for (const prNumber of prNumbers) {
+              try {
+                await classifyPR(prNumber);
+              } catch (err) {
+                console.error(`PR #${prNumber}: classification failed — ${err.message}`);
+              }
+            }
--- a/AGENTS.md
+++ b/AGENTS.md
@ -137,6 +137,27 @@ make dev-e2e-clean                               # Remove test artifacts
 - **Database patterns**: MongoDB for metadata with Mongoose, ClickHouse for
  telemetry queries

+## PR Hygiene for Agent-Generated Code
+
+When using agentic tools to generate PRs, follow these practices to keep reviews
+efficient and accurate:
+
+1. **Scope PRs to a single logical change**, even if the agent can produce more
+   in one session. Smaller, focused PRs move through the review pipeline faster
+   and are easier to classify accurately.
+
+2. **Write the PR description to explain intent (the "why"), not just what
+   changed.** Reviewers need to understand the goal to catch cases where the
+   agent solved the wrong problem or made a plausible-but-wrong trade-off.
+
+3. **Name agent-generated branches with a `claude/`, `agent/`, or `ai/` prefix**
+   (e.g., `claude/add-rate-limiting`). This allows the PR triage classifier to
+   apply appropriate scrutiny and lets reviewers calibrate their attention.
+
+4. **Write or update tests alongside the implementation**, not after. Configure
+   your agent to produce tests before writing implementation code. See the
+   Testing section below for the commands to use.
+
 ## GitHub Action Workflow (when invoked via @claude)

 When working on issues or PRs through the GitHub Action: