Merge remote-tracking branch 'origin/main' into feat/unified-group

2026-04-21 13:37:15 +00:00 · 2026-04-20 19:35:45 -07:00 · 2026-04-20 19:35:45 -07:00 · 3787fab561
commit 3787fab561
parent 649bea41e0 2f1eb80109
345 changed files with 36213 additions and 6023 deletions
--- a/.changeset/hot-chicken-jam.md
+++ b/.changeset/hot-chicken-jam.md
@ -1,5 +0,0 @@
---
-"@hyperdx/app": patch
---
-
-feat: Add alert icons to dashboard list page
--- a/.claude/skills/playwright/SKILL.md
+++ b/.claude/skills/playwright/SKILL.md
@ -23,6 +23,8 @@ Delegate to the **`playwright-test-generator`** agent (via the Agent tool). Pass

 The agent will drive a real browser, execute the steps live, and produce spec code that follows HyperDX conventions. Review the output before proceeding.

+NOTE: When there is an existing spec file covering the feature, add new tests to the existing file instead of creating a new one. This keeps related tests together and avoids fragmentation.
+
 ### 2. Test Execution
 After the generator agent writes the file, run the test:

--- a/.env
+++ b/.env
@ -8,8 +8,8 @@ NEXT_ALL_IN_ONE_IMAGE_NAME_DOCKERHUB=clickhouse/clickstack-all-in-one
 ALL_IN_ONE_IMAGE_NAME_DOCKERHUB=hyperdx/hyperdx-all-in-one
 NEXT_OTEL_COLLECTOR_IMAGE_NAME_DOCKERHUB=clickhouse/clickstack-otel-collector
 OTEL_COLLECTOR_IMAGE_NAME_DOCKERHUB=hyperdx/hyperdx-otel-collector
-CODE_VERSION=2.23.0
-IMAGE_VERSION_SUB_TAG=.23.0
+CODE_VERSION=2.24.0
+IMAGE_VERSION_SUB_TAG=.24.0
 IMAGE_VERSION=2
 IMAGE_NIGHTLY_TAG=2-nightly
 IMAGE_LATEST_TAG=latest
@ -38,5 +38,11 @@ HDX_DEV_OTEL_HTTP_PORT=4318
 HDX_DEV_OTEL_METRICS_PORT=8888
 HDX_DEV_OTEL_JSON_HTTP_PORT=14318

+# Otel Collector version (used as Docker build arg for image tags and component versions)
+# When bumping, look up the core version from the upstream manifest:
+# https://github.com/open-telemetry/opentelemetry-collector-releases/blob/main/distributions/otelcol-contrib/manifest.yaml
+OTEL_COLLECTOR_VERSION=0.149.0
+OTEL_COLLECTOR_CORE_VERSION=1.55.0
+
 # Otel/Clickhouse config
 HYPERDX_OTEL_EXPORTER_CLICKHOUSE_DATABASE=default
--- a/.github/scripts/tests/pr-triage-classify.test.js
+++ b/.github/scripts/tests/pr-triage-classify.test.js
@ -0,0 +1,569 @@
+'use strict';
+
+// Tests for the pure classification functions in pr-triage-classify.js.
+// Uses Node's built-in test runner (no extra dependencies required).
+// Run with: node --test .github/scripts/__tests__/pr-triage-classify.test.js
+
+const { describe, it } = require('node:test');
+const assert = require('node:assert/strict');
+
+const {
+  isTestFile, isTrivialFile, isCriticalFile,
+  computeSignals, determineTier, buildTierComment,
+} = require('../pr-triage-classify');
+
+// ── Test helpers ─────────────────────────────────────────────────────────────
+
+/** Minimal PR object matching the shape returned by the GitHub API */
+function makePR(login, ref) {
+  return { user: { login }, head: { ref } };
+}
+
+/** Minimal file entry matching the shape returned by pulls.listFiles */
+function makeFile(filename, additions = 10, deletions = 5) {
+  return { filename, additions, deletions };
+}
+
+/** Classify a PR end-to-end from raw inputs (the common test path) */
+function classify(login, ref, files) {
+  return determineTier(computeSignals(makePR(login, ref), files));
+}
+
+// ── File classification helpers ──────────────────────────────────────────────
+
+describe('isTestFile', () => {
+  it('matches __tests__ directory', () => {
+    assert.ok(isTestFile('packages/api/src/__tests__/foo.test.ts'));
+    assert.ok(isTestFile('packages/app/src/components/__tests__/Foo.test.tsx'));
+  });
+
+  it('matches .test.* and .spec.* extensions', () => {
+    assert.ok(isTestFile('packages/app/src/Foo.test.tsx'));
+    assert.ok(isTestFile('packages/app/src/Foo.spec.js'));
+    assert.ok(isTestFile('packages/api/src/bar.test.ts'));
+  });
+
+  it('matches packages/app/tests/ prefix', () => {
+    assert.ok(isTestFile('packages/app/tests/e2e/navigation.ts'));
+  });
+
+  it('does not match regular source files', () => {
+    assert.ok(!isTestFile('packages/api/src/routers/foo.ts'));
+    assert.ok(!isTestFile('packages/app/src/App.tsx'));
+  });
+});
+
+describe('isTrivialFile', () => {
+  it('matches docs and images', () => {
+    assert.ok(isTrivialFile('README.md'));
+    assert.ok(isTrivialFile('docs/setup.txt'));
+    assert.ok(isTrivialFile('assets/logo.png'));
+    assert.ok(isTrivialFile('assets/icon.svg'));
+  });
+
+  it('matches lock files and yarn config', () => {
+    assert.ok(isTrivialFile('yarn.lock'));
+    assert.ok(isTrivialFile('package-lock.json'));
+    assert.ok(isTrivialFile('.yarnrc.yml'));
+  });
+
+  it('matches .changeset/ files', () => {
+    assert.ok(isTrivialFile('.changeset/some-change.md'));
+    assert.ok(isTrivialFile('.changeset/fancy-bears-dance.md'));
+  });
+
+  it('matches .env.example and .github/images/', () => {
+    assert.ok(isTrivialFile('.env.example'));
+    assert.ok(isTrivialFile('.github/images/screenshot.png'));
+  });
+
+  it('matches .github/scripts/ files', () => {
+    assert.ok(isTrivialFile('.github/scripts/pr-triage.js'));
+    assert.ok(isTrivialFile('.github/scripts/pr-triage-classify.js'));
+  });
+
+  it('matches .github/workflows/ files', () => {
+    assert.ok(isTrivialFile('.github/workflows/pr-triage.yml'));
+    assert.ok(isTrivialFile('.github/workflows/knip.yml'));
+    // main.yml and release.yml are also trivial per isTrivialFile, but they are
+    // caught first by isCriticalFile in computeSignals, so they still → Tier 4
+    assert.ok(isTrivialFile('.github/workflows/main.yml'));
+  });
+
+  it('does not match production source files', () => {
+    assert.ok(!isTrivialFile('packages/app/src/App.tsx'));
+    assert.ok(!isTrivialFile('packages/api/src/routers/logs.ts'));
+    assert.ok(!isTrivialFile('Makefile'));
+    assert.ok(!isTrivialFile('knip.json'));
+  });
+});
+
+describe('isCriticalFile', () => {
+  it('matches auth middleware', () => {
+    assert.ok(isCriticalFile('packages/api/src/middleware/auth.ts'));
+    assert.ok(isCriticalFile('packages/api/src/middleware/auth/index.ts'));
+  });
+
+  it('matches sensitive API routes', () => {
+    assert.ok(isCriticalFile('packages/api/src/routers/api/me.ts'));
+    assert.ok(isCriticalFile('packages/api/src/routers/api/team.ts'));
+    assert.ok(isCriticalFile('packages/api/src/routers/external-api/logs.ts'));
+  });
+
+  it('matches core data models', () => {
+    assert.ok(isCriticalFile('packages/api/src/models/user.ts'));
+    assert.ok(isCriticalFile('packages/api/src/models/team.ts'));
+    assert.ok(isCriticalFile('packages/api/src/models/teamInvite.ts'));
+  });
+
+  it('matches config, tasks, otel, clickhouse, and core CI workflows', () => {
+    assert.ok(isCriticalFile('packages/api/src/config.ts'));
+    assert.ok(isCriticalFile('packages/api/src/tasks/alertChecker.ts'));
+    assert.ok(isCriticalFile('packages/otel-collector/config.yaml'));
+    assert.ok(isCriticalFile('docker/clickhouse/config.xml'));
+    assert.ok(isCriticalFile('.github/workflows/main.yml'));
+    assert.ok(isCriticalFile('.github/workflows/release.yml'));
+  });
+
+  it('does NOT flag non-core workflow files as critical', () => {
+    assert.ok(!isCriticalFile('.github/workflows/pr-triage.yml'));
+    assert.ok(!isCriticalFile('.github/workflows/knip.yml'));
+    assert.ok(!isCriticalFile('.github/workflows/claude.yml'));
+  });
+
+  it('matches docker/hyperdx/', () => {
+    assert.ok(isCriticalFile('docker/hyperdx/Dockerfile'));
+  });
+
+  it('does NOT match non-critical API models', () => {
+    assert.ok(!isCriticalFile('packages/api/src/models/alert.ts'));
+    assert.ok(!isCriticalFile('packages/api/src/models/dashboard.ts'));
+  });
+
+  it('does NOT match regular app and API files', () => {
+    assert.ok(!isCriticalFile('packages/app/src/App.tsx'));
+    assert.ok(!isCriticalFile('packages/api/src/routers/logs.ts'));
+  });
+
+  // Note: isCriticalFile DOES return true for test files under critical paths
+  // (e.g. packages/api/src/tasks/tests/util.test.ts). The exclusion happens in
+  // computeSignals, which filters test files out before building criticalFiles.
+  it('returns true for test files under critical paths (exclusion is in computeSignals)', () => {
+    assert.ok(isCriticalFile('packages/api/src/tasks/tests/util.test.ts'));
+  });
+});
+
+// ── computeSignals ───────────────────────────────────────────────────────────
+
+describe('computeSignals', () => {
+  it('separates prod, test, and trivial file line counts', () => {
+    const pr = makePR('alice', 'feature/foo');
+    const files = [
+      makeFile('packages/app/src/Foo.tsx', 20, 5),                     // prod: 25 lines
+      makeFile('packages/app/src/__tests__/Foo.test.tsx', 50, 0),      // test: 50 lines
+      makeFile('README.md', 2, 1),                                      // trivial: excluded
+    ];
+    const s = computeSignals(pr, files);
+    assert.equal(s.prodFiles.length, 1);
+    assert.equal(s.prodLines, 25);
+    assert.equal(s.testLines, 50);
+  });
+
+  it('excludes changeset files from prod counts', () => {
+    const pr = makePR('alice', 'feature/foo');
+    const files = [
+      makeFile('packages/app/src/Foo.tsx', 20, 5),
+      makeFile('.changeset/witty-foxes-run.md', 5, 0),  // trivial
+    ];
+    const s = computeSignals(pr, files);
+    assert.equal(s.prodFiles.length, 1);
+    assert.equal(s.prodLines, 25);
+  });
+
+  it('detects agent branches by prefix', () => {
+    for (const prefix of ['claude/', 'agent/', 'ai/']) {
+      const s = computeSignals(makePR('alice', `${prefix}fix-thing`), []);
+      assert.ok(s.isAgentBranch, `expected isAgentBranch for prefix "${prefix}"`);
+    }
+    assert.ok(!computeSignals(makePR('alice', 'feature/normal'), []).isAgentBranch);
+  });
+
+  it('detects bot authors', () => {
+    assert.ok(computeSignals(makePR('dependabot[bot]', 'dependabot/npm/foo'), []).isBotAuthor);
+    assert.ok(!computeSignals(makePR('alice', 'feature/foo'), []).isBotAuthor);
+  });
+
+  it('sets allFilesTrivial when every file is trivial', () => {
+    const files = [makeFile('README.md'), makeFile('yarn.lock')];
+    assert.ok(computeSignals(makePR('alice', 'docs/update'), files).allFilesTrivial);
+  });
+
+  it('does not set allFilesTrivial for mixed files', () => {
+    const files = [makeFile('README.md'), makeFile('packages/app/src/Foo.tsx')];
+    assert.ok(!computeSignals(makePR('alice', 'feat/foo'), files).allFilesTrivial);
+  });
+
+  it('detects cross-layer changes (frontend + backend)', () => {
+    const files = [
+      makeFile('packages/app/src/NewFeature.tsx'),         // frontend
+      makeFile('packages/api/src/services/newFeature.ts'), // backend (not models/routers)
+    ];
+    const s = computeSignals(makePR('alice', 'feat/new'), files);
+    assert.ok(s.isCrossLayer);
+    assert.ok(s.touchesFrontend);
+    assert.ok(s.touchesBackend);
+  });
+
+  it('detects cross-layer changes (backend + shared-utils)', () => {
+    const files = [
+      makeFile('packages/api/src/services/foo.ts'),
+      makeFile('packages/common-utils/src/queryParser.ts'),
+    ];
+    const s = computeSignals(makePR('alice', 'feat/foo'), files);
+    assert.ok(s.isCrossLayer);
+    assert.ok(s.touchesSharedUtils);
+  });
+
+  it('does not flag single-package changes as cross-layer', () => {
+    const files = [
+      makeFile('packages/app/src/Foo.tsx'),
+      makeFile('packages/app/src/Bar.tsx'),
+    ];
+    assert.ok(!computeSignals(makePR('alice', 'feat/foo'), files).isCrossLayer);
+  });
+
+  it('blocks agent branch from Tier 2 when prod lines exceed threshold', () => {
+    // 60 prod lines > AGENT_TIER2_MAX_LINES (50)
+    const s = computeSignals(makePR('alice', 'claude/feature'), [
+      makeFile('packages/app/src/Foo.tsx', 60, 0),
+    ]);
+    assert.ok(s.agentBlocksTier2);
+  });
+
+  it('blocks agent branch from Tier 2 when prod file count exceeds threshold', () => {
+    // 5 prod files > AGENT_TIER2_MAX_PROD_FILES (3)
+    const files = Array.from({ length: 5 }, (_, i) =>
+      makeFile(`packages/app/src/File${i}.tsx`, 5, 2)
+    );
+    const s = computeSignals(makePR('alice', 'claude/feature'), files);
+    assert.ok(s.agentBlocksTier2);
+  });
+
+  it('does NOT block agent branch when change is small and focused', () => {
+    // 16 prod lines, 1 prod file — well under both thresholds
+    const s = computeSignals(makePR('mikeshi', 'claude/fix-mobile-nav'), [
+      makeFile('packages/app/src/AppNav.tsx', 11, 5),
+    ]);
+    assert.ok(!s.agentBlocksTier2);
+  });
+});
+
+// ── determineTier ────────────────────────────────────────────────────────────
+
+describe('determineTier', () => {
+  describe('Tier 1', () => {
+    it('bot author', () => {
+      assert.equal(classify('dependabot[bot]', 'dependabot/npm/foo', [
+        makeFile('package.json', 5, 3),
+      ]), 1);
+    });
+
+    // package.json is not in TIER1_PATTERNS (it's a production file), but bot
+    // author short-circuits to Tier 1 before the trivial-file check fires.
+    it('bot author with package.json (non-trivial file) is still Tier 1', () => {
+      assert.equal(classify('dependabot[bot]', 'dependabot/npm/lodash', [
+        makeFile('package.json', 5, 3),
+        makeFile('packages/api/package.json', 2, 2),
+      ]), 1);
+    });
+
+    it('all trivial files (docs + lock)', () => {
+      assert.equal(classify('alice', 'docs/update-readme', [
+        makeFile('README.md', 10, 2),
+        makeFile('docs/setup.md', 5, 0),
+        makeFile('yarn.lock', 100, 80),
+      ]), 1);
+    });
+
+    it('changeset-only PR', () => {
+      assert.equal(classify('alice', 'release/v2.1', [
+        makeFile('.changeset/witty-foxes-run.md', 4, 0),
+      ]), 1);
+    });
+  });
+
+  describe('Tier 4', () => {
+    it('touches auth middleware', () => {
+      assert.equal(classify('alice', 'fix/auth-bug', [
+        makeFile('packages/api/src/middleware/auth.ts', 20, 5),
+      ]), 4);
+    });
+
+    it('touches ClickHouse docker config', () => {
+      assert.equal(classify('alice', 'infra/clickhouse-update', [
+        makeFile('docker/clickhouse/config.xml', 10, 2),
+      ]), 4);
+    });
+
+    it('touches main.yml or release.yml', () => {
+      assert.equal(classify('alice', 'ci/add-step', [
+        makeFile('.github/workflows/main.yml', 15, 3),
+      ]), 4);
+      assert.equal(classify('alice', 'ci/release-fix', [
+        makeFile('.github/workflows/release.yml', 8, 2),
+      ]), 4);
+    });
+
+    it('non-critical workflow-only changes are Tier 1 (workflow files are trivial)', () => {
+      assert.equal(classify('alice', 'ci/add-triage-step', [
+        makeFile('.github/workflows/pr-triage.yml', 10, 2),
+      ]), 1);
+    });
+
+    it('does NOT flag test files under critical paths as Tier 4', () => {
+      // e.g. packages/api/src/tasks/tests/util.test.ts should not be critical
+      assert.equal(classify('alice', 'feat/alert-tests', [
+        makeFile('packages/api/src/tasks/tests/util.test.ts', 40, 0),
+        makeFile('packages/api/src/tasks/checkAlerts/tests/checkAlerts.test.ts', 80, 0),
+      ]), 2);
+    });
+
+    it('touches core user/team models', () => {
+      assert.equal(classify('alice', 'feat/user-fields', [
+        makeFile('packages/api/src/models/user.ts', 10, 2),
+      ]), 4);
+    });
+
+    it('escalates Tier 3 human branch past 1000 prod lines', () => {
+      assert.equal(classify('alice', 'feat/huge-refactor', [
+        makeFile('packages/app/src/BigComponent.tsx', 600, 450),  // 1050 lines
+      ]), 4);
+    });
+
+    it('escalates Tier 3 agent branch past 400 prod lines (stricter threshold)', () => {
+      assert.equal(classify('alice', 'claude/large-feature', [
+        makeFile('packages/app/src/BigFeature.tsx', 300, 120),  // 420 lines
+      ]), 4);
+    });
+  });
+
+  describe('Tier 2', () => {
+    it('small single-layer frontend change', () => {
+      assert.equal(classify('alice', 'fix/button-style', [
+        makeFile('packages/app/src/components/Button.tsx', 20, 10),
+      ]), 2);
+    });
+
+    it('small single-layer backend change (not models/routers)', () => {
+      assert.equal(classify('alice', 'fix/service-bug', [
+        makeFile('packages/api/src/services/logs.ts', 30, 15),
+      ]), 2);
+    });
+
+    it('agent branch small enough to qualify (PR #1431 pattern: 1 file, 16 lines)', () => {
+      assert.equal(classify('mikeshi', 'claude/fix-mobile-nav', [
+        makeFile('packages/app/src/AppNav.tsx', 11, 5),
+      ]), 2);
+    });
+
+    it('agent branch exactly at file limit (3 prod files, small lines)', () => {
+      const files = Array.from({ length: 3 }, (_, i) =>
+        makeFile(`packages/app/src/File${i}.tsx`, 10, 5)
+      );
+      assert.equal(classify('alice', 'claude/small-multi', files), 2);
+    });
+
+    it('human branch at 149 prod lines (just under threshold)', () => {
+      assert.equal(classify('alice', 'fix/component', [
+        makeFile('packages/app/src/Foo.tsx', 100, 49),  // 149 lines
+      ]), 2);
+    });
+
+    it('agent branch at exactly 49 prod lines qualifies for Tier 2', () => {
+      assert.equal(classify('alice', 'claude/fix', [
+        makeFile('packages/app/src/Foo.tsx', 49, 0),
+      ]), 2);
+    });
+  });
+
+  describe('Tier 3', () => {
+    it('cross-layer change (frontend + backend)', () => {
+      assert.equal(classify('alice', 'feat/new-feature', [
+        makeFile('packages/app/src/NewFeature.tsx', 30, 5),
+        makeFile('packages/api/src/services/newFeature.ts', 40, 10),
+      ]), 3);
+    });
+
+    it('touches API routes (non-critical)', () => {
+      assert.equal(classify('alice', 'feat/new-route', [
+        makeFile('packages/api/src/routers/logs.ts', 30, 5),
+      ]), 3);
+    });
+
+    it('touches API models (non-critical)', () => {
+      assert.equal(classify('alice', 'feat/model-field', [
+        makeFile('packages/api/src/models/alert.ts', 20, 3),
+      ]), 3);
+    });
+
+    it('agent branch at exactly 50 prod lines is blocked from Tier 2', () => {
+      assert.equal(classify('alice', 'claude/feature', [
+        makeFile('packages/app/src/Foo.tsx', 50, 0),  // exactly AGENT_TIER2_MAX_LINES — >= blocks it
+      ]), 3);
+    });
+
+    it('agent branch over prod-line threshold (60 > 50) → Tier 3, not Tier 2', () => {
+      assert.equal(classify('alice', 'claude/medium-feature', [
+        makeFile('packages/app/src/Foo.tsx', 60, 0),
+      ]), 3);
+    });
+
+    it('agent branch over file count threshold (4 files) → Tier 3', () => {
+      const files = Array.from({ length: 4 }, (_, i) =>
+        makeFile(`packages/app/src/File${i}.tsx`, 10, 5)
+      );
+      assert.equal(classify('alice', 'claude/big-feature', files), 3);
+    });
+
+    it('does NOT escalate agent branch at exactly 400 lines (threshold is exclusive)', () => {
+      // prodLines > threshold, not >=, so 400 stays at Tier 3
+      assert.equal(classify('alice', 'claude/medium-large', [
+        makeFile('packages/app/src/Feature.tsx', 200, 200),  // exactly 400
+      ]), 3);
+    });
+
+    it('large test additions with small prod change stay Tier 3 (PR #2122 pattern)', () => {
+      // Alert threshold PR: 1300 total adds but ~1100 are tests
+      const files = [
+        makeFile('packages/api/src/services/checkAlerts.ts', 180, 70),       // prod: 250 lines
+        makeFile('packages/api/src/__tests__/checkAlerts.test.ts', 1100, 0), // test: excluded
+      ];
+      // 250 prod lines > TIER2_MAX_LINES (150) → Tier 3, not Tier 4
+      assert.equal(classify('alice', 'feat/alert-thresholds', files), 3);
+    });
+
+    it('human branch at exactly 150 prod lines is Tier 3, not Tier 2', () => {
+      assert.equal(classify('alice', 'fix/component', [
+        makeFile('packages/app/src/Foo.tsx', 100, 50),  // exactly TIER2_MAX_LINES — < is exclusive
+      ]), 3);
+    });
+
+    it('does NOT escalate human branch at exactly 1000 prod lines', () => {
+      assert.equal(classify('alice', 'feat/medium-large', [
+        makeFile('packages/app/src/Feature.tsx', 500, 500),  // exactly 1000
+      ]), 3);
+    });
+  });
+});
+
+// ── buildTierComment ─────────────────────────────────────────────────────────
+
+describe('buildTierComment', () => {
+  /** Build a signal object with sensible defaults, overrideable per test */
+  function makeSignals(overrides = {}) {
+    return {
+      author: 'alice',
+      branchName: 'feature/foo',
+      prodFiles: [makeFile('packages/app/src/Foo.tsx')],
+      prodLines: 50,
+      testLines: 0,
+      criticalFiles: [],
+      isAgentBranch: false,
+      isBotAuthor: false,
+      allFilesTrivial: false,
+      touchesApiModels: false,
+      touchesFrontend: true,
+      touchesBackend: false,
+      touchesSharedUtils: false,
+      isCrossLayer: false,
+      agentBlocksTier2: false,
+      ...overrides,
+    };
+  }
+
+  it('always includes the pr-triage sentinel marker', () => {
+    assert.ok(buildTierComment(2, makeSignals()).includes('<!-- pr-triage -->'));
+  });
+
+  it('includes the correct headline for each tier', () => {
+    assert.ok(buildTierComment(1, makeSignals()).includes('Tier 1'));
+    assert.ok(buildTierComment(2, makeSignals()).includes('Tier 2'));
+    assert.ok(buildTierComment(3, makeSignals()).includes('Tier 3'));
+    assert.ok(buildTierComment(4, makeSignals()).includes('Tier 4'));
+  });
+
+  it('includes override instructions with the correct tier label', () => {
+    const body = buildTierComment(3, makeSignals());
+    assert.ok(body.includes('review/tier-3'));
+    assert.ok(body.includes('Manual overrides are preserved'));
+  });
+
+  it('lists critical files when present', () => {
+    const signals = makeSignals({
+      criticalFiles: [makeFile('packages/api/src/middleware/auth.ts')],
+    });
+    const body = buildTierComment(4, signals);
+    assert.ok(body.includes('Critical-path files'));
+    assert.ok(body.includes('auth.ts'));
+  });
+
+  it('explains cross-layer trigger with which layers are involved', () => {
+    const signals = makeSignals({
+      isCrossLayer: true,
+      touchesFrontend: true,
+      touchesBackend: true,
+      touchesSharedUtils: false,
+    });
+    const body = buildTierComment(3, signals);
+    assert.ok(body.includes('Cross-layer change'));
+    assert.ok(body.includes('packages/app'));
+    assert.ok(body.includes('packages/api'));
+  });
+
+  it('explains API model/route trigger', () => {
+    const body = buildTierComment(3, makeSignals({ touchesApiModels: true }));
+    assert.ok(body.includes('API routes or data models'));
+  });
+
+  it('explains agent branch bump to Tier 3', () => {
+    const signals = makeSignals({
+      isAgentBranch: true,
+      agentBlocksTier2: true,
+      branchName: 'claude/big-feature',
+      prodLines: 80,
+      prodFiles: Array.from({ length: 5 }, (_, i) => makeFile(`packages/app/src/File${i}.tsx`)),
+    });
+    const body = buildTierComment(3, signals);
+    assert.ok(body.includes('bumped to Tier 3'));
+  });
+
+  it('notes when agent branch is small enough for Tier 2', () => {
+    const signals = makeSignals({
+      isAgentBranch: true,
+      agentBlocksTier2: false,
+      branchName: 'claude/tiny-fix',
+    });
+    const body = buildTierComment(2, signals);
+    assert.ok(body.includes('small enough to qualify for Tier 2'));
+  });
+
+  it('shows test line count in stats when non-zero', () => {
+    const body = buildTierComment(2, makeSignals({ testLines: 200 }));
+    assert.ok(body.includes('200 in test files'));
+  });
+
+  it('omits test line note when testLines is 0', () => {
+    const body = buildTierComment(2, makeSignals({ testLines: 0 }));
+    assert.ok(!body.includes('test files'));
+  });
+
+  it('includes a catch-all trigger for standard Tier 3 PRs with no specific signals', () => {
+    const body = buildTierComment(3, makeSignals());
+    assert.ok(body.includes('Standard feature/fix'));
+  });
+
+  it('includes bot-author trigger for Tier 1 bot PRs', () => {
+    const body = buildTierComment(1, makeSignals({ isBotAuthor: true, author: 'dependabot[bot]' }));
+    assert.ok(body.includes('Bot author'));
+  });
+});
--- a/.github/scripts/pr-triage-classify.js
+++ b/.github/scripts/pr-triage-classify.js
@ -0,0 +1,257 @@
+'use strict';
+
+// ── File classification patterns ─────────────────────────────────────────────
+const TIER4_PATTERNS = [
+  /^packages\/api\/src\/middleware\/auth/,
+  /^packages\/api\/src\/routers\/api\/me\./,
+  /^packages\/api\/src\/routers\/api\/team\./,
+  /^packages\/api\/src\/routers\/external-api\//,
+  /^packages\/api\/src\/models\/(user|team|teamInvite)\./,
+  /^packages\/api\/src\/config\./,
+  /^packages\/api\/src\/tasks\//,
+  /^packages\/otel-collector\//,
+  /^docker\/otel-collector\//,
+  /^docker\/clickhouse\//,
+  /^docker\/hyperdx\//,
+  /^\.github\/workflows\/(main|release)\.yml$/,
+];
+
+const TIER1_PATTERNS = [
+  /\.(md|txt|png|jpg|jpeg|gif|svg|ico)$/i,
+  /^yarn\.lock$/,
+  /^package-lock\.json$/,
+  /^\.yarnrc\.yml$/,
+  /^\.github\/images\//,
+  /^\.env\.example$/,
+  /^\.changeset\//,  // version-bump config files; no functional code
+  /^\.github\/scripts\//,   // GitHub Actions scripts; not application code
+  /^\.github\/workflows\//,  // workflow files (main.yml/release.yml still caught by TIER4_PATTERNS)
+];
+
+const TEST_FILE_PATTERNS = [
+  /\/__tests__\//,
+  /\.test\.[jt]sx?$/,
+  /\.spec\.[jt]sx?$/,
+  /^packages\/app\/tests\//,
+];
+
+// ── Thresholds (all line counts exclude test and trivial files) ───────────────
+const TIER2_MAX_LINES = 150;           // max prod lines eligible for Tier 2
+const TIER4_ESCALATION_HUMAN = 1000;   // Tier 3 → 4 for human branches
+const TIER4_ESCALATION_AGENT = 400;    // Tier 3 → 4 for agent branches (stricter)
+
+// Agent branches can reach Tier 2 only for very small, focused changes
+const AGENT_TIER2_MAX_LINES = 50;
+const AGENT_TIER2_MAX_PROD_FILES = 3;
+
+// ── Other constants ──────────────────────────────────────────────────────────
+const BOT_AUTHORS = ['dependabot', 'dependabot[bot]'];
+const AGENT_BRANCH_PREFIXES = ['claude/', 'agent/', 'ai/'];
+
+const TIER_LABELS = {
+  1: { name: 'review/tier-1', color: '0E8A16', description: 'Trivial — auto-merge candidate once CI passes' },
+  2: { name: 'review/tier-2', color: '1D76DB', description: 'Low risk — AI review + quick human skim' },
+  3: { name: 'review/tier-3', color: 'E4E669', description: 'Standard — full human review required' },
+  4: { name: 'review/tier-4', color: 'B60205', description: 'Critical — deep review + domain expert sign-off' },
+};
+
+const TIER_INFO = {
+  1: {
+    emoji: '🟢',
+    headline: 'Tier 1 — Trivial',
+    detail: 'Docs, images, lock files, or a dependency bump. No functional code changes detected.',
+    process: 'Auto-merge once CI passes. No human review required.',
+    sla: 'Resolves automatically.',
+  },
+  2: {
+    emoji: '🔵',
+    headline: 'Tier 2 — Low Risk',
+    detail: 'Small, isolated change with no API route or data model modifications.',
+    process: 'AI review + quick human skim (target: 5–15 min). Reviewer validates AI assessment and checks for domain-specific concerns.',
+    sla: 'Resolve within 4 business hours.',
+  },
+  3: {
+    emoji: '🟡',
+    headline: 'Tier 3 — Standard',
+    detail: 'Introduces new logic, modifies core functionality, or touches areas with non-trivial risk.',
+    process: 'Full human review — logic, architecture, edge cases.',
+    sla: 'First-pass feedback within 1 business day.',
+  },
+  4: {
+    emoji: '🔴',
+    headline: 'Tier 4 — Critical',
+    detail: 'Touches auth, data models, config, tasks, OTel pipeline, ClickHouse, or CI/CD.',
+    process: 'Deep review from a domain expert. Synchronous walkthrough may be required.',
+    sla: 'Schedule synchronous review within 2 business days.',
+  },
+};
+
+// ── File classification helpers ──────────────────────────────────────────────
+const isTestFile    = f => TEST_FILE_PATTERNS.some(p => p.test(f));
+const isTrivialFile = f => TIER1_PATTERNS.some(p => p.test(f));
+const isCriticalFile = f => TIER4_PATTERNS.some(p => p.test(f));
+
+// ── Signal computation ───────────────────────────────────────────────────────
+// Returns a flat object of all facts needed for tier determination and comment
+// generation. All derived from PR metadata + file list — no GitHub API calls.
+//
+// @param {object} pr       - GitHub PR object: { user: { login }, head: { ref } }
+// @param {Array}  filesRes - GitHub files array: [{ filename, additions, deletions }]
+function computeSignals(pr, filesRes) {
+  const author     = pr.user.login;
+  const branchName = pr.head.ref;
+
+  const testFiles     = filesRes.filter(f => isTestFile(f.filename));
+  const prodFiles     = filesRes.filter(f => !isTestFile(f.filename) && !isTrivialFile(f.filename));
+  const criticalFiles = filesRes.filter(f => isCriticalFile(f.filename) && !isTestFile(f.filename));
+
+  const prodLines = prodFiles.reduce((sum, f) => sum + f.additions + f.deletions, 0);
+  const testLines = testFiles.reduce((sum, f) => sum + f.additions + f.deletions, 0);
+
+  const isAgentBranch   = AGENT_BRANCH_PREFIXES.some(p => branchName.startsWith(p));
+  const isBotAuthor     = BOT_AUTHORS.includes(author);
+  const allFilesTrivial = filesRes.length > 0 && filesRes.every(f => isTrivialFile(f.filename));
+
+  // Blocks Tier 2 — API models and routes carry implicit cross-cutting risk
+  const touchesApiModels = prodFiles.some(f =>
+    f.filename.startsWith('packages/api/src/models/') ||
+    f.filename.startsWith('packages/api/src/routers/')
+  );
+
+  // Cross-layer: production changes spanning multiple monorepo packages
+  const touchesFrontend    = prodFiles.some(f => f.filename.startsWith('packages/app/'));
+  const touchesBackend     = prodFiles.some(f => f.filename.startsWith('packages/api/'));
+  const touchesSharedUtils = prodFiles.some(f => f.filename.startsWith('packages/common-utils/'));
+  const isCrossLayer = [touchesFrontend, touchesBackend, touchesSharedUtils].filter(Boolean).length >= 2;
+
+  // Agent branches can reach Tier 2 only when the change is very small and focused
+  const agentBlocksTier2 = isAgentBranch &&
+    (prodLines >= AGENT_TIER2_MAX_LINES || prodFiles.length > AGENT_TIER2_MAX_PROD_FILES);
+
+  return {
+    author, branchName,
+    prodFiles, prodLines, testLines, criticalFiles,
+    isAgentBranch, isBotAuthor, allFilesTrivial,
+    touchesApiModels, touchesFrontend, touchesBackend, touchesSharedUtils,
+    isCrossLayer, agentBlocksTier2,
+  };
+}
+
+// ── Tier determination ───────────────────────────────────────────────────────
+// @param {object} signals - output of computeSignals()
+// @returns {number} tier  - 1 | 2 | 3 | 4
+function determineTier(signals) {
+  const {
+    criticalFiles, isBotAuthor, allFilesTrivial,
+    prodLines, touchesApiModels, isCrossLayer, agentBlocksTier2, isAgentBranch,
+  } = signals;
+
+  // Tier 4: touches critical infrastructure (auth, config, pipeline, CI/CD)
+  if (criticalFiles.length > 0) return 4;
+
+  // Tier 1: bot-authored or only docs/images/lock files changed
+  if (isBotAuthor || allFilesTrivial) return 1;
+
+  // Tier 2: small, isolated, single-layer change
+  //   Agent branches qualify when the change is very small and focused
+  //   (agentBlocksTier2 is false when under AGENT_TIER2_MAX_LINES / MAX_PROD_FILES)
+  const qualifiesForTier2 =
+    prodLines < TIER2_MAX_LINES &&
+    !touchesApiModels &&
+    !isCrossLayer &&
+    !agentBlocksTier2;
+  if (qualifiesForTier2) return 2;
+
+  // Tier 3: everything else — escalate very large diffs to Tier 4
+  const sizeThreshold = isAgentBranch ? TIER4_ESCALATION_AGENT : TIER4_ESCALATION_HUMAN;
+  return prodLines > sizeThreshold ? 4 : 3;
+}
+
+// ── Comment generation ───────────────────────────────────────────────────────
+// @param {number} tier    - 1 | 2 | 3 | 4
+// @param {object} signals - output of computeSignals()
+// @returns {string}       - Markdown comment body
+function buildTierComment(tier, signals) {
+  const {
+    author, branchName,
+    prodFiles, prodLines, testLines, criticalFiles,
+    isAgentBranch, isBotAuthor, allFilesTrivial,
+    touchesApiModels, touchesFrontend, touchesBackend, touchesSharedUtils,
+    isCrossLayer, agentBlocksTier2,
+  } = signals;
+
+  const info = TIER_INFO[tier];
+  const sizeThreshold = isAgentBranch ? TIER4_ESCALATION_AGENT : TIER4_ESCALATION_HUMAN;
+
+  // Primary triggers — the specific reasons this tier was assigned
+  const triggers = [];
+  if (criticalFiles.length > 0) {
+    triggers.push(`**Critical-path files** (${criticalFiles.length}):\n${criticalFiles.map(f => `    - \`${f.filename}\``).join('\n')}`);
+  }
+  if (tier === 4 && prodLines > sizeThreshold && criticalFiles.length === 0) {
+    triggers.push(`**Large diff**: ${prodLines} production lines changed (threshold: ${sizeThreshold})`);
+  }
+  if (isBotAuthor) triggers.push(`**Bot author**: \`${author}\``);
+  if (allFilesTrivial && !isBotAuthor) triggers.push('**All files are docs / images / lock files**');
+  if (isCrossLayer) {
+    const layers = [
+      touchesFrontend    && 'frontend (`packages/app`)',
+      touchesBackend     && 'backend (`packages/api`)',
+      touchesSharedUtils && 'shared utils (`packages/common-utils`)',
+    ].filter(Boolean);
+    triggers.push(`**Cross-layer change**: touches ${layers.join(' + ')}`);
+  }
+  if (touchesApiModels && criticalFiles.length === 0) {
+    triggers.push('**Touches API routes or data models** — hidden complexity risk');
+  }
+  if (isAgentBranch && agentBlocksTier2 && tier <= 3) {
+    triggers.push(`**Agent-generated branch** (\`${branchName}\`) with ${prodLines} prod lines across ${prodFiles.length} files — bumped to Tier 3 for mandatory human review`);
+  }
+  if (triggers.length === 0) {
+    triggers.push('**Standard feature/fix** — introduces new logic or modifies core functionality');
+  }
+
+  // Informational context — didn't drive the tier on their own
+  const contextSignals = [];
+  if (isAgentBranch && !agentBlocksTier2 && tier === 2) {
+    contextSignals.push(`agent branch (\`${branchName}\`) — change small enough to qualify for Tier 2`);
+  } else if (isAgentBranch && tier === 4) {
+    contextSignals.push(`agent branch (\`${branchName}\`)`);
+  }
+
+  const triggerSection = `\n**Why this tier:**\n${triggers.map(t => `- ${t}`).join('\n')}`;
+  const contextSection = contextSignals.length > 0
+    ? `\n**Additional context:** ${contextSignals.join(', ')}`
+    : '';
+
+  return [
+    '<!-- pr-triage -->',
+    `## ${info.emoji} ${info.headline}`,
+    '',
+    info.detail,
+    triggerSection,
+    contextSection,
+    '',
+    `**Review process**: ${info.process}`,
+    `**SLA**: ${info.sla}`,
+    '',
+    '<details><summary>Stats</summary>',
+    '',
+    `- Production files changed: ${prodFiles.length}`,
+    `- Production lines changed: ${prodLines}${testLines > 0 ? ` (+ ${testLines} in test files, excluded from tier calculation)` : ''}`,
+    `- Branch: \`${branchName}\``,
+    `- Author: ${author}`,
+    '',
+    '</details>',
+    '',
+    `> To override this classification, remove the \`${TIER_LABELS[tier].name}\` label and apply a different \`review/tier-*\` label. Manual overrides are preserved on subsequent pushes.`,
+  ].join('\n');
+}
+
+module.exports = {
+  // Constants needed by the orchestration script
+  TIER_LABELS, TIER_INFO,
+  // Pure functions
+  isTestFile, isTrivialFile, isCriticalFile,
+  computeSignals, determineTier, buildTierComment,
+};
--- a/.github/scripts/pr-triage.js
+++ b/.github/scripts/pr-triage.js
@ -0,0 +1,123 @@
+'use strict';
+
+// Entry point for actions/github-script@v7 via script-path.
+// Pure classification logic lives in pr-triage-classify.js so it can be
+// unit-tested without GitHub API machinery.
+
+const {
+  TIER_LABELS,
+  computeSignals, determineTier, buildTierComment,
+} = require('./pr-triage-classify');
+
+module.exports = async ({ github, context }) => {
+  const owner = context.repo.owner;
+  const repo  = context.repo.repo;
+
+  // ── Determine which PRs to process ──────────────────────────────────────
+  let prNumbers;
+  if (context.eventName === 'workflow_dispatch') {
+    // Use context.payload.inputs to avoid script-injection via template interpolation
+    const input = (context.payload.inputs?.pr_number ?? '').trim();
+    if (input !== '') {
+      const num = Number(input);
+      if (!Number.isInteger(num) || num <= 0) {
+        throw new Error(`Invalid PR number: "${input}"`);
+      }
+      prNumbers = [num];
+    } else {
+      const openPRs = await github.paginate(
+        github.rest.pulls.list,
+        { owner, repo, state: 'open', per_page: 100 }
+      );
+      prNumbers = openPRs.map(pr => pr.number);
+      console.log(`Bulk triage: found ${prNumbers.length} open PRs`);
+    }
+  } else {
+    prNumbers = [context.payload.pull_request.number];
+  }
+
+  // ── Ensure tier labels exist (once, before the loop) ────────────────────
+  const repoLabels = await github.paginate(
+    github.rest.issues.listLabelsForRepo,
+    { owner, repo, per_page: 100 }
+  );
+  const repoLabelNames = new Set(repoLabels.map(l => l.name));
+  for (const label of Object.values(TIER_LABELS)) {
+    if (!repoLabelNames.has(label.name)) {
+      await github.rest.issues.createLabel({ owner, repo, ...label });
+      repoLabelNames.add(label.name);
+    }
+  }
+
+  // ── Classify a single PR ─────────────────────────────────────────────────
+  async function classifyPR(prNumber) {
+    const filesRes = await github.paginate(
+      github.rest.pulls.listFiles,
+      { owner, repo, pull_number: prNumber, per_page: 100 }
+    );
+    const { data: pr } = await github.rest.pulls.get({ owner, repo, pull_number: prNumber });
+    const { data: currentLabels } = await github.rest.issues.listLabelsOnIssue({ owner, repo, issue_number: prNumber });
+    const currentLabelNames = new Set(currentLabels.map(l => l.name));
+
+    // Skip drafts (bulk mode; PR events already filter these via the job condition)
+    if (pr.draft) {
+      console.log(`PR #${prNumber}: skipping draft`);
+      return;
+    }
+
+    // Respect manual tier overrides — don't overwrite labels applied by humans
+    const existingTierLabel = currentLabels.find(l => l.name.startsWith('review/tier-'));
+    if (existingTierLabel) {
+      const events = await github.paginate(
+        github.rest.issues.listEvents,
+        { owner, repo, issue_number: prNumber, per_page: 100 }
+      );
+      const lastLabelEvent = events
+        .filter(e => e.event === 'labeled' && e.label?.name === existingTierLabel.name)
+        .pop();
+      if (lastLabelEvent && lastLabelEvent.actor.type !== 'Bot') {
+        console.log(`PR #${prNumber}: tier manually set to ${existingTierLabel.name} by ${lastLabelEvent.actor.login} — skipping`);
+        return;
+      }
+    }
+
+    const signals = computeSignals(pr, filesRes);
+    const tier    = determineTier(signals);
+    const body    = buildTierComment(tier, signals);
+
+    // Apply the tier label (remove any stale tier label first)
+    for (const label of currentLabels) {
+      if (label.name.startsWith('review/tier-') && label.name !== TIER_LABELS[tier].name) {
+        await github.rest.issues.removeLabel({ owner, repo, issue_number: prNumber, name: label.name });
+      }
+    }
+    if (!currentLabelNames.has(TIER_LABELS[tier].name)) {
+      await github.rest.issues.addLabels({ owner, repo, issue_number: prNumber, labels: [TIER_LABELS[tier].name] });
+    }
+
+    // Post or update the triage comment
+    const comments = await github.paginate(
+      github.rest.issues.listComments,
+      { owner, repo, issue_number: prNumber, per_page: 100 }
+    );
+    const existingComment = comments.find(
+      c => c.user.login === 'github-actions[bot]' && c.body.includes('<!-- pr-triage -->')
+    );
+    if (existingComment) {
+      await github.rest.issues.updateComment({ owner, repo, comment_id: existingComment.id, body });
+    } else {
+      await github.rest.issues.createComment({ owner, repo, issue_number: prNumber, body });
+    }
+
+    console.log(`PR #${prNumber}: Tier ${tier} (${signals.prodLines} prod lines, ${signals.prodFiles.length} prod files, ${signals.testLines} test lines)`);
+  }
+
+  // ── Process all target PRs ───────────────────────────────────────────────
+  for (const prNumber of prNumbers) {
+    try {
+      await classifyPR(prNumber);
+    } catch (err) {
+      console.error(`PR #${prNumber}: classification failed — ${err.message}`);
+    }
+  }
+};
--- a/.github/workflows/claude.yml
+++ b/.github/workflows/claude.yml
@ -51,4 +51,4 @@ jobs:
          github_token: ${{ secrets.GITHUB_TOKEN }}
          use_sticky_comment: 'true'
          include_fix_links: 'true'
-          claude_args: '--max-turns 20'
+          claude_args: '--max-turns 60'
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@ -47,7 +47,7 @@ jobs:
      - name: Run unit tests
        run: make ci-unit
  integration:
-    timeout-minutes: 8
+    timeout-minutes: 16
    runs-on: ubuntu-24.04
    steps:
      - name: Checkout
@ -93,7 +93,7 @@ jobs:
        working-directory: ./packages/otel-collector
        run: go test ./...
  otel-smoke-test:
-    timeout-minutes: 8
+    timeout-minutes: 16
    runs-on: ubuntu-24.04
    steps:
      - name: Checkout
--- a/.github/workflows/pr-triage.yml
+++ b/.github/workflows/pr-triage.yml
@ -2,6 +2,7 @@ name: PR Triage

 on:
  pull_request:
+    branches: [main]
    types: [opened, synchronize, reopened, ready_for_review]
  workflow_dispatch:
    inputs:
@ -16,269 +17,39 @@ permissions:
  pull-requests: write
  issues: write

+concurrency:
+  group:
+    ${{ github.workflow }}-${{ github.event.pull_request.number || github.run_id
+    }}
+  cancel-in-progress: true
+
 jobs:
+  test:
+    name: Test triage logic
+    runs-on: ubuntu-24.04
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-node@v4
+        with:
+          node-version-file: '.nvmrc'
+      - run: node --test .github/scripts/__tests__/pr-triage-classify.test.js
+
  classify:
    name: Classify PR risk tier
+    needs: test
    runs-on: ubuntu-24.04
+    timeout-minutes: 8
    # For pull_request events skip drafts; workflow_dispatch always runs
    if:
      ${{ github.event_name == 'workflow_dispatch' ||
      !github.event.pull_request.draft }}
    steps:
+      - uses: actions/checkout@v4
      - name: Classify and label PR(s)
        uses: actions/github-script@v7
        with:
          github-token: ${{ secrets.GITHUB_TOKEN }}
          script: |
-            const owner = context.repo.owner;
-            const repo = context.repo.repo;
-
-            // ── Determine which PRs to process ──────────────────────────────
-            let prNumbers;
-            if (context.eventName === 'workflow_dispatch') {
-              // Use context.payload.inputs to avoid script-injection via template interpolation
-              const input = (context.payload.inputs?.pr_number ?? '').trim();
-              if (input && input !== '') {
-                prNumbers = [Number(input)];
-              } else {
-                const openPRs = await github.paginate(
-                  github.rest.pulls.list,
-                  { owner, repo, state: 'open', per_page: 100 }
-                );
-                prNumbers = openPRs.map(pr => pr.number);
-                console.log(`Bulk triage: found ${prNumbers.length} open PRs`);
-              }
-            } else {
-              prNumbers = [context.payload.pull_request.number];
-            }
-
-            // ── Shared constants ─────────────────────────────────────────────
-            const TIER4_PATTERNS = [
-              /^packages\/api\/src\/middleware\/auth/,
-              /^packages\/api\/src\/routers\/api\/me\./,
-              /^packages\/api\/src\/routers\/api\/team\./,
-              /^packages\/api\/src\/routers\/external-api\//,
-              /^packages\/api\/src\/models\/(user|team|teamInvite)\./,
-              /^packages\/api\/src\/config\./,
-              /^packages\/api\/src\/tasks\//,
-              /^packages\/otel-collector\//,
-              /^docker\/otel-collector\//,
-              /^docker\/clickhouse\//,
-              /^\.github\/workflows\//,
-            ];
-
-            const TIER1_PATTERNS = [
-              /\.(md|txt|png|jpg|jpeg|gif|svg|ico)$/i,
-              /^yarn\.lock$/,
-              /^package-lock\.json$/,
-              /^\.yarnrc\.yml$/,
-              /^\.github\/images\//,
-              /^\.env\.example$/,
-            ];
-
-            const BOT_AUTHORS = ['dependabot', 'dependabot[bot]'];
-            const AGENT_BRANCH_PREFIXES = ['claude/', 'agent/', 'ai/'];
-
-            const TIER_LABELS = {
-              1: { name: 'review/tier-1', color: '0E8A16', description: 'Trivial — auto-merge candidate once CI passes' },
-              2: { name: 'review/tier-2', color: '1D76DB', description: 'Low risk — AI review + quick human skim' },
-              3: { name: 'review/tier-3', color: 'E4E669', description: 'Standard — full human review required' },
-              4: { name: 'review/tier-4', color: 'B60205', description: 'Critical — deep review + domain expert sign-off' },
-            };
-
-            const TIER_INFO = {
-              1: {
-                emoji: '🟢',
-                headline: 'Tier 1 — Trivial',
-                detail: 'Docs, images, lock files, or a dependency bump. No functional code changes detected.',
-                process: 'Auto-merge once CI passes. No human review required.',
-                sla: 'Resolves automatically.',
-              },
-              2: {
-                emoji: '🔵',
-                headline: 'Tier 2 — Low Risk',
-                detail: 'Small, isolated change with no API route or data model modifications.',
-                process: 'AI review + quick human skim (target: 5–15 min). Reviewer validates AI assessment and checks for domain-specific concerns.',
-                sla: 'Resolve within 4 business hours.',
-              },
-              3: {
-                emoji: '🟡',
-                headline: 'Tier 3 — Standard',
-                detail: 'Introduces new logic, modifies core functionality, or touches areas with non-trivial risk.',
-                process: 'Full human review — logic, architecture, edge cases.',
-                sla: 'First-pass feedback within 1 business day.',
-              },
-              4: {
-                emoji: '🔴',
-                headline: 'Tier 4 — Critical',
-                detail: 'Touches auth, data models, config, tasks, OTel pipeline, ClickHouse, or CI/CD.',
-                process: 'Deep review from a domain expert. Synchronous walkthrough may be required.',
-                sla: 'Schedule synchronous review within 2 business days.',
-              },
-            };
-
-            // ── Ensure tier labels exist (once, before the loop) ─────────────
-            const repoLabels = await github.paginate(
-              github.rest.issues.listLabelsForRepo,
-              { owner, repo, per_page: 100 }
-            );
-            const repoLabelNames = new Set(repoLabels.map(l => l.name));
-            for (const label of Object.values(TIER_LABELS)) {
-              if (!repoLabelNames.has(label.name)) {
-                await github.rest.issues.createLabel({ owner, repo, ...label });
-                repoLabelNames.add(label.name);
-              }
-            }
-
-            // ── Classify a single PR ─────────────────────────────────────────
-            async function classifyPR(prNumber) {
-              // Fetch changed files
-              const filesRes = await github.paginate(
-                github.rest.pulls.listFiles,
-                { owner, repo, pull_number: prNumber, per_page: 100 }
-              );
-              const files = filesRes.map(f => f.filename);
-              const linesChanged = filesRes.reduce((sum, f) => sum + f.additions + f.deletions, 0);
-
-              // Fetch PR metadata
-              const { data: pr } = await github.rest.pulls.get({ owner, repo, pull_number: prNumber });
-              const author = pr.user.login;
-              const branchName = pr.head.ref;
-
-              // Skip drafts when running in bulk mode
-              if (pr.draft) {
-                console.log(`Skipping PR #${prNumber}: draft`);
-                return;
-              }
-
-              // Check for manual tier override — if a human last applied the label, respect it
-              const { data: currentLabels } = await github.rest.issues.listLabelsOnIssue({ owner, repo, issue_number: prNumber });
-              const existingTierLabel = currentLabels.find(l => l.name.startsWith('review/tier-'));
-              if (existingTierLabel) {
-                const events = await github.paginate(
-                  github.rest.issues.listEvents,
-                  { owner, repo, issue_number: prNumber, per_page: 100 }
-                );
-                const lastLabelEvent = events
-                  .filter(e => e.event === 'labeled' && e.label?.name === existingTierLabel.name)
-                  .pop();
-                if (lastLabelEvent && lastLabelEvent.actor.type !== 'Bot') {
-                  console.log(`PR #${prNumber}: tier manually set to ${existingTierLabel.name} by ${lastLabelEvent.actor.login} — skipping`);
-                  return;
-                }
-              }
-
-              // Classify
-              const isTier4 = files.some(f => TIER4_PATTERNS.some(p => p.test(f)));
-              const isTrivialAuthor = BOT_AUTHORS.includes(author);
-              const allFilesTrivial = files.length > 0 && files.every(f => TIER1_PATTERNS.some(p => p.test(f)));
-              const isTier1 = isTrivialAuthor || allFilesTrivial;
-              const isAgentBranch = AGENT_BRANCH_PREFIXES.some(p => branchName.startsWith(p));
-              const touchesApiModels = files.some(f =>
-                f.startsWith('packages/api/src/models/') || f.startsWith('packages/api/src/routers/')
-              );
-              const isSmallDiff = linesChanged < 100;
-              // Agent branches are bumped to Tier 3 regardless of size to ensure human review
-              const isTier2 = !isTier4 && !isTier1 && isSmallDiff && !touchesApiModels && !isAgentBranch;
-
-              let tier;
-              if (isTier4) tier = 4;
-              else if (isTier1) tier = 1;
-              else if (isTier2) tier = 2;
-              else tier = 3;
-
-              // Escalate very large non-critical PRs to Tier 4. Agent branches use a lower
-              // threshold (400 lines) since they warrant deeper scrutiny. Human-authored PRs
-              // are only escalated for truly massive diffs (1000+ lines); Tier 3 already
-              // requires full human review, so smaller large PRs don't need the extra urgency.
-              const sizeThreshold = isAgentBranch ? 400 : 1000;
-              if (tier === 3 && linesChanged > sizeThreshold) tier = 4;
-
-              // Apply label
-              for (const existing of currentLabels) {
-                if (existing.name.startsWith('review/tier-') && existing.name !== TIER_LABELS[tier].name) {
-                  await github.rest.issues.removeLabel({ owner, repo, issue_number: prNumber, name: existing.name });
-                }
-              }
-              if (!currentLabels.find(l => l.name === TIER_LABELS[tier].name)) {
-                await github.rest.issues.addLabels({ owner, repo, issue_number: prNumber, labels: [TIER_LABELS[tier].name] });
-              }
-
-              // Build comment body
-              const info = TIER_INFO[tier];
-
-              // Primary triggers — what actually determined (or escalated) the tier
-              const triggers = [];
-              const criticalFiles = files.filter(f => TIER4_PATTERNS.some(p => p.test(f)));
-              if (criticalFiles.length > 0) {
-                triggers.push(`**Critical-path files** (${criticalFiles.length}):\n${criticalFiles.map(f => `    - \`${f}\``).join('\n')}`);
-              }
-              if (tier === 4 && linesChanged > sizeThreshold && criticalFiles.length === 0) {
-                triggers.push(`**Large diff**: ${linesChanged} lines changed (threshold: ${sizeThreshold})`);
-              }
-              if (isTrivialAuthor) triggers.push(`**Bot author**: \`${author}\``);
-              if (allFilesTrivial && !isTrivialAuthor) triggers.push('**All files are docs / images / lock files**');
-              // Agent branch prevents Tier 2 — it's a cause for Tier 3, not just context
-              if (isAgentBranch && tier <= 3) triggers.push(`**Agent-generated branch** (\`${branchName}\`) — bumped to Tier 3 for mandatory human review`);
-              // Catch-all for Tier 3 PRs that don't match any specific trigger above
-              if (triggers.length === 0 && tier === 3) triggers.push('**Standard feature/fix** — introduces new logic or modifies core functionality');
-
-              // Informational signals — didn't drive the tier by themselves
-              const contextSignals = [];
-              if (isAgentBranch && tier === 4) contextSignals.push(`agent branch (\`${branchName}\`)`);
-              if (touchesApiModels && criticalFiles.length === 0) contextSignals.push('touches API routes or data models');
-              if (linesChanged > sizeThreshold && criticalFiles.length > 0) contextSignals.push(`large diff (${linesChanged} lines)`);
-
-              const triggerSection = triggers.length > 0
-                ? `\n**Why this tier:**\n${triggers.map(t => `- ${t}`).join('\n')}`
-                : '';
-              const contextSection = contextSignals.length > 0
-                ? `\n**Additional context:** ${contextSignals.join(', ')}`
-                : '';
-
-              const body = [
-                '<!-- pr-triage -->',
-                `## ${info.emoji} ${info.headline}`,
-                '',
-                info.detail,
-                triggerSection,
-                contextSection,
-                '',
-                `**Review process**: ${info.process}`,
-                `**SLA**: ${info.sla}`,
-                '',
-                `<details><summary>Stats</summary>`,
-                '',
-                `- Files changed: ${files.length}`,
-                `- Lines changed: ${linesChanged}`,
-                `- Branch: \`${branchName}\``,
-                `- Author: ${author}`,
-                '',
-                '</details>',
-                '',
-                `> To override this classification, remove the \`${TIER_LABELS[tier].name}\` label and apply a different \`review/tier-*\` label. Manual overrides are preserved on subsequent pushes.`,
-              ].join('\n');
-
-              // Post or update the single triage comment
-              const comments = await github.paginate(
-                github.rest.issues.listComments,
-                { owner, repo, issue_number: prNumber, per_page: 100 }
-              );
-              const existing = comments.find(c => c.user.login === 'github-actions[bot]' && c.body.includes('<!-- pr-triage -->'));
-              if (existing) {
-                await github.rest.issues.updateComment({ owner, repo, comment_id: existing.id, body });
-              } else {
-                await github.rest.issues.createComment({ owner, repo, issue_number: prNumber, body });
-              }
-
-              console.log(`PR #${prNumber}: Tier ${tier} (${linesChanged} lines, ${files.length} files)`);
-            }
-
-            // ── Process all target PRs ───────────────────────────────────────
-            for (const prNumber of prNumbers) {
-              try {
-                await classifyPR(prNumber);
-              } catch (err) {
-                console.error(`PR #${prNumber}: classification failed — ${err.message}`);
-              }
-            }
+            const path = require('path');
+            const run = require(path.join(process.env.GITHUB_WORKSPACE, '.github/scripts/pr-triage.js'));
+            await run({ github, context });
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@ -25,6 +25,13 @@ jobs:
          cache: 'yarn'
      - name: Install root dependencies
        run: yarn install
+      # Build @hyperdx/common-utils before changesets publish runs.
+      # @hyperdx/cli's `prepublishOnly` runs `yarn build` (tsup), which bundles
+      # imports from `@hyperdx/common-utils/dist/*`. Without this step those
+      # paths fail to resolve and the npm publish for @hyperdx/cli is skipped
+      # while the GitHub release/tag still succeeds. See HDX-4075.
+      - name: Build common-utils
+        run: make ci-build
      - name: Create Release Pull Request or Publish to npm
        if: always()
        continue-on-error: true
@ -418,6 +425,106 @@ jobs:
              "${IMAGE}:${VERSION}-arm64"
          done

+  # ---------------------------------------------------------------------------
+  # CLI – compile standalone binaries and upload as GitHub Release assets
+  # npm publishing is handled by changesets in the check_changesets job above.
+  # This job only compiles platform-specific binaries and creates a GH Release.
+  # ---------------------------------------------------------------------------
+  release-cli:
+    name: Release CLI Binaries
+    needs: [check_changesets, check_version]
+    if: needs.check_version.outputs.should_release == 'true'
+    runs-on: ubuntu-24.04
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+      - name: Setup node
+        uses: actions/setup-node@v4
+        with:
+          node-version-file: '.nvmrc'
+          cache-dependency-path: 'yarn.lock'
+          cache: 'yarn'
+      - name: Setup Bun
+        uses: oven-sh/setup-bun@v2
+        with:
+          bun-version: '1.3.11'
+      - name: Install dependencies
+        run: yarn install
+      - name: Build common-utils
+        run: make ci-build
+      - name: Get CLI version
+        id: cli_version
+        run: |
+          CLI_VERSION=$(node -p "require('./packages/cli/package.json').version")
+          echo "version=${CLI_VERSION}" >> $GITHUB_OUTPUT
+          echo "CLI version: ${CLI_VERSION}"
+      - name: Check if CLI release already exists
+        id: check_cli_release
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          if gh release view "cli-v${{ steps.cli_version.outputs.version }}" > /dev/null 2>&1; then
+            echo "Release cli-v${{ steps.cli_version.outputs.version }} already exists. Skipping."
+            echo "exists=true" >> $GITHUB_OUTPUT
+          else
+            echo "Release does not exist. Proceeding."
+            echo "exists=false" >> $GITHUB_OUTPUT
+          fi
+      - name: Compile CLI binaries
+        if: steps.check_cli_release.outputs.exists == 'false'
+        working-directory: packages/cli
+        run: |
+          yarn compile:linux
+          yarn compile:macos
+          yarn compile:macos-x64
+      - name: Create GitHub Release
+        if: steps.check_cli_release.outputs.exists == 'false'
+        uses: softprops/action-gh-release@153bb8e04406b158c6c84fc1615b65b24149a1fe # v2.6.1
+        with:
+          tag_name: cli-v${{ steps.cli_version.outputs.version }}
+          name: '@hyperdx/cli v${{ steps.cli_version.outputs.version }}'
+          body: |
+            ## @hyperdx/cli v${{ steps.cli_version.outputs.version }}
+
+            ### Installation
+
+            **npm (recommended):**
+            ```bash
+            npm install -g @hyperdx/cli
+            ```
+
+            **Or run directly with npx:**
+            ```bash
+            npx @hyperdx/cli tui -s <your-hyperdx-api-url>
+            ```
+
+            **Manual download (standalone binary, no Node.js required):**
+            ```bash
+            # macOS Apple Silicon
+            curl -L https://github.com/hyperdxio/hyperdx/releases/download/cli-v${{ steps.cli_version.outputs.version }}/hdx-darwin-arm64 -o hdx
+            # macOS Intel
+            curl -L https://github.com/hyperdxio/hyperdx/releases/download/cli-v${{ steps.cli_version.outputs.version }}/hdx-darwin-x64 -o hdx
+            # Linux x64
+            curl -L https://github.com/hyperdxio/hyperdx/releases/download/cli-v${{ steps.cli_version.outputs.version }}/hdx-linux-x64 -o hdx
+
+            chmod +x hdx && sudo mv hdx /usr/local/bin/
+            ```
+
+            ### Usage
+
+            ```bash
+            hdx auth login -s <your-hyperdx-api-url>
+            hdx tui
+            ```
+          draft: false
+          prerelease: false
+          files: |
+            packages/cli/dist/hdx-linux-x64
+            packages/cli/dist/hdx-darwin-arm64
+            packages/cli/dist/hdx-darwin-x64
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+
  # ---------------------------------------------------------------------------
  # Downstream notifications
  # ---------------------------------------------------------------------------
@ -553,6 +660,7 @@ jobs:
        publish-otel-collector,
        publish-local,
        publish-all-in-one,
+        release-cli,
        notify_helm_charts,
        notify_ch,
        notify_clickhouse_clickstack,
--- a/AGENTS.md
+++ b/AGENTS.md
@ -206,6 +206,49 @@ formatting checks pass. Fix any issues before creating the commit.
   manual intervention rather than guessing. A wrong guess silently breaks
   things; asking is always cheaper than debugging later.

+## Cursor Cloud specific instructions
+
+### Docker requirement
+
+Docker must be installed and running before starting the dev stack or running
+integration/E2E tests. The VM update script handles `yarn install` and
+`yarn build:common-utils`, but Docker daemon startup is a prerequisite that must
+already be available.
+
+### Starting the dev stack
+
+`yarn dev` uses `sh -c` to source `scripts/dev-env.sh`, which contains
+bash-specific syntax (`BASH_SOURCE`). On systems where `/bin/sh` is `dash`
+(e.g. Ubuntu), this fails with "Bad substitution". Work around it by running
+with bash directly:
+
+```bash
+bash -c 'export PATH="/workspace/node_modules/.bin:$PATH" && source ./scripts/dev-env.sh && yarn build:common-utils && dotenvx run --convention=nextjs -- docker compose -p "$HDX_DEV_PROJECT" -f docker-compose.dev.yml up -d && yarn app:dev'
+```
+
+Port isolation assigns a slot based on the worktree directory name. In the
+default `/workspace` directory, the slot is **76**, so services are at:
+
+- **App**: http://localhost:30276
+- **API**: http://localhost:30176
+- **ClickHouse**: http://localhost:30576
+- **MongoDB**: localhost:30476
+
+### Key commands reference
+
+See `AGENTS.md` above and `agent_docs/development.md` for the full command
+reference. Quick summary:
+
+- `make ci-lint` — lint + TypeScript type check
+- `make ci-unit` — unit tests (all packages)
+- `make dev-int FILE=<name>` — integration tests (spins up Docker services)
+- `make dev-e2e FILE=<name>` — E2E tests (Playwright)
+
+### First-time registration
+
+When the dev stack starts fresh (empty MongoDB), the app shows a registration
+page. Create any account to get started — no external auth provider is needed.
+
 ---

 _Need more details? Check the `agent_docs/` directory or ask which documentation
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -125,7 +125,11 @@ yarn dev:unit

 ## AI-Assisted Development

-The repo ships with configuration for AI coding assistants that enables interactive browser-based E2E test generation and debugging via the [Playwright MCP server](https://github.com/microsoft/playwright-mcp).
+HyperDX includes an [MCP server](https://modelcontextprotocol.io/) that lets AI assistants query observability data, manage dashboards, and
+explore data sources. See [MCP.md](/MCP.md) for setup instructions.
+
+The repo also ships with configuration for AI coding assistants that enables interactive browser-based E2E test generation and debugging via
+the [Playwright MCP server](https://github.com/microsoft/playwright-mcp).

 ### Claude Code

--- a/MCP.md
+++ b/MCP.md
@ -0,0 +1,89 @@
+# HyperDX MCP Server
+
+HyperDX exposes a [Model Context Protocol (MCP)](https://modelcontextprotocol.io/) server that lets AI assistants query your observability
+data, manage dashboards, and explore data sources directly.
+
+## Prerequisites
+
+- A running HyperDX instance (see [CONTRIBUTING.md](/CONTRIBUTING.md) for local development setup, or [DEPLOY.md](/DEPLOY.md) for
+  self-hosted deployment)
+- A **Personal API Access Key** — find yours in the HyperDX UI under **Team Settings > API Keys > Personal API Access Key**
+
+## Endpoint
+
+The MCP server is available at the `/api/mcp` path on your HyperDX instance. For local development this is:
+
+```
+http://localhost:8080/api/mcp
+```
+
+Replace `localhost:8080` with your instance's host and port if you've customized the defaults.
+
+## Connecting an MCP Client
+
+The MCP server uses the **Streamable HTTP** transport with Bearer token authentication. In the examples below, replace `<your-hyperdx-url>`
+with your instance URL (e.g. `http://localhost:8080`).
+
+### Claude Code
+
+```bash
+claude mcp add --transport http hyperdx <your-hyperdx-url>/api/mcp \
+  --header "Authorization: Bearer <your-personal-access-key>"
+```
+
+### OpenCode
+
+```bash
+opencode mcp add --transport http hyperdx <your-hyperdx-url>/api/mcp \
+  --header "Authorization: Bearer <your-personal-access-key>"
+```
+
+### Cursor
+
+Add the following to `.cursor/mcp.json` in your project (or your global Cursor settings):
+
+```json
+{
+  "mcpServers": {
+    "hyperdx": {
+      "url": "<your-hyperdx-url>/api/mcp",
+      "headers": {
+        "Authorization": "Bearer <your-personal-access-key>"
+      }
+    }
+  }
+}
+```
+
+### MCP Inspector
+
+The MCP Inspector is useful for interactively testing and debugging the server.
+
+```bash
+cd packages/api && yarn dev:mcp
+```
+
+Then configure the inspector:
+
+1. **Transport Type:** Streamable HTTP
+2. **URL:** `<your-hyperdx-url>/api/mcp`
+3. **Authentication:** Header `Authorization` with value `Bearer <your-personal-access-key>`
+4. Click **Connect**
+
+### Other Clients
+
+Any MCP client that supports Streamable HTTP transport can connect. Configure it with:
+
+- **URL:** `<your-hyperdx-url>/api/mcp`
+- **Header:** `Authorization: Bearer <your-personal-access-key>`
+
+## Available Tools
+
+| Tool                       | Description                                                                                  |
+| -------------------------- | -------------------------------------------------------------------------------------------- |
+| `hyperdx_list_sources`     | List all data sources and database connections, including column schemas and attribute keys  |
+| `hyperdx_query`            | Query observability data (logs, metrics, traces) using builder mode, search mode, or raw SQL |
+| `hyperdx_get_dashboard`    | List all dashboards or get full detail for a specific dashboard                              |
+| `hyperdx_save_dashboard`   | Create or update a dashboard with tiles (charts, tables, numbers, search, markdown)          |
+| `hyperdx_delete_dashboard` | Permanently delete a dashboard and its attached alerts                                       |
+| `hyperdx_query_tile`       | Execute the query for a specific dashboard tile to validate results                          |
--- a/4
+++ b/4
@ -187,6 +187,10 @@ dev-unit:
 ci-unit:
 	npx nx run-many -t ci:unit

+.PHONY: ci-triage
+ci-triage:
+	node --test .github/scripts/__tests__/pr-triage-classify.test.js
+
 # ---------------------------------------------------------------------------
 # E2E tests — port isolation is handled by scripts/test-e2e.sh
 # ---------------------------------------------------------------------------
--- a/agent_docs/code_style.md
+++ b/agent_docs/code_style.md
@ -93,6 +93,34 @@ The project uses Mantine UI with **custom variants** defined in `packages/app/sr

 This pattern cannot be enforced by ESLint and requires manual code review.

+### EmptyState Component (REQUIRED)
+
+**Use `EmptyState` (`@/components/EmptyState`) for all empty/no-data states.** Do not create ad-hoc inline empty states.
+
+| Prop | Type | Default | Description |
+|------|------|---------|-------------|
+| `icon` | `ReactNode` | — | Icon in the theme circle (hidden if not provided) |
+| `title` | `string` | — | Heading text (headline style — no trailing period) |
+| `description` | `ReactNode` | — | Subtext below the title |
+| `children` | `ReactNode` | — | Actions (buttons, links) below description |
+| `variant` | `"default" \| "card"` | `"default"` | `"card"` wraps in a bordered Paper |
+
+```tsx
+// ❌ BAD - ad-hoc inline empty states
+<div className="text-center my-4 fs-8">No data</div>
+<Text ta="center" c="dimmed">Nothing here</Text>
+
+// ✅ GOOD - use the EmptyState component
+<EmptyState
+  icon={<IconBell size={32} />}
+  title="No alerts created yet"
+  description="Create alerts from dashboard charts or saved searches."
+  variant="card"
+/>
+```
+
+**Title copy**: Treat `title` as a short headline (like `Title` in the UI). Do **not** end it with a period. Use `description` for full sentences, which should use normal punctuation including a trailing period when appropriate. Match listing pages (e.g. dashboards and saved searches use parallel phrasing such as “No matching … yet” / “No … yet” without dots).
+
 ## Refactoring

 - Edit files directly - don't create `component-v2.tsx` copies
--- a/docker-compose.ci.yml
+++ b/docker-compose.ci.yml
@ -5,6 +5,9 @@ services:
      context: .
      dockerfile: docker/otel-collector/Dockerfile
      target: dev
+      args:
+        OTEL_COLLECTOR_VERSION: ${OTEL_COLLECTOR_VERSION}
+        OTEL_COLLECTOR_CORE_VERSION: ${OTEL_COLLECTOR_CORE_VERSION}
    environment:
      CLICKHOUSE_ENDPOINT: 'tcp://ch-server:9000?dial_timeout=10s'
      HYPERDX_OTEL_EXPORTER_CLICKHOUSE_DATABASE: ${HYPERDX_OTEL_EXPORTER_CLICKHOUSE_DATABASE}
--- a/docker-compose.dev.yml
+++ b/docker-compose.dev.yml
@ -26,6 +26,9 @@ services:
      context: .
      dockerfile: docker/otel-collector/Dockerfile
      target: dev
+      args:
+        OTEL_COLLECTOR_VERSION: ${OTEL_COLLECTOR_VERSION}
+        OTEL_COLLECTOR_CORE_VERSION: ${OTEL_COLLECTOR_CORE_VERSION}
    environment:
      CLICKHOUSE_ENDPOINT: 'tcp://ch-server:9000?dial_timeout=10s'
      CLICKHOUSE_PROMETHEUS_METRICS_ENDPOINT: 'ch-server:9363'
@ -63,6 +66,9 @@ services:
      context: .
      dockerfile: docker/otel-collector/Dockerfile
      target: dev
+      args:
+        OTEL_COLLECTOR_VERSION: ${OTEL_COLLECTOR_VERSION}
+        OTEL_COLLECTOR_CORE_VERSION: ${OTEL_COLLECTOR_CORE_VERSION}
    environment:
      CLICKHOUSE_ENDPOINT: 'tcp://ch-server:9000?dial_timeout=10s'
      CLICKHOUSE_PROMETHEUS_METRICS_ENDPOINT: 'ch-server:9363'
@ -74,9 +80,9 @@ services:
      CUSTOM_OTELCOL_CONFIG_FILE: '/etc/otelcol-contrib/custom.config.yaml'
      # Uncomment to enable stdout logging for the OTel collector
      OTEL_SUPERVISOR_LOGS: 'true'
-      # Uncomment to enable JSON schema in ClickHouse
+      # Enable JSON schema in the ClickHouse exporter (per-exporter config)
      # Be sure to also set BETA_CH_OTEL_JSON_SCHEMA_ENABLED to 'true' in ch-server
-      OTEL_AGENT_FEATURE_GATE_ARG: '--feature-gates=clickhouse.json'
+      HYPERDX_OTEL_EXPORTER_CLICKHOUSE_JSON_ENABLE: 'true'
    volumes:
      - ./docker/otel-collector/config.yaml:/etc/otelcol-contrib/config.yaml
      - ./docker/otel-collector/supervisor_docker.yaml.tmpl:/etc/otel/supervisor.yaml.tmpl
@ -103,7 +109,7 @@ services:
      CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT: 1
      HYPERDX_OTEL_EXPORTER_CLICKHOUSE_DATABASE: ${HYPERDX_OTEL_EXPORTER_CLICKHOUSE_DATABASE}
      # Set to 'true' to allow for proper OTel JSON Schema creation
-      # Be sure to also set the OTEL_AGENT_FEATURE_GATE_ARG env in otel-collector
+      # Be sure to also set HYPERDX_OTEL_EXPORTER_CLICKHOUSE_JSON_ENABLE in otel-collector
      # BETA_CH_OTEL_JSON_SCHEMA_ENABLED: 'true'
    volumes:
      - ./docker/clickhouse/local/config.xml:/etc/clickhouse-server/config.xml
--- a/docker/hyperdx/Dockerfile
+++ b/docker/hyperdx/Dockerfile
@ -9,15 +9,31 @@

 ARG NODE_VERSION=22.22
 ARG CLICKHOUSE_VERSION=26.1
-ARG OTEL_COLLECTOR_VERSION=0.147.0
-ARG OTEL_COLLECTOR_OPAMPSUPERVISOR_VERSION=0.147.0
+ARG OTEL_COLLECTOR_VERSION=0.149.0
+ARG OTEL_COLLECTOR_CORE_VERSION=1.55.0

 # base #############################################################################################
 # == Otel Collector Image ==
-FROM otel/opentelemetry-collector-contrib:${OTEL_COLLECTOR_VERSION} AS otel_collector_base
-FROM otel/opentelemetry-collector-opampsupervisor:${OTEL_COLLECTOR_OPAMPSUPERVISOR_VERSION} AS otel_collector_opampsupervisor_base
+FROM otel/opentelemetry-collector-opampsupervisor:${OTEL_COLLECTOR_VERSION} AS otel_collector_opampsupervisor_base
 FROM kukymbr/goose-docker@sha256:0cd025636df126e7f66472861ca4db3683bc649be46cd1f6ef1a316209058e23 AS goose

+# Build the custom HyperDX collector binary using OCB (OpenTelemetry Collector Builder).
+# This replaces the pre-built otel/opentelemetry-collector-contrib image so we can
+# include custom receiver/processor components alongside the standard contrib ones.
+# Note: The official OCB image may ship an older Go than the contrib modules require,
+# so we copy the ocb binary into a golang base with a newer toolchain.
+FROM otel/opentelemetry-collector-builder:${OTEL_COLLECTOR_VERSION} AS ocb-bin
+FROM golang:1.26-alpine AS ocb-builder
+ARG OTEL_COLLECTOR_VERSION
+ARG OTEL_COLLECTOR_CORE_VERSION
+COPY --from=ocb-bin /usr/local/bin/ocb /usr/local/bin/ocb
+WORKDIR /build
+COPY packages/otel-collector/builder-config.yaml .
+RUN --mount=type=cache,target=/go/pkg/mod,id=ocb-gomod \
+    --mount=type=cache,target=/root/.cache/go-build,id=ocb-gobuild \
+    sed -i "s/__OTEL_COLLECTOR_VERSION__/${OTEL_COLLECTOR_VERSION}/g; s/__OTEL_COLLECTOR_CORE_VERSION__/${OTEL_COLLECTOR_CORE_VERSION}/g" builder-config.yaml && \
+    CGO_ENABLED=0 ocb --config=builder-config.yaml
+
 # Build the Go migration tool with full TLS support for ClickHouse
 FROM golang:1.26-alpine AS migrate-builder
 WORKDIR /build
@ -132,7 +148,7 @@ ARG USER_GID=10001
 ENV CODE_VERSION=$CODE_VERSION
 ENV OTEL_RESOURCE_ATTRIBUTES="service.version=$CODE_VERSION"
 # Copy from otel collector bases
-COPY --from=otel_collector_base --chmod=755 /otelcol-contrib /otelcontribcol
+COPY --from=ocb-builder --chmod=755 /build/output/otelcol-hyperdx /otelcontribcol
 COPY --from=otel_collector_opampsupervisor_base --chmod=755 /usr/local/bin/opampsupervisor /usr/local/bin/opampsupervisor

 # Copy Node.js runtime from node base
--- a/docker/otel-collector/Dockerfile
+++ b/docker/otel-collector/Dockerfile
@ -1,12 +1,31 @@
 ## base #############################################################################################
-FROM otel/opentelemetry-collector-contrib:0.147.0 AS col
-FROM otel/opentelemetry-collector-opampsupervisor:0.147.0 AS supervisor
+ARG OTEL_COLLECTOR_VERSION=0.149.0
+ARG OTEL_COLLECTOR_CORE_VERSION=1.55.0
+
+FROM otel/opentelemetry-collector-opampsupervisor:${OTEL_COLLECTOR_VERSION} AS supervisor
 FROM hairyhenderson/gomplate:v4.3.3-alpine AS gomplate
 FROM kukymbr/goose-docker@sha256:0cd025636df126e7f66472861ca4db3683bc649be46cd1f6ef1a316209058e23 AS goose

-# Build the Go migration tool with full TLS support for ClickHouse
+# Build the custom HyperDX collector binary using OCB (OpenTelemetry Collector Builder).
+# This replaces the pre-built otel/opentelemetry-collector-contrib image so we can
+# include custom receiver/processor components alongside the standard contrib ones.
 # Note: Build context must be repo root (use: docker build -f docker/otel-collector/Dockerfile .)
-FROM golang:1.25-alpine AS migrate-builder
+# Note: The official OCB image may ship an older Go than the contrib modules require,
+# so we copy the ocb binary into a golang base with a newer toolchain.
+FROM otel/opentelemetry-collector-builder:${OTEL_COLLECTOR_VERSION} AS ocb-bin
+FROM golang:1.26-alpine AS ocb-builder
+ARG OTEL_COLLECTOR_VERSION
+ARG OTEL_COLLECTOR_CORE_VERSION
+COPY --from=ocb-bin /usr/local/bin/ocb /usr/local/bin/ocb
+WORKDIR /build
+COPY packages/otel-collector/builder-config.yaml .
+RUN --mount=type=cache,target=/go/pkg/mod,id=ocb-gomod \
+    --mount=type=cache,target=/root/.cache/go-build,id=ocb-gobuild \
+    sed -i "s/__OTEL_COLLECTOR_VERSION__/${OTEL_COLLECTOR_VERSION}/g; s/__OTEL_COLLECTOR_CORE_VERSION__/${OTEL_COLLECTOR_CORE_VERSION}/g" builder-config.yaml && \
+    CGO_ENABLED=0 ocb --config=builder-config.yaml
+
+# Build the Go migration tool with full TLS support for ClickHouse
+FROM golang:1.26-alpine AS migrate-builder
 WORKDIR /build
 COPY packages/otel-collector/go.mod packages/otel-collector/go.sum ./
 RUN go mod download
@ -38,7 +57,7 @@ COPY --from=migrate-builder /migrate /usr/local/bin/migrate
 USER ${USER_UID}:${USER_GID}

 COPY --from=supervisor --chmod=755 /usr/local/bin/opampsupervisor /opampsupervisor
-COPY --from=col --chmod=755 /otelcol-contrib /otelcontribcol
+COPY --from=ocb-builder --chmod=755 /build/output/otelcol-hyperdx /otelcontribcol

 # Copy entrypoint and log tail wrapper scripts
 COPY --chmod=755 docker/otel-collector/entrypoint.sh /entrypoint.sh
--- a/docker/otel-collector/config.standalone.yaml
+++ b/docker/otel-collector/config.standalone.yaml
@ -33,6 +33,7 @@ exporters:
    logs_table_name: hyperdx_sessions
    timeout: 5s
    create_schema: ${env:HYPERDX_OTEL_EXPORTER_CREATE_LEGACY_SCHEMA:-false}
+    json: ${env:HYPERDX_OTEL_EXPORTER_CLICKHOUSE_JSON_ENABLE:-false}
    retry_on_failure:
      enabled: true
      initial_interval: 5s
@ -46,6 +47,7 @@ exporters:
    ttl: 720h
    timeout: 5s
    create_schema: ${env:HYPERDX_OTEL_EXPORTER_CREATE_LEGACY_SCHEMA:-false}
+    json: ${env:HYPERDX_OTEL_EXPORTER_CLICKHOUSE_JSON_ENABLE:-false}
    retry_on_failure:
      enabled: true
      initial_interval: 5s
--- a/docker/otel-collector/entrypoint.sh
+++ b/docker/otel-collector/entrypoint.sh
@ -1,8 +1,26 @@
 #!/bin/sh
 set -e

-# Fall back to legacy schema when the ClickHouse JSON feature gate is enabled
+# DEPRECATED: The clickhouse.json feature gate has been removed upstream.
+# When OTEL_AGENT_FEATURE_GATE_ARG contains clickhouse.json, strip it and
+# map it to HYPERDX_OTEL_EXPORTER_CLICKHOUSE_JSON_ENABLE instead. Other feature gates
+# are preserved and passed through to the collector.
 if echo "$OTEL_AGENT_FEATURE_GATE_ARG" | grep -q "clickhouse.json"; then
+  echo "WARNING: '--feature-gates=clickhouse.json' is deprecated and no longer supported by the collector."
+  echo "WARNING: Use HYPERDX_OTEL_EXPORTER_CLICKHOUSE_JSON_ENABLE=true instead. This flag will be removed in a future release."
+  export HYPERDX_OTEL_EXPORTER_CLICKHOUSE_JSON_ENABLE=true
+
+  # Strip clickhouse.json from the feature gates, keeping any other gates
+  REMAINING_GATES=$(echo "$OTEL_AGENT_FEATURE_GATE_ARG" | sed 's/--feature-gates=//' | tr ',' '\n' | grep -v 'clickhouse.json' | tr '\n' ',' | sed 's/,$//')
+  if [ -n "$REMAINING_GATES" ]; then
+    export OTEL_AGENT_FEATURE_GATE_ARG="--feature-gates=$REMAINING_GATES"
+  else
+    unset OTEL_AGENT_FEATURE_GATE_ARG
+  fi
+fi
+
+# Fall back to legacy schema when ClickHouse JSON exporter mode is enabled
+if [ "$HYPERDX_OTEL_EXPORTER_CLICKHOUSE_JSON_ENABLE" = "true" ]; then
  export HYPERDX_OTEL_EXPORTER_CREATE_LEGACY_SCHEMA=true
 fi

@ -39,7 +57,7 @@ if [ -z "$OPAMP_SERVER_URL" ]; then
    COLLECTOR_ARGS="$COLLECTOR_ARGS --config $CUSTOM_OTELCOL_CONFIG_FILE"
  fi

-  # Pass feature gates to the collector in standalone mode
+  # Pass remaining feature gates to the collector in standalone mode
  if [ -n "$OTEL_AGENT_FEATURE_GATE_ARG" ]; then
    COLLECTOR_ARGS="$COLLECTOR_ARGS $OTEL_AGENT_FEATURE_GATE_ARG"
  fi
--- a/docker/otel-collector/supervisor_docker.yaml.tmpl
+++ b/docker/otel-collector/supervisor_docker.yaml.tmpl
@ -23,8 +23,8 @@ agent:
 {{- if getenv "CUSTOM_OTELCOL_CONFIG_FILE" }}
    - {{ getenv "CUSTOM_OTELCOL_CONFIG_FILE" }}
 {{- end }}
-  args:
 {{- if getenv "OTEL_AGENT_FEATURE_GATE_ARG" }}
+  args:
    - {{ getenv "OTEL_AGENT_FEATURE_GATE_ARG" }}
 {{- end }}

--- a/knip.json
+++ b/knip.json
@ -27,7 +27,7 @@
      "project": ["src/**/*.ts"]
    }
  },
-  "ignore": ["scripts/dev-portal/**"],
+  "ignore": ["scripts/dev-portal/**", ".github/scripts/**"],
  "ignoreBinaries": ["make", "migrate", "playwright"],
  "ignoreDependencies": [
    "@dotenvx/dotenvx",
--- a/package.json
+++ b/package.json
@ -42,6 +42,7 @@
    "run:clickhouse": "nx run @hyperdx/app:run:clickhouse",
    "dev": "sh -c '. ./scripts/dev-env.sh && yarn build:common-utils && dotenvx run --convention=nextjs -- docker compose -p \"$HDX_DEV_PROJECT\" -f docker-compose.dev.yml up -d && yarn app:dev; dotenvx run --convention=nextjs -- docker compose -p \"$HDX_DEV_PROJECT\" -f docker-compose.dev.yml down'",
    "dev:local": "IS_LOCAL_APP_MODE='DANGEROUSLY_is_local_app_mode💀' yarn dev",
+    "cli:dev": "yarn workspace @hyperdx/cli dev",
    "dev:down": "sh -c '. ./scripts/dev-env.sh && docker compose -p \"$HDX_DEV_PROJECT\" -f docker-compose.dev.yml down && sh ./scripts/dev-kill-ports.sh'",
    "dev:compose": "sh -c '. ./scripts/dev-env.sh && docker compose -p \"$HDX_DEV_PROJECT\" -f docker-compose.dev.yml'",
    "knip": "knip",
--- a/packages/api/.env.test
+++ b/packages/api/.env.test
@ -12,5 +12,7 @@ MONGO_URI=mongodb://localhost:${HDX_CI_MONGO_PORT:-39999}/hyperdx-test
 NODE_ENV=test
 PORT=${HDX_CI_API_PORT:-19000}
 OPAMP_PORT=${HDX_CI_OPAMP_PORT:-14320}
-# Default to only logging warnings/errors. Adjust if you need more verbosity
-HYPERDX_LOG_LEVEL=warn
+# Default to only logging errors. Adjust if you need more verbosity.
+# Note: the logger module is mocked in jest.setup.ts to suppress expected
+# operational noise (validation errors, MCP tool errors, etc.) during tests.
+HYPERDX_LOG_LEVEL=error
--- a/packages/api/CHANGELOG.md
+++ b/packages/api/CHANGELOG.md
@ -1,5 +1,41 @@
 # @hyperdx/api

+## 2.24.0
+
+### Minor Changes
+
+- 9781ae63: Add an MCP (Model Context Protocol) server to the HyperDX API, enabling AI assistants (Claude, Cursor, OpenCode, etc.) to query observability data, manage dashboards, and explore data sources directly via standardized tool calls.
+- 5885d479: Introduces Shared Filters, enabling teams to pin and surface common filters across all members.
+
+### Patch Changes
+
+- 1fada918: feat: Support alerts on Raw SQL Number Charts
+- 7953c028: feat: Add between-type alert thresholds
+- d3a61f9b: feat: Add additional alert threshold types
+- 085f3074: feat: Implement alerting for Raw SQL-based dashboard tiles
+- 3c057720: feat: Show alert execution errors in the UI
+- 6ff1ba60: feat: Add alert history + ack to alert editor
+- Updated dependencies [418f70c5]
+- Updated dependencies [1fada918]
+- Updated dependencies [7953c028]
+- Updated dependencies [d3a61f9b]
+- Updated dependencies [5885d479]
+- Updated dependencies [cc714f90]
+- Updated dependencies [085f3074]
+- Updated dependencies [3c057720]
+- Updated dependencies [6ff1ba60]
+  - @hyperdx/common-utils@0.18.0
+
+## 2.23.2
+
+## 2.23.1
+
+### Patch Changes
+
+- f8d2edde: feat: Show created/updated metadata for saved searches and dashboards
+- Updated dependencies [24767c58]
+  - @hyperdx/common-utils@0.17.1
+
 ## 2.23.0

 ### Minor Changes
--- a/packages/api/jest.config.js
+++ b/packages/api/jest.config.js
@ -1,8 +1,12 @@
-/** @type {import('ts-jest/dist/types').InitialOptionsTsJest} */
+const { createJsWithTsPreset } = require('ts-jest');
+
+const tsJestTransformCfg = createJsWithTsPreset();
+
+/** @type {import("jest").Config} **/
 module.exports = {
+  ...tsJestTransformCfg,
  setupFilesAfterEnv: ['<rootDir>/../jest.setup.ts'],
  setupFiles: ['dotenv-expand/config'],
-  preset: 'ts-jest',
  testEnvironment: 'node',
  verbose: true,
  rootDir: './src',
--- a/packages/api/jest.setup.ts
+++ b/packages/api/jest.setup.ts
@ -1,12 +1,11 @@
-// @eslint-disable @typescript-eslint/no-var-requires
 jest.retryTimes(1, { logErrorsBeforeRetry: true });

-global.console = {
-  ...console,
-  // Turn off noisy console logs in tests
-  debug: jest.fn(),
-  info: jest.fn(),
-};
+// Suppress noisy console output during test runs.
+// - debug/info: ClickHouse query logging, server startup messages
+// - warn: expected column-not-found warnings from renderChartConfig on CTE tables
+jest.spyOn(console, 'debug').mockImplementation(() => {});
+jest.spyOn(console, 'info').mockImplementation(() => {});
+jest.spyOn(console, 'warn').mockImplementation(() => {});

 // Mock alert notification functions to prevent HTTP calls during tests
 jest.mock('@/utils/slack', () => ({
--- a/packages/api/openapi.json
+++ b/packages/api/openapi.json
@ -70,7 +70,13 @@
        "type": "string",
        "enum": [
          "above",
-          "below"
+          "below",
+          "above_exclusive",
+          "below_or_equal",
+          "equal",
+          "not_equal",
+          "between",
+          "not_between"
        ],
        "description": "Threshold comparison direction."
      },
@ -99,6 +105,43 @@
        ],
        "description": "Channel type."
      },
+      "AlertErrorType": {
+        "type": "string",
+        "enum": [
+          "QUERY_ERROR",
+          "WEBHOOK_ERROR",
+          "INVALID_ALERT",
+          "UNKNOWN"
+        ],
+        "description": "Category of error recorded during alert execution."
+      },
+      "AlertExecutionError": {
+        "type": "object",
+        "description": "An error recorded during a recent alert execution.",
+        "required": [
+          "timestamp",
+          "type",
+          "message"
+        ],
+        "properties": {
+          "timestamp": {
+            "type": "string",
+            "format": "date-time",
+            "description": "When the error occurred.",
+            "example": "2026-04-17T12:00:00.000Z"
+          },
+          "type": {
+            "$ref": "#/components/schemas/AlertErrorType",
+            "description": "Category of the error.",
+            "example": "QUERY_ERROR"
+          },
+          "message": {
+            "type": "string",
+            "description": "Human-readable error message.",
+            "example": "Query timed out after 30s"
+          }
+        }
+      },
      "AlertSilenced": {
        "type": "object",
        "description": "Silencing metadata.",
@ -162,7 +205,7 @@
          },
          "tileId": {
            "type": "string",
-            "description": "Tile ID for tile-based alerts. May not be a Raw-SQL-based tile.",
+            "description": "Tile ID for tile-based alerts. Must be a line, stacked bar, or number type tile.",
            "nullable": true,
            "example": "65f5e4a3b9e77c001a901234"
          },
@ -180,9 +223,15 @@
          },
          "threshold": {
            "type": "number",
-            "description": "Threshold value for triggering the alert.",
+            "description": "Threshold value for triggering the alert. For between and not_between threshold types, this is the lower bound.",
            "example": 100
          },
+          "thresholdMax": {
+            "type": "number",
+            "nullable": true,
+            "description": "Upper bound for between and not_between threshold types. Required when thresholdType is between or not_between, must be >= threshold.",
+            "example": 500
+          },
          "interval": {
            "$ref": "#/components/schemas/AlertInterval",
            "description": "Evaluation interval for the alert.",
@ -258,6 +307,14 @@
                "description": "Silencing metadata.",
                "nullable": true
              },
+              "executionErrors": {
+                "type": "array",
+                "nullable": true,
+                "description": "Errors recorded during the most recent alert execution, if any.",
+                "items": {
+                  "$ref": "#/components/schemas/AlertExecutionError"
+                }
+              },
              "createdAt": {
                "type": "string",
                "nullable": true,
--- a/packages/api/package.json
+++ b/packages/api/package.json
@ -1,6 +1,6 @@
 {
  "name": "@hyperdx/api",
-  "version": "2.23.0",
+  "version": "2.24.0",
  "license": "MIT",
  "private": true,
  "engines": {
@ -10,9 +10,10 @@
    "@ai-sdk/anthropic": "^3.0.58",
    "@ai-sdk/openai": "^3.0.47",
    "@esm2cjs/p-queue": "^7.3.0",
-    "@hyperdx/common-utils": "^0.17.0",
+    "@hyperdx/common-utils": "^0.18.0",
    "@hyperdx/node-opentelemetry": "^0.9.0",
    "@hyperdx/passport-local-mongoose": "^9.0.1",
+    "@modelcontextprotocol/sdk": "^1.27.1",
    "@opentelemetry/api": "^1.8.0",
    "@opentelemetry/host-metrics": "^0.35.5",
    "@opentelemetry/sdk-metrics": "^1.30.1",
@ -57,7 +58,7 @@
    "@types/cors": "^2.8.14",
    "@types/express": "^4.17.13",
    "@types/express-session": "^1.17.7",
-    "@types/jest": "^28.1.1",
+    "@types/jest": "^29.5.14",
    "@types/lodash": "^4.14.198",
    "@types/minimist": "^1.2.2",
    "@types/ms": "^0.7.31",
@ -65,7 +66,7 @@
    "@types/supertest": "^2.0.12",
    "@types/swagger-jsdoc": "^6",
    "@types/uuid": "^8.3.4",
-    "jest": "^28.1.3",
+    "jest": "^30.2.0",
    "migrate-mongo": "^11.0.0",
    "nodemon": "^2.0.20",
    "pino-pretty": "^13.1.1",
@ -82,13 +83,14 @@
  "scripts": {
    "start": "node ./build/index.js",
    "dev": "DOTENV_CONFIG_PATH=.env.development nodemon --exec 'ts-node' --transpile-only -r tsconfig-paths/register -r dotenv-expand/config -r '@hyperdx/node-opentelemetry/build/src/tracing' ./src/index.ts",
+    "dev:mcp": "npx @modelcontextprotocol/inspector",
    "dev-task": "DOTENV_CONFIG_PATH=.env.development nodemon --exec 'ts-node' --transpile-only -r tsconfig-paths/register -r dotenv-expand/config -r '@hyperdx/node-opentelemetry/build/src/tracing' ./src/tasks/index.ts",
    "build": "rimraf ./build && tsc && tsc-alias && cp -r ./src/opamp/proto ./build/opamp/",
    "lint": "npx eslint --quiet . --ext .ts",
    "lint:fix": "npx eslint . --ext .ts --fix",
    "ci:lint": "yarn lint && yarn tsc --noEmit && yarn lint:openapi",
    "ci:int": "DOTENV_CONFIG_PATH=.env.test DOTENV_CONFIG_OVERRIDE=true jest --runInBand --ci --forceExit --coverage",
-    "dev:int": "DOTENV_CONFIG_PATH=.env.test DOTENV_CONFIG_OVERRIDE=true jest --runInBand --forceExit --coverage",
+    "dev:int": "DOTENV_CONFIG_PATH=.env.test DOTENV_CONFIG_OVERRIDE=true jest --runInBand --coverage",
    "dev:migrate-db-create": "ts-node node_modules/.bin/migrate-mongo create -f migrate-mongo-config.ts",
    "dev:migrate-db": "ts-node node_modules/.bin/migrate-mongo up -f migrate-mongo-config.ts",
    "dev:migrate-ch-create": "migrate create -ext sql -dir ./migrations/ch -seq",
--- a/packages/api/src/api-app.ts
+++ b/packages/api/src/api-app.ts
@ -5,6 +5,7 @@ import session from 'express-session';
 import onHeaders from 'on-headers';

 import * as config from './config';
+import mcpRouter from './mcp/app';
 import { isUserAuthenticated } from './middleware/auth';
 import defaultCors from './middleware/cors';
 import { appErrorHandler } from './middleware/error';
@ -12,6 +13,7 @@ import routers from './routers/api';
 import clickhouseProxyRouter from './routers/api/clickhouseProxy';
 import connectionsRouter from './routers/api/connections';
 import favoritesRouter from './routers/api/favorites';
+import pinnedFiltersRouter from './routers/api/pinnedFilters';
 import savedSearchRouter from './routers/api/savedSearch';
 import sourcesRouter from './routers/api/sources';
 import externalRoutersV2 from './routers/external-api/v2';
@ -79,7 +81,7 @@ app.use(defaultCors);
 // ---------------------------------------------------------------------
 // ----------------------- Background Jobs -----------------------------
 // ---------------------------------------------------------------------
-if (config.USAGE_STATS_ENABLED) {
+if (config.USAGE_STATS_ENABLED && !config.IS_CI) {
  usageStats();
 }
 // ---------------------------------------------------------------------
@ -90,6 +92,9 @@ if (config.USAGE_STATS_ENABLED) {
 // PUBLIC ROUTES
 app.use('/', routers.rootRouter);

+// SELF-AUTHENTICATED ROUTES (validated via access key, not session middleware)
+app.use('/mcp', mcpRouter);
+
 // PRIVATE ROUTES
 app.use('/ai', isUserAuthenticated, routers.aiRouter);
 app.use('/alerts', isUserAuthenticated, routers.alertsRouter);
@ -101,6 +106,7 @@ app.use('/connections', isUserAuthenticated, connectionsRouter);
 app.use('/sources', isUserAuthenticated, sourcesRouter);
 app.use('/saved-search', isUserAuthenticated, savedSearchRouter);
 app.use('/favorites', isUserAuthenticated, favoritesRouter);
+app.use('/pinned-filters', isUserAuthenticated, pinnedFiltersRouter);
 app.use('/clickhouse-proxy', isUserAuthenticated, clickhouseProxyRouter);
 // ---------------------------------------------------------------------

--- a/packages/api/src/clickhouse/tests/snapshots/renderChartConfig.test.ts.snap
+++ b/packages/api/src/clickhouse/tests/snapshots/renderChartConfig.test.ts.snap
@ -1,33 +1,33 @@
-// Jest Snapshot v1, https://goo.gl/fbAQLP
+// Jest Snapshot v1, https://jestjs.io/docs/snapshot-testing

 exports[`renderChartConfig K8s Semantic Convention Migrations with metricNameSql should handle gauge metric with metricNameSql and groupBy 1`] = `
-Array [
-  Object {
+[
+  {
    "__hdx_time_bucket": "2022-01-05T00:00:00Z",
    "arrayElement(ResourceAttributes, 'k8s.pod.name')": "test-pod",
    "avg(toFloat64OrDefault(toString(LastValue)))": 45,
  },
-  Object {
+  {
    "__hdx_time_bucket": "2022-01-05T00:01:00Z",
    "arrayElement(ResourceAttributes, 'k8s.pod.name')": "test-pod",
    "avg(toFloat64OrDefault(toString(LastValue)))": 50,
  },
-  Object {
+  {
    "__hdx_time_bucket": "2022-01-05T00:02:00Z",
    "arrayElement(ResourceAttributes, 'k8s.pod.name')": "test-pod",
    "avg(toFloat64OrDefault(toString(LastValue)))": 55,
  },
-  Object {
+  {
    "__hdx_time_bucket": "2022-01-05T00:03:00Z",
    "arrayElement(ResourceAttributes, 'k8s.pod.name')": "test-pod",
    "avg(toFloat64OrDefault(toString(LastValue)))": 60,
  },
-  Object {
+  {
    "__hdx_time_bucket": "2022-01-05T00:04:00Z",
    "arrayElement(ResourceAttributes, 'k8s.pod.name')": "test-pod",
    "avg(toFloat64OrDefault(toString(LastValue)))": 65,
  },
-  Object {
+  {
    "__hdx_time_bucket": "2022-01-05T00:05:00Z",
    "arrayElement(ResourceAttributes, 'k8s.pod.name')": "test-pod",
    "avg(toFloat64OrDefault(toString(LastValue)))": 70,
@ -36,16 +36,16 @@ Array [
 `;

 exports[`renderChartConfig K8s Semantic Convention Migrations with metricNameSql should handle metrics without metricNameSql (backward compatibility) 1`] = `
-Array [
-  Object {
+[
+  {
    "__hdx_time_bucket": "2022-01-05T00:00:00Z",
    "avg(toFloat64OrDefault(toString(LastValue)))": 45,
  },
-  Object {
+  {
    "__hdx_time_bucket": "2022-01-05T00:01:00Z",
    "avg(toFloat64OrDefault(toString(LastValue)))": 50,
  },
-  Object {
+  {
    "__hdx_time_bucket": "2022-01-05T00:02:00Z",
    "avg(toFloat64OrDefault(toString(LastValue)))": 55,
  },
@ -53,28 +53,28 @@ Array [
 `;

 exports[`renderChartConfig K8s Semantic Convention Migrations with metricNameSql should query k8s.pod.cpu.utilization gauge metric using metricNameSql to handle both old and new conventions 1`] = `
-Array [
-  Object {
+[
+  {
    "__hdx_time_bucket": "2022-01-05T00:00:00Z",
    "avg(toFloat64OrDefault(toString(LastValue)))": 45,
  },
-  Object {
+  {
    "__hdx_time_bucket": "2022-01-05T00:01:00Z",
    "avg(toFloat64OrDefault(toString(LastValue)))": 50,
  },
-  Object {
+  {
    "__hdx_time_bucket": "2022-01-05T00:02:00Z",
    "avg(toFloat64OrDefault(toString(LastValue)))": 55,
  },
-  Object {
+  {
    "__hdx_time_bucket": "2022-01-05T00:03:00Z",
    "avg(toFloat64OrDefault(toString(LastValue)))": 60,
  },
-  Object {
+  {
    "__hdx_time_bucket": "2022-01-05T00:04:00Z",
    "avg(toFloat64OrDefault(toString(LastValue)))": 65,
  },
-  Object {
+  {
    "__hdx_time_bucket": "2022-01-05T00:05:00Z",
    "avg(toFloat64OrDefault(toString(LastValue)))": 70,
  },
@ -82,12 +82,12 @@ Array [
 `;

 exports[`renderChartConfig Query Events - Logs simple select + group by query logs 1`] = `
-Array [
-  Object {
+[
+  {
    "ServiceName": "app",
    "count": "1",
  },
-  Object {
+  {
    "ServiceName": "api",
    "count": "1",
  },
@ -95,31 +95,31 @@ Array [
 `;

 exports[`renderChartConfig Query Events - Logs simple select + where query logs 1`] = `
-Array [
-  Object {
+[
+  {
    "Body": "Oh no! Something went wrong!",
  },
 ]
 `;

 exports[`renderChartConfig Query Metrics - Gauge single avg gauge with group-by 1`] = `
-Array [
-  Object {
+[
+  {
    "__hdx_time_bucket": "2022-01-05T00:00:00Z",
    "arrayElement(ResourceAttributes, 'host')": "host2",
    "avg(toFloat64OrDefault(toString(LastValue)))": 4,
  },
-  Object {
+  {
    "__hdx_time_bucket": "2022-01-05T00:00:00Z",
    "arrayElement(ResourceAttributes, 'host')": "host1",
    "avg(toFloat64OrDefault(toString(LastValue)))": 6.25,
  },
-  Object {
+  {
    "__hdx_time_bucket": "2022-01-05T00:05:00Z",
    "arrayElement(ResourceAttributes, 'host')": "host2",
    "avg(toFloat64OrDefault(toString(LastValue)))": 4,
  },
-  Object {
+  {
    "__hdx_time_bucket": "2022-01-05T00:05:00Z",
    "arrayElement(ResourceAttributes, 'host')": "host1",
    "avg(toFloat64OrDefault(toString(LastValue)))": 80,
@ -128,12 +128,12 @@ Array [
 `;

 exports[`renderChartConfig Query Metrics - Gauge single avg gauge with where 1`] = `
-Array [
-  Object {
+[
+  {
    "__hdx_time_bucket": "2022-01-05T00:00:00Z",
    "avg(toFloat64OrDefault(toString(LastValue)))": 6.25,
  },
-  Object {
+  {
    "__hdx_time_bucket": "2022-01-05T00:05:00Z",
    "avg(toFloat64OrDefault(toString(LastValue)))": 80,
  },
@ -141,12 +141,12 @@ Array [
 `;

 exports[`renderChartConfig Query Metrics - Gauge single max gauge with delta 1`] = `
-Array [
-  Object {
+[
+  {
    "__hdx_time_bucket": "2022-01-05T00:00:00Z",
    "max(toFloat64OrDefault(toString(LastValue)))": 5,
  },
-  Object {
+  {
    "__hdx_time_bucket": "2022-01-05T00:05:00Z",
    "max(toFloat64OrDefault(toString(LastValue)))": -1.6666666666666667,
  },
@ -154,23 +154,23 @@ Array [
 `;

 exports[`renderChartConfig Query Metrics - Gauge single max gauge with delta and group by 1`] = `
-Array [
-  Object {
+[
+  {
    "__hdx_time_bucket": "2022-01-05T00:00:00Z",
    "arrayElement(ResourceAttributes, 'host')": "host2",
    "max(toFloat64OrDefault(toString(LastValue)))": 5,
  },
-  Object {
+  {
    "__hdx_time_bucket": "2022-01-05T00:00:00Z",
    "arrayElement(ResourceAttributes, 'host')": "host1",
    "max(toFloat64OrDefault(toString(LastValue)))": -72.91666666666667,
  },
-  Object {
+  {
    "__hdx_time_bucket": "2022-01-05T00:05:00Z",
    "arrayElement(ResourceAttributes, 'host')": "host2",
    "max(toFloat64OrDefault(toString(LastValue)))": -1.6666666666666667,
  },
-  Object {
+  {
    "__hdx_time_bucket": "2022-01-05T00:05:00Z",
    "arrayElement(ResourceAttributes, 'host')": "host1",
    "max(toFloat64OrDefault(toString(LastValue)))": -33.333333333333336,
@ -179,12 +179,12 @@ Array [
 `;

 exports[`renderChartConfig Query Metrics - Gauge single max/avg/sum gauge 1`] = `
-Array [
-  Object {
+[
+  {
    "__hdx_time_bucket": "2022-01-05T00:00:00Z",
    "avg(toFloat64OrDefault(toString(LastValue)))": 5.125,
  },
-  Object {
+  {
    "__hdx_time_bucket": "2022-01-05T00:05:00Z",
    "avg(toFloat64OrDefault(toString(LastValue)))": 42,
  },
@ -192,12 +192,12 @@ Array [
 `;

 exports[`renderChartConfig Query Metrics - Gauge single max/avg/sum gauge 2`] = `
-Array [
-  Object {
+[
+  {
    "__hdx_time_bucket": "2022-01-05T00:00:00Z",
    "max(toFloat64OrDefault(toString(LastValue)))": 6.25,
  },
-  Object {
+  {
    "__hdx_time_bucket": "2022-01-05T00:05:00Z",
    "max(toFloat64OrDefault(toString(LastValue)))": 80,
  },
@ -205,12 +205,12 @@ Array [
 `;

 exports[`renderChartConfig Query Metrics - Gauge single max/avg/sum gauge 3`] = `
-Array [
-  Object {
+[
+  {
    "__hdx_time_bucket": "2022-01-05T00:00:00Z",
    "sum(toFloat64OrDefault(toString(LastValue)))": 10.25,
  },
-  Object {
+  {
    "__hdx_time_bucket": "2022-01-05T00:05:00Z",
    "sum(toFloat64OrDefault(toString(LastValue)))": 84,
  },
@ -218,32 +218,32 @@ Array [
 `;

 exports[`renderChartConfig Query Metrics - Histogram should bucket correctly when grouping by a single attribute 1`] = `
-Array [
-  Object {
+[
+  {
    "Value": 3.5714285714285716,
    "__hdx_time_bucket": "2022-01-05T00:00:00Z",
-    "group": Array [
+    "group": [
      "host-b",
    ],
  },
-  Object {
+  {
    "Value": 8.382352941176471,
    "__hdx_time_bucket": "2022-01-05T00:00:00Z",
-    "group": Array [
+    "group": [
      "host-a",
    ],
  },
-  Object {
+  {
    "Value": 3.5,
    "__hdx_time_bucket": "2022-01-05T00:02:00Z",
-    "group": Array [
+    "group": [
      "host-b",
    ],
  },
-  Object {
+  {
    "Value": 4.95,
    "__hdx_time_bucket": "2022-01-05T00:02:00Z",
-    "group": Array [
+    "group": [
      "host-a",
    ],
  },
@ -251,99 +251,99 @@ Array [
 `;

 exports[`renderChartConfig Query Metrics - Histogram should bucket correctly when grouping by multiple attributes 1`] = `
-Array [
-  Object {
+[
+  {
    "Value": 2.916666666666667,
    "__hdx_time_bucket": "2022-01-05T00:00:00Z",
-    "group": Array [
+    "group": [
      "host-b",
      "service-2",
    ],
  },
-  Object {
+  {
    "Value": 4.852941176470588,
    "__hdx_time_bucket": "2022-01-05T00:00:00Z",
-    "group": Array [
+    "group": [
      "host-a",
      "service-2",
    ],
  },
-  Object {
+  {
    "Value": 8.75,
    "__hdx_time_bucket": "2022-01-05T00:00:00Z",
-    "group": Array [
+    "group": [
      "host-a",
      "service-1",
    ],
  },
-  Object {
+  {
    "Value": 3.1578947368421053,
    "__hdx_time_bucket": "2022-01-05T00:00:00Z",
-    "group": Array [
+    "group": [
      "host-b",
      "service-1",
    ],
  },
-  Object {
+  {
    "Value": 58.33333333333333,
    "__hdx_time_bucket": "2022-01-05T00:00:00Z",
-    "group": Array [
+    "group": [
      "host-a",
      "service-3",
    ],
  },
-  Object {
+  {
    "Value": 6.25,
    "__hdx_time_bucket": "2022-01-05T00:00:00Z",
-    "group": Array [
+    "group": [
      "host-b",
      "service-3",
    ],
  },
-  Object {
+  {
    "Value": 3.4090909090909087,
    "__hdx_time_bucket": "2022-01-05T00:02:00Z",
-    "group": Array [
+    "group": [
      "host-b",
      "service-1",
    ],
  },
-  Object {
+  {
    "Value": 7.916666666666667,
    "__hdx_time_bucket": "2022-01-05T00:02:00Z",
-    "group": Array [
+    "group": [
      "host-a",
      "service-1",
    ],
  },
-  Object {
+  {
    "Value": 3.25,
    "__hdx_time_bucket": "2022-01-05T00:02:00Z",
-    "group": Array [
+    "group": [
      "host-b",
      "service-3",
    ],
  },
-  Object {
+  {
    "Value": 4.25,
    "__hdx_time_bucket": "2022-01-05T00:02:00Z",
-    "group": Array [
+    "group": [
      "host-a",
      "service-3",
    ],
  },
-  Object {
+  {
    "Value": 4.75,
    "__hdx_time_bucket": "2022-01-05T00:02:00Z",
-    "group": Array [
+    "group": [
      "host-a",
      "service-2",
    ],
  },
-  Object {
+  {
    "Value": 3.888888888888889,
    "__hdx_time_bucket": "2022-01-05T00:02:00Z",
-    "group": Array [
+    "group": [
      "host-b",
      "service-2",
    ],
@ -352,12 +352,12 @@ Array [
 `;

 exports[`renderChartConfig Query Metrics - Histogram should bucket correctly when no grouping is defined 1`] = `
-Array [
-  Object {
+[
+  {
    "Value": 5.241935483870968,
    "__hdx_time_bucket": "2022-01-05T00:00:00Z",
  },
-  Object {
+  {
    "Value": 4.40625,
    "__hdx_time_bucket": "2022-01-05T00:02:00Z",
  },
@ -365,8 +365,8 @@ Array [
 `;

 exports[`renderChartConfig Query Metrics - Histogram two_timestamps_bounded histogram (p25) 1`] = `
-Array [
-  Object {
+[
+  {
    "Value": 7.5,
    "__hdx_time_bucket": "2022-01-05T00:01:00Z",
  },
@ -374,8 +374,8 @@ Array [
 `;

 exports[`renderChartConfig Query Metrics - Histogram two_timestamps_bounded histogram (p50) 1`] = `
-Array [
-  Object {
+[
+  {
    "Value": 20,
    "__hdx_time_bucket": "2022-01-05T00:01:00Z",
  },
@ -383,8 +383,8 @@ Array [
 `;

 exports[`renderChartConfig Query Metrics - Histogram two_timestamps_bounded histogram (p90) 1`] = `
-Array [
-  Object {
+[
+  {
    "Value": 30,
    "__hdx_time_bucket": "2022-01-05T00:01:00Z",
  },
@ -392,8 +392,8 @@ Array [
 `;

 exports[`renderChartConfig Query Metrics - Histogram two_timestamps_lower_bound_inf histogram (p50) 1`] = `
-Array [
-  Object {
+[
+  {
    "Value": 0.5,
    "__hdx_time_bucket": "2022-01-05T00:01:00Z",
  },
@ -401,12 +401,12 @@ Array [
 `;

 exports[`renderChartConfig Query Metrics - Sum calculates min_rate/max_rate correctly for sum metrics: maxSum 1`] = `
-Array [
-  Object {
+[
+  {
    "Value": 24,
    "__hdx_time_bucket": "2022-01-05T00:00:00Z",
  },
-  Object {
+  {
    "Value": 134,
    "__hdx_time_bucket": "2022-01-05T00:10:00Z",
  },
@ -414,12 +414,12 @@ Array [
 `;

 exports[`renderChartConfig Query Metrics - Sum calculates min_rate/max_rate correctly for sum metrics: minSum 1`] = `
-Array [
-  Object {
+[
+  {
    "Value": 15,
    "__hdx_time_bucket": "2022-01-05T00:00:00Z",
  },
-  Object {
+  {
    "Value": 52,
    "__hdx_time_bucket": "2022-01-05T00:10:00Z",
  },
@ -427,12 +427,12 @@ Array [
 `;

 exports[`renderChartConfig Query Metrics - Sum handles counter resets correctly for sum metrics 1`] = `
-Array [
-  Object {
+[
+  {
    "Value": 15,
    "__hdx_time_bucket": "2022-01-05T00:00:00Z",
  },
-  Object {
+  {
    "Value": 52,
    "__hdx_time_bucket": "2022-01-05T00:10:00Z",
  },
@ -440,20 +440,20 @@ Array [
 `;

 exports[`renderChartConfig Query Metrics - Sum single sum rate 1`] = `
-Array [
-  Object {
+[
+  {
    "Value": 19,
    "__hdx_time_bucket": "2022-01-05T00:00:00Z",
  },
-  Object {
+  {
    "Value": 79,
    "__hdx_time_bucket": "2022-01-05T00:05:00Z",
  },
-  Object {
+  {
    "Value": 5813,
    "__hdx_time_bucket": "2022-01-05T00:10:00Z",
  },
-  Object {
+  {
    "Value": 78754,
    "__hdx_time_bucket": "2022-01-05T00:15:00Z",
  },
@ -461,12 +461,12 @@ Array [
 `;

 exports[`renderChartConfig Query Metrics - Sum sum values as without rate computation 1`] = `
-Array [
-  Object {
+[
+  {
    "Value": 950400,
    "__hdx_time_bucket": "2022-01-05T00:00:00Z",
  },
-  Object {
+  {
    "Value": 1641600,
    "__hdx_time_bucket": "2022-01-05T00:10:00Z",
  },
@ -474,19 +474,19 @@ Array [
 `;

 exports[`renderChartConfig Query settings handles the the query settings 1`] = `
-Array [
-  Object {
+[
+  {
    "Body": "Oh no! Something went wrong!",
  },
-  Object {
+  {
    "Body": "This is a test message.",
  },
 ]
 `;

 exports[`renderChartConfig aggFn numeric agg functions should handle numeric values as strings 1`] = `
-Array [
-  Object {
+[
+  {
    "AVG(toFloat64OrDefault(toString(strVal)))": 0.5,
    "MAX(toFloat64OrDefault(toString(strVal)))": 3,
    "MIN(toFloat64OrDefault(toString(strVal)))": -1.1,
@ -497,8 +497,8 @@ Array [
 `;

 exports[`renderChartConfig aggFn numeric agg functions should use default values for other types 1`] = `
-Array [
-  Object {
+[
+  {
    "AVG(toFloat64OrDefault(toString(strVal)))": 0,
    "MAX(toFloat64OrDefault(toString(strVal)))": 0,
    "MIN(toFloat64OrDefault(toString(strVal)))": 0,
--- a/packages/api/src/controllers/alerts.ts
+++ b/packages/api/src/controllers/alerts.ts
@ -1,3 +1,7 @@
+import {
+  displayTypeSupportsRawSqlAlerts,
+  validateRawSqlForAlert,
+} from '@hyperdx/common-utils/dist/core/utils';
 import { isRawSqlSavedChartConfig } from '@hyperdx/common-utils/dist/guards';
 import { sign, verify } from 'jsonwebtoken';
 import { groupBy } from 'lodash';
@ -5,13 +9,7 @@ import ms from 'ms';
 import { z } from 'zod';

 import type { ObjectId } from '@/models';
-import Alert, {
-  AlertChannel,
-  AlertInterval,
-  AlertSource,
-  AlertThresholdType,
-  IAlert,
-} from '@/models/alert';
+import Alert, { AlertSource, IAlert } from '@/models/alert';
 import Dashboard, { IDashboard } from '@/models/dashboard';
 import { ISavedSearch, SavedSearch } from '@/models/savedSearch';
 import { IUser } from '@/models/user';
@ -20,34 +18,23 @@ import { Api400Error } from '@/utils/errors';
 import logger from '@/utils/logger';
 import { alertSchema, objectIdSchema } from '@/utils/zod';

-export type AlertInput = {
+export type AlertInput = Omit<
+  IAlert,
+  | 'id'
+  | 'scheduleStartAt'
+  | 'savedSearchId'
+  | 'createdAt'
+  | 'createdBy'
+  | 'updatedAt'
+  | 'team'
+  | 'state'
+> & {
  id?: string;
-  source?: AlertSource;
-  channel: AlertChannel;
-  interval: AlertInterval;
-  scheduleOffsetMinutes?: number;
+  // Replace the Date-type fields from IAlert
  scheduleStartAt?: string | null;
-  thresholdType: AlertThresholdType;
-  threshold: number;
-
-  // Message template
-  name?: string | null;
-  message?: string | null;
-
-  // Log alerts
-  groupBy?: string;
+  // Replace the ObjectId-type fields from IAlert
  savedSearchId?: string;
-
-  // Chart alerts
  dashboardId?: string;
-  tileId?: string;
-
-  // Silenced
-  silenced?: {
-    by?: ObjectId;
-    at: Date;
-    until: Date;
-  };
 };

 const validateObjectId = (id: string | undefined, message: string) => {
@ -82,7 +69,18 @@ export const validateAlertInput = async (
    }

    if (tile.config != null && isRawSqlSavedChartConfig(tile.config)) {
-      throw new Api400Error('Cannot create an alert on a raw SQL tile');
+      if (!displayTypeSupportsRawSqlAlerts(tile.config.displayType)) {
+        throw new Api400Error(
+          'Alerts on Raw SQL tiles are only supported for Line, Stacked Bar, or Number display types',
+        );
+      }
+
+      const { errors } = validateRawSqlForAlert(tile.config);
+      if (errors.length > 0) {
+        throw new Api400Error(
+          `Raw SQL alert query is invalid: ${errors.join(', ')}`,
+        );
+      }
    }
  }

@ -140,6 +138,7 @@ const makeAlert = (alert: AlertInput, userId?: ObjectId): Partial<IAlert> => {
    }),
    source: alert.source,
    threshold: alert.threshold,
+    thresholdMax: alert.thresholdMax,
    thresholdType: alert.thresholdType,
    ...(userId && { createdBy: userId }),

@ -287,6 +286,20 @@ export const getAlertsEnhanced = async (teamId: ObjectId) => {
  }>(['savedSearch', 'dashboard', 'createdBy', 'silenced.by']);
 };

+export const getAlertEnhanced = async (
+  alertId: ObjectId | string,
+  teamId: ObjectId,
+) => {
+  return Alert.findOne({ _id: alertId, team: teamId }).populate<{
+    savedSearch: ISavedSearch;
+    dashboard: IDashboard;
+    createdBy?: IUser;
+    silenced?: IAlert['silenced'] & {
+      by: IUser;
+    };
+  }>(['savedSearch', 'dashboard', 'createdBy', 'silenced.by']);
+};
+
 export const deleteAlert = async (id: string, teamId: ObjectId) => {
  return Alert.deleteOne({
    _id: id,
--- a/packages/api/src/controllers/dashboard.ts
+++ b/packages/api/src/controllers/dashboard.ts
@ -1,7 +1,6 @@
-import { isBuilderSavedChartConfig } from '@hyperdx/common-utils/dist/guards';
 import {
-  BuilderSavedChartConfig,
  DashboardWithoutIdSchema,
+  SavedChartConfig,
  Tile,
 } from '@hyperdx/common-utils/dist/types';
 import { map, partition, uniq } from 'lodash';
@ -19,7 +18,7 @@ import Dashboard from '@/models/dashboard';

 function pickAlertsByTile(tiles: Tile[]) {
  return tiles.reduce((acc, tile) => {
-    if (isBuilderSavedChartConfig(tile.config) && tile.config.alert) {
+    if (tile.config.alert) {
      acc[tile.id] = tile.config.alert;
    }
    return acc;
@ -27,9 +26,7 @@ function pickAlertsByTile(tiles: Tile[]) {
 }

 type TileForAlertSync = Pick<Tile, 'id'> & {
-  config?:
-    | Pick<BuilderSavedChartConfig, 'alert'>
-    | { alert?: IAlert | AlertDocument };
+  config?: Pick<SavedChartConfig, 'alert'> | { alert?: IAlert | AlertDocument };
 };

 function extractTileAlertData(tiles: TileForAlertSync[]): {
@ -55,9 +52,7 @@ async function syncDashboardAlerts(

  const newTilesForAlertSync: TileForAlertSync[] = newTiles.map(t => ({
    id: t.id,
-    config: isBuilderSavedChartConfig(t.config)
-      ? { alert: t.config.alert }
-      : {},
+    config: { alert: t.config.alert },
  }));
  const { tileIds: newTileIds, tileIdsWithAlerts: newTileIdsWithAlerts } =
    extractTileAlertData(newTilesForAlertSync);
@ -95,7 +90,9 @@ async function syncDashboardAlerts(

 export async function getDashboards(teamId: ObjectId) {
  const [_dashboards, alerts] = await Promise.all([
-    Dashboard.find({ team: teamId }),
+    Dashboard.find({ team: teamId })
+      .populate('createdBy', 'email name')
+      .populate('updatedBy', 'email name'),
    getTeamDashboardAlertsByDashboardAndTile(teamId),
  ]);

@ -117,12 +114,14 @@ export async function getDashboards(teamId: ObjectId) {

 export async function getDashboard(dashboardId: string, teamId: ObjectId) {
  const [_dashboard, alerts] = await Promise.all([
-    Dashboard.findOne({ _id: dashboardId, team: teamId }),
+    Dashboard.findOne({ _id: dashboardId, team: teamId })
+      .populate('createdBy', 'email name')
+      .populate('updatedBy', 'email name'),
    getDashboardAlertsByTile(teamId, dashboardId),
  ]);

  return {
-    ..._dashboard,
+    ..._dashboard?.toJSON(),
    tiles: _dashboard?.tiles.map(t => ({
      ...t,
      config: { ...t.config, alert: alerts[t.id]?.[0] },
@ -138,6 +137,8 @@ export async function createDashboard(
  const newDashboard = await new Dashboard({
    ...dashboard,
    team: teamId,
+    createdBy: userId,
+    updatedBy: userId,
  }).save();

  await createOrUpdateDashboardAlerts(
@ -180,6 +181,7 @@ export async function updateDashboard(
    {
      ...updates,
      tags: updates.tags && uniq(updates.tags),
+      updatedBy: userId,
    },
    { new: true },
  );
--- a/packages/api/src/controllers/pinnedFilter.ts
+++ b/packages/api/src/controllers/pinnedFilter.ts
@ -0,0 +1,42 @@
+import type { PinnedFiltersValue } from '@hyperdx/common-utils/dist/types';
+import mongoose from 'mongoose';
+
+import type { ObjectId } from '@/models';
+import PinnedFilterModel from '@/models/pinnedFilter';
+
+/**
+ * Get team-level pinned filters for a team+source combination.
+ */
+export async function getPinnedFilters(
+  teamId: string | ObjectId,
+  sourceId: string | ObjectId,
+) {
+  return PinnedFilterModel.findOne({
+    team: new mongoose.Types.ObjectId(teamId),
+    source: new mongoose.Types.ObjectId(sourceId),
+  });
+}
+
+/**
+ * Upsert team-level pinned filters for a team+source.
+ */
+export async function updatePinnedFilters(
+  teamId: string | ObjectId,
+  sourceId: string | ObjectId,
+  data: { fields: string[]; filters: PinnedFiltersValue },
+) {
+  const filter = {
+    team: new mongoose.Types.ObjectId(teamId),
+    source: new mongoose.Types.ObjectId(sourceId),
+  };
+
+  return PinnedFilterModel.findOneAndUpdate(
+    filter,
+    {
+      ...filter,
+      fields: data.fields,
+      filters: data.filters,
+    },
+    { upsert: true, new: true },
+  );
+}
--- a/packages/api/src/controllers/savedSearch.ts
+++ b/packages/api/src/controllers/savedSearch.ts
@ -1,16 +1,22 @@
-import { SavedSearchSchema } from '@hyperdx/common-utils/dist/types';
-import { groupBy, pick } from 'lodash';
+import {
+  SavedSearchListApiResponse,
+  SavedSearchSchema,
+} from '@hyperdx/common-utils/dist/types';
+import { groupBy } from 'lodash';
 import { z } from 'zod';

 import { deleteSavedSearchAlerts } from '@/controllers/alerts';
 import Alert from '@/models/alert';
 import { SavedSearch } from '@/models/savedSearch';
-import type { IUser } from '@/models/user';

 type SavedSearchWithoutId = Omit<z.infer<typeof SavedSearchSchema>, 'id'>;

-export async function getSavedSearches(teamId: string) {
-  const savedSearches = await SavedSearch.find({ team: teamId });
+export async function getSavedSearches(
+  teamId: string,
+): Promise<SavedSearchListApiResponse[]> {
+  const savedSearches = await SavedSearch.find({ team: teamId })
+    .populate('createdBy', 'email name')
+    .populate('updatedBy', 'email name');
  const alerts = await Alert.find(
    { team: teamId, savedSearch: { $exists: true, $ne: null } },
    { __v: 0 },
@ -27,26 +33,36 @@ export async function getSavedSearches(teamId: string) {
 }

 export function getSavedSearch(teamId: string, savedSearchId: string) {
-  return SavedSearch.findOne({ _id: savedSearchId, team: teamId });
+  return SavedSearch.findOne({ _id: savedSearchId, team: teamId })
+    .populate('createdBy', 'email name')
+    .populate('updatedBy', 'email name');
 }

 export function createSavedSearch(
  teamId: string,
  savedSearch: SavedSearchWithoutId,
+  userId?: string,
 ) {
-  return SavedSearch.create({ ...savedSearch, team: teamId });
+  return SavedSearch.create({
+    ...savedSearch,
+    team: teamId,
+    createdBy: userId,
+    updatedBy: userId,
+  });
 }

 export function updateSavedSearch(
  teamId: string,
  savedSearchId: string,
  savedSearch: SavedSearchWithoutId,
+  userId?: string,
 ) {
  return SavedSearch.findOneAndUpdate(
    { _id: savedSearchId, team: teamId },
    {
      ...savedSearch,
      team: teamId,
+      updatedBy: userId,
    },
    { new: true },
  );
--- a/packages/api/src/fixtures.ts
+++ b/packages/api/src/fixtures.ts
@ -1,5 +1,6 @@
 import { createNativeClient } from '@hyperdx/common-utils/dist/clickhouse/node';
 import {
+  AlertThresholdType,
  BuilderSavedChartConfig,
  DisplayType,
  RawSqlSavedChartConfig,
@ -15,7 +16,7 @@ import { AlertInput } from '@/controllers/alerts';
 import { getTeam } from '@/controllers/team';
 import { findUserByEmail } from '@/controllers/user';
 import { mongooseConnection } from '@/models';
-import { AlertInterval, AlertSource, AlertThresholdType } from '@/models/alert';
+import { AlertInterval, AlertSource } from '@/models/alert';
 import Server from '@/server';
 import logger from '@/utils/logger';
 import { MetricModel } from '@/utils/logParser';
@ -63,6 +64,13 @@ export const getTestFixtureClickHouseClient = async () => {
  return clickhouseClient;
 };

+export const closeTestFixtureClickHouseClient = async () => {
+  if (clickhouseClient) {
+    await clickhouseClient.close();
+    clickhouseClient = null;
+  }
+};
+
 const healthCheck = async () => {
  const client = await getTestFixtureClickHouseClient();
  const result = await client.ping();
@ -132,6 +140,7 @@ export const closeDB = async () => {
    throw new Error('ONLY execute this in CI env 😈 !!!');
  }
  await mongooseConnection.dropDatabase();
+  await mongoose.disconnect();
 };

 export const clearDBCollections = async () => {
@ -175,8 +184,8 @@ class MockServer extends Server {
    }
  }

-  stop() {
-    return new Promise<void>((resolve, reject) => {
+  async stop() {
+    await new Promise<void>((resolve, reject) => {
      this.appServer.close(err => {
        if (err) {
          reject(err);
@ -187,13 +196,12 @@ class MockServer extends Server {
            reject(err);
            return;
          }
-          super
-            .shutdown()
-            .then(() => resolve())
-            .catch(err => reject(err));
+          resolve();
        });
      });
    });
+    await closeTestFixtureClickHouseClient();
+    await super.shutdown();
  }

  clearDBs() {
@ -501,7 +509,39 @@ export const makeExternalTile = (opts?: {
  },
 });

-export const makeRawSqlTile = (opts?: { id?: string }): Tile => ({
+export const makeRawSqlTile = (opts?: {
+  id?: string;
+  displayType?: DisplayType;
+  sqlTemplate?: string;
+  connectionId?: string;
+}): Tile => ({
+  id: opts?.id ?? randomMongoId(),
+  x: 1,
+  y: 1,
+  w: 1,
+  h: 1,
+  config: {
+    configType: 'sql',
+    displayType: opts?.displayType ?? DisplayType.Line,
+    sqlTemplate: opts?.sqlTemplate ?? 'SELECT 1',
+    connection: opts?.connectionId ?? 'test-connection',
+  } satisfies RawSqlSavedChartConfig,
+});
+
+export const RAW_SQL_ALERT_TEMPLATE = [
+  'SELECT toStartOfInterval(Timestamp, INTERVAL {intervalSeconds:Int64} second) AS ts,',
+  ' count() AS cnt',
+  ' FROM default.otel_logs',
+  ' WHERE Timestamp >= fromUnixTimestamp64Milli({startDateMilliseconds:Int64})',
+  ' AND Timestamp < fromUnixTimestamp64Milli({endDateMilliseconds:Int64})',
+  ' GROUP BY ts ORDER BY ts',
+].join('');
+
+export const makeRawSqlAlertTile = (opts?: {
+  id?: string;
+  connectionId?: string;
+  sqlTemplate?: string;
+}): Tile => ({
  id: opts?.id ?? randomMongoId(),
  x: 1,
  y: 1,
@ -510,8 +550,33 @@ export const makeRawSqlTile = (opts?: { id?: string }): Tile => ({
  config: {
    configType: 'sql',
    displayType: DisplayType.Line,
-    sqlTemplate: 'SELECT 1',
-    connection: 'test-connection',
+    sqlTemplate: opts?.sqlTemplate ?? RAW_SQL_ALERT_TEMPLATE,
+    connection: opts?.connectionId ?? 'test-connection',
+  } satisfies RawSqlSavedChartConfig,
+});
+
+export const RAW_SQL_NUMBER_ALERT_TEMPLATE = [
+  'SELECT count() AS cnt',
+  ' FROM default.otel_logs',
+  ' WHERE Timestamp >= fromUnixTimestamp64Milli({startDateMilliseconds:Int64})',
+  ' AND Timestamp < fromUnixTimestamp64Milli({endDateMilliseconds:Int64})',
+].join('');
+
+export const makeRawSqlNumberAlertTile = (opts?: {
+  id?: string;
+  connectionId?: string;
+  sqlTemplate?: string;
+}): Tile => ({
+  id: opts?.id ?? randomMongoId(),
+  x: 1,
+  y: 1,
+  w: 1,
+  h: 1,
+  config: {
+    configType: 'sql',
+    displayType: DisplayType.Number,
+    sqlTemplate: opts?.sqlTemplate ?? RAW_SQL_NUMBER_ALERT_TEMPLATE,
+    connection: opts?.connectionId ?? 'test-connection',
  } satisfies RawSqlSavedChartConfig,
 });

--- a/packages/api/src/mcp/tests/dashboards.test.ts
+++ b/packages/api/src/mcp/tests/dashboards.test.ts
@ -0,0 +1,545 @@
+import { SourceKind } from '@hyperdx/common-utils/dist/types';
+import { Client } from '@modelcontextprotocol/sdk/client/index.js';
+
+import * as config from '@/config';
+import {
+  DEFAULT_DATABASE,
+  DEFAULT_TRACES_TABLE,
+  getLoggedInAgent,
+  getServer,
+} from '@/fixtures';
+import Connection from '@/models/connection';
+import Dashboard from '@/models/dashboard';
+import { Source } from '@/models/source';
+
+import { McpContext } from '../tools/types';
+import { callTool, createTestClient, getFirstText } from './mcpTestUtils';
+
+describe('MCP Dashboard Tools', () => {
+  const server = getServer();
+  let team: any;
+  let user: any;
+  let traceSource: any;
+  let connection: any;
+  let client: Client;
+
+  beforeAll(async () => {
+    await server.start();
+  });
+
+  beforeEach(async () => {
+    const result = await getLoggedInAgent(server);
+    team = result.team;
+    user = result.user;
+
+    connection = await Connection.create({
+      team: team._id,
+      name: 'Default',
+      host: config.CLICKHOUSE_HOST,
+      username: config.CLICKHOUSE_USER,
+      password: config.CLICKHOUSE_PASSWORD,
+    });
+
+    traceSource = await Source.create({
+      kind: SourceKind.Trace,
+      team: team._id,
+      from: {
+        databaseName: DEFAULT_DATABASE,
+        tableName: DEFAULT_TRACES_TABLE,
+      },
+      timestampValueExpression: 'Timestamp',
+      connection: connection._id,
+      name: 'Traces',
+    });
+
+    const context: McpContext = {
+      teamId: team._id.toString(),
+      userId: user._id.toString(),
+    };
+    client = await createTestClient(context);
+  });
+
+  afterEach(async () => {
+    await client.close();
+    await server.clearDBs();
+  });
+
+  afterAll(async () => {
+    await server.stop();
+  });
+
+  describe('hyperdx_list_sources', () => {
+    it('should list available sources and connections', async () => {
+      const result = await callTool(client, 'hyperdx_list_sources');
+
+      expect(result.isError).toBeFalsy();
+      expect(result.content).toHaveLength(1);
+
+      const output = JSON.parse(getFirstText(result));
+      expect(output.sources).toHaveLength(1);
+      expect(output.sources[0]).toMatchObject({
+        id: traceSource._id.toString(),
+        name: 'Traces',
+        kind: SourceKind.Trace,
+      });
+
+      expect(output.connections).toHaveLength(1);
+      expect(output.connections[0]).toMatchObject({
+        id: connection._id.toString(),
+        name: 'Default',
+      });
+
+      expect(output.usage).toBeDefined();
+    });
+
+    it('should include column schema for sources', async () => {
+      const result = await callTool(client, 'hyperdx_list_sources');
+      const output = JSON.parse(getFirstText(result));
+      const source = output.sources[0];
+
+      expect(source.columns).toBeDefined();
+      expect(Array.isArray(source.columns)).toBe(true);
+      expect(source.columns.length).toBeGreaterThan(0);
+      // Each column should have name, type, and jsType
+      expect(source.columns[0]).toHaveProperty('name');
+      expect(source.columns[0]).toHaveProperty('type');
+      expect(source.columns[0]).toHaveProperty('jsType');
+    });
+
+    it('should return empty sources for a team with no sources', async () => {
+      // Clear everything and re-register with new team
+      await client.close();
+      await server.clearDBs();
+      const result2 = await getLoggedInAgent(server);
+      const context2: McpContext = {
+        teamId: result2.team._id.toString(),
+      };
+      const client2 = await createTestClient(context2);
+
+      const result = await callTool(client2, 'hyperdx_list_sources');
+      const output = JSON.parse(getFirstText(result));
+
+      expect(output.sources).toHaveLength(0);
+      expect(output.connections).toHaveLength(0);
+
+      await client2.close();
+    });
+  });
+
+  describe('hyperdx_get_dashboard', () => {
+    it('should list all dashboards when no id provided', async () => {
+      await new Dashboard({
+        name: 'Dashboard 1',
+        tiles: [],
+        team: team._id,
+        tags: ['tag1'],
+      }).save();
+      await new Dashboard({
+        name: 'Dashboard 2',
+        tiles: [],
+        team: team._id,
+        tags: ['tag2'],
+      }).save();
+
+      const result = await callTool(client, 'hyperdx_get_dashboard', {});
+
+      expect(result.isError).toBeFalsy();
+      const output = JSON.parse(getFirstText(result));
+      expect(output).toHaveLength(2);
+      expect(output[0]).toHaveProperty('id');
+      expect(output[0]).toHaveProperty('name');
+      expect(output[0]).toHaveProperty('tags');
+    });
+
+    it('should get dashboard detail when id is provided', async () => {
+      const dashboard = await new Dashboard({
+        name: 'My Dashboard',
+        tiles: [],
+        team: team._id,
+        tags: ['test'],
+      }).save();
+
+      const result = await callTool(client, 'hyperdx_get_dashboard', {
+        id: dashboard._id.toString(),
+      });
+
+      expect(result.isError).toBeFalsy();
+      const output = JSON.parse(getFirstText(result));
+      expect(output.id).toBe(dashboard._id.toString());
+      expect(output.name).toBe('My Dashboard');
+      expect(output.tags).toEqual(['test']);
+      expect(output.tiles).toEqual([]);
+    });
+
+    it('should return error for non-existent dashboard id', async () => {
+      const fakeId = '000000000000000000000000';
+      const result = await callTool(client, 'hyperdx_get_dashboard', {
+        id: fakeId,
+      });
+
+      expect(result.isError).toBe(true);
+      expect(getFirstText(result)).toContain('not found');
+    });
+  });
+
+  describe('hyperdx_save_dashboard', () => {
+    it('should create a new dashboard with tiles', async () => {
+      const sourceId = traceSource._id.toString();
+      const result = await callTool(client, 'hyperdx_save_dashboard', {
+        name: 'New MCP Dashboard',
+        tiles: [
+          {
+            name: 'Line Chart',
+            x: 0,
+            y: 0,
+            w: 12,
+            h: 4,
+            config: {
+              displayType: 'line',
+              sourceId,
+              select: [{ aggFn: 'count', where: '' }],
+            },
+          },
+        ],
+        tags: ['mcp-test'],
+      });
+
+      expect(result.isError).toBeFalsy();
+      const output = JSON.parse(getFirstText(result));
+      expect(output.id).toBeDefined();
+      expect(output.name).toBe('New MCP Dashboard');
+      expect(output.tiles).toHaveLength(1);
+      expect(output.tiles[0].config.displayType).toBe('line');
+      expect(output.tags).toEqual(['mcp-test']);
+
+      // Verify in database
+      const dashboard = await Dashboard.findById(output.id);
+      expect(dashboard).not.toBeNull();
+      expect(dashboard?.name).toBe('New MCP Dashboard');
+    });
+
+    it('should create a dashboard with a markdown tile', async () => {
+      const result = await callTool(client, 'hyperdx_save_dashboard', {
+        name: 'Markdown Dashboard',
+        tiles: [
+          {
+            name: 'Notes',
+            config: {
+              displayType: 'markdown',
+              markdown: '# Hello World',
+            },
+          },
+        ],
+      });
+
+      expect(result.isError).toBeFalsy();
+      const output = JSON.parse(getFirstText(result));
+      expect(output.tiles).toHaveLength(1);
+      expect(output.tiles[0].config.displayType).toBe('markdown');
+    });
+
+    it('should update an existing dashboard', async () => {
+      const sourceId = traceSource._id.toString();
+
+      // Create first
+      const createResult = await callTool(client, 'hyperdx_save_dashboard', {
+        name: 'Original Name',
+        tiles: [
+          {
+            name: 'Tile 1',
+            config: {
+              displayType: 'number',
+              sourceId,
+              select: [{ aggFn: 'count' }],
+            },
+          },
+        ],
+      });
+      const created = JSON.parse(getFirstText(createResult));
+
+      // Update
+      const updateResult = await callTool(client, 'hyperdx_save_dashboard', {
+        id: created.id,
+        name: 'Updated Name',
+        tiles: [
+          {
+            name: 'Updated Tile',
+            config: {
+              displayType: 'table',
+              sourceId,
+              select: [{ aggFn: 'count' }],
+            },
+          },
+        ],
+        tags: ['updated'],
+      });
+
+      expect(updateResult.isError).toBeFalsy();
+      const updated = JSON.parse(getFirstText(updateResult));
+      expect(updated.id).toBe(created.id);
+      expect(updated.name).toBe('Updated Name');
+      expect(updated.tiles).toHaveLength(1);
+      expect(updated.tiles[0].name).toBe('Updated Tile');
+      expect(updated.tiles[0].config.displayType).toBe('table');
+    });
+
+    it('should return error for missing source ID', async () => {
+      const fakeSourceId = '000000000000000000000000';
+      const result = await callTool(client, 'hyperdx_save_dashboard', {
+        name: 'Bad Dashboard',
+        tiles: [
+          {
+            name: 'Bad Tile',
+            config: {
+              displayType: 'line',
+              sourceId: fakeSourceId,
+              select: [{ aggFn: 'count' }],
+            },
+          },
+        ],
+      });
+
+      expect(result.isError).toBe(true);
+      expect(getFirstText(result)).toContain('source');
+    });
+
+    it('should return error when updating non-existent dashboard', async () => {
+      const sourceId = traceSource._id.toString();
+      const result = await callTool(client, 'hyperdx_save_dashboard', {
+        id: '000000000000000000000000',
+        name: 'Ghost Dashboard',
+        tiles: [
+          {
+            name: 'Tile',
+            config: {
+              displayType: 'line',
+              sourceId,
+              select: [{ aggFn: 'count' }],
+            },
+          },
+        ],
+      });
+
+      expect(result.isError).toBe(true);
+      expect(getFirstText(result)).toContain('not found');
+    });
+
+    it('should create a dashboard with multiple tile types', async () => {
+      const sourceId = traceSource._id.toString();
+      const result = await callTool(client, 'hyperdx_save_dashboard', {
+        name: 'Multi-tile Dashboard',
+        tiles: [
+          {
+            name: 'Line',
+            x: 0,
+            y: 0,
+            w: 12,
+            h: 4,
+            config: {
+              displayType: 'line',
+              sourceId,
+              select: [{ aggFn: 'count' }],
+            },
+          },
+          {
+            name: 'Table',
+            x: 0,
+            y: 4,
+            w: 12,
+            h: 4,
+            config: {
+              displayType: 'table',
+              sourceId,
+              select: [{ aggFn: 'count' }],
+            },
+          },
+          {
+            name: 'Number',
+            x: 0,
+            y: 8,
+            w: 6,
+            h: 3,
+            config: {
+              displayType: 'number',
+              sourceId,
+              select: [{ aggFn: 'count' }],
+            },
+          },
+          {
+            name: 'Pie',
+            x: 6,
+            y: 8,
+            w: 6,
+            h: 3,
+            config: {
+              displayType: 'pie',
+              sourceId,
+              select: [{ aggFn: 'count' }],
+              groupBy: 'SpanName',
+            },
+          },
+          {
+            name: 'Notes',
+            x: 0,
+            y: 11,
+            w: 12,
+            h: 2,
+            config: { displayType: 'markdown', markdown: '# Dashboard Notes' },
+          },
+        ],
+      });
+
+      expect(result.isError).toBeFalsy();
+      const output = JSON.parse(getFirstText(result));
+      expect(output.tiles).toHaveLength(5);
+    });
+
+    it('should create a dashboard with a raw SQL tile', async () => {
+      const connectionId = connection._id.toString();
+      const result = await callTool(client, 'hyperdx_save_dashboard', {
+        name: 'SQL Dashboard',
+        tiles: [
+          {
+            name: 'Raw SQL',
+            config: {
+              configType: 'sql',
+              displayType: 'table',
+              connectionId,
+              sqlTemplate: 'SELECT 1 AS value LIMIT 1',
+            },
+          },
+        ],
+      });
+
+      expect(result.isError).toBeFalsy();
+      const output = JSON.parse(getFirstText(result));
+      expect(output.tiles).toHaveLength(1);
+    });
+  });
+
+  describe('hyperdx_delete_dashboard', () => {
+    it('should delete an existing dashboard', async () => {
+      const dashboard = await new Dashboard({
+        name: 'To Delete',
+        tiles: [],
+        team: team._id,
+      }).save();
+
+      const result = await callTool(client, 'hyperdx_delete_dashboard', {
+        id: dashboard._id.toString(),
+      });
+
+      expect(result.isError).toBeFalsy();
+      const output = JSON.parse(getFirstText(result));
+      expect(output.deleted).toBe(true);
+      expect(output.id).toBe(dashboard._id.toString());
+
+      // Verify deleted from database
+      const found = await Dashboard.findById(dashboard._id);
+      expect(found).toBeNull();
+    });
+
+    it('should return error for non-existent dashboard', async () => {
+      const result = await callTool(client, 'hyperdx_delete_dashboard', {
+        id: '000000000000000000000000',
+      });
+
+      expect(result.isError).toBe(true);
+      expect(getFirstText(result)).toContain('not found');
+    });
+  });
+
+  describe('hyperdx_query_tile', () => {
+    it('should return error for non-existent dashboard', async () => {
+      const result = await callTool(client, 'hyperdx_query_tile', {
+        dashboardId: '000000000000000000000000',
+        tileId: 'some-tile-id',
+      });
+
+      expect(result.isError).toBe(true);
+      expect(getFirstText(result)).toContain('not found');
+    });
+
+    it('should return error for non-existent tile', async () => {
+      const sourceId = traceSource._id.toString();
+      const createResult = await callTool(client, 'hyperdx_save_dashboard', {
+        name: 'Tile Query Test',
+        tiles: [
+          {
+            name: 'My Tile',
+            config: {
+              displayType: 'number',
+              sourceId,
+              select: [{ aggFn: 'count' }],
+            },
+          },
+        ],
+      });
+      const dashboard = JSON.parse(getFirstText(createResult));
+
+      const result = await callTool(client, 'hyperdx_query_tile', {
+        dashboardId: dashboard.id,
+        tileId: 'non-existent-tile-id',
+      });
+
+      expect(result.isError).toBe(true);
+      expect(getFirstText(result)).toContain('Tile not found');
+    });
+
+    it('should return error for invalid time range', async () => {
+      const sourceId = traceSource._id.toString();
+      const createResult = await callTool(client, 'hyperdx_save_dashboard', {
+        name: 'Time Range Test',
+        tiles: [
+          {
+            name: 'Tile',
+            config: {
+              displayType: 'number',
+              sourceId,
+              select: [{ aggFn: 'count' }],
+            },
+          },
+        ],
+      });
+      const dashboard = JSON.parse(getFirstText(createResult));
+
+      const result = await callTool(client, 'hyperdx_query_tile', {
+        dashboardId: dashboard.id,
+        tileId: dashboard.tiles[0].id,
+        startTime: 'not-a-date',
+      });
+
+      expect(result.isError).toBe(true);
+      expect(getFirstText(result)).toContain('Invalid');
+    });
+
+    it('should execute query for a valid tile', async () => {
+      const sourceId = traceSource._id.toString();
+      const createResult = await callTool(client, 'hyperdx_save_dashboard', {
+        name: 'Query Tile Test',
+        tiles: [
+          {
+            name: 'Count Tile',
+            config: {
+              displayType: 'number',
+              sourceId,
+              select: [{ aggFn: 'count' }],
+            },
+          },
+        ],
+      });
+      const dashboard = JSON.parse(getFirstText(createResult));
+
+      const result = await callTool(client, 'hyperdx_query_tile', {
+        dashboardId: dashboard.id,
+        tileId: dashboard.tiles[0].id,
+        startTime: new Date(Date.now() - 24 * 60 * 60 * 1000).toISOString(),
+        endTime: new Date().toISOString(),
+      });
+
+      // Should succeed (may have empty results since no data inserted)
+      expect(result.isError).toBeFalsy();
+      expect(result.content).toHaveLength(1);
+    });
+  });
+});
--- a/packages/api/src/mcp/tests/mcpTestUtils.ts
+++ b/packages/api/src/mcp/tests/mcpTestUtils.ts
@ -0,0 +1,53 @@
+import { Client } from '@modelcontextprotocol/sdk/client/index.js';
+import { InMemoryTransport } from '@modelcontextprotocol/sdk/inMemory.js';
+import {
+  type CallToolResult,
+  CallToolResultSchema,
+} from '@modelcontextprotocol/sdk/types.js';
+
+import { createServer } from '../mcpServer';
+import { McpContext } from '../tools/types';
+
+/**
+ * Connect an MCP server to an in-process Client via InMemoryTransport and
+ * return the client. This is the officially supported way to test MCP servers
+ * without accessing private SDK internals.
+ */
+export async function createTestClient(context: McpContext): Promise<Client> {
+  const mcpServer = createServer(context);
+  const [clientTransport, serverTransport] =
+    InMemoryTransport.createLinkedPair();
+  await mcpServer.connect(serverTransport);
+  const client = new Client({ name: 'test-client', version: '1.0.0' });
+  await client.connect(clientTransport);
+  return client;
+}
+
+/**
+ * Call a named MCP tool and return a properly-typed result.
+ *
+ * The SDK's `Client.callTool()` return type carries an index signature
+ * `[x: string]: unknown` that widens all property accesses to `unknown`.
+ * Re-parsing through `CallToolResultSchema` gives the concrete named type
+ * needed for clean test assertions.
+ */
+export async function callTool(
+  c: Client,
+  name: string,
+  args: Record<string, unknown> = {},
+): Promise<CallToolResult> {
+  const raw = await c.callTool({ name, arguments: args });
+  return CallToolResultSchema.parse(raw);
+}
+
+/**
+ * Extract the text from the first content item of a tool result.
+ * Throws if the item is not a text block.
+ */
+export function getFirstText(result: CallToolResult): string {
+  const item = result.content[0];
+  if (!item || item.type !== 'text') {
+    throw new Error(`Expected text content, got: ${JSON.stringify(item)}`);
+  }
+  return item.text;
+}
--- a/packages/api/src/mcp/tests/query.test.ts
+++ b/packages/api/src/mcp/tests/query.test.ts
@ -0,0 +1,74 @@
+import { parseTimeRange } from '../tools/query/helpers';
+
+describe('parseTimeRange', () => {
+  it('should return default range (last 15 minutes) when no arguments provided', () => {
+    const before = Date.now();
+    const result = parseTimeRange();
+    const after = Date.now();
+
+    expect(result).not.toHaveProperty('error');
+    if ('error' in result) return;
+
+    // endDate should be approximately now
+    expect(result.endDate.getTime()).toBeGreaterThanOrEqual(before);
+    expect(result.endDate.getTime()).toBeLessThanOrEqual(after);
+    // startDate should be ~15 minutes before endDate
+    const diffMs = result.endDate.getTime() - result.startDate.getTime();
+    expect(diffMs).toBe(15 * 60 * 1000);
+  });
+
+  it('should use provided startTime and endTime', () => {
+    const result = parseTimeRange(
+      '2025-01-01T00:00:00Z',
+      '2025-01-02T00:00:00Z',
+    );
+    expect(result).not.toHaveProperty('error');
+    if ('error' in result) return;
+
+    expect(result.startDate.toISOString()).toBe('2025-01-01T00:00:00.000Z');
+    expect(result.endDate.toISOString()).toBe('2025-01-02T00:00:00.000Z');
+  });
+
+  it('should default startTime to 15 minutes before endTime', () => {
+    const result = parseTimeRange(undefined, '2025-06-15T10:00:00Z');
+    expect(result).not.toHaveProperty('error');
+    if ('error' in result) return;
+
+    expect(result.endDate.toISOString()).toBe('2025-06-15T10:00:00.000Z');
+    expect(result.startDate.toISOString()).toBe('2025-06-15T09:45:00.000Z');
+  });
+
+  it('should default endTime to now', () => {
+    const before = Date.now();
+    const result = parseTimeRange('2025-06-15T11:00:00Z');
+    const after = Date.now();
+
+    expect(result).not.toHaveProperty('error');
+    if ('error' in result) return;
+
+    expect(result.startDate.toISOString()).toBe('2025-06-15T11:00:00.000Z');
+    expect(result.endDate.getTime()).toBeGreaterThanOrEqual(before);
+    expect(result.endDate.getTime()).toBeLessThanOrEqual(after);
+  });
+
+  it('should return error for invalid startTime', () => {
+    const result = parseTimeRange('not-a-date', '2025-01-01T00:00:00Z');
+    expect(result).toHaveProperty('error');
+    if (!('error' in result)) return;
+
+    expect(result.error).toContain('Invalid');
+  });
+
+  it('should return error for invalid endTime', () => {
+    const result = parseTimeRange('2025-01-01T00:00:00Z', 'garbage');
+    expect(result).toHaveProperty('error');
+    if (!('error' in result)) return;
+
+    expect(result.error).toContain('Invalid');
+  });
+
+  it('should return error when both times are invalid', () => {
+    const result = parseTimeRange('bad', 'also-bad');
+    expect(result).toHaveProperty('error');
+  });
+});
--- a/packages/api/src/mcp/tests/queryTool.test.ts
+++ b/packages/api/src/mcp/tests/queryTool.test.ts
@ -0,0 +1,233 @@
+import { SourceKind } from '@hyperdx/common-utils/dist/types';
+import { Client } from '@modelcontextprotocol/sdk/client/index.js';
+
+import * as config from '@/config';
+import {
+  DEFAULT_DATABASE,
+  DEFAULT_TRACES_TABLE,
+  getLoggedInAgent,
+  getServer,
+} from '@/fixtures';
+import Connection from '@/models/connection';
+import { Source } from '@/models/source';
+
+import { McpContext } from '../tools/types';
+import { callTool, createTestClient, getFirstText } from './mcpTestUtils';
+
+describe('MCP Query Tool', () => {
+  const server = getServer();
+  let team: any;
+  let user: any;
+  let traceSource: any;
+  let connection: any;
+  let client: Client;
+
+  beforeAll(async () => {
+    await server.start();
+  });
+
+  beforeEach(async () => {
+    const result = await getLoggedInAgent(server);
+    team = result.team;
+    user = result.user;
+
+    connection = await Connection.create({
+      team: team._id,
+      name: 'Default',
+      host: config.CLICKHOUSE_HOST,
+      username: config.CLICKHOUSE_USER,
+      password: config.CLICKHOUSE_PASSWORD,
+    });
+
+    traceSource = await Source.create({
+      kind: SourceKind.Trace,
+      team: team._id,
+      from: {
+        databaseName: DEFAULT_DATABASE,
+        tableName: DEFAULT_TRACES_TABLE,
+      },
+      timestampValueExpression: 'Timestamp',
+      connection: connection._id,
+      name: 'Traces',
+    });
+
+    const context: McpContext = {
+      teamId: team._id.toString(),
+      userId: user._id.toString(),
+    };
+    client = await createTestClient(context);
+  });
+
+  afterEach(async () => {
+    await client.close();
+    await server.clearDBs();
+  });
+
+  afterAll(async () => {
+    await server.stop();
+  });
+
+  describe('builder queries', () => {
+    it('should execute a number query', async () => {
+      const result = await callTool(client, 'hyperdx_query', {
+        displayType: 'number',
+        sourceId: traceSource._id.toString(),
+        select: [{ aggFn: 'count' }],
+        startTime: new Date(Date.now() - 60 * 60 * 1000).toISOString(),
+        endTime: new Date().toISOString(),
+      });
+
+      expect(result.isError).toBeFalsy();
+      expect(result.content).toHaveLength(1);
+      const output = JSON.parse(getFirstText(result));
+      expect(output).toHaveProperty('result');
+    });
+
+    it('should execute a line chart query', async () => {
+      const result = await callTool(client, 'hyperdx_query', {
+        displayType: 'line',
+        sourceId: traceSource._id.toString(),
+        select: [{ aggFn: 'count' }],
+        startTime: new Date(Date.now() - 60 * 60 * 1000).toISOString(),
+        endTime: new Date().toISOString(),
+      });
+
+      expect(result.isError).toBeFalsy();
+      expect(result.content).toHaveLength(1);
+    });
+
+    it('should execute a table query', async () => {
+      const result = await callTool(client, 'hyperdx_query', {
+        displayType: 'table',
+        sourceId: traceSource._id.toString(),
+        select: [{ aggFn: 'count' }],
+        groupBy: 'SpanName',
+        startTime: new Date(Date.now() - 60 * 60 * 1000).toISOString(),
+        endTime: new Date().toISOString(),
+      });
+
+      expect(result.isError).toBeFalsy();
+      expect(result.content).toHaveLength(1);
+    });
+
+    it('should execute a pie query', async () => {
+      const result = await callTool(client, 'hyperdx_query', {
+        displayType: 'pie',
+        sourceId: traceSource._id.toString(),
+        select: [{ aggFn: 'count' }],
+        groupBy: 'SpanName',
+        startTime: new Date(Date.now() - 60 * 60 * 1000).toISOString(),
+        endTime: new Date().toISOString(),
+      });
+
+      expect(result.isError).toBeFalsy();
+      expect(result.content).toHaveLength(1);
+    });
+
+    it('should execute a stacked_bar query', async () => {
+      const result = await callTool(client, 'hyperdx_query', {
+        displayType: 'stacked_bar',
+        sourceId: traceSource._id.toString(),
+        select: [{ aggFn: 'count' }],
+        startTime: new Date(Date.now() - 60 * 60 * 1000).toISOString(),
+        endTime: new Date().toISOString(),
+      });
+
+      expect(result.isError).toBeFalsy();
+      expect(result.content).toHaveLength(1);
+    });
+
+    it('should use default time range when not provided', async () => {
+      const result = await callTool(client, 'hyperdx_query', {
+        displayType: 'number',
+        sourceId: traceSource._id.toString(),
+        select: [{ aggFn: 'count' }],
+      });
+
+      expect(result.isError).toBeFalsy();
+      expect(result.content).toHaveLength(1);
+    });
+
+    it('should return result for query with no matching data', async () => {
+      const result = await callTool(client, 'hyperdx_query', {
+        displayType: 'number',
+        sourceId: traceSource._id.toString(),
+        select: [{ aggFn: 'count', where: 'SpanName:z_impossible_value_xyz' }],
+        startTime: new Date(Date.now() - 60 * 1000).toISOString(),
+        endTime: new Date().toISOString(),
+      });
+
+      expect(result.isError).toBeFalsy();
+      expect(result.content).toHaveLength(1);
+    });
+  });
+
+  describe('search queries', () => {
+    it('should execute a search query', async () => {
+      const result = await callTool(client, 'hyperdx_query', {
+        displayType: 'search',
+        sourceId: traceSource._id.toString(),
+        where: '',
+        startTime: new Date(Date.now() - 60 * 60 * 1000).toISOString(),
+        endTime: new Date().toISOString(),
+      });
+
+      expect(result.isError).toBeFalsy();
+      expect(result.content).toHaveLength(1);
+    });
+
+    it('should respect maxResults parameter', async () => {
+      const result = await callTool(client, 'hyperdx_query', {
+        displayType: 'search',
+        sourceId: traceSource._id.toString(),
+        maxResults: 10,
+        startTime: new Date(Date.now() - 60 * 60 * 1000).toISOString(),
+        endTime: new Date().toISOString(),
+      });
+
+      expect(result.isError).toBeFalsy();
+    });
+  });
+
+  describe('SQL queries', () => {
+    it('should execute a raw SQL query', async () => {
+      const result = await callTool(client, 'hyperdx_query', {
+        displayType: 'sql',
+        connectionId: connection._id.toString(),
+        sql: 'SELECT 1 AS value',
+        startTime: new Date(Date.now() - 60 * 60 * 1000).toISOString(),
+        endTime: new Date().toISOString(),
+      });
+
+      expect(result.isError).toBeFalsy();
+      expect(result.content).toHaveLength(1);
+    });
+
+    it('should execute SQL with time macros', async () => {
+      const result = await callTool(client, 'hyperdx_query', {
+        displayType: 'sql',
+        connectionId: connection._id.toString(),
+        sql: `SELECT count() AS cnt FROM ${DEFAULT_DATABASE}.${DEFAULT_TRACES_TABLE} WHERE $__timeFilter(Timestamp) LIMIT 10`,
+        startTime: new Date(Date.now() - 60 * 60 * 1000).toISOString(),
+        endTime: new Date().toISOString(),
+      });
+
+      expect(result.isError).toBeFalsy();
+      expect(result.content).toHaveLength(1);
+    });
+  });
+
+  describe('error handling', () => {
+    it('should return error for invalid time range', async () => {
+      const result = await callTool(client, 'hyperdx_query', {
+        displayType: 'number',
+        sourceId: traceSource._id.toString(),
+        select: [{ aggFn: 'count' }],
+        startTime: 'invalid-date',
+      });
+
+      expect(result.isError).toBe(true);
+      expect(getFirstText(result)).toContain('Invalid');
+    });
+  });
+});
--- a/packages/api/src/mcp/tests/tracing.test.ts
+++ b/packages/api/src/mcp/tests/tracing.test.ts
@ -0,0 +1,160 @@
+// Mock OpenTelemetry and all modules that transitively import it
+// These must be declared before any imports
+
+const mockSpan = {
+  setAttribute: jest.fn(),
+  setStatus: jest.fn(),
+  recordException: jest.fn(),
+  end: jest.fn(),
+};
+
+const mockTracer = {
+  startActiveSpan: (
+    _name: string,
+    fn: (span: typeof mockSpan) => Promise<unknown>,
+  ) => fn(mockSpan),
+};
+
+jest.mock('@opentelemetry/api', () => ({
+  __esModule: true,
+  default: {
+    trace: {
+      getTracer: () => mockTracer,
+    },
+  },
+  SpanStatusCode: {
+    OK: 1,
+    ERROR: 2,
+  },
+}));
+
+jest.mock('@/config', () => ({
+  CODE_VERSION: 'test-version',
+}));
+
+jest.mock('@/utils/logger', () => ({
+  __esModule: true,
+  default: {
+    info: jest.fn(),
+    warn: jest.fn(),
+    error: jest.fn(),
+    debug: jest.fn(),
+  },
+}));
+
+import { withToolTracing } from '../utils/tracing';
+
+describe('withToolTracing', () => {
+  const context = { teamId: 'team-123', userId: 'user-456' };
+
+  beforeEach(() => {
+    jest.clearAllMocks();
+  });
+
+  it('should call the handler and return its result', async () => {
+    const handler = jest.fn().mockResolvedValue({
+      content: [{ type: 'text', text: 'hello' }],
+    });
+
+    const traced = withToolTracing('test_tool', context, handler);
+    const result = await traced({ some: 'args' });
+
+    expect(handler).toHaveBeenCalledWith({ some: 'args' });
+    expect(result).toEqual({
+      content: [{ type: 'text', text: 'hello' }],
+    });
+  });
+
+  it('should set span attributes for tool name, team, and user', async () => {
+    const handler = jest.fn().mockResolvedValue({
+      content: [{ type: 'text', text: 'ok' }],
+    });
+
+    const traced = withToolTracing('my_tool', context, handler);
+    await traced({});
+
+    expect(mockSpan.setAttribute).toHaveBeenCalledWith(
+      'mcp.tool.name',
+      'my_tool',
+    );
+    expect(mockSpan.setAttribute).toHaveBeenCalledWith(
+      'mcp.team.id',
+      'team-123',
+    );
+    expect(mockSpan.setAttribute).toHaveBeenCalledWith(
+      'mcp.user.id',
+      'user-456',
+    );
+  });
+
+  it('should not set user id attribute when userId is undefined', async () => {
+    const noUserContext = { teamId: 'team-123' };
+    const handler = jest.fn().mockResolvedValue({
+      content: [{ type: 'text', text: 'ok' }],
+    });
+
+    const traced = withToolTracing('my_tool', noUserContext, handler);
+    await traced({});
+
+    expect(mockSpan.setAttribute).not.toHaveBeenCalledWith(
+      'mcp.user.id',
+      expect.anything(),
+    );
+  });
+
+  it('should set OK status for successful results', async () => {
+    const handler = jest.fn().mockResolvedValue({
+      content: [{ type: 'text', text: 'ok' }],
+    });
+
+    const traced = withToolTracing('my_tool', context, handler);
+    await traced({});
+
+    expect(mockSpan.setStatus).toHaveBeenCalledWith({ code: 1 }); // SpanStatusCode.OK
+    expect(mockSpan.end).toHaveBeenCalled();
+  });
+
+  it('should set ERROR status for isError results', async () => {
+    const handler = jest.fn().mockResolvedValue({
+      isError: true,
+      content: [{ type: 'text', text: 'something went wrong' }],
+    });
+
+    const traced = withToolTracing('my_tool', context, handler);
+    await traced({});
+
+    expect(mockSpan.setStatus).toHaveBeenCalledWith({ code: 2 }); // SpanStatusCode.ERROR
+    expect(mockSpan.setAttribute).toHaveBeenCalledWith('mcp.tool.error', true);
+    expect(mockSpan.end).toHaveBeenCalled();
+  });
+
+  it('should set ERROR status and re-throw on handler exception', async () => {
+    const error = new Error('boom');
+    const handler = jest.fn().mockRejectedValue(error);
+
+    const traced = withToolTracing('my_tool', context, handler);
+
+    await expect(traced({})).rejects.toThrow('boom');
+
+    expect(mockSpan.setStatus).toHaveBeenCalledWith({
+      code: 2,
+      message: 'boom',
+    });
+    expect(mockSpan.recordException).toHaveBeenCalledWith(error);
+    expect(mockSpan.end).toHaveBeenCalled();
+  });
+
+  it('should record duration on the span', async () => {
+    const handler = jest.fn().mockResolvedValue({
+      content: [{ type: 'text', text: 'ok' }],
+    });
+
+    const traced = withToolTracing('my_tool', context, handler);
+    await traced({});
+
+    expect(mockSpan.setAttribute).toHaveBeenCalledWith(
+      'mcp.tool.duration_ms',
+      expect.any(Number),
+    );
+  });
+});
--- a/packages/api/src/mcp/app.ts
+++ b/packages/api/src/mcp/app.ts
@ -0,0 +1,58 @@
+import { setTraceAttributes } from '@hyperdx/node-opentelemetry';
+import { createMcpExpressApp } from '@modelcontextprotocol/sdk/server/express.js';
+import { StreamableHTTPServerTransport } from '@modelcontextprotocol/sdk/server/streamableHttp.js';
+
+import { validateUserAccessKey } from '../middleware/auth';
+import logger from '../utils/logger';
+import rateLimiter, { rateLimiterKeyGenerator } from '../utils/rateLimiter';
+import { createServer } from './mcpServer';
+import { McpContext } from './tools/types';
+
+const app = createMcpExpressApp();
+
+const mcpRateLimiter = rateLimiter({
+  windowMs: 60 * 1000, // 1 minute
+  max: 100,
+  standardHeaders: true,
+  legacyHeaders: false,
+  keyGenerator: rateLimiterKeyGenerator,
+});
+
+app.all('/', mcpRateLimiter, validateUserAccessKey, async (req, res) => {
+  const transport = new StreamableHTTPServerTransport({
+    sessionIdGenerator: undefined, // stateless
+  });
+
+  const teamId = req.user?.team;
+
+  if (!teamId) {
+    logger.warn('MCP request rejected: no teamId');
+    res.sendStatus(403);
+    return;
+  }
+
+  const userId = req.user?._id?.toString();
+  const context: McpContext = {
+    teamId: teamId.toString(),
+    userId,
+  };
+
+  setTraceAttributes({
+    'mcp.team.id': context.teamId,
+    ...(userId && { 'mcp.user.id': userId }),
+  });
+
+  logger.info({ teamId: context.teamId, userId }, 'MCP request received');
+
+  const server = createServer(context);
+
+  try {
+    await server.connect(transport);
+    await transport.handleRequest(req, res, req.body);
+  } finally {
+    await server.close();
+    await transport.close();
+  }
+});
+
+export default app;
--- a/packages/api/src/mcp/mcpServer.ts
+++ b/packages/api/src/mcp/mcpServer.ts
@ -0,0 +1,21 @@
+import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
+
+import { CODE_VERSION } from '@/config';
+
+import dashboardPrompts from './prompts/dashboards/index';
+import dashboardsTools from './tools/dashboards/index';
+import queryTools from './tools/query/index';
+import { McpContext } from './tools/types';
+
+export function createServer(context: McpContext) {
+  const server = new McpServer({
+    name: 'hyperdx',
+    version: `${CODE_VERSION}-beta`,
+  });
+
+  dashboardsTools(server, context);
+  queryTools(server, context);
+  dashboardPrompts(server, context);
+
+  return server;
+}
--- a/packages/api/src/mcp/prompts/dashboards/content.ts
+++ b/packages/api/src/mcp/prompts/dashboards/content.ts
@ -0,0 +1,719 @@
+// ─── Prompt content builders ──────────────────────────────────────────────────
+// Each function returns a plain string that is injected as a prompt message.
+
+export function buildCreateDashboardPrompt(
+  sourceSummary: string,
+  traceSourceId: string,
+  logSourceId: string,
+  description?: string,
+): string {
+  const userContext = description
+    ? `\nThe user wants to create a dashboard for: ${description}\nTailor the dashboard tiles to match this goal.\n`
+    : '';
+
+  return `You are an expert at creating HyperDX observability dashboards.
+${userContext}
+${sourceSummary}
+
+IMPORTANT: Call hyperdx_list_sources first to get the full column schema and attribute keys for each source. The source IDs above are correct, but you need the schema details to write accurate queries.
+
+== WORKFLOW ==
+
+1. Call hyperdx_list_sources — get source IDs, column schemas, and attribute keys
+2. Design tiles — pick tile types that match the monitoring goal
+3. Call hyperdx_save_dashboard — create the dashboard with all tiles
+4. Call hyperdx_query_tile on each tile — validate queries return data
+
+== TILE TYPE GUIDE ==
+
+Use BUILDER tiles (with sourceId) for most cases:
+  line        — Time-series trends (error rate, request volume, latency over time)
+  stacked_bar — Compare categories over time (requests by service, errors by status code)
+  number      — Single KPI metric (total requests, current error rate, p99 latency)
+  table       — Ranked lists (top endpoints by latency, error counts by service)
+  pie         — Proportional breakdowns (traffic share by service, errors by type)
+  search      — Browse raw log/event rows (error logs, recent traces)
+  markdown    — Dashboard notes, section headers, or documentation
+
+Use RAW SQL tiles (with connectionId) only for advanced queries:
+  Requires configType: "sql" plus a displayType (line, stacked_bar, table, number, pie)
+  Use when you need JOINs, sub-queries, CTEs, or queries the builder cannot express
+
+== COLUMN NAMING ==
+
+- Top-level columns use PascalCase by default: Duration, StatusCode, SpanName, Body, SeverityText, ServiceName
+  NOTE: These are defaults for the standard HyperDX schema. Custom sources may use different names.
+  Always call hyperdx_list_sources to get the real column names and keyColumns for each source.
+- Map-type columns use bracket syntax: SpanAttributes['http.method'], ResourceAttributes['service.name']
+  NEVER use dot notation for Map columns (SpanAttributes.http.method) — always use brackets.
+- JSON-type columns use dot notation: JsonColumn.key.subkey
+  Check the jsType returned by hyperdx_list_sources to determine whether a column is Map or JSON.
+- Call hyperdx_list_sources to discover the exact column names, types, and attribute keys
+
+== LAYOUT GRID ==
+
+The dashboard grid is 24 columns wide. Tiles are positioned with (x, y, w, h):
+  - Number tiles: w=6, h=4 — fit 4 across in a row
+  - Line/Bar charts: w=12, h=4 — fit 2 side-by-side
+  - Tables: w=24, h=6 — full width
+  - Search tiles: w=24, h=6 — full width
+  - Markdown: w=24, h=2 — full width section header
+
+Recommended layout pattern (top to bottom):
+  Row 0: KPI number tiles across the top (y=0)
+  Row 1: Time-series charts (y=4)
+  Row 2: Tables or search tiles (y=8)
+
+== FILTER SYNTAX (Lucene) ==
+
+Simple match:       level:error
+AND:                service.name:api AND http.status_code:>=500
+OR:                 level:error OR level:fatal
+Wildcards:          service.name:front*
+Negation:           NOT level:debug
+Exists:             _exists_:http.route
+Range:              Duration:>1000000000
+Phrase:             Body:"connection refused"
+Grouped:            (level:error OR level:fatal) AND service.name:api
+
+== COMPLETE EXAMPLE ==
+
+Here is a full dashboard creation call with properly structured tiles:
+
+hyperdx_save_dashboard({
+  name: "Service Overview",
+  tags: ["overview"],
+  tiles: [
+    {
+      name: "Total Requests",
+      x: 0, y: 0, w: 6, h: 4,
+      config: {
+        displayType: "number",
+        sourceId: "${traceSourceId}",
+        select: [{ aggFn: "count" }]
+      }
+    },
+    {
+      name: "Error Count",
+      x: 6, y: 0, w: 6, h: 4,
+      config: {
+        displayType: "number",
+        sourceId: "${traceSourceId}",
+        select: [{ aggFn: "count", where: "StatusCode:STATUS_CODE_ERROR" }]
+      }
+    },
+    {
+      name: "P95 Latency (ms)",
+      x: 12, y: 0, w: 6, h: 4,
+      config: {
+        displayType: "number",
+        sourceId: "${traceSourceId}",
+        select: [{ aggFn: "quantile", valueExpression: "Duration", level: 0.95 }]
+      }
+    },
+    {
+      name: "Request Rate by Service",
+      x: 0, y: 4, w: 12, h: 4,
+      config: {
+        displayType: "line",
+        sourceId: "${traceSourceId}",
+        select: [{ aggFn: "count" }],
+        groupBy: "ResourceAttributes['service.name']"
+      }
+    },
+    {
+      name: "Error Rate Over Time",
+      x: 12, y: 4, w: 12, h: 4,
+      config: {
+        displayType: "line",
+        sourceId: "${traceSourceId}",
+        select: [
+          { aggFn: "count", where: "StatusCode:STATUS_CODE_ERROR", alias: "Errors" },
+          { aggFn: "count", alias: "Total" }
+        ],
+        asRatio: true
+      }
+    },
+    {
+      name: "Top Endpoints by Request Count",
+      x: 0, y: 8, w: 24, h: 6,
+      config: {
+        displayType: "table",
+        sourceId: "${traceSourceId}",
+        groupBy: "SpanName",
+        select: [
+          { aggFn: "count", alias: "Requests" },
+          { aggFn: "avg", valueExpression: "Duration", alias: "Avg Duration" },
+          { aggFn: "quantile", valueExpression: "Duration", level: 0.95, alias: "P95 Duration" }
+        ]
+      }
+    }
+  ]
+})
+
+== STATUS CODE & SEVERITY VALUES ==
+
+IMPORTANT: The exact values for StatusCode and SeverityText vary by deployment.
+Do NOT assume values like "STATUS_CODE_ERROR", "Ok", "error", or "fatal".
+Always call hyperdx_list_sources first and inspect the keyValues / mapAttributeKeys
+returned for each source to discover the real values used in your data.
+
+== COMMON MISTAKES TO AVOID ==
+
+- Using valueExpression with aggFn "count" — count does not take a valueExpression
+- Forgetting valueExpression for non-count aggFns — avg, sum, min, max, quantile all require it
+- Using dot notation for Map-type attributes — always use SpanAttributes['key'] bracket syntax for Map columns
+- Not calling hyperdx_list_sources first — you need real source IDs, not placeholders
+- Not validating with hyperdx_query_tile after saving — tiles can silently fail
+- Number and Pie tiles accept exactly 1 select item — not multiple
+- Missing level for quantile aggFn — must specify 0.5, 0.9, 0.95, or 0.99
+- Assuming StatusCode or SeverityText values — always inspect the source first`;
+}
+
+export function buildDashboardExamplesPrompt(
+  traceSourceId: string,
+  logSourceId: string,
+  connectionId: string,
+  pattern?: string,
+): string {
+  const examples: Record<string, string> = {};
+
+  examples['service_overview'] = `
+== SERVICE HEALTH OVERVIEW ==
+
+A high-level view of service health with KPIs, trends, and endpoint details.
+
+{
+  name: "Service Health Overview",
+  tags: ["overview", "service"],
+  tiles: [
+    {
+      name: "Total Requests",
+      x: 0, y: 0, w: 6, h: 4,
+      config: {
+        displayType: "number",
+        sourceId: "${traceSourceId}",
+        select: [{ aggFn: "count" }]
+      }
+    },
+    {
+      name: "Error Count",
+      x: 6, y: 0, w: 6, h: 4,
+      config: {
+        displayType: "number",
+        sourceId: "${traceSourceId}",
+        select: [{ aggFn: "count", where: "StatusCode:STATUS_CODE_ERROR" }]
+      }
+    },
+    {
+      name: "Avg Latency",
+      x: 12, y: 0, w: 6, h: 4,
+      config: {
+        displayType: "number",
+        sourceId: "${traceSourceId}",
+        select: [{ aggFn: "avg", valueExpression: "Duration" }]
+      }
+    },
+    {
+      name: "P99 Latency",
+      x: 18, y: 0, w: 6, h: 4,
+      config: {
+        displayType: "number",
+        sourceId: "${traceSourceId}",
+        select: [{ aggFn: "quantile", valueExpression: "Duration", level: 0.99 }]
+      }
+    },
+    {
+      name: "Request Volume Over Time",
+      x: 0, y: 4, w: 12, h: 4,
+      config: {
+        displayType: "line",
+        sourceId: "${traceSourceId}",
+        select: [{ aggFn: "count" }],
+        groupBy: "ResourceAttributes['service.name']"
+      }
+    },
+    {
+      name: "Error Rate Over Time",
+      x: 12, y: 4, w: 12, h: 4,
+      config: {
+        displayType: "line",
+        sourceId: "${traceSourceId}",
+        select: [
+          { aggFn: "count", where: "StatusCode:STATUS_CODE_ERROR", alias: "Errors" },
+          { aggFn: "count", alias: "Total" }
+        ],
+        asRatio: true
+      }
+    },
+    {
+      name: "Top Endpoints",
+      x: 0, y: 8, w: 24, h: 6,
+      config: {
+        displayType: "table",
+        sourceId: "${traceSourceId}",
+        groupBy: "SpanName",
+        select: [
+          { aggFn: "count", alias: "Requests" },
+          { aggFn: "avg", valueExpression: "Duration", alias: "Avg Duration" },
+          { aggFn: "quantile", valueExpression: "Duration", level: 0.95, alias: "P95" },
+          { aggFn: "count", where: "StatusCode:STATUS_CODE_ERROR", alias: "Errors" }
+        ]
+      }
+    }
+  ]
+}`;
+
+  examples['error_tracking'] = `
+== ERROR TRACKING ==
+
+Focus on errors: volume, distribution, and raw error logs.
+
+{
+  name: "Error Tracking",
+  tags: ["errors"],
+  tiles: [
+    {
+      name: "Total Errors",
+      x: 0, y: 0, w: 8, h: 4,
+      config: {
+        displayType: "number",
+        sourceId: "${logSourceId}",
+        select: [{ aggFn: "count", where: "SeverityText:error OR SeverityText:fatal" }]
+      }
+    },
+    {
+      name: "Errors Over Time by Service",
+      x: 0, y: 4, w: 12, h: 4,
+      config: {
+        displayType: "line",
+        sourceId: "${logSourceId}",
+        select: [{ aggFn: "count", where: "SeverityText:error OR SeverityText:fatal" }],
+        groupBy: "ResourceAttributes['service.name']"
+      }
+    },
+    {
+      name: "Error Breakdown by Service",
+      x: 12, y: 4, w: 12, h: 4,
+      config: {
+        displayType: "pie",
+        sourceId: "${logSourceId}",
+        select: [{ aggFn: "count", where: "SeverityText:error OR SeverityText:fatal" }],
+        groupBy: "ResourceAttributes['service.name']"
+      }
+    },
+    {
+      name: "Error Logs",
+      x: 0, y: 8, w: 24, h: 6,
+      config: {
+        displayType: "search",
+        sourceId: "${logSourceId}",
+        where: "SeverityText:error OR SeverityText:fatal"
+      }
+    }
+  ]
+}`;
+
+  examples['latency'] = `
+== LATENCY MONITORING ==
+
+Track response times with percentile breakdowns and slow endpoint identification.
+
+{
+  name: "Latency Monitoring",
+  tags: ["latency", "performance"],
+  tiles: [
+    {
+      name: "P50 Latency",
+      x: 0, y: 0, w: 6, h: 4,
+      config: {
+        displayType: "number",
+        sourceId: "${traceSourceId}",
+        select: [{ aggFn: "quantile", valueExpression: "Duration", level: 0.5 }]
+      }
+    },
+    {
+      name: "P95 Latency",
+      x: 6, y: 0, w: 6, h: 4,
+      config: {
+        displayType: "number",
+        sourceId: "${traceSourceId}",
+        select: [{ aggFn: "quantile", valueExpression: "Duration", level: 0.95 }]
+      }
+    },
+    {
+      name: "P99 Latency",
+      x: 12, y: 0, w: 6, h: 4,
+      config: {
+        displayType: "number",
+        sourceId: "${traceSourceId}",
+        select: [{ aggFn: "quantile", valueExpression: "Duration", level: 0.99 }]
+      }
+    },
+    {
+      name: "Latency Percentiles Over Time",
+      x: 0, y: 4, w: 24, h: 4,
+      config: {
+        displayType: "line",
+        sourceId: "${traceSourceId}",
+        select: [
+          { aggFn: "quantile", valueExpression: "Duration", level: 0.5, alias: "P50" },
+          { aggFn: "quantile", valueExpression: "Duration", level: 0.95, alias: "P95" },
+          { aggFn: "quantile", valueExpression: "Duration", level: 0.99, alias: "P99" }
+        ]
+      }
+    },
+    {
+      name: "Latency by Service",
+      x: 0, y: 8, w: 12, h: 4,
+      config: {
+        displayType: "stacked_bar",
+        sourceId: "${traceSourceId}",
+        select: [{ aggFn: "avg", valueExpression: "Duration" }],
+        groupBy: "ResourceAttributes['service.name']"
+      }
+    },
+    {
+      name: "Slowest Endpoints",
+      x: 12, y: 8, w: 12, h: 6,
+      config: {
+        displayType: "table",
+        sourceId: "${traceSourceId}",
+        groupBy: "SpanName",
+        select: [
+          { aggFn: "quantile", valueExpression: "Duration", level: 0.95, alias: "P95 Duration" },
+          { aggFn: "avg", valueExpression: "Duration", alias: "Avg Duration" },
+          { aggFn: "count", alias: "Request Count" }
+        ]
+      }
+    }
+  ]
+}`;
+
+  examples['log_analysis'] = `
+== LOG ANALYSIS ==
+
+Analyze log volume, severity distribution, and browse log events.
+
+{
+  name: "Log Analysis",
+  tags: ["logs"],
+  tiles: [
+    {
+      name: "Total Log Events",
+      x: 0, y: 0, w: 8, h: 4,
+      config: {
+        displayType: "number",
+        sourceId: "${logSourceId}",
+        select: [{ aggFn: "count" }]
+      }
+    },
+    {
+      name: "Log Volume by Severity",
+      x: 0, y: 4, w: 12, h: 4,
+      config: {
+        displayType: "stacked_bar",
+        sourceId: "${logSourceId}",
+        select: [{ aggFn: "count" }],
+        groupBy: "SeverityText"
+      }
+    },
+    {
+      name: "Severity Breakdown",
+      x: 12, y: 4, w: 12, h: 4,
+      config: {
+        displayType: "pie",
+        sourceId: "${logSourceId}",
+        select: [{ aggFn: "count" }],
+        groupBy: "SeverityText"
+      }
+    },
+    {
+      name: "Top Services by Log Volume",
+      x: 0, y: 8, w: 12, h: 6,
+      config: {
+        displayType: "table",
+        sourceId: "${logSourceId}",
+        groupBy: "ResourceAttributes['service.name']",
+        select: [
+          { aggFn: "count", alias: "Log Count" },
+          { aggFn: "count", where: "SeverityText:error OR SeverityText:fatal", alias: "Error Count" }
+        ]
+      }
+    },
+    {
+      name: "Recent Logs",
+      x: 12, y: 8, w: 12, h: 6,
+      config: {
+        displayType: "search",
+        sourceId: "${logSourceId}"
+      }
+    }
+  ]
+}`;
+
+  examples['infrastructure_sql'] = `
+== INFRASTRUCTURE MONITORING (Raw SQL) ==
+
+Advanced dashboard using raw SQL tiles for custom ClickHouse queries.
+Use this pattern when you need JOINs, CTEs, or queries the builder cannot express.
+
+{
+  name: "Infrastructure (SQL)",
+  tags: ["infrastructure", "sql"],
+  tiles: [
+    {
+      name: "Log Ingestion Rate Over Time",
+      x: 0, y: 0, w: 12, h: 4,
+      config: {
+        configType: "sql",
+        displayType: "line",
+        connectionId: "${connectionId}",
+        sqlTemplate: "SELECT $__timeInterval(Timestamp) AS ts, count() AS logs_per_interval FROM otel_logs WHERE $__timeFilter(Timestamp) GROUP BY ts ORDER BY ts"
+      }
+    },
+    {
+      name: "Top 20 Services by Span Count",
+      x: 12, y: 0, w: 12, h: 4,
+      config: {
+        configType: "sql",
+        displayType: "table",
+        connectionId: "${connectionId}",
+        sqlTemplate: "SELECT ServiceName, count() AS span_count, avg(Duration) AS avg_duration FROM otel_traces WHERE Timestamp >= fromUnixTimestamp64Milli({startDateMilliseconds:Int64}) AND Timestamp < fromUnixTimestamp64Milli({endDateMilliseconds:Int64}) GROUP BY ServiceName ORDER BY span_count DESC LIMIT 20"
+      }
+    },
+    {
+      name: "Error Rate by Service (SQL)",
+      x: 0, y: 4, w: 24, h: 4,
+      config: {
+        configType: "sql",
+        displayType: "line",
+        connectionId: "${connectionId}",
+        sqlTemplate: "SELECT $__timeInterval(Timestamp) AS ts, ServiceName, countIf(StatusCode = 'STATUS_CODE_ERROR') / count() AS error_rate FROM otel_traces WHERE $__timeFilter(Timestamp) GROUP BY ServiceName, ts ORDER BY ts"
+      }
+    }
+  ]
+}
+
+SQL TEMPLATE REFERENCE:
+  Macros (expanded before execution):
+    $__timeFilter(col)        — col >= <start> AND col <= <end> (DateTime)
+    $__timeFilter_ms(col)     — same with DateTime64 millisecond precision
+    $__dateFilter(col)        — same with Date precision
+    $__timeInterval(col)      — time bucket: toStartOfInterval(toDateTime(col), INTERVAL ...)
+    $__timeInterval_ms(col)   — same with millisecond precision
+    $__fromTime / $__toTime   — start/end as DateTime values
+    $__fromTime_ms / $__toTime_ms — start/end as DateTime64 values
+    $__interval_s             — raw interval in seconds
+    $__filters                — dashboard filter conditions (resolves to 1=1 when none)
+
+  Query parameters:
+    {startDateMilliseconds:Int64} — start of date range in milliseconds
+    {endDateMilliseconds:Int64}   — end of date range in milliseconds
+    {intervalSeconds:Int64}       — time bucket size in seconds
+    {intervalMilliseconds:Int64}  — time bucket size in milliseconds
+
+  Available parameters by displayType:
+    line / stacked_bar — startDate, endDate, interval (all available)
+    table / number / pie — startDate, endDate only (no interval)`;
+
+  if (pattern) {
+    const key = pattern.toLowerCase().replace(/[\s-]+/g, '_');
+    const matched = Object.entries(examples).find(([k]) => k === key);
+    if (matched) {
+      return `Dashboard example for pattern: ${pattern}\n\nReplace sourceId/connectionId values with real IDs from hyperdx_list_sources.\nNOTE: Column names below (Duration, StatusCode, SpanName, etc.) are defaults for the standard schema. Call hyperdx_list_sources to get the actual column names for your sources.\n${matched[1]}`;
+    }
+    return (
+      `No example found for pattern "${pattern}". Available patterns: ${Object.keys(examples).join(', ')}\n\n` +
+      `Showing all examples below.\n\n` +
+      Object.values(examples).join('\n')
+    );
+  }
+
+  return (
+    `Complete dashboard examples for common observability patterns.\n` +
+    `Replace sourceId/connectionId values with real IDs from hyperdx_list_sources.\n` +
+    `NOTE: Column names below (Duration, StatusCode, SpanName, etc.) are defaults for the standard schema. Call hyperdx_list_sources to get the actual column names for your sources.\n\n` +
+    `Available patterns: ${Object.keys(examples).join(', ')}\n` +
+    Object.values(examples).join('\n')
+  );
+}
+
+export function buildQueryGuidePrompt(): string {
+  return `Reference guide for writing queries with HyperDX MCP tools (hyperdx_query and hyperdx_save_dashboard).
+
+== AGGREGATION FUNCTIONS (aggFn) ==
+
+  count          — Count matching rows. Does NOT take a valueExpression.
+  sum            — Sum of a numeric column. Requires valueExpression.
+  avg            — Average of a numeric column. Requires valueExpression.
+  min            — Minimum value. Requires valueExpression.
+  max            — Maximum value. Requires valueExpression.
+  count_distinct — Count of unique values. Requires valueExpression.
+  quantile       — Percentile value. Requires valueExpression AND level (0.5, 0.9, 0.95, or 0.99).
+  last_value     — Most recent value of a column. Requires valueExpression.
+  none           — Pass a raw expression unchanged. Requires valueExpression.
+
+Examples:
+  { aggFn: "count" }
+  { aggFn: "avg", valueExpression: "Duration" }
+  { aggFn: "quantile", valueExpression: "Duration", level: 0.95 }
+  { aggFn: "count_distinct", valueExpression: "ResourceAttributes['service.name']" }
+  { aggFn: "sum", valueExpression: "Duration", where: "StatusCode:STATUS_CODE_ERROR" }
+
+== COLUMN NAMING ==
+
+Top-level columns (PascalCase defaults — use directly in valueExpression and groupBy):
+  Duration, StatusCode, SpanName, ServiceName, Body, SeverityText,
+  Timestamp, TraceId, SpanId, SpanKind, ParentSpanId
+  NOTE: These are the defaults for the standard HyperDX schema. Custom sources may
+  use different column names. Always verify with hyperdx_list_sources, which returns
+  the real column names and keyColumns expressions for each source.
+
+Map-type columns (bracket syntax — access keys via ['key']):
+  SpanAttributes['http.method']
+  SpanAttributes['http.route']
+  SpanAttributes['http.status_code']
+  ResourceAttributes['service.name']
+  ResourceAttributes['deployment.environment']
+
+IMPORTANT: Always use bracket syntax for Map-type columns. Never use dot notation for Maps.
+  Correct:   SpanAttributes['http.method']
+  Incorrect: SpanAttributes.http.method
+
+JSON-type columns (dot notation — access nested keys via dot path):
+  JsonColumn.key.subkey
+  NOTE: Check the jsType field returned by hyperdx_list_sources to determine
+  whether a column is Map (use brackets) or JSON (use dots).
+
+== LUCENE FILTER SYNTAX ==
+
+Used in the "where" field of select items and search tiles.
+
+  Basic match:        level:error
+  AND:                service.name:api AND http.status_code:>=500
+  OR:                 level:error OR level:fatal
+  NOT:                NOT level:debug
+  Wildcard:           service.name:front*
+  Phrase:             Body:"connection refused"
+  Exists:             _exists_:http.route
+  Range (numeric):    Duration:>1000000000
+  Range (inclusive):  http.status_code:[400 TO 499]
+  Grouped:            (level:error OR level:fatal) AND service.name:api
+
+NOTE: In Lucene filters, use dot notation for attribute keys (service.name, http.method).
+This is different from valueExpression/groupBy which requires bracket syntax (SpanAttributes['http.method']).
+
+== SQL FILTER SYNTAX ==
+
+Alternative to Lucene. Set whereLanguage: "sql" when using SQL syntax.
+
+  Basic:              SeverityText = 'error'
+  AND/OR:             ServiceName = 'api' AND StatusCode = 'STATUS_CODE_ERROR'
+  IN:                 ServiceName IN ('api', 'web', 'worker')
+  LIKE:               Body LIKE '%timeout%'
+  Comparison:         Duration > 1000000000
+  Map access:         SpanAttributes['http.status_code'] = '500'
+
+== RAW SQL TEMPLATES ==
+
+For configType: "sql" tiles, write ClickHouse SQL with template macros:
+
+  MACROS (expanded before execution):
+    $__timeFilter(col)         — col >= <start> AND col <= <end>
+    $__timeFilter_ms(col)      — same with DateTime64 millisecond precision
+    $__dateFilter(col)         — same with Date precision
+    $__dateTimeFilter(d, t)    — filters on both Date and DateTime columns
+    $__timeInterval(col)       — time bucket expression for GROUP BY
+    $__timeInterval_ms(col)    — same with millisecond precision
+    $__fromTime / $__toTime    — start/end as DateTime values
+    $__fromTime_ms / $__toTime_ms — start/end as DateTime64 values
+    $__interval_s              — raw interval in seconds (for arithmetic)
+    $__filters                 — dashboard filter conditions (1=1 when none)
+
+  QUERY PARAMETERS (ClickHouse parameterized syntax):
+    {startDateMilliseconds:Int64}
+    {endDateMilliseconds:Int64}
+    {intervalSeconds:Int64}
+    {intervalMilliseconds:Int64}
+
+  TIME-SERIES EXAMPLE (line / stacked_bar):
+    SELECT
+      $__timeInterval(Timestamp) AS ts,
+      ServiceName,
+      count() AS requests
+    FROM otel_traces
+    WHERE $__timeFilter(Timestamp)
+    GROUP BY ServiceName, ts
+    ORDER BY ts
+
+  TABLE EXAMPLE:
+    SELECT
+      ServiceName,
+      count() AS request_count,
+      avg(Duration) AS avg_duration,
+      quantile(0.95)(Duration) AS p95_duration
+    FROM otel_traces
+    WHERE Timestamp >= fromUnixTimestamp64Milli({startDateMilliseconds:Int64})
+      AND Timestamp < fromUnixTimestamp64Milli({endDateMilliseconds:Int64})
+    GROUP BY ServiceName
+    ORDER BY request_count DESC
+    LIMIT 50
+
+  IMPORTANT: Always include a LIMIT clause in table/number/pie SQL queries.
+
+== PER-TILE TYPE CONSTRAINTS ==
+
+  number  — Exactly 1 select item. No groupBy.
+  pie     — Exactly 1 select item. groupBy defines the slices.
+  line    — 1-20 select items. Optional groupBy splits into series.
+  stacked_bar — 1-20 select items. Optional groupBy splits into stacks.
+  table   — 1-20 select items. Optional groupBy defines row groups.
+  search  — No select items (select is a column list string). where is the filter.
+  markdown — No select items. Set markdown field with content.
+
+== asRatio ==
+
+Set asRatio: true on line/stacked_bar/table tiles with exactly 2 select items
+to plot the first as a ratio of the second. Useful for error rates:
+  select: [
+    { aggFn: "count", where: "StatusCode:STATUS_CODE_ERROR", alias: "Errors" },
+    { aggFn: "count", alias: "Total" }
+  ],
+  asRatio: true
+
+== COMMON MISTAKES ==
+
+1. Using valueExpression with aggFn "count"
+   Wrong:   { aggFn: "count", valueExpression: "Duration" }
+   Correct: { aggFn: "count" }
+
+2. Forgetting valueExpression for non-count aggFns
+   Wrong:   { aggFn: "avg" }
+   Correct: { aggFn: "avg", valueExpression: "Duration" }
+
+3. Using dot notation for Map-type attributes in valueExpression/groupBy
+   Wrong:   groupBy: "SpanAttributes.http.method"
+   Correct: groupBy: "SpanAttributes['http.method']"
+   NOTE: JSON-type columns DO use dot notation. Check jsType from hyperdx_list_sources.
+
+4. Multiple select items on number/pie tiles
+   Wrong:   displayType: "number", select: [{ aggFn: "count" }, { aggFn: "avg", ... }]
+   Correct: displayType: "number", select: [{ aggFn: "count" }]
+
+5. Missing level for quantile
+   Wrong:   { aggFn: "quantile", valueExpression: "Duration" }
+   Correct: { aggFn: "quantile", valueExpression: "Duration", level: 0.95 }
+
+6. Forgetting to validate tiles after saving
+   Always call hyperdx_query_tile after hyperdx_save_dashboard to verify each tile returns data.
+
+7. Using sourceId with SQL tiles or connectionId with builder tiles
+   Builder tiles (line, table, etc.) use sourceId.
+   SQL tiles (configType: "sql") use connectionId.
+
+8. Assuming StatusCode or SeverityText values
+   Values like STATUS_CODE_ERROR, Ok, error, fatal vary by deployment.
+   Always call hyperdx_list_sources and inspect real keyValues from the source
+   before writing filters that depend on these columns.`;
+}
--- a/packages/api/src/mcp/prompts/dashboards/helpers.ts
+++ b/packages/api/src/mcp/prompts/dashboards/helpers.ts
@ -0,0 +1,48 @@
+// ─── Source/connection summary helpers ───────────────────────────────────────
+
+export function buildSourceSummary(
+  sources: { _id: unknown; name: string; kind: string; connection: unknown }[],
+  connections: { _id: unknown; name: string }[],
+): string {
+  if (sources.length === 0 && connections.length === 0) {
+    return 'No sources or connections found. Call hyperdx_list_sources to discover available data.';
+  }
+
+  const lines: string[] = [];
+
+  if (sources.length > 0) {
+    lines.push('AVAILABLE SOURCES (use sourceId with builder tiles):');
+    for (const s of sources) {
+      lines.push(
+        `  - "${s.name}" (${s.kind}) — sourceId: "${s._id}", connectionId: "${s.connection}"`,
+      );
+    }
+  }
+
+  if (connections.length > 0) {
+    lines.push('');
+    lines.push(
+      'AVAILABLE CONNECTIONS (use connectionId with raw SQL tiles only):',
+    );
+    for (const c of connections) {
+      lines.push(`  - "${c.name}" — connectionId: "${c._id}"`);
+    }
+  }
+
+  return lines.join('\n');
+}
+
+export function getFirstSourceId(
+  sources: { _id: unknown; kind: string }[],
+  preferredKind?: string,
+): string {
+  const preferred = preferredKind
+    ? sources.find(s => s.kind === preferredKind)
+    : undefined;
+  const source = preferred ?? sources[0];
+  return source ? String(source._id) : '<SOURCE_ID>';
+}
+
+export function getFirstConnectionId(connections: { _id: unknown }[]): string {
+  return connections[0] ? String(connections[0]._id) : '<CONNECTION_ID>';
+}
--- a/packages/api/src/mcp/prompts/dashboards/index.ts
+++ b/packages/api/src/mcp/prompts/dashboards/index.ts
@ -0,0 +1,195 @@
+import { z } from 'zod';
+
+import { getConnectionsByTeam } from '@/controllers/connection';
+import { getSources } from '@/controllers/sources';
+import logger from '@/utils/logger';
+
+import type { PromptDefinition } from '../../tools/types';
+import {
+  buildCreateDashboardPrompt,
+  buildDashboardExamplesPrompt,
+  buildQueryGuidePrompt,
+} from './content';
+import {
+  buildSourceSummary,
+  getFirstConnectionId,
+  getFirstSourceId,
+} from './helpers';
+
+const dashboardPrompts: PromptDefinition = (server, context) => {
+  const { teamId } = context;
+
+  // ── create_dashboard ──────────────────────────────────────────────────────
+
+  server.registerPrompt(
+    'create_dashboard',
+    {
+      title: 'Create a Dashboard',
+      description:
+        'Create a HyperDX dashboard with the MCP tools. ' +
+        'Follow the recommended workflow, pick tile types, write queries, ' +
+        'and validate results — using your real data sources.',
+      argsSchema: {
+        description: z
+          .string()
+          .optional()
+          .describe(
+            'What the dashboard should monitor (e.g. "API error rates and latency")',
+          ),
+      },
+    },
+    async ({ description }) => {
+      let sourceSummary: string;
+      let traceSourceId: string;
+      let logSourceId: string;
+
+      try {
+        const [sources, connections] = await Promise.all([
+          getSources(teamId),
+          getConnectionsByTeam(teamId),
+        ]);
+
+        sourceSummary = buildSourceSummary(
+          sources.map(s => ({
+            _id: s._id,
+            name: s.name,
+            kind: s.kind,
+            connection: s.connection,
+          })),
+          connections.map(c => ({ _id: c._id, name: c.name })),
+        );
+        traceSourceId = getFirstSourceId(
+          sources.map(s => ({ _id: s._id, kind: s.kind })),
+          'trace',
+        );
+        logSourceId = getFirstSourceId(
+          sources.map(s => ({ _id: s._id, kind: s.kind })),
+          'log',
+        );
+      } catch (e) {
+        logger.warn(
+          { teamId, error: e },
+          'Failed to fetch sources for create_dashboard prompt',
+        );
+        sourceSummary =
+          'Could not fetch sources. Call hyperdx_list_sources to discover available data.';
+        traceSourceId = '<SOURCE_ID>';
+        logSourceId = '<SOURCE_ID>';
+      }
+
+      return {
+        messages: [
+          {
+            role: 'user' as const,
+            content: {
+              type: 'text' as const,
+              text: buildCreateDashboardPrompt(
+                sourceSummary,
+                traceSourceId,
+                logSourceId,
+                description,
+              ),
+            },
+          },
+        ],
+      };
+    },
+  );
+
+  // ── dashboard_examples ────────────────────────────────────────────────────
+
+  server.registerPrompt(
+    'dashboard_examples',
+    {
+      title: 'Dashboard Examples',
+      description:
+        'Get copy-paste-ready dashboard examples for common observability patterns: ' +
+        'service_overview, error_tracking, latency, log_analysis, infrastructure_sql.',
+      argsSchema: {
+        pattern: z
+          .string()
+          .optional()
+          .describe(
+            'Filter to a specific pattern: service_overview, error_tracking, latency, log_analysis, infrastructure_sql',
+          ),
+      },
+    },
+    async ({ pattern }) => {
+      let traceSourceId: string;
+      let logSourceId: string;
+      let connectionId: string;
+
+      try {
+        const [sources, connections] = await Promise.all([
+          getSources(teamId),
+          getConnectionsByTeam(teamId),
+        ]);
+
+        traceSourceId = getFirstSourceId(
+          sources.map(s => ({ _id: s._id, kind: s.kind })),
+          'trace',
+        );
+        logSourceId = getFirstSourceId(
+          sources.map(s => ({ _id: s._id, kind: s.kind })),
+          'log',
+        );
+        connectionId = getFirstConnectionId(
+          connections.map(c => ({ _id: c._id })),
+        );
+      } catch (e) {
+        logger.warn(
+          { teamId, error: e },
+          'Failed to fetch sources for dashboard_examples prompt',
+        );
+        traceSourceId = '<TRACE_SOURCE_ID>';
+        logSourceId = '<LOG_SOURCE_ID>';
+        connectionId = '<CONNECTION_ID>';
+      }
+
+      return {
+        messages: [
+          {
+            role: 'user' as const,
+            content: {
+              type: 'text' as const,
+              text: buildDashboardExamplesPrompt(
+                traceSourceId,
+                logSourceId,
+                connectionId,
+                pattern,
+              ),
+            },
+          },
+        ],
+      };
+    },
+  );
+
+  // ── query_guide ───────────────────────────────────────────────────────────
+
+  server.registerPrompt(
+    'query_guide',
+    {
+      title: 'Query Writing Guide',
+      description:
+        'Look up HyperDX query syntax: aggregation functions, ' +
+        'Lucene/SQL filters, raw SQL macros, column naming, ' +
+        'per-tile constraints, and common mistakes.',
+    },
+    async () => {
+      return {
+        messages: [
+          {
+            role: 'user' as const,
+            content: {
+              type: 'text' as const,
+              text: buildQueryGuidePrompt(),
+            },
+          },
+        ],
+      };
+    },
+  );
+};
+
+export default dashboardPrompts;
--- a/packages/api/src/mcp/tools/dashboards/deleteDashboard.ts
+++ b/packages/api/src/mcp/tools/dashboards/deleteDashboard.ts
@ -0,0 +1,63 @@
+import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
+import mongoose from 'mongoose';
+import { z } from 'zod';
+
+import { deleteDashboard } from '@/controllers/dashboard';
+import Dashboard from '@/models/dashboard';
+
+import { withToolTracing } from '../../utils/tracing';
+import type { McpContext } from '../types';
+
+export function registerDeleteDashboard(
+  server: McpServer,
+  context: McpContext,
+): void {
+  const { teamId } = context;
+
+  server.registerTool(
+    'hyperdx_delete_dashboard',
+    {
+      title: 'Delete Dashboard',
+      description:
+        'Permanently delete a dashboard by ID. Also removes any alerts attached to its tiles. ' +
+        'Use hyperdx_get_dashboard (without an ID) to list available dashboard IDs.',
+      inputSchema: z.object({
+        id: z.string().describe('Dashboard ID to delete.'),
+      }),
+    },
+    withToolTracing(
+      'hyperdx_delete_dashboard',
+      context,
+      async ({ id: dashboardId }) => {
+        if (!mongoose.Types.ObjectId.isValid(dashboardId)) {
+          return {
+            isError: true,
+            content: [{ type: 'text' as const, text: 'Invalid dashboard ID' }],
+          };
+        }
+
+        const existing = await Dashboard.findOne({
+          _id: dashboardId,
+          team: teamId,
+        }).lean();
+        if (!existing) {
+          return {
+            isError: true,
+            content: [{ type: 'text' as const, text: 'Dashboard not found' }],
+          };
+        }
+
+        await deleteDashboard(dashboardId, new mongoose.Types.ObjectId(teamId));
+
+        return {
+          content: [
+            {
+              type: 'text' as const,
+              text: JSON.stringify({ deleted: true, id: dashboardId }, null, 2),
+            },
+          ],
+        };
+      },
+    ),
+  );
+}
--- a/packages/api/src/mcp/tools/dashboards/getDashboard.ts
+++ b/packages/api/src/mcp/tools/dashboards/getDashboard.ts
@ -0,0 +1,87 @@
+import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
+import mongoose from 'mongoose';
+import { z } from 'zod';
+
+import * as config from '@/config';
+import { getDashboards } from '@/controllers/dashboard';
+import Dashboard from '@/models/dashboard';
+import { convertToExternalDashboard } from '@/routers/external-api/v2/utils/dashboards';
+
+import { withToolTracing } from '../../utils/tracing';
+import type { McpContext } from '../types';
+
+export function registerGetDashboard(
+  server: McpServer,
+  context: McpContext,
+): void {
+  const { teamId } = context;
+  const frontendUrl = config.FRONTEND_URL;
+
+  server.registerTool(
+    'hyperdx_get_dashboard',
+    {
+      title: 'Get Dashboard(s)',
+      description:
+        'Without an ID: list all dashboards (returns IDs, names, tags). ' +
+        'With an ID: get full dashboard detail including all tiles and configuration.',
+      inputSchema: z.object({
+        id: z
+          .string()
+          .optional()
+          .describe(
+            'Dashboard ID. Omit to list all dashboards, provide to get full detail.',
+          ),
+      }),
+    },
+    withToolTracing('hyperdx_get_dashboard', context, async ({ id }) => {
+      if (!id) {
+        const dashboards = await getDashboards(
+          new mongoose.Types.ObjectId(teamId),
+        );
+        const output = dashboards.map(d => ({
+          id: d._id.toString(),
+          name: d.name,
+          tags: d.tags,
+          ...(frontendUrl ? { url: `${frontendUrl}/dashboards/${d._id}` } : {}),
+        }));
+        return {
+          content: [
+            { type: 'text' as const, text: JSON.stringify(output, null, 2) },
+          ],
+        };
+      }
+
+      if (!mongoose.Types.ObjectId.isValid(id)) {
+        return {
+          isError: true,
+          content: [{ type: 'text' as const, text: 'Invalid dashboard ID' }],
+        };
+      }
+
+      const dashboard = await Dashboard.findOne({ _id: id, team: teamId });
+      if (!dashboard) {
+        return {
+          isError: true,
+          content: [{ type: 'text' as const, text: 'Dashboard not found' }],
+        };
+      }
+      return {
+        content: [
+          {
+            type: 'text' as const,
+            text: JSON.stringify(
+              {
+                ...convertToExternalDashboard(dashboard),
+                ...(frontendUrl
+                  ? { url: `${frontendUrl}/dashboards/${dashboard._id}` }
+                  : {}),
+              },
+              null,
+              2,
+            ),
+          },
+        ],
+      };
+    }),
+  );
+}
--- a/packages/api/src/mcp/tools/dashboards/index.ts
+++ b/packages/api/src/mcp/tools/dashboards/index.ts
@ -0,0 +1,23 @@
+import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
+
+import type { McpContext, ToolDefinition } from '../types';
+import { registerDeleteDashboard } from './deleteDashboard';
+import { registerGetDashboard } from './getDashboard';
+import { registerListSources } from './listSources';
+import { registerQueryTile } from './queryTile';
+import { registerSaveDashboard } from './saveDashboard';
+
+export * from './schemas';
+
+const dashboardsTools: ToolDefinition = (
+  server: McpServer,
+  context: McpContext,
+) => {
+  registerListSources(server, context);
+  registerGetDashboard(server, context);
+  registerSaveDashboard(server, context);
+  registerDeleteDashboard(server, context);
+  registerQueryTile(server, context);
+};
+
+export default dashboardsTools;
--- a/packages/api/src/mcp/tools/dashboards/listSources.ts
+++ b/packages/api/src/mcp/tools/dashboards/listSources.ts
@ -0,0 +1,183 @@
+import {
+  convertCHDataTypeToJSType,
+  filterColumnMetaByType,
+  JSDataType,
+} from '@hyperdx/common-utils/dist/clickhouse';
+import { ClickhouseClient } from '@hyperdx/common-utils/dist/clickhouse/node';
+import { getMetadata } from '@hyperdx/common-utils/dist/core/metadata';
+import { SourceKind } from '@hyperdx/common-utils/dist/types';
+import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
+import { z } from 'zod';
+
+import {
+  getConnectionById,
+  getConnectionsByTeam,
+} from '@/controllers/connection';
+import { getSources } from '@/controllers/sources';
+import logger from '@/utils/logger';
+
+import { withToolTracing } from '../../utils/tracing';
+import type { McpContext } from '../types';
+
+export function registerListSources(
+  server: McpServer,
+  context: McpContext,
+): void {
+  const { teamId } = context;
+
+  server.registerTool(
+    'hyperdx_list_sources',
+    {
+      title: 'List Sources & Connections',
+      description:
+        'List all data sources (logs, metrics, traces) and database connections available to this team. ' +
+        'Returns source IDs (use as sourceId in hyperdx_query and dashboard tiles) and ' +
+        'connection IDs (use as connectionId for advanced raw SQL queries). ' +
+        'Each source includes its full column schema and sampled attribute keys from map columns ' +
+        '(e.g. SpanAttributes, ResourceAttributes). ' +
+        'Column names are PascalCase (e.g. Duration, not duration). ' +
+        "Map attributes must be accessed via bracket syntax: SpanAttributes['key'].\n\n" +
+        'NOTE: For most queries, use source IDs with the builder display types. ' +
+        'Connection IDs are only needed for advanced raw SQL queries (displayType "sql").',
+      inputSchema: z.object({}),
+    },
+    withToolTracing('hyperdx_list_sources', context, async () => {
+      const [sources, connections] = await Promise.all([
+        getSources(teamId.toString()),
+        getConnectionsByTeam(teamId.toString()),
+      ]);
+
+      const sourcesWithSchema = await Promise.all(
+        sources.map(async s => {
+          const meta: Record<string, unknown> = {
+            id: s._id.toString(),
+            name: s.name,
+            kind: s.kind,
+            connectionId: s.connection.toString(),
+            timestampColumn: s.timestampValueExpression,
+          };
+
+          if ('eventAttributesExpression' in s && s.eventAttributesExpression) {
+            meta.eventAttributesColumn = s.eventAttributesExpression;
+          }
+          if (
+            'resourceAttributesExpression' in s &&
+            s.resourceAttributesExpression
+          ) {
+            meta.resourceAttributesColumn = s.resourceAttributesExpression;
+          }
+
+          if (s.kind === SourceKind.Trace) {
+            meta.keyColumns = {
+              spanName: s.spanNameExpression,
+              duration: s.durationExpression,
+              durationPrecision: s.durationPrecision,
+              statusCode: s.statusCodeExpression,
+              serviceName: s.serviceNameExpression,
+              traceId: s.traceIdExpression,
+              spanId: s.spanIdExpression,
+            };
+          } else if (s.kind === SourceKind.Log) {
+            meta.keyColumns = {
+              body: s.bodyExpression,
+              serviceName: s.serviceNameExpression,
+              severityText: s.severityTextExpression,
+              traceId: s.traceIdExpression,
+            };
+          } else if (s.kind === SourceKind.Metric) {
+            meta.metricTables = s.metricTables;
+          }
+
+          // Skip column schema fetch for sources without a table (e.g. metrics
+          // sources store their tables in metricTables, not from.tableName).
+          if (s.from.tableName) {
+            try {
+              const connection = await getConnectionById(
+                teamId.toString(),
+                s.connection.toString(),
+                true,
+              );
+              if (!connection) {
+                throw new Error(`Connection not found for source ${s._id}`);
+              }
+
+              const clickhouseClient = new ClickhouseClient({
+                host: connection.host,
+                username: connection.username,
+                password: connection.password,
+              });
+              const metadata = getMetadata(clickhouseClient);
+
+              const columns = await metadata.getColumns({
+                databaseName: s.from.databaseName,
+                tableName: s.from.tableName,
+                connectionId: s.connection.toString(),
+              });
+
+              meta.columns = columns.map(c => ({
+                name: c.name,
+                type: c.type,
+                jsType: convertCHDataTypeToJSType(c.type),
+              }));
+
+              const mapColumns = filterColumnMetaByType(columns, [
+                JSDataType.Map,
+              ]);
+              const mapKeysResults: Record<string, string[]> = {};
+              await Promise.all(
+                (mapColumns ?? []).map(async col => {
+                  try {
+                    const keys = await metadata.getMapKeys({
+                      databaseName: s.from.databaseName,
+                      tableName: s.from.tableName,
+                      column: col.name,
+                      maxKeys: 50,
+                      connectionId: s.connection.toString(),
+                    });
+                    mapKeysResults[col.name] = keys;
+                  } catch {
+                    // Skip columns where key sampling fails
+                  }
+                }),
+              );
+              if (Object.keys(mapKeysResults).length > 0) {
+                meta.mapAttributeKeys = mapKeysResults;
+              }
+            } catch (e) {
+              logger.warn(
+                { teamId, sourceId: s._id, error: e },
+                'Failed to fetch schema for source',
+              );
+            }
+          }
+
+          return meta;
+        }),
+      );
+
+      const output = {
+        sources: sourcesWithSchema,
+        connections: connections.map(c => ({
+          id: c._id.toString(),
+          name: c.name,
+        })),
+        usage: {
+          topLevelColumns:
+            'Use directly in valueExpression/groupBy with PascalCase: Duration, StatusCode, SpanName',
+          mapAttributes:
+            "Use bracket syntax: SpanAttributes['http.method'], ResourceAttributes['service.name']",
+          sourceIds:
+            'Use sourceId with builder display types (line, stacked_bar, table, number, pie, search) for standard queries',
+          connectionIds:
+            'ADVANCED: Use connectionId only with raw SQL queries (displayType "sql" or configType "sql"). ' +
+            'Raw SQL is for advanced use cases like JOINs, sub-queries, or querying tables not registered as sources.',
+        },
+      };
+      return {
+        content: [
+          { type: 'text' as const, text: JSON.stringify(output, null, 2) },
+        ],
+      };
+    }),
+  );
+}
--- a/packages/api/src/mcp/tools/dashboards/queryTile.ts
+++ b/packages/api/src/mcp/tools/dashboards/queryTile.ts
@ -0,0 +1,97 @@
+import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
+import mongoose from 'mongoose';
+import { z } from 'zod';
+
+import Dashboard from '@/models/dashboard';
+import { convertToExternalDashboard } from '@/routers/external-api/v2/utils/dashboards';
+
+import { withToolTracing } from '../../utils/tracing';
+import { parseTimeRange, runConfigTile } from '../query/helpers';
+import type { McpContext } from '../types';
+
+export function registerQueryTile(
+  server: McpServer,
+  context: McpContext,
+): void {
+  const { teamId } = context;
+
+  server.registerTool(
+    'hyperdx_query_tile',
+    {
+      title: 'Query a Dashboard Tile',
+      description:
+        'Execute the query for a specific tile on an existing dashboard. ' +
+        'Useful for validating that a tile returns data or for spot-checking results ' +
+        'without rebuilding the query from scratch. ' +
+        'Use hyperdx_get_dashboard with an ID to find tile IDs.',
+      inputSchema: z.object({
+        dashboardId: z.string().describe('Dashboard ID.'),
+        tileId: z
+          .string()
+          .describe(
+            'Tile ID within the dashboard. ' +
+              'Obtain from hyperdx_get_dashboard.',
+          ),
+        startTime: z
+          .string()
+          .optional()
+          .describe(
+            'Start of the query window as ISO 8601. Default: 15 minutes ago. ' +
+              'If results are empty, try a wider range (e.g. 24 hours).',
+          ),
+        endTime: z
+          .string()
+          .optional()
+          .describe('End of the query window as ISO 8601. Default: now.'),
+      }),
+    },
+    withToolTracing(
+      'hyperdx_query_tile',
+      context,
+      async ({ dashboardId, tileId, startTime, endTime }) => {
+        const timeRange = parseTimeRange(startTime, endTime);
+        if ('error' in timeRange) {
+          return {
+            isError: true,
+            content: [{ type: 'text' as const, text: timeRange.error }],
+          };
+        }
+        const { startDate, endDate } = timeRange;
+
+        if (!mongoose.Types.ObjectId.isValid(dashboardId)) {
+          return {
+            isError: true,
+            content: [{ type: 'text' as const, text: 'Invalid dashboard ID' }],
+          };
+        }
+
+        const dashboard = await Dashboard.findOne({
+          _id: dashboardId,
+          team: teamId,
+        });
+        if (!dashboard) {
+          return {
+            isError: true,
+            content: [{ type: 'text' as const, text: 'Dashboard not found' }],
+          };
+        }
+
+        const externalDashboard = convertToExternalDashboard(dashboard);
+        const tile = externalDashboard.tiles.find(t => t.id === tileId);
+        if (!tile) {
+          return {
+            isError: true,
+            content: [
+              {
+                type: 'text' as const,
+                text: `Tile not found: ${tileId}. Available tile IDs: ${externalDashboard.tiles.map(t => t.id).join(', ')}`,
+              },
+            ],
+          };
+        }
+
+        return runConfigTile(teamId.toString(), tile, startDate, endDate);
+      },
+    ),
+  );
+}
--- a/packages/api/src/mcp/tools/dashboards/saveDashboard.ts
+++ b/packages/api/src/mcp/tools/dashboards/saveDashboard.ts
@ -0,0 +1,338 @@
+import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
+import { uniq } from 'lodash';
+import mongoose from 'mongoose';
+import { z } from 'zod';
+
+import * as config from '@/config';
+import Dashboard from '@/models/dashboard';
+import {
+  cleanupDashboardAlerts,
+  convertExternalFiltersToInternal,
+  convertExternalTilesToInternal,
+  convertToExternalDashboard,
+  createDashboardBodySchema,
+  getMissingConnections,
+  getMissingSources,
+  resolveSavedQueryLanguage,
+  updateDashboardBodySchema,
+} from '@/routers/external-api/v2/utils/dashboards';
+import type { ExternalDashboardTileWithId } from '@/utils/zod';
+
+import { withToolTracing } from '../../utils/tracing';
+import type { McpContext } from '../types';
+import { mcpTilesParam } from './schemas';
+
+export function registerSaveDashboard(
+  server: McpServer,
+  context: McpContext,
+): void {
+  const { teamId } = context;
+  const frontendUrl = config.FRONTEND_URL;
+
+  server.registerTool(
+    'hyperdx_save_dashboard',
+    {
+      title: 'Create or Update Dashboard',
+      description:
+        'Create a new dashboard (omit id) or update an existing one (provide id). ' +
+        'Call hyperdx_list_sources first to obtain sourceId and connectionId values. ' +
+        'IMPORTANT: After saving a dashboard, always run hyperdx_query_tile on each tile ' +
+        'to confirm the queries work and return expected data. Tiles can silently fail ' +
+        'due to incorrect filter syntax, missing attributes, or wrong column names.',
+      inputSchema: z.object({
+        id: z
+          .string()
+          .optional()
+          .describe(
+            'Dashboard ID. Omit to create a new dashboard, provide to update an existing one.',
+          ),
+        name: z.string().describe('Dashboard name'),
+        tiles: mcpTilesParam,
+        tags: z.array(z.string()).optional().describe('Dashboard tags'),
+      }),
+    },
+    withToolTracing(
+      'hyperdx_save_dashboard',
+      context,
+      async ({ id: dashboardId, name, tiles: inputTiles, tags }) => {
+        if (!dashboardId) {
+          return createDashboard({
+            teamId,
+            frontendUrl,
+            name,
+            inputTiles,
+            tags,
+          });
+        }
+        return updateDashboard({
+          teamId,
+          frontendUrl,
+          dashboardId,
+          name,
+          inputTiles,
+          tags,
+        });
+      },
+    ),
+  );
+}
+
+// ─── Create helper ────────────────────────────────────────────────────────────
+
+async function createDashboard({
+  teamId,
+  frontendUrl,
+  name,
+  inputTiles,
+  tags,
+}: {
+  teamId: string;
+  frontendUrl: string | undefined;
+  name: string;
+  inputTiles: unknown[];
+  tags: string[] | undefined;
+}) {
+  const parsed = createDashboardBodySchema.safeParse({
+    name,
+    tiles: inputTiles,
+    tags,
+  });
+  if (!parsed.success) {
+    return {
+      isError: true,
+      content: [
+        {
+          type: 'text' as const,
+          text: `Validation error: ${JSON.stringify(parsed.error.errors)}`,
+        },
+      ],
+    };
+  }
+
+  const { tiles, filters } = parsed.data;
+  const tilesWithId = tiles as ExternalDashboardTileWithId[];
+
+  const [missingSources, missingConnections] = await Promise.all([
+    getMissingSources(teamId, tilesWithId, filters),
+    getMissingConnections(teamId, tilesWithId),
+  ]);
+  if (missingSources.length > 0) {
+    return {
+      isError: true,
+      content: [
+        {
+          type: 'text' as const,
+          text: `Could not find source IDs: ${missingSources.join(', ')}`,
+        },
+      ],
+    };
+  }
+  if (missingConnections.length > 0) {
+    return {
+      isError: true,
+      content: [
+        {
+          type: 'text' as const,
+          text: `Could not find connection IDs: ${missingConnections.join(', ')}`,
+        },
+      ],
+    };
+  }
+
+  const internalTiles = convertExternalTilesToInternal(tilesWithId);
+  const filtersWithIds = convertExternalFiltersToInternal(filters ?? []);
+
+  const normalizedSavedQueryLanguage = resolveSavedQueryLanguage({
+    savedQuery: undefined,
+    savedQueryLanguage: undefined,
+  });
+
+  const newDashboard = await new Dashboard({
+    name: parsed.data.name,
+    tiles: internalTiles,
+    tags: tags && uniq(tags),
+    filters: filtersWithIds,
+    savedQueryLanguage: normalizedSavedQueryLanguage,
+    savedFilterValues: parsed.data.savedFilterValues,
+    team: teamId,
+  }).save();
+
+  return {
+    content: [
+      {
+        type: 'text' as const,
+        text: JSON.stringify(
+          {
+            ...convertToExternalDashboard(newDashboard),
+            ...(frontendUrl
+              ? { url: `${frontendUrl}/dashboards/${newDashboard._id}` }
+              : {}),
+            hint: 'Use hyperdx_query to test individual tile queries before viewing the dashboard.',
+          },
+          null,
+          2,
+        ),
+      },
+    ],
+  };
+}
+
+// ─── Update helper ────────────────────────────────────────────────────────────
+
+async function updateDashboard({
+  teamId,
+  frontendUrl,
+  dashboardId,
+  name,
+  inputTiles,
+  tags,
+}: {
+  teamId: string;
+  frontendUrl: string | undefined;
+  dashboardId: string;
+  name: string;
+  inputTiles: unknown[];
+  tags: string[] | undefined;
+}) {
+  if (!mongoose.Types.ObjectId.isValid(dashboardId)) {
+    return {
+      isError: true,
+      content: [{ type: 'text' as const, text: 'Invalid dashboard ID' }],
+    };
+  }
+
+  const parsed = updateDashboardBodySchema.safeParse({
+    name,
+    tiles: inputTiles,
+    tags,
+  });
+  if (!parsed.success) {
+    return {
+      isError: true,
+      content: [
+        {
+          type: 'text' as const,
+          text: `Validation error: ${JSON.stringify(parsed.error.errors)}`,
+        },
+      ],
+    };
+  }
+
+  const { tiles, filters } = parsed.data;
+  const tilesWithId = tiles as ExternalDashboardTileWithId[];
+
+  const [missingSources, missingConnections] = await Promise.all([
+    getMissingSources(teamId, tilesWithId, filters),
+    getMissingConnections(teamId, tilesWithId),
+  ]);
+  if (missingSources.length > 0) {
+    return {
+      isError: true,
+      content: [
+        {
+          type: 'text' as const,
+          text: `Could not find source IDs: ${missingSources.join(', ')}`,
+        },
+      ],
+    };
+  }
+  if (missingConnections.length > 0) {
+    return {
+      isError: true,
+      content: [
+        {
+          type: 'text' as const,
+          text: `Could not find connection IDs: ${missingConnections.join(', ')}`,
+        },
+      ],
+    };
+  }
+
+  const existingDashboard = await Dashboard.findOne(
+    { _id: dashboardId, team: teamId },
+    { tiles: 1, filters: 1 },
+  ).lean();
+
+  if (!existingDashboard) {
+    return {
+      isError: true,
+      content: [{ type: 'text' as const, text: 'Dashboard not found' }],
+    };
+  }
+
+  const existingTileIds = new Set(
+    (existingDashboard.tiles ?? []).map((t: { id: string }) => t.id),
+  );
+  const existingFilterIds = new Set(
+    (existingDashboard.filters ?? []).map((f: { id: string }) => f.id),
+  );
+
+  const internalTiles = convertExternalTilesToInternal(
+    tilesWithId,
+    existingTileIds,
+  );
+
+  const setPayload: Record<string, unknown> = {
+    name,
+    tiles: internalTiles,
+    tags: tags && uniq(tags),
+  };
+
+  if (filters !== undefined) {
+    setPayload.filters = convertExternalFiltersToInternal(
+      filters,
+      existingFilterIds,
+    );
+  }
+
+  const normalizedSavedQueryLanguage = resolveSavedQueryLanguage({
+    savedQuery: undefined,
+    savedQueryLanguage: undefined,
+  });
+  if (normalizedSavedQueryLanguage !== undefined) {
+    setPayload.savedQueryLanguage = normalizedSavedQueryLanguage;
+  }
+
+  if (parsed.data.savedFilterValues !== undefined) {
+    setPayload.savedFilterValues = parsed.data.savedFilterValues;
+  }
+
+  const updatedDashboard = await Dashboard.findOneAndUpdate(
+    { _id: dashboardId, team: teamId },
+    { $set: setPayload },
+    { new: true },
+  );
+
+  if (!updatedDashboard) {
+    return {
+      isError: true,
+      content: [{ type: 'text' as const, text: 'Dashboard not found' }],
+    };
+  }
+
+  await cleanupDashboardAlerts({
+    dashboardId,
+    teamId,
+    internalTiles,
+    existingTileIds,
+  });
+
+  return {
+    content: [
+      {
+        type: 'text' as const,
+        text: JSON.stringify(
+          {
+            ...convertToExternalDashboard(updatedDashboard),
+            ...(frontendUrl
+              ? { url: `${frontendUrl}/dashboards/${updatedDashboard._id}` }
+              : {}),
+            hint: 'Use hyperdx_query to test individual tile queries before viewing the dashboard.',
+          },
+          null,
+          2,
+        ),
+      },
+    ],
+  };
+}
--- a/packages/api/src/mcp/tools/dashboards/schemas.ts
+++ b/packages/api/src/mcp/tools/dashboards/schemas.ts
@ -0,0 +1,320 @@
+import {
+  AggregateFunctionSchema,
+  SearchConditionLanguageSchema,
+} from '@hyperdx/common-utils/dist/types';
+import { z } from 'zod';
+
+import { externalQuantileLevelSchema } from '@/utils/zod';
+
+// ─── Shared tile schemas for MCP dashboard tools ─────────────────────────────
+const mcpTileSelectItemSchema = z
+  .object({
+    aggFn: AggregateFunctionSchema.describe(
+      'Aggregation function. "count" requires no valueExpression; all others do.',
+    ),
+    valueExpression: z
+      .string()
+      .optional()
+      .describe(
+        'Column or expression to aggregate. Required for all aggFn except "count". ' +
+          'Use PascalCase for top-level columns (e.g. "Duration", "StatusCode"). ' +
+          "For span attributes use: SpanAttributes['key'] (e.g. SpanAttributes['http.method']). " +
+          "For resource attributes use: ResourceAttributes['key'] (e.g. ResourceAttributes['service.name']).",
+      ),
+    where: z
+      .string()
+      .optional()
+      .default('')
+      .describe('Filter in Lucene syntax. Example: "level:error"'),
+    whereLanguage: SearchConditionLanguageSchema.optional().default('lucene'),
+    alias: z.string().optional().describe('Display label for this series'),
+    level: externalQuantileLevelSchema
+      .optional()
+      .describe('Percentile level for aggFn="quantile"'),
+  })
+  .superRefine((data, ctx) => {
+    if (data.level && data.aggFn !== 'quantile') {
+      ctx.addIssue({
+        code: z.ZodIssueCode.custom,
+        message: 'Level can only be used with quantile aggregation function',
+      });
+    }
+    if (data.valueExpression && data.aggFn === 'count') {
+      ctx.addIssue({
+        code: z.ZodIssueCode.custom,
+        message:
+          'Value expression cannot be used with count aggregation function',
+      });
+    } else if (!data.valueExpression && data.aggFn !== 'count') {
+      ctx.addIssue({
+        code: z.ZodIssueCode.custom,
+        message:
+          'Value expression is required for non-count aggregation functions',
+      });
+    }
+  });
+
+const mcpTileLayoutSchema = z.object({
+  name: z.string().describe('Tile title shown on the dashboard'),
+  x: z
+    .number()
+    .min(0)
+    .max(23)
+    .optional()
+    .default(0)
+    .describe('Horizontal grid position (0–23). Default 0'),
+  y: z
+    .number()
+    .min(0)
+    .optional()
+    .default(0)
+    .describe('Vertical grid position. Default 0'),
+  w: z
+    .number()
+    .min(1)
+    .max(24)
+    .optional()
+    .default(12)
+    .describe('Width in grid columns (1–24). Default 12'),
+  h: z
+    .number()
+    .min(1)
+    .optional()
+    .default(4)
+    .describe('Height in grid rows. Default 4'),
+  id: z
+    .string()
+    .max(36)
+    .optional()
+    .describe('Tile ID (auto-generated if omitted)'),
+});
+
+const mcpLineTileSchema = mcpTileLayoutSchema.extend({
+  config: z.object({
+    displayType: z.literal('line').describe('Line chart over time'),
+    sourceId: z.string().describe('Source ID – call hyperdx_list_sources'),
+    select: z
+      .array(mcpTileSelectItemSchema)
+      .min(1)
+      .max(20)
+      .describe('Metrics to plot (one series per item)'),
+    groupBy: z
+      .string()
+      .optional()
+      .describe(
+        'Column to split/group by. ' +
+          'Top-level columns use PascalCase (e.g. "SpanName", "StatusCode"). ' +
+          "Span attributes: SpanAttributes['key'] (e.g. SpanAttributes['http.method']). " +
+          "Resource attributes: ResourceAttributes['key'] (e.g. ResourceAttributes['service.name']).",
+      ),
+    fillNulls: z.boolean().optional().default(true),
+    alignDateRangeToGranularity: z.boolean().optional(),
+    asRatio: z
+      .boolean()
+      .optional()
+      .describe(
+        'Plot as ratio of two metrics (requires exactly 2 select items)',
+      ),
+  }),
+});
+
+const mcpBarTileSchema = mcpTileLayoutSchema.extend({
+  config: z.object({
+    displayType: z
+      .literal('stacked_bar')
+      .describe('Stacked bar chart over time'),
+    sourceId: z.string().describe('Source ID – call hyperdx_list_sources'),
+    select: z.array(mcpTileSelectItemSchema).min(1).max(20),
+    groupBy: z.string().optional(),
+    fillNulls: z.boolean().optional().default(true),
+    alignDateRangeToGranularity: z.boolean().optional(),
+    asRatio: z.boolean().optional(),
+  }),
+});
+
+const mcpTableTileSchema = mcpTileLayoutSchema.extend({
+  config: z.object({
+    displayType: z.literal('table').describe('Tabular aggregated data'),
+    sourceId: z.string().describe('Source ID – call hyperdx_list_sources'),
+    select: z.array(mcpTileSelectItemSchema).min(1).max(20),
+    groupBy: z
+      .string()
+      .optional()
+      .describe(
+        'Group rows by this column. Use PascalCase for top-level columns (e.g. "SpanName"). ' +
+          "For attributes: SpanAttributes['key'] or ResourceAttributes['key'].",
+      ),
+    orderBy: z.string().optional().describe('Sort results by this column'),
+    asRatio: z.boolean().optional(),
+  }),
+});
+
+const mcpNumberFormatSchema = z
+  .object({
+    output: z
+      .enum(['currency', 'percent', 'byte', 'time', 'number'])
+      .describe(
+        'Format category. "time" auto-formats durations (use factor for input unit). ' +
+          '"byte" formats as KB/MB/GB. "currency" prepends a symbol. "percent" appends %.',
+      ),
+    mantissa: z
+      .number()
+      .int()
+      .optional()
+      .describe('Decimal places (0–10). Not used for "time" output.'),
+    thousandSeparated: z
+      .boolean()
+      .optional()
+      .describe('Separate thousands (e.g. 1,234,567)'),
+    average: z
+      .boolean()
+      .optional()
+      .describe('Abbreviate large numbers (e.g. 1.2m)'),
+    decimalBytes: z
+      .boolean()
+      .optional()
+      .describe(
+        'Use decimal base for bytes (1KB = 1000). Only for "byte" output.',
+      ),
+    factor: z
+      .number()
+      .optional()
+      .describe(
+        'Input unit factor for "time" output. ' +
+          '1 = seconds, 0.001 = milliseconds, 0.000001 = microseconds, 0.000000001 = nanoseconds.',
+      ),
+    currencySymbol: z
+      .string()
+      .optional()
+      .describe('Currency symbol (e.g. "$"). Only for "currency" output.'),
+    unit: z
+      .string()
+      .optional()
+      .describe('Suffix appended to the value (e.g. " req/s")'),
+  })
+  .describe(
+    'Controls how the number value is formatted for display. ' +
+      'Most useful: { output: "time", factor: 0.000000001 } to auto-format nanosecond durations, ' +
+      'or { output: "number", mantissa: 2, thousandSeparated: true } for clean counts.',
+  );
+
+const mcpNumberTileSchema = mcpTileLayoutSchema.extend({
+  config: z.object({
+    displayType: z.literal('number').describe('Single aggregate scalar value'),
+    sourceId: z.string().describe('Source ID – call hyperdx_list_sources'),
+    select: z
+      .array(mcpTileSelectItemSchema)
+      .length(1)
+      .describe('Exactly one metric to display'),
+    numberFormat: mcpNumberFormatSchema
+      .optional()
+      .describe(
+        'Display formatting for the number value. Example: { output: "time", factor: 0.000000001 } ' +
+          'to auto-format nanosecond durations as human-readable time.',
+      ),
+  }),
+});
+
+const mcpPieTileSchema = mcpTileLayoutSchema.extend({
+  config: z.object({
+    displayType: z.literal('pie').describe('Pie chart'),
+    sourceId: z.string().describe('Source ID – call hyperdx_list_sources'),
+    select: z.array(mcpTileSelectItemSchema).length(1),
+    groupBy: z
+      .string()
+      .optional()
+      .describe(
+        'Column that defines pie slices. Use PascalCase for top-level columns. ' +
+          "For attributes: SpanAttributes['key'] or ResourceAttributes['key'].",
+      ),
+  }),
+});
+
+const mcpSearchTileSchema = mcpTileLayoutSchema.extend({
+  config: z.object({
+    displayType: z.literal('search').describe('Log/event search results list'),
+    sourceId: z.string().describe('Source ID – call hyperdx_list_sources'),
+    where: z
+      .string()
+      .optional()
+      .default('')
+      .describe('Filter in Lucene syntax. Example: "level:error"'),
+    whereLanguage: SearchConditionLanguageSchema.optional().default('lucene'),
+    select: z
+      .string()
+      .optional()
+      .default('')
+      .describe(
+        'Columns to display (empty = defaults). Example: "body,service.name,duration"',
+      ),
+  }),
+});
+
+const mcpMarkdownTileSchema = mcpTileLayoutSchema.extend({
+  config: z.object({
+    displayType: z.literal('markdown').describe('Free-form Markdown text tile'),
+    markdown: z.string().optional().default(''),
+  }),
+});
+
+const mcpSqlTileSchema = mcpTileLayoutSchema.extend({
+  config: z.object({
+    configType: z
+      .literal('sql')
+      .describe(
+        'Must be "sql" for raw SQL tiles. ' +
+          'ADVANCED: Only use raw SQL tiles when the builder tile types cannot express the query you need.',
+      ),
+    displayType: z
+      .enum(['line', 'stacked_bar', 'table', 'number', 'pie'])
+      .describe('How to render the SQL results'),
+    connectionId: z
+      .string()
+      .describe(
+        'Connection ID (not sourceId) – call hyperdx_list_sources to find available connections',
+      ),
+    sqlTemplate: z
+      .string()
+      .describe(
+        'Raw ClickHouse SQL query. Always include a LIMIT clause to avoid excessive data.\n' +
+          'Use query parameters: {startDateMilliseconds:Int64}, {endDateMilliseconds:Int64}, ' +
+          '{intervalSeconds:Int64}, {intervalMilliseconds:Int64}.\n' +
+          'Or use macros: $__timeFilter(col), $__timeFilter_ms(col), $__dateFilter(col), ' +
+          '$__fromTime, $__toTime, $__fromTime_ms, $__toTime_ms, ' +
+          '$__timeInterval(col), $__timeInterval_ms(col), $__interval_s, $__filters.\n' +
+          'Example: "SELECT $__timeInterval(TimestampTime) AS ts, ServiceName, count() ' +
+          'FROM otel_logs WHERE $__timeFilter(TimestampTime) AND $__filters ' +
+          'GROUP BY ServiceName, ts ORDER BY ts"',
+      ),
+    fillNulls: z.boolean().optional(),
+    alignDateRangeToGranularity: z.boolean().optional(),
+  }),
+});
+
+const mcpTileSchema = z.union([
+  mcpLineTileSchema,
+  mcpBarTileSchema,
+  mcpTableTileSchema,
+  mcpNumberTileSchema,
+  mcpPieTileSchema,
+  mcpSearchTileSchema,
+  mcpMarkdownTileSchema,
+  mcpSqlTileSchema,
+]);
+
+export const mcpTilesParam = z
+  .array(mcpTileSchema)
+  .describe(
+    'Array of dashboard tiles. Each tile needs a name, optional layout (x/y/w/h), and a config block. ' +
+      'The config block varies by displayType – use hyperdx_list_sources for sourceId and connectionId values.\n\n' +
+      'Example tiles:\n' +
+      '1. Line chart: { "name": "Error Rate", "config": { "displayType": "line", "sourceId": "<from list_sources>", ' +
+      '"groupBy": "ResourceAttributes[\'service.name\']", "select": [{ "aggFn": "count", "where": "StatusCode:STATUS_CODE_ERROR" }] } }\n' +
+      '2. Table: { "name": "Top Endpoints", "config": { "displayType": "table", "sourceId": "<from list_sources>", ' +
+      '"groupBy": "SpanAttributes[\'http.route\']", "select": [{ "aggFn": "count" }, { "aggFn": "avg", "valueExpression": "Duration" }] } }\n' +
+      '3. Number: { "name": "Total Requests", "config": { "displayType": "number", "sourceId": "<from list_sources>", ' +
+      '"select": [{ "aggFn": "count" }], "numberFormat": { "output": "number", "average": true } } }\n' +
+      '4. Number (duration): { "name": "P95 Latency", "config": { "displayType": "number", "sourceId": "<from list_sources>", ' +
+      '"select": [{ "aggFn": "quantile", "level": 0.95, "valueExpression": "Duration" }], ' +
+      '"numberFormat": { "output": "time", "factor": 0.000000001 } } }',
+  );
--- a/packages/api/src/mcp/tools/query/helpers.ts
+++ b/packages/api/src/mcp/tools/query/helpers.ts
@ -0,0 +1,264 @@
+import { ClickhouseClient } from '@hyperdx/common-utils/dist/clickhouse/node';
+import { getMetadata } from '@hyperdx/common-utils/dist/core/metadata';
+import { getFirstTimestampValueExpression } from '@hyperdx/common-utils/dist/core/utils';
+import { isRawSqlSavedChartConfig } from '@hyperdx/common-utils/dist/guards';
+import type {
+  ChartConfigWithDateRange,
+  MetricTable,
+} from '@hyperdx/common-utils/dist/types';
+import { DisplayType, SourceKind } from '@hyperdx/common-utils/dist/types';
+import ms from 'ms';
+
+import { getConnectionById } from '@/controllers/connection';
+import { getSource } from '@/controllers/sources';
+import {
+  convertToInternalTileConfig,
+  isConfigTile,
+} from '@/routers/external-api/v2/utils/dashboards';
+import { trimToolResponse } from '@/utils/trimToolResponse';
+import type { ExternalDashboardTileWithId } from '@/utils/zod';
+
+// ─── Time range ──────────────────────────────────────────────────────────────
+
+export function parseTimeRange(
+  startTime?: string,
+  endTime?: string,
+): { error: string } | { startDate: Date; endDate: Date } {
+  const endDate = endTime ? new Date(endTime) : new Date();
+  const startDate = startTime
+    ? new Date(startTime)
+    : new Date(endDate.getTime() - ms('15m'));
+  if (isNaN(endDate.getTime()) || isNaN(startDate.getTime())) {
+    return {
+      error: 'Invalid startTime or endTime: must be valid ISO 8601 strings',
+    };
+  }
+  return { startDate, endDate };
+}
+
+// ─── Result helpers ──────────────────────────────────────────────────────────
+
+function isEmptyResult(result: unknown): boolean {
+  if (result == null) return true;
+  if (Array.isArray(result)) return result.length === 0;
+  if (typeof result === 'object' && result !== null) {
+    const obj = result as Record<string, unknown>;
+    if (Array.isArray(obj.data) && obj.data.length === 0) return true;
+    if (obj.rows != null && Number(obj.rows) === 0) return true;
+  }
+  return false;
+}
+
+function formatQueryResult(result: unknown) {
+  const trimmedResult = trimToolResponse(result);
+  const isTrimmed =
+    JSON.stringify(trimmedResult).length < JSON.stringify(result).length;
+  const empty = isEmptyResult(result);
+  return {
+    content: [
+      {
+        type: 'text' as const,
+        text: JSON.stringify(
+          {
+            result: trimmedResult,
+            ...(isTrimmed
+              ? {
+                  note: 'Result was trimmed for context size. Narrow the time range or add filters to reduce data.',
+                }
+              : {}),
+            ...(empty
+              ? {
+                  hint: 'No data found in the queried time range. Try setting startTime to a wider window (e.g. 24 hours ago) or check that filters match existing data.',
+                }
+              : {}),
+          },
+          null,
+          2,
+        ),
+      },
+    ],
+  };
+}
+
+// ─── Tile execution ──────────────────────────────────────────────────────────
+
+export async function runConfigTile(
+  teamId: string,
+  tile: ExternalDashboardTileWithId,
+  startDate: Date,
+  endDate: Date,
+  options?: { maxResults?: number },
+) {
+  if (!isConfigTile(tile)) {
+    return {
+      isError: true as const,
+      content: [
+        { type: 'text' as const, text: 'Invalid tile: config field missing' },
+      ],
+    };
+  }
+
+  const internalTile = convertToInternalTileConfig(tile);
+  const savedConfig = internalTile.config;
+
+  if (!isRawSqlSavedChartConfig(savedConfig)) {
+    const builderConfig = savedConfig;
+
+    if (
+      !builderConfig.source ||
+      builderConfig.displayType === DisplayType.Markdown
+    ) {
+      return {
+        content: [
+          {
+            type: 'text' as const,
+            text: 'Markdown tile: no query to execute.',
+          },
+        ],
+      };
+    }
+
+    const source = await getSource(teamId, builderConfig.source);
+    if (!source) {
+      return {
+        isError: true as const,
+        content: [
+          {
+            type: 'text' as const,
+            text: `Source not found: ${builderConfig.source}`,
+          },
+        ],
+      };
+    }
+
+    const connection = await getConnectionById(
+      teamId,
+      source.connection.toString(),
+      true,
+    );
+    if (!connection) {
+      return {
+        isError: true as const,
+        content: [
+          {
+            type: 'text' as const,
+            text: `Connection not found for source: ${builderConfig.source}`,
+          },
+        ],
+      };
+    }
+
+    const clickhouseClient = new ClickhouseClient({
+      host: connection.host,
+      username: connection.username,
+      password: connection.password,
+    });
+
+    const isSearch = builderConfig.displayType === DisplayType.Search;
+    const defaultTableSelect =
+      'defaultTableSelectExpression' in source
+        ? source.defaultTableSelectExpression
+        : undefined;
+    const implicitColumn =
+      'implicitColumnExpression' in source
+        ? source.implicitColumnExpression
+        : undefined;
+    const searchOverrides = isSearch
+      ? {
+          select: builderConfig.select || defaultTableSelect || '*',
+          groupBy: undefined,
+          granularity: undefined,
+          orderBy: [
+            {
+              ordering: 'DESC' as const,
+              valueExpression: getFirstTimestampValueExpression(
+                source.timestampValueExpression,
+              ),
+            },
+          ],
+          limit: { limit: options?.maxResults ?? 50, offset: 0 },
+        }
+      : {};
+
+    const chartConfig = {
+      ...builderConfig,
+      ...searchOverrides,
+      from: {
+        databaseName: source.from.databaseName,
+        tableName: source.from.tableName,
+      },
+      connection: source.connection.toString(),
+      timestampValueExpression: source.timestampValueExpression,
+      implicitColumnExpression: implicitColumn,
+      dateRange: [startDate, endDate] as [Date, Date],
+    } satisfies ChartConfigWithDateRange;
+
+    const metadata = getMetadata(clickhouseClient);
+    const result = await clickhouseClient.queryChartConfig({
+      config: chartConfig,
+      metadata,
+      querySettings: source.querySettings,
+    });
+
+    return formatQueryResult(result);
+  }
+
+  // Raw SQL tile — hydrate source fields for macro support ($__sourceTable, $__filters)
+  let sourceFields: {
+    from?: { databaseName: string; tableName: string };
+    implicitColumnExpression?: string;
+    metricTables?: MetricTable;
+  } = {};
+  if (savedConfig.source) {
+    const source = await getSource(teamId, savedConfig.source);
+    if (source) {
+      sourceFields = {
+        from: source.from,
+        implicitColumnExpression:
+          'implicitColumnExpression' in source
+            ? source.implicitColumnExpression
+            : undefined,
+        metricTables:
+          source.kind === SourceKind.Metric ? source.metricTables : undefined,
+      };
+    }
+  }
+
+  const connection = await getConnectionById(
+    teamId,
+    savedConfig.connection,
+    true,
+  );
+  if (!connection) {
+    return {
+      isError: true as const,
+      content: [
+        {
+          type: 'text' as const,
+          text: `Connection not found: ${savedConfig.connection}`,
+        },
+      ],
+    };
+  }
+
+  const clickhouseClient = new ClickhouseClient({
+    host: connection.host,
+    username: connection.username,
+    password: connection.password,
+  });
+
+  const chartConfig = {
+    ...savedConfig,
+    ...sourceFields,
+    dateRange: [startDate, endDate] as [Date, Date],
+  } satisfies ChartConfigWithDateRange;
+
+  const metadata = getMetadata(clickhouseClient);
+  const result = await clickhouseClient.queryChartConfig({
+    config: chartConfig,
+    metadata,
+    querySettings: undefined,
+  });
+
+  return formatQueryResult(result);
+}
--- a/packages/api/src/mcp/tools/query/index.ts
+++ b/packages/api/src/mcp/tools/query/index.ts
@ -0,0 +1,117 @@
+import { ObjectId } from 'mongodb';
+
+import type { ExternalDashboardTileWithId } from '@/utils/zod';
+import { externalDashboardTileSchemaWithId } from '@/utils/zod';
+
+import { withToolTracing } from '../../utils/tracing';
+import type { ToolDefinition } from '../types';
+import { parseTimeRange, runConfigTile } from './helpers';
+import { hyperdxQuerySchema } from './schemas';
+
+// ─── Tool definition ─────────────────────────────────────────────────────────
+
+const queryTools: ToolDefinition = (server, context) => {
+  const { teamId } = context;
+
+  server.registerTool(
+    'hyperdx_query',
+    {
+      title: 'Query Data',
+      description:
+        'Query observability data (logs, metrics, traces) from HyperDX. ' +
+        'Use hyperdx_list_sources first to find sourceId/connectionId values. ' +
+        'Set displayType to control the query shape.\n\n' +
+        'PREFERRED: Use the builder display types (line, stacked_bar, table, number, pie) ' +
+        'for aggregated metrics, or "search" for browsing individual log/event rows. ' +
+        'These are safer, easier to construct, and cover most use cases.\n\n' +
+        'ADVANCED: Use displayType "sql" only when you need capabilities the builder cannot express, ' +
+        'such as JOINs, sub-queries, CTEs, or querying tables not registered as sources. ' +
+        'Raw SQL requires a connectionId (not sourceId) and a hand-written ClickHouse SQL query.\n\n' +
+        'Column naming: Top-level columns are PascalCase (Duration, StatusCode, SpanName). ' +
+        "Map attributes use bracket syntax: SpanAttributes['http.method'], ResourceAttributes['service.name']. " +
+        'Call hyperdx_list_sources to discover available columns and attribute keys for each source.',
+      inputSchema: hyperdxQuerySchema,
+    },
+    withToolTracing('hyperdx_query', context, async input => {
+      const timeRange = parseTimeRange(input.startTime, input.endTime);
+      if ('error' in timeRange) {
+        return {
+          isError: true,
+          content: [{ type: 'text' as const, text: timeRange.error }],
+        };
+      }
+      const { startDate, endDate } = timeRange;
+
+      let tile: ExternalDashboardTileWithId;
+
+      if (input.displayType === 'sql') {
+        tile = externalDashboardTileSchemaWithId.parse({
+          id: new ObjectId().toString(),
+          name: 'MCP SQL',
+          x: 0,
+          y: 0,
+          w: 24,
+          h: 6,
+          config: {
+            configType: 'sql' as const,
+            displayType: 'table' as const,
+            connectionId: input.connectionId,
+            sqlTemplate: input.sql,
+          },
+        });
+      } else if (input.displayType === 'search') {
+        tile = externalDashboardTileSchemaWithId.parse({
+          id: new ObjectId().toString(),
+          name: 'MCP Search',
+          x: 0,
+          y: 0,
+          w: 24,
+          h: 6,
+          config: {
+            displayType: 'search' as const,
+            sourceId: input.sourceId,
+            select: input.columns ?? '',
+            where: input.where ?? '',
+            whereLanguage: input.whereLanguage ?? 'lucene',
+          },
+        });
+      } else {
+        tile = externalDashboardTileSchemaWithId.parse({
+          id: new ObjectId().toString(),
+          name: 'MCP Query',
+          x: 0,
+          y: 0,
+          w: 12,
+          h: 4,
+          config: {
+            displayType: input.displayType,
+            sourceId: input.sourceId,
+            select: input.select.map(s => ({
+              aggFn: s.aggFn,
+              where: s.where ?? '',
+              whereLanguage: s.whereLanguage ?? 'lucene',
+              valueExpression: s.valueExpression,
+              alias: s.alias,
+              level: s.level,
+            })),
+            groupBy: input.groupBy ?? undefined,
+            orderBy: input.orderBy ?? undefined,
+            ...(input.granularity ? { granularity: input.granularity } : {}),
+          },
+        });
+      }
+
+      return runConfigTile(
+        teamId.toString(),
+        tile,
+        startDate,
+        endDate,
+        input.displayType === 'search'
+          ? { maxResults: input.maxResults }
+          : undefined,
+      );
+    }),
+  );
+};
+
+export default queryTools;
--- a/packages/api/src/mcp/tools/query/schemas.ts
+++ b/packages/api/src/mcp/tools/query/schemas.ts
@ -0,0 +1,220 @@
+import { z } from 'zod';
+
+// ─── Shared schemas ──────────────────────────────────────────────────────────
+
+const mcpAggFnSchema = z
+  .enum([
+    'avg',
+    'count',
+    'count_distinct',
+    'last_value',
+    'max',
+    'min',
+    'quantile',
+    'sum',
+    'none',
+  ])
+  .describe(
+    'Aggregation function:\n' +
+      '  count – count matching rows (no valueExpression needed)\n' +
+      '  sum / avg / min / max – aggregate a numeric column (valueExpression required)\n' +
+      '  count_distinct – unique value count (valueExpression required)\n' +
+      '  quantile – percentile; also set level (valueExpression required)\n' +
+      '  last_value – most recent value of a column\n' +
+      '  none – pass a raw expression through unchanged',
+  );
+
+const mcpSelectItemSchema = z.object({
+  aggFn: mcpAggFnSchema,
+  valueExpression: z
+    .string()
+    .optional()
+    .describe(
+      'Column or expression to aggregate. Required for every aggFn except "count". ' +
+        'Use PascalCase for top-level columns (e.g. "Duration", "StatusCode"). ' +
+        "For span attributes use: SpanAttributes['key'] (e.g. SpanAttributes['http.method']). " +
+        "For resource attributes use: ResourceAttributes['key'] (e.g. ResourceAttributes['service.name']).",
+    ),
+  where: z
+    .string()
+    .optional()
+    .default('')
+    .describe(
+      'Row filter in Lucene syntax. ' +
+        'Examples: "level:error", "service.name:api AND http.status_code:>=500"',
+    ),
+  whereLanguage: z
+    .enum(['lucene', 'sql'])
+    .optional()
+    .default('lucene')
+    .describe('Query language for the where filter. Default: lucene'),
+  alias: z
+    .string()
+    .optional()
+    .describe('Display label for this series. Example: "Error rate"'),
+  level: z
+    .union([z.literal(0.5), z.literal(0.9), z.literal(0.95), z.literal(0.99)])
+    .optional()
+    .describe(
+      'Percentile level. Only applicable when aggFn is "quantile". ' +
+        'Allowed values: 0.5, 0.9, 0.95, 0.99',
+    ),
+});
+
+const mcpTimeRangeSchema = z.object({
+  startTime: z
+    .string()
+    .optional()
+    .describe(
+      'Start of the query window as ISO 8601. Default: 15 minutes ago. ' +
+        'If results are empty, try a wider range (e.g. 24 hours).',
+    ),
+  endTime: z
+    .string()
+    .optional()
+    .describe('End of the query window as ISO 8601. Default: now.'),
+});
+
+// ─── Discriminated union schema for hyperdx_query ───────────────────────────
+
+const builderQuerySchema = mcpTimeRangeSchema.extend({
+  displayType: z
+    .enum(['line', 'stacked_bar', 'table', 'number', 'pie'])
+    .describe(
+      'How to visualize the query results:\n' +
+        '  line – time-series line chart\n' +
+        '  stacked_bar – time-series stacked bar chart\n' +
+        '  table – grouped aggregation as rows\n' +
+        '  number – single aggregate scalar\n' +
+        '  pie – pie chart (one metric, grouped)',
+    ),
+  sourceId: z
+    .string()
+    .describe(
+      'Source ID. Call hyperdx_list_sources to find available sources.',
+    ),
+  select: z
+    .array(mcpSelectItemSchema)
+    .min(1)
+    .max(10)
+    .describe(
+      'Metrics to compute. Each item defines an aggregation. ' +
+        'For "number" display, provide exactly 1 item. ' +
+        'Example: [{ aggFn: "count" }, { aggFn: "avg", valueExpression: "Duration" }]',
+    ),
+  groupBy: z
+    .string()
+    .optional()
+    .describe(
+      'Column to group/split by. ' +
+        'Top-level columns use PascalCase (e.g. "SpanName", "StatusCode"). ' +
+        "Span attributes: SpanAttributes['key'] (e.g. SpanAttributes['http.method']). " +
+        "Resource attributes: ResourceAttributes['key'] (e.g. ResourceAttributes['service.name']).",
+    ),
+  orderBy: z
+    .string()
+    .optional()
+    .describe('Column to sort results by (table display only).'),
+  granularity: z
+    .string()
+    .optional()
+    .describe(
+      'Time bucket size for time-series charts (line, stacked_bar). ' +
+        'Format: "<number> <unit>" where unit is second, minute, hour, or day. ' +
+        'Examples: "1 minute", "5 minute", "1 hour", "1 day". ' +
+        'Omit to let HyperDX pick automatically based on the time range.',
+    ),
+});
+
+const searchQuerySchema = mcpTimeRangeSchema.extend({
+  displayType: z
+    .literal('search')
+    .describe('Search and filter individual log/event rows'),
+  sourceId: z
+    .string()
+    .describe(
+      'Source ID. Call hyperdx_list_sources to find available sources.',
+    ),
+  where: z
+    .string()
+    .optional()
+    .default('')
+    .describe(
+      'Row filter. Examples: "level:error", "service.name:api AND duration:>500"',
+    ),
+  whereLanguage: z
+    .enum(['lucene', 'sql'])
+    .optional()
+    .default('lucene')
+    .describe('Query language for the where filter. Default: lucene'),
+  columns: z
+    .string()
+    .optional()
+    .default('')
+    .describe(
+      'Comma-separated columns to include. Leave empty for defaults. ' +
+        'Example: "body,service.name,duration"',
+    ),
+  maxResults: z
+    .number()
+    .min(1)
+    .max(200)
+    .optional()
+    .default(50)
+    .describe(
+      'Maximum number of rows to return (1–200). Default: 50. ' +
+        'Use smaller values to reduce response size.',
+    ),
+});
+
+const sqlQuerySchema = mcpTimeRangeSchema.extend({
+  displayType: z
+    .literal('sql')
+    .describe(
+      'ADVANCED: Execute raw SQL directly against ClickHouse. ' +
+        'Only use this when the builder query types (line, stacked_bar, table, number, pie, search) ' +
+        'cannot express the query you need — e.g. complex JOINs, sub-queries, CTEs, or ' +
+        'querying tables not registered as sources. ' +
+        'Prefer the builder display types for standard queries as they are safer and easier to use.',
+    ),
+  connectionId: z
+    .string()
+    .describe(
+      'Connection ID (not sourceId). Call hyperdx_list_sources to find available connections.',
+    ),
+  sql: z
+    .string()
+    .describe(
+      'Raw ClickHouse SQL query to execute. ' +
+        'Always include a LIMIT clause to avoid returning excessive data.\n\n' +
+        'QUERY PARAMETERS (ClickHouse native parameterized syntax):\n' +
+        '  {startDateMilliseconds:Int64} — start of date range in ms since epoch\n' +
+        '  {endDateMilliseconds:Int64} — end of date range in ms since epoch\n' +
+        '  {intervalSeconds:Int64} — time bucket size in seconds (time-series only)\n' +
+        '  {intervalMilliseconds:Int64} — time bucket size in milliseconds (time-series only)\n\n' +
+        'MACROS (expanded before execution):\n' +
+        '  $__timeFilter(column) — expands to: column >= <start> AND column <= <end> (DateTime precision)\n' +
+        '  $__timeFilter_ms(column) — same but with DateTime64 millisecond precision\n' +
+        '  $__dateFilter(column) — same but with Date precision\n' +
+        '  $__dateTimeFilter(dateCol, timeCol) — filters on both a Date and DateTime column\n' +
+        '  $__dt(dateCol, timeCol) — alias for $__dateTimeFilter\n' +
+        '  $__fromTime / $__toTime — start/end as DateTime values\n' +
+        '  $__fromTime_ms / $__toTime_ms — start/end as DateTime64 values\n' +
+        '  $__timeInterval(column) — time bucket expression: toStartOfInterval(toDateTime(column), INTERVAL ...)\n' +
+        '  $__timeInterval_ms(column) — same with millisecond precision\n' +
+        '  $__interval_s — raw interval in seconds\n' +
+        '  $__filters — placeholder for dashboard filter conditions (resolves to 1=1 when no filters)\n\n' +
+        'Example (time-series): "SELECT $__timeInterval(TimestampTime) AS ts, ServiceName, count() ' +
+        'FROM otel_logs WHERE $__timeFilter(TimestampTime) GROUP BY ServiceName, ts ORDER BY ts"\n\n' +
+        'Example (table): "SELECT ServiceName, count() AS n FROM otel_logs ' +
+        'WHERE TimestampTime >= fromUnixTimestamp64Milli({startDateMilliseconds:Int64}) ' +
+        'AND TimestampTime < fromUnixTimestamp64Milli({endDateMilliseconds:Int64}) ' +
+        'GROUP BY ServiceName ORDER BY n DESC LIMIT 20"',
+    ),
+});
+
+export const hyperdxQuerySchema = z.discriminatedUnion('displayType', [
+  builderQuerySchema,
+  searchQuerySchema,
+  sqlQuerySchema,
+]);
--- a/packages/api/src/mcp/tools/types.ts
+++ b/packages/api/src/mcp/tools/types.ts
@ -0,0 +1,10 @@
+import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
+
+export type McpContext = {
+  teamId: string;
+  userId?: string;
+};
+
+export type ToolDefinition = (server: McpServer, context: McpContext) => void;
+
+export type PromptDefinition = (server: McpServer, context: McpContext) => void;
--- a/packages/api/src/mcp/utils/tracing.ts
+++ b/packages/api/src/mcp/utils/tracing.ts
@ -0,0 +1,83 @@
+import opentelemetry, { SpanStatusCode } from '@opentelemetry/api';
+
+import { CODE_VERSION } from '@/config';
+import logger from '@/utils/logger';
+
+import type { McpContext } from '../tools/types';
+
+const mcpTracer = opentelemetry.trace.getTracer('hyperdx-mcp', CODE_VERSION);
+
+type ToolResult = {
+  content: { type: 'text'; text: string }[];
+  isError?: boolean;
+};
+
+/**
+ * Wraps an MCP tool handler with tracing and structured logging.
+ * Creates a span for each tool invocation and logs start/end with duration.
+ */
+export function withToolTracing<TArgs>(
+  toolName: string,
+  context: McpContext,
+  handler: (args: TArgs) => Promise<ToolResult>,
+): (args: TArgs) => Promise<ToolResult> {
+  return async (args: TArgs) => {
+    return mcpTracer.startActiveSpan(`mcp.tool.${toolName}`, async span => {
+      const startTime = Date.now();
+      const logContext = {
+        tool: toolName,
+        teamId: context.teamId,
+        userId: context.userId,
+      };
+
+      span.setAttribute('mcp.tool.name', toolName);
+      span.setAttribute('mcp.team.id', context.teamId);
+      if (context.userId) {
+        span.setAttribute('mcp.user.id', context.userId);
+      }
+
+      logger.info(logContext, `MCP tool invoked: ${toolName}`);
+
+      try {
+        const result = await handler(args);
+        const durationMs = Date.now() - startTime;
+
+        if (result.isError) {
+          span.setStatus({ code: SpanStatusCode.ERROR });
+          span.setAttribute('mcp.tool.error', true);
+          logger.warn(
+            { ...logContext, durationMs },
+            `MCP tool error: ${toolName}`,
+          );
+        } else {
+          span.setStatus({ code: SpanStatusCode.OK });
+          logger.info(
+            { ...logContext, durationMs },
+            `MCP tool completed: ${toolName}`,
+          );
+        }
+
+        span.setAttribute('mcp.tool.duration_ms', durationMs);
+        span.end();
+        return result;
+      } catch (err) {
+        const durationMs = Date.now() - startTime;
+        span.setStatus({
+          code: SpanStatusCode.ERROR,
+          message: err instanceof Error ? err.message : String(err),
+        });
+        span.recordException(
+          err instanceof Error ? err : new Error(String(err)),
+        );
+        span.setAttribute('mcp.tool.duration_ms', durationMs);
+        span.end();
+
+        logger.error(
+          { ...logContext, durationMs, error: err },
+          `MCP tool failed: ${toolName}`,
+        );
+        throw err;
+      }
+    });
+  };
+}
--- a/packages/api/src/middleware/error.ts
+++ b/packages/api/src/middleware/error.ts
@ -3,6 +3,7 @@ import type { NextFunction, Request, Response } from 'express';

 import { IS_PROD } from '@/config';
 import { BaseError, isOperationalError, StatusCode } from '@/utils/errors';
+import logger from '@/utils/logger';

 // WARNING: need to keep the 4th arg for express to identify it as an error-handling middleware function
 export const appErrorHandler = (
@ -11,7 +12,11 @@ export const appErrorHandler = (
  res: Response,
  next: NextFunction,
 ) => {
-  console.error(err);
+  if (isOperationalError(err)) {
+    logger.warn({ err }, err.message);
+  } else {
+    logger.error({ err }, err.message);
+  }

  const userFacingErrorMessage = isOperationalError(err)
    ? err.name || err.message
--- a/packages/api/src/models/alert.ts
+++ b/packages/api/src/models/alert.ts
@ -1,14 +1,14 @@
-import { ALERT_INTERVAL_TO_MINUTES } from '@hyperdx/common-utils/dist/types';
+import {
+  ALERT_INTERVAL_TO_MINUTES,
+  AlertErrorType,
+  AlertThresholdType,
+} from '@hyperdx/common-utils/dist/types';
+export { AlertThresholdType } from '@hyperdx/common-utils/dist/types';
 import mongoose, { Schema } from 'mongoose';

 import type { ObjectId } from '.';
 import Team from './team';

-export enum AlertThresholdType {
-  ABOVE = 'above',
-  BELOW = 'below',
-}
-
 export enum AlertState {
  ALERT = 'ALERT',
  DISABLED = 'DISABLED',
@ -16,6 +16,12 @@ export enum AlertState {
  OK = 'OK',
 }

+export interface IAlertError {
+  timestamp: Date;
+  type: AlertErrorType;
+  message: string;
+}
+
 // follow 'ms' pkg formats
 export type AlertInterval =
  | '1m'
@ -51,6 +57,8 @@ export interface IAlert {
  state: AlertState;
  team: ObjectId;
  threshold: number;
+  /** The upper bound for BETWEEN and NOT BETWEEN threshold types */
+  thresholdMax?: number;
  thresholdType: AlertThresholdType;
  createdBy?: ObjectId;

@ -72,6 +80,9 @@ export interface IAlert {
    at: Date;
    until: Date;
  };
+
+  // Errors recorded during the most recent execution
+  executionErrors?: IAlertError[];
  createdAt: Date;
  updatedAt: Date;
 }
@ -84,6 +95,10 @@ const AlertSchema = new Schema<IAlert>(
      type: Number,
      required: true,
    },
+    thresholdMax: {
+      type: Number,
+      required: false,
+    },
    thresholdType: {
      type: String,
      enum: AlertThresholdType,
@ -187,6 +202,22 @@ const AlertSchema = new Schema<IAlert>(
        required: false,
      },
    },
+    executionErrors: {
+      type: [
+        {
+          _id: false,
+          timestamp: { type: Date, required: true },
+          type: {
+            type: String,
+            enum: AlertErrorType,
+            required: true,
+          },
+          message: { type: String, required: true },
+        },
+      ],
+      required: false,
+      default: undefined,
+    },
  },
  {
    timestamps: true,
--- a/packages/api/src/models/dashboard.ts
+++ b/packages/api/src/models/dashboard.ts
@ -7,6 +7,8 @@ import type { ObjectId } from '.';
 export interface IDashboard extends z.infer<typeof DashboardSchema> {
  _id: ObjectId;
  team: ObjectId;
+  createdBy?: ObjectId;
+  updatedBy?: ObjectId;
  createdAt: Date;
  updatedAt: Date;
 }
@ -32,6 +34,16 @@ export default mongoose.model<IDashboard>(
      savedQueryLanguage: { type: String, required: false },
      savedFilterValues: { type: mongoose.Schema.Types.Array, required: false },
      containers: { type: mongoose.Schema.Types.Array, required: false },
+      createdBy: {
+        type: mongoose.Schema.Types.ObjectId,
+        ref: 'User',
+        required: false,
+      },
+      updatedBy: {
+        type: mongoose.Schema.Types.ObjectId,
+        ref: 'User',
+        required: false,
+      },
    },
    {
      timestamps: true,
--- a/packages/api/src/models/pinnedFilter.ts
+++ b/packages/api/src/models/pinnedFilter.ts
@ -0,0 +1,49 @@
+import type { PinnedFiltersValue } from '@hyperdx/common-utils/dist/types';
+import mongoose, { Schema } from 'mongoose';
+
+import type { ObjectId } from '.';
+
+interface IPinnedFilter {
+  _id: ObjectId;
+  team: ObjectId;
+  source: ObjectId;
+  fields: string[];
+  filters: PinnedFiltersValue;
+  createdAt: Date;
+  updatedAt: Date;
+}
+
+const PinnedFilterSchema = new Schema<IPinnedFilter>(
+  {
+    team: {
+      type: mongoose.Schema.Types.ObjectId,
+      required: true,
+      ref: 'Team',
+    },
+    source: {
+      type: mongoose.Schema.Types.ObjectId,
+      required: true,
+      ref: 'Source',
+    },
+    fields: {
+      type: [String],
+      default: [],
+    },
+    filters: {
+      type: Schema.Types.Mixed,
+      default: {},
+    },
+  },
+  {
+    timestamps: true,
+    toJSON: { getters: true },
+  },
+);
+
+// One document per team+source combination
+PinnedFilterSchema.index({ team: 1, source: 1 }, { unique: true });
+
+export default mongoose.model<IPinnedFilter>(
+  'PinnedFilter',
+  PinnedFilterSchema,
+);
--- a/packages/api/src/models/savedSearch.ts
+++ b/packages/api/src/models/savedSearch.ts
@ -9,6 +9,8 @@ export interface ISavedSearch
  _id: ObjectId;
  team: ObjectId;
  source: ObjectId;
+  createdBy?: ObjectId;
+  updatedBy?: ObjectId;
  createdAt: Date;
  updatedAt: Date;
 }
@ -35,6 +37,16 @@ export const SavedSearch = mongoose.model<ISavedSearch>(
      },
      tags: [String],
      filters: [{ type: mongoose.Schema.Types.Mixed }],
+      createdBy: {
+        type: mongoose.Schema.Types.ObjectId,
+        ref: 'User',
+        required: false,
+      },
+      updatedBy: {
+        type: mongoose.Schema.Types.ObjectId,
+        ref: 'User',
+        required: false,
+      },
    },
    {
      toJSON: { virtuals: true },
--- a/packages/api/src/opamp/controllers/opampController.ts
+++ b/packages/api/src/opamp/controllers/opampController.ts
@ -82,6 +82,7 @@ type CollectorConfig = {
      logs_table_name: string;
      timeout: string;
      create_schema: string;
+      json: string;
      retry_on_failure: {
        enabled: boolean;
        initial_interval: string;
@ -97,6 +98,7 @@ type CollectorConfig = {
      ttl: string;
      timeout: string;
      create_schema: string;
+      json: string;
      retry_on_failure: {
        enabled: boolean;
        initial_interval: string;
@ -205,6 +207,7 @@ export const buildOtelCollectorConfig = (
        timeout: '5s',
        create_schema:
          '${env:HYPERDX_OTEL_EXPORTER_CREATE_LEGACY_SCHEMA:-false}',
+        json: '${env:HYPERDX_OTEL_EXPORTER_CLICKHOUSE_JSON_ENABLE:-false}',
        retry_on_failure: {
          enabled: true,
          initial_interval: '5s',
@ -221,6 +224,7 @@ export const buildOtelCollectorConfig = (
        timeout: '5s',
        create_schema:
          '${env:HYPERDX_OTEL_EXPORTER_CREATE_LEGACY_SCHEMA:-false}',
+        json: '${env:HYPERDX_OTEL_EXPORTER_CLICKHOUSE_JSON_ENABLE:-false}',
        retry_on_failure: {
          enabled: true,
          initial_interval: '5s',
--- a/packages/api/src/routers/api/tests/alerts.test.ts
+++ b/packages/api/src/routers/api/tests/alerts.test.ts
@ -1,12 +1,22 @@
+import {
+  AlertErrorType,
+  AlertThresholdType,
+  DisplayType,
+} from '@hyperdx/common-utils/dist/types';
+
 import {
  getLoggedInAgent,
  getServer,
  makeAlertInput,
+  makeRawSqlAlertTile,
+  makeRawSqlNumberAlertTile,
  makeRawSqlTile,
  makeTile,
  randomMongoId,
+  RAW_SQL_ALERT_TEMPLATE,
 } from '@/fixtures';
-import Alert, { AlertSource, AlertThresholdType } from '@/models/alert';
+import Alert, { AlertSource, AlertState } from '@/models/alert';
+import AlertHistory from '@/models/alertHistory';
 import Webhook, { WebhookDocument, WebhookService } from '@/models/webhook';

 const MOCK_TILES = [makeTile(), makeTile(), makeTile(), makeTile(), makeTile()];
@ -550,8 +560,61 @@ describe('alerts router', () => {
    await agent.delete(`/alerts/${fakeId}/silenced`).expect(404); // Should fail
  });

-  it('rejects creating an alert on a raw SQL tile', async () => {
-    const rawSqlTile = makeRawSqlTile();
+  it('allows creating an alert on a raw SQL line tile', async () => {
+    const rawSqlTile = makeRawSqlAlertTile();
+    const dashboard = await agent
+      .post('/dashboards')
+      .send({
+        name: 'Test Dashboard',
+        tiles: [rawSqlTile],
+        tags: [],
+      })
+      .expect(200);
+
+    const alert = await agent
+      .post('/alerts')
+      .send(
+        makeAlertInput({
+          dashboardId: dashboard.body.id,
+          tileId: rawSqlTile.id,
+          webhookId: webhook._id.toString(),
+        }),
+      )
+      .expect(200);
+    expect(alert.body.data.dashboard).toBe(dashboard.body.id);
+    expect(alert.body.data.tileId).toBe(rawSqlTile.id);
+  });
+
+  it('allows creating an alert on a raw SQL number tile', async () => {
+    const rawSqlTile = makeRawSqlNumberAlertTile();
+    const dashboard = await agent
+      .post('/dashboards')
+      .send({
+        name: 'Test Dashboard',
+        tiles: [rawSqlTile],
+        tags: [],
+      })
+      .expect(200);
+
+    const alert = await agent
+      .post('/alerts')
+      .send(
+        makeAlertInput({
+          dashboardId: dashboard.body.id,
+          tileId: rawSqlTile.id,
+          webhookId: webhook._id.toString(),
+        }),
+      )
+      .expect(200);
+    expect(alert.body.data.dashboard).toBe(dashboard.body.id);
+    expect(alert.body.data.tileId).toBe(rawSqlTile.id);
+  });
+
+  it('rejects creating an alert on a raw SQL table tile', async () => {
+    const rawSqlTile = makeRawSqlTile({
+      displayType: DisplayType.Table,
+      sqlTemplate: RAW_SQL_ALERT_TEMPLATE,
+    });
    const dashboard = await agent
      .post('/dashboards')
      .send({
@ -573,9 +636,72 @@ describe('alerts router', () => {
      .expect(400);
  });

-  it('rejects updating an alert to reference a raw SQL tile', async () => {
+  it('rejects creating an alert on a raw SQL tile without interval params', async () => {
+    const rawSqlTile = makeRawSqlTile({
+      sqlTemplate: 'SELECT count() FROM otel_logs',
+    });
+    const dashboard = await agent
+      .post('/dashboards')
+      .send({
+        name: 'Test Dashboard',
+        tiles: [rawSqlTile],
+        tags: [],
+      })
+      .expect(200);
+
+    await agent
+      .post('/alerts')
+      .send(
+        makeAlertInput({
+          dashboardId: dashboard.body.id,
+          tileId: rawSqlTile.id,
+          webhookId: webhook._id.toString(),
+        }),
+      )
+      .expect(400);
+  });
+
+  it('allows updating an alert to reference a raw SQL number tile', async () => {
    const regularTile = makeTile();
-    const rawSqlTile = makeRawSqlTile();
+    const rawSqlTile = makeRawSqlNumberAlertTile();
+    const dashboard = await agent
+      .post('/dashboards')
+      .send({
+        name: 'Test Dashboard',
+        tiles: [regularTile, rawSqlTile],
+        tags: [],
+      })
+      .expect(200);
+
+    const alert = await agent
+      .post('/alerts')
+      .send(
+        makeAlertInput({
+          dashboardId: dashboard.body.id,
+          tileId: regularTile.id,
+          webhookId: webhook._id.toString(),
+        }),
+      )
+      .expect(200);
+
+    await agent
+      .put(`/alerts/${alert.body.data._id}`)
+      .send({
+        ...makeAlertInput({
+          dashboardId: dashboard.body.id,
+          tileId: rawSqlTile.id,
+          webhookId: webhook._id.toString(),
+        }),
+      })
+      .expect(200);
+  });
+
+  it('rejects updating an alert to reference a raw SQL table tile', async () => {
+    const regularTile = makeTile();
+    const rawSqlTile = makeRawSqlTile({
+      displayType: DisplayType.Table,
+      sqlTemplate: RAW_SQL_ALERT_TEMPLATE,
+    });
    const dashboard = await agent
      .post('/dashboards')
      .send({
@ -607,4 +733,175 @@ describe('alerts router', () => {
      })
      .expect(400);
  });
+
+  describe('GET /alerts/:id', () => {
+    it('returns 404 for non-existent alert', async () => {
+      const fakeId = randomMongoId();
+      await agent.get(`/alerts/${fakeId}`).expect(404);
+    });
+
+    it('returns alert with empty history when no history exists', async () => {
+      const dashboard = await agent
+        .post('/dashboards')
+        .send(MOCK_DASHBOARD)
+        .expect(200);
+
+      const alert = await agent
+        .post('/alerts')
+        .send(
+          makeAlertInput({
+            dashboardId: dashboard.body.id,
+            tileId: dashboard.body.tiles[0].id,
+            webhookId: webhook._id.toString(),
+          }),
+        )
+        .expect(200);
+
+      const res = await agent.get(`/alerts/${alert.body.data._id}`).expect(200);
+
+      expect(res.body.data._id).toBe(alert.body.data._id);
+      expect(res.body.data.history).toEqual([]);
+      expect(res.body.data.threshold).toBe(alert.body.data.threshold);
+      expect(res.body.data.interval).toBe(alert.body.data.interval);
+      expect(res.body.data.dashboard).toBeDefined();
+      expect(res.body.data.tileId).toBe(dashboard.body.tiles[0].id);
+    });
+
+    it('returns alert with history entries', async () => {
+      const dashboard = await agent
+        .post('/dashboards')
+        .send(MOCK_DASHBOARD)
+        .expect(200);
+
+      const alert = await agent
+        .post('/alerts')
+        .send(
+          makeAlertInput({
+            dashboardId: dashboard.body.id,
+            tileId: dashboard.body.tiles[0].id,
+            webhookId: webhook._id.toString(),
+          }),
+        )
+        .expect(200);
+
+      const now = new Date(Date.now() - 60000);
+      const earlier = new Date(Date.now() - 120000);
+
+      await AlertHistory.create({
+        alert: alert.body.data._id,
+        createdAt: now,
+        state: AlertState.ALERT,
+        counts: 5,
+        lastValues: [{ startTime: now, count: 5 }],
+      });
+
+      await AlertHistory.create({
+        alert: alert.body.data._id,
+        createdAt: earlier,
+        state: AlertState.OK,
+        counts: 0,
+        lastValues: [{ startTime: earlier, count: 0 }],
+      });
+
+      const res = await agent.get(`/alerts/${alert.body.data._id}`).expect(200);
+
+      expect(res.body.data._id).toBe(alert.body.data._id);
+      expect(res.body.data.history).toHaveLength(2);
+      expect(res.body.data.history[0].state).toBe('ALERT');
+      expect(res.body.data.history[0].counts).toBe(5);
+      expect(res.body.data.history[1].state).toBe('OK');
+      expect(res.body.data.history[1].counts).toBe(0);
+    });
+  });
+
+  describe('errors propagation', () => {
+    it('returns the errors field on a single alert response', async () => {
+      const dashboard = await agent
+        .post('/dashboards')
+        .send(MOCK_DASHBOARD)
+        .expect(200);
+
+      const alert = await agent
+        .post('/alerts')
+        .send(
+          makeAlertInput({
+            dashboardId: dashboard.body.id,
+            tileId: dashboard.body.tiles[0].id,
+            webhookId: webhook._id.toString(),
+          }),
+        )
+        .expect(200);
+
+      const errorTimestamp = new Date('2026-04-17T12:00:00.000Z');
+      await Alert.updateOne(
+        { _id: alert.body.data._id },
+        {
+          $set: {
+            executionErrors: [
+              {
+                timestamp: errorTimestamp,
+                type: AlertErrorType.QUERY_ERROR,
+                message: 'ClickHouse returned 500',
+              },
+            ],
+          },
+        },
+      );
+
+      const res = await agent.get(`/alerts/${alert.body.data._id}`).expect(200);
+      expect(res.body.data.executionErrors).toHaveLength(1);
+      expect(res.body.data.executionErrors[0].type).toBe(
+        AlertErrorType.QUERY_ERROR,
+      );
+      expect(res.body.data.executionErrors[0].message).toBe(
+        'ClickHouse returned 500',
+      );
+      expect(
+        new Date(res.body.data.executionErrors[0].timestamp).toISOString(),
+      ).toBe(errorTimestamp.toISOString());
+    });
+
+    it('returns the errors field on the alerts list response', async () => {
+      const dashboard = await agent
+        .post('/dashboards')
+        .send(MOCK_DASHBOARD)
+        .expect(200);
+
+      const alert = await agent
+        .post('/alerts')
+        .send(
+          makeAlertInput({
+            dashboardId: dashboard.body.id,
+            tileId: dashboard.body.tiles[0].id,
+            webhookId: webhook._id.toString(),
+          }),
+        )
+        .expect(200);
+
+      await Alert.updateOne(
+        { _id: alert.body.data._id },
+        {
+          $set: {
+            executionErrors: [
+              {
+                timestamp: new Date('2026-04-17T12:00:00.000Z'),
+                type: AlertErrorType.WEBHOOK_ERROR,
+                message: 'webhook delivery failed',
+              },
+            ],
+          },
+        },
+      );
+
+      const list = await agent.get('/alerts').expect(200);
+      expect(list.body.data).toHaveLength(1);
+      expect(list.body.data[0].executionErrors).toHaveLength(1);
+      expect(list.body.data[0].executionErrors[0].type).toBe(
+        AlertErrorType.WEBHOOK_ERROR,
+      );
+      expect(list.body.data[0].executionErrors[0].message).toBe(
+        'webhook delivery failed',
+      );
+    });
+  });
 });
--- a/packages/api/src/routers/api/tests/dashboard.test.ts
+++ b/packages/api/src/routers/api/tests/dashboard.test.ts
@ -20,6 +20,8 @@ import {
  makeTile,
 } from '../../../fixtures';
 import Alert from '../../../models/alert';
+import Dashboard from '../../../models/dashboard';
+import User from '../../../models/user';

 const MOCK_DASHBOARD = {
  name: 'Test Dashboard',
@ -78,6 +80,45 @@ describe('dashboard router', () => {
    );
  });

+  it('sets createdBy and updatedBy on create and populates them in GET', async () => {
+    const created = await agent
+      .post('/dashboards')
+      .send(MOCK_DASHBOARD)
+      .expect(200);
+
+    // GET all dashboards
+    const allDashboards = await agent.get('/dashboards').expect(200);
+    const dashboard = allDashboards.body.find(d => d._id === created.body.id);
+    expect(dashboard.createdBy).toMatchObject({ email: user.email });
+    expect(dashboard.updatedBy).toMatchObject({ email: user.email });
+  });
+
+  it('populates updatedBy with a different user after DB update', async () => {
+    const created = await agent
+      .post('/dashboards')
+      .send(MOCK_DASHBOARD)
+      .expect(200);
+
+    // Create a second user on the same team
+    const secondUser = await User.create({
+      email: 'second@test.com',
+      name: 'Second User',
+      team: team._id,
+    });
+
+    // Simulate a different user updating the dashboard
+    await Dashboard.findByIdAndUpdate(created.body.id, {
+      updatedBy: secondUser._id,
+    });
+
+    const allDashboards = await agent.get('/dashboards').expect(200);
+    const dashboard = allDashboards.body.find(d => d._id === created.body.id);
+    expect(dashboard.createdBy).toMatchObject({ email: user.email });
+    expect(dashboard.updatedBy).toMatchObject({
+      email: 'second@test.com',
+    });
+  });
+
  it('can update a dashboard', async () => {
    const dashboard = await agent
      .post('/dashboards')
--- a/packages/api/src/routers/api/tests/pinnedFilters.test.ts
+++ b/packages/api/src/routers/api/tests/pinnedFilters.test.ts
@ -0,0 +1,214 @@
+import { SourceKind, TSource } from '@hyperdx/common-utils/dist/types';
+import { Types } from 'mongoose';
+
+import { getLoggedInAgent, getServer } from '@/fixtures';
+import { Source } from '@/models/source';
+
+const MOCK_SOURCE: Omit<Extract<TSource, { kind: 'log' }>, 'id'> = {
+  kind: SourceKind.Log,
+  name: 'Test Source',
+  connection: new Types.ObjectId().toString(),
+  from: { databaseName: 'test_db', tableName: 'test_table' },
+  timestampValueExpression: 'timestamp',
+  defaultTableSelectExpression: 'body',
+};
+
+describe('pinnedFilters router', () => {
+  const server = getServer();
+  let agent: Awaited<ReturnType<typeof getLoggedInAgent>>['agent'];
+  let team: Awaited<ReturnType<typeof getLoggedInAgent>>['team'];
+  let sourceId: string;
+
+  beforeAll(async () => {
+    await server.start();
+  });
+
+  beforeEach(async () => {
+    const result = await getLoggedInAgent(server);
+    agent = result.agent;
+    team = result.team;
+
+    // Create a real source owned by this team
+    const source = await Source.create({ ...MOCK_SOURCE, team: team._id });
+    sourceId = source._id.toString();
+  });
+
+  afterEach(async () => {
+    await server.clearDBs();
+  });
+
+  afterAll(async () => {
+    await server.stop();
+  });
+
+  describe('GET /pinned-filters', () => {
+    it('returns null when no pinned filters exist', async () => {
+      const res = await agent
+        .get(`/pinned-filters?source=${sourceId}`)
+        .expect(200);
+
+      expect(res.body.team).toBeNull();
+    });
+
+    it('rejects invalid source id', async () => {
+      await agent.get('/pinned-filters?source=not-an-objectid').expect(400);
+    });
+
+    it('rejects missing source param', async () => {
+      await agent.get('/pinned-filters').expect(400);
+    });
+
+    it('returns 404 for a source not owned by the team', async () => {
+      const foreignSourceId = new Types.ObjectId().toString();
+      await agent.get(`/pinned-filters?source=${foreignSourceId}`).expect(404);
+    });
+  });
+
+  describe('PUT /pinned-filters', () => {
+    it('can create pinned filters', async () => {
+      const res = await agent
+        .put('/pinned-filters')
+        .send({
+          source: sourceId,
+          fields: ['ServiceName', 'SeverityText'],
+          filters: { ServiceName: ['web', 'api'] },
+        })
+        .expect(200);
+
+      expect(res.body.fields).toEqual(['ServiceName', 'SeverityText']);
+      expect(res.body.filters).toEqual({ ServiceName: ['web', 'api'] });
+      expect(res.body.id).toBeDefined();
+    });
+
+    it('upserts on repeated PUT', async () => {
+      await agent
+        .put('/pinned-filters')
+        .send({
+          source: sourceId,
+          fields: ['ServiceName'],
+          filters: { ServiceName: ['web'] },
+        })
+        .expect(200);
+
+      const res = await agent
+        .put('/pinned-filters')
+        .send({
+          source: sourceId,
+          fields: ['ServiceName', 'SeverityText'],
+          filters: { ServiceName: ['web', 'api'], SeverityText: ['error'] },
+        })
+        .expect(200);
+
+      expect(res.body.fields).toEqual(['ServiceName', 'SeverityText']);
+      expect(res.body.filters).toEqual({
+        ServiceName: ['web', 'api'],
+        SeverityText: ['error'],
+      });
+    });
+
+    it('rejects invalid source id', async () => {
+      await agent
+        .put('/pinned-filters')
+        .send({ source: 'not-valid', fields: [], filters: {} })
+        .expect(400);
+    });
+
+    it('returns 404 for a source not owned by the team', async () => {
+      const foreignSourceId = new Types.ObjectId().toString();
+      await agent
+        .put('/pinned-filters')
+        .send({ source: foreignSourceId, fields: [], filters: {} })
+        .expect(404);
+    });
+  });
+
+  describe('GET + PUT round-trip', () => {
+    it('returns data after PUT', async () => {
+      await agent
+        .put('/pinned-filters')
+        .send({
+          source: sourceId,
+          fields: ['ServiceName'],
+          filters: { ServiceName: ['web'] },
+        })
+        .expect(200);
+
+      const res = await agent
+        .get(`/pinned-filters?source=${sourceId}`)
+        .expect(200);
+
+      expect(res.body.team).not.toBeNull();
+      expect(res.body.team.fields).toEqual(['ServiceName']);
+      expect(res.body.team.filters).toEqual({ ServiceName: ['web'] });
+    });
+
+    it('can reset by sending empty fields and filters', async () => {
+      await agent
+        .put('/pinned-filters')
+        .send({
+          source: sourceId,
+          fields: ['ServiceName'],
+          filters: { ServiceName: ['web'] },
+        })
+        .expect(200);
+
+      await agent
+        .put('/pinned-filters')
+        .send({ source: sourceId, fields: [], filters: {} })
+        .expect(200);
+
+      const res = await agent
+        .get(`/pinned-filters?source=${sourceId}`)
+        .expect(200);
+
+      expect(res.body.team).not.toBeNull();
+      expect(res.body.team.fields).toEqual([]);
+      expect(res.body.team.filters).toEqual({});
+    });
+  });
+
+  describe('source scoping', () => {
+    it('pins are scoped to their source', async () => {
+      const source2 = await Source.create({ ...MOCK_SOURCE, team: team._id });
+
+      await agent
+        .put('/pinned-filters')
+        .send({
+          source: sourceId,
+          fields: ['ServiceName'],
+          filters: { ServiceName: ['web'] },
+        })
+        .expect(200);
+
+      const res = await agent
+        .get(`/pinned-filters?source=${source2._id}`)
+        .expect(200);
+
+      expect(res.body.team).toBeNull();
+    });
+  });
+
+  // Note: cross-team isolation (Team B cannot read Team A's pins) is enforced
+  // by the MongoDB query filtering on teamId AND the source ownership check
+  // (getSource validates source.team === teamId). Multi-team integration tests
+  // are not possible in this single-team environment (register returns 409).
+
+  describe('filter values with booleans', () => {
+    it('supports boolean values in filters', async () => {
+      await agent
+        .put('/pinned-filters')
+        .send({
+          source: sourceId,
+          fields: ['isRootSpan'],
+          filters: { isRootSpan: [true, false] },
+        })
+        .expect(200);
+
+      const res = await agent
+        .get(`/pinned-filters?source=${sourceId}`)
+        .expect(200);
+
+      expect(res.body.team.filters).toEqual({ isRootSpan: [true, false] });
+    });
+  });
+});
--- a/packages/api/src/routers/api/tests/savedSearch.test.ts
+++ b/packages/api/src/routers/api/tests/savedSearch.test.ts
@ -4,6 +4,8 @@ import {
  makeSavedSearchAlertInput,
 } from '@/fixtures';
 import Alert from '@/models/alert';
+import { SavedSearch } from '@/models/savedSearch';
+import User from '@/models/user';
 import Webhook, { WebhookDocument, WebhookService } from '@/models/webhook';

 const MOCK_SAVED_SEARCH = {
@ -127,6 +129,66 @@ describe('savedSearch router', () => {
    expect(await Alert.findById(alert.body.data._id)).toBeNull();
  });

+  it('sets createdBy and updatedBy on create and populates them in GET', async () => {
+    const created = await agent
+      .post('/saved-search')
+      .send(MOCK_SAVED_SEARCH)
+      .expect(200);
+
+    // GET all saved searches
+    const savedSearches = await agent.get('/saved-search').expect(200);
+    const savedSearch = savedSearches.body.find(
+      s => s._id === created.body._id,
+    );
+    expect(savedSearch.createdBy).toMatchObject({ email: user.email });
+    expect(savedSearch.updatedBy).toMatchObject({ email: user.email });
+  });
+
+  it('populates updatedBy with a different user after DB update', async () => {
+    const created = await agent
+      .post('/saved-search')
+      .send(MOCK_SAVED_SEARCH)
+      .expect(200);
+
+    // Create a second user on the same team
+    const secondUser = await User.create({
+      email: 'second@test.com',
+      name: 'Second User',
+      team: team._id,
+    });
+
+    // Simulate a different user updating the saved search
+    await SavedSearch.findByIdAndUpdate(created.body._id, {
+      updatedBy: secondUser._id,
+    });
+
+    const savedSearches = await agent.get('/saved-search').expect(200);
+    const savedSearch = savedSearches.body.find(
+      s => s._id === created.body._id,
+    );
+    expect(savedSearch.createdBy).toMatchObject({ email: user.email });
+    expect(savedSearch.updatedBy).toMatchObject({
+      email: 'second@test.com',
+    });
+  });
+
+  it('updates updatedBy when updating a saved search via API', async () => {
+    const created = await agent
+      .post('/saved-search')
+      .send(MOCK_SAVED_SEARCH)
+      .expect(200);
+
+    await agent
+      .patch(`/saved-search/${created.body._id}`)
+      .send({ name: 'updated name' })
+      .expect(200);
+
+    // Verify updatedBy is still set in the DB
+    const dbRecord = await SavedSearch.findById(created.body._id);
+    expect(dbRecord?.updatedBy?.toString()).toBe(user._id.toString());
+    expect(dbRecord?.createdBy?.toString()).toBe(user._id.toString());
+  });
+
  it('sets createdBy on alerts created from a saved search and populates it in list', async () => {
    // Create a saved search
    const savedSearch = await agent
--- a/packages/api/src/routers/api/tests/team.test.ts
+++ b/packages/api/src/routers/api/tests/team.test.ts
@ -31,11 +31,11 @@ describe('team router', () => {

    expect(_.omit(resp.body, ['_id', 'id', 'apiKey', 'createdAt']))
      .toMatchInlineSnapshot(`
-Object {
-  "allowedAuthMethods": Array [],
-  "name": "fake@deploysentinel.com's Team",
-}
-`);
+      {
+        "allowedAuthMethods": [],
+        "name": "fake@deploysentinel.com's Team",
+      }
+    `);
  });

  it('GET /team/tags - no tags', async () => {
@ -43,7 +43,7 @@ Object {

    const resp = await agent.get('/team/tags').expect(200);

-    expect(resp.body.data).toMatchInlineSnapshot(`Array []`);
+    expect(resp.body.data).toMatchInlineSnapshot(`[]`);
  });

  it('GET /team/tags', async () => {
@ -98,29 +98,27 @@ Object {
    });
    const resp = await agent.get('/team/members').expect(200);

-    expect(resp.body.data).toMatchInlineSnapshot(`
-Array [
-  Object {
-    "_id": "${resp.body.data[0]._id}",
-    "email": "fake@deploysentinel.com",
-    "hasPasswordAuth": true,
-    "isCurrentUser": true,
-    "name": "fake@deploysentinel.com",
-  },
-  Object {
-    "_id": "${user1._id}",
-    "email": "user1@example.com",
-    "hasPasswordAuth": true,
-    "isCurrentUser": false,
-  },
-  Object {
-    "_id": "${user2._id}",
-    "email": "user2@example.com",
-    "hasPasswordAuth": true,
-    "isCurrentUser": false,
-  },
-]
-`);
+    expect(resp.body.data.map(({ _id, ...rest }: any) => rest))
+      .toMatchInlineSnapshot(`
+      [
+        {
+          "email": "fake@deploysentinel.com",
+          "hasPasswordAuth": true,
+          "isCurrentUser": true,
+          "name": "fake@deploysentinel.com",
+        },
+        {
+          "email": "user1@example.com",
+          "hasPasswordAuth": true,
+          "isCurrentUser": false,
+        },
+        {
+          "email": "user2@example.com",
+          "hasPasswordAuth": true,
+          "isCurrentUser": false,
+        },
+      ]
+    `);
  });

  it('POST /team/invitation', async () => {
@ -236,17 +234,17 @@ Array [
        name: i.name,
      })),
    ).toMatchInlineSnapshot(`
-Array [
-  Object {
-    "email": "user1@example.com",
-    "name": "User 1",
-  },
-  Object {
-    "email": "user2@example.com",
-    "name": "User 2",
-  },
-]
-`);
+      [
+        {
+          "email": "user1@example.com",
+          "name": "User 1",
+        },
+        {
+          "email": "user2@example.com",
+          "name": "User 2",
+        },
+      ]
+    `);
  });

  it('DELETE /team/member/:userId removes a user', async () => {
--- a/packages/api/src/routers/api/alerts.ts
+++ b/packages/api/src/routers/api/alerts.ts
@ -1,24 +1,92 @@
-import type { AlertsApiResponse } from '@hyperdx/common-utils/dist/types';
+import type {
+  AlertApiResponse,
+  AlertsApiResponse,
+  AlertsPageItem,
+} from '@hyperdx/common-utils/dist/types';
 import express from 'express';
 import { pick } from 'lodash';
 import { ObjectId } from 'mongodb';
 import { z } from 'zod';
 import { processRequest, validateRequest } from 'zod-express-middleware';

-import { getRecentAlertHistoriesBatch } from '@/controllers/alertHistory';
+import {
+  getRecentAlertHistories,
+  getRecentAlertHistoriesBatch,
+} from '@/controllers/alertHistory';
 import {
  createAlert,
  deleteAlert,
  getAlertById,
+  getAlertEnhanced,
  getAlertsEnhanced,
  updateAlert,
  validateAlertInput,
 } from '@/controllers/alerts';
-import { sendJson } from '@/utils/serialization';
+import { IAlertHistory } from '@/models/alertHistory';
+import { PreSerialized, sendJson } from '@/utils/serialization';
 import { alertSchema, objectIdSchema } from '@/utils/zod';

 const router = express.Router();

+type EnhancedAlert = NonNullable<Awaited<ReturnType<typeof getAlertEnhanced>>>;
+
+const formatAlertResponse = (
+  alert: EnhancedAlert,
+  history: Omit<IAlertHistory, 'alert'>[],
+): PreSerialized<AlertsPageItem> => {
+  return {
+    history,
+    silenced: alert.silenced
+      ? {
+          by: alert.silenced.by?.email,
+          at: alert.silenced.at,
+          until: alert.silenced.until,
+        }
+      : undefined,
+    createdBy: alert.createdBy
+      ? pick(alert.createdBy, ['email', 'name'])
+      : undefined,
+    channel: pick(alert.channel, ['type']),
+    ...(alert.dashboard && {
+      dashboardId: alert.dashboard._id,
+      dashboard: {
+        tiles: alert.dashboard.tiles
+          .filter(tile => tile.id === alert.tileId)
+          .map(tile => ({
+            id: tile.id,
+            config: { name: tile.config.name },
+          })),
+        ...pick(alert.dashboard, ['_id', 'updatedAt', 'name', 'tags']),
+      },
+    }),
+    ...(alert.savedSearch && {
+      savedSearchId: alert.savedSearch._id,
+      savedSearch: pick(alert.savedSearch, [
+        '_id',
+        'createdAt',
+        'name',
+        'updatedAt',
+        'tags',
+      ]),
+    }),
+    ...pick(alert, [
+      '_id',
+      'interval',
+      'scheduleOffsetMinutes',
+      'scheduleStartAt',
+      'threshold',
+      'thresholdMax',
+      'thresholdType',
+      'state',
+      'source',
+      'tileId',
+      'createdAt',
+      'updatedAt',
+      'executionErrors',
+    ]),
+  };
+};
+
 type AlertsExpRes = express.Response<AlertsApiResponse>;
 router.get('/', async (req, res: AlertsExpRes, next) => {
  try {
@ -39,63 +107,50 @@ router.get('/', async (req, res: AlertsExpRes, next) => {

    const data = alerts.map(alert => {
      const history = historyMap.get(alert._id.toString()) ?? [];
-
-      return {
-        history,
-        silenced: alert.silenced
-          ? {
-              by: alert.silenced.by?.email,
-              at: alert.silenced.at,
-              until: alert.silenced.until,
-            }
-          : undefined,
-        createdBy: alert.createdBy
-          ? pick(alert.createdBy, ['email', 'name'])
-          : undefined,
-        channel: pick(alert.channel, ['type']),
-        ...(alert.dashboard && {
-          dashboardId: alert.dashboard._id,
-          dashboard: {
-            tiles: alert.dashboard.tiles
-              .filter(tile => tile.id === alert.tileId)
-              .map(tile => ({
-                id: tile.id,
-                config: { name: tile.config.name },
-              })),
-            ...pick(alert.dashboard, ['_id', 'updatedAt', 'name', 'tags']),
-          },
-        }),
-        ...(alert.savedSearch && {
-          savedSearchId: alert.savedSearch._id,
-          savedSearch: pick(alert.savedSearch, [
-            '_id',
-            'createdAt',
-            'name',
-            'updatedAt',
-            'tags',
-          ]),
-        }),
-        ...pick(alert, [
-          '_id',
-          'interval',
-          'scheduleOffsetMinutes',
-          'scheduleStartAt',
-          'threshold',
-          'thresholdType',
-          'state',
-          'source',
-          'tileId',
-          'createdAt',
-          'updatedAt',
-        ]),
-      };
+      return formatAlertResponse(alert, history);
    });
+
    sendJson(res, { data });
  } catch (e) {
    next(e);
  }
 });

+type AlertExpRes = express.Response<AlertApiResponse>;
+router.get(
+  '/:id',
+  validateRequest({
+    params: z.object({
+      id: objectIdSchema,
+    }),
+  }),
+  async (req, res: AlertExpRes, next) => {
+    try {
+      const teamId = req.user?.team;
+      if (teamId == null) {
+        return res.sendStatus(403);
+      }
+
+      const alert = await getAlertEnhanced(req.params.id, teamId);
+      if (!alert) {
+        return res.sendStatus(404);
+      }
+
+      const history = await getRecentAlertHistories({
+        alertId: new ObjectId(alert._id),
+        interval: alert.interval,
+        limit: 20,
+      });
+
+      const data = formatAlertResponse(alert, history);
+
+      sendJson(res, { data });
+    } catch (e) {
+      next(e);
+    }
+  },
+);
+
 router.post(
  '/',
  processRequest({ body: alertSchema }),
--- a/packages/api/src/routers/api/pinnedFilters.ts
+++ b/packages/api/src/routers/api/pinnedFilters.ts
@ -0,0 +1,95 @@
+import { PinnedFiltersValueSchema } from '@hyperdx/common-utils/dist/types';
+import express from 'express';
+import { z } from 'zod';
+import { validateRequest } from 'zod-express-middleware';
+
+import {
+  getPinnedFilters,
+  updatePinnedFilters,
+} from '@/controllers/pinnedFilter';
+import { getSource } from '@/controllers/sources';
+import { getNonNullUserWithTeam } from '@/middleware/auth';
+import { objectIdSchema } from '@/utils/zod';
+
+const router = express.Router();
+
+/**
+ * GET /pinned-filters?source=<sourceId>
+ * Returns the team-level pinned filters for the source.
+ */
+router.get(
+  '/',
+  validateRequest({
+    query: z.object({
+      source: objectIdSchema,
+    }),
+  }),
+  async (req, res, next) => {
+    try {
+      const { teamId } = getNonNullUserWithTeam(req);
+      const { source } = req.query;
+
+      // Verify the source belongs to this team
+      const sourceDoc = await getSource(teamId.toString(), source);
+      if (!sourceDoc) {
+        return res.status(404).json({ error: 'Source not found' });
+      }
+
+      const doc = await getPinnedFilters(teamId.toString(), source);
+
+      return res.json({
+        team: doc
+          ? {
+              id: doc._id.toString(),
+              fields: doc.fields,
+              filters: doc.filters,
+            }
+          : null,
+      });
+    } catch (e) {
+      next(e);
+    }
+  },
+);
+
+const updateBodySchema = z.object({
+  source: objectIdSchema,
+  fields: z.array(z.string().max(1024)).max(100),
+  filters: PinnedFiltersValueSchema,
+});
+
+/**
+ * PUT /pinned-filters
+ * Upserts team-level pinned filters for the given source.
+ */
+router.put(
+  '/',
+  validateRequest({ body: updateBodySchema }),
+  async (req, res, next) => {
+    try {
+      const { teamId } = getNonNullUserWithTeam(req);
+      const { source, fields, filters } = req.body;
+
+      // Verify the source belongs to this team
+      const sourceDoc = await getSource(teamId.toString(), source);
+      if (!sourceDoc) {
+        return res.status(404).json({ error: 'Source not found' });
+      }
+
+      const doc = await updatePinnedFilters(teamId.toString(), source, {
+        fields,
+        filters,
+      });
+
+      return res.json({
+        id: doc._id.toString(),
+        fields: doc.fields,
+        filters: doc.filters,
+      });
+    } catch (e) {
+      next(e);
+    }
+  },
+);
+
+export default router;
--- a/packages/api/src/routers/api/savedSearch.ts
+++ b/packages/api/src/routers/api/savedSearch.ts
@ -1,4 +1,7 @@
-import { SavedSearchSchema } from '@hyperdx/common-utils/dist/types';
+import {
+  SavedSearchListApiResponse,
+  SavedSearchSchema,
+} from '@hyperdx/common-utils/dist/types';
 import express from 'express';
 import _ from 'lodash';
 import { z } from 'zod';
@ -16,7 +19,9 @@ import { objectIdSchema } from '@/utils/zod';

 const router = express.Router();

-router.get('/', async (req, res, next) => {
+type SavedSearchListExpRes = express.Response<SavedSearchListApiResponse[]>;
+
+router.get('/', async (req, res: SavedSearchListExpRes, next) => {
  try {
    const { teamId } = getNonNullUserWithTeam(req);

@ -37,9 +42,13 @@ router.post(
  }),
  async (req, res, next) => {
    try {
-      const { teamId } = getNonNullUserWithTeam(req);
+      const { teamId, userId } = getNonNullUserWithTeam(req);

-      const savedSearch = await createSavedSearch(teamId.toString(), req.body);
+      const savedSearch = await createSavedSearch(
+        teamId.toString(),
+        req.body,
+        userId?.toString(),
+      );

      return res.json(savedSearch);
    } catch (e) {
@ -60,7 +69,7 @@ router.patch(
  }),
  async (req, res, next) => {
    try {
-      const { teamId } = getNonNullUserWithTeam(req);
+      const { teamId, userId } = getNonNullUserWithTeam(req);

      const savedSearch = await getSavedSearch(
        teamId.toString(),
@ -82,6 +91,7 @@ router.patch(
          source: savedSearch.source.toString(),
          ...updates,
        },
+        userId?.toString(),
      );

      if (!updatedSavedSearch) {
--- a/packages/api/src/routers/external-api/tests/alerts.test.ts
+++ b/packages/api/src/routers/external-api/tests/alerts.test.ts
@ -1,8 +1,14 @@
+import { AlertErrorType } from '@hyperdx/common-utils/dist/types';
 import _ from 'lodash';
 import { ObjectId } from 'mongodb';
 import request from 'supertest';

-import { getLoggedInAgent, getServer } from '../../../fixtures';
+import {
+  getLoggedInAgent,
+  getServer,
+  RAW_SQL_ALERT_TEMPLATE,
+  RAW_SQL_NUMBER_ALERT_TEMPLATE,
+} from '../../../fixtures';
 import { AlertSource, AlertThresholdType } from '../../../models/alert';
 import Alert from '../../../models/alert';
 import Dashboard from '../../../models/dashboard';
@ -83,8 +89,13 @@ describe('External API Alerts', () => {
  };

  // Helper to create a dashboard with a raw SQL tile for testing
+  // Uses Number display type by default (not alertable) for rejection tests
  const createTestDashboardWithRawSqlTile = async (
-    options: { teamId?: any } = {},
+    options: {
+      teamId?: any;
+      displayType?: string;
+      sqlTemplate?: string;
+    } = {},
  ) => {
    const tileId = new ObjectId().toString();
    const tiles = [
@ -97,8 +108,8 @@ describe('External API Alerts', () => {
        h: 3,
        config: {
          configType: 'sql',
-          displayType: 'line',
-          sqlTemplate: 'SELECT 1',
+          displayType: options.displayType ?? 'number',
+          sqlTemplate: options.sqlTemplate ?? 'SELECT 1',
          connection: 'test-connection',
        },
      },
@ -716,9 +727,66 @@ describe('External API Alerts', () => {
          .expect(400);
      });

-      it('should reject creating an alert on a raw SQL tile', async () => {
+      it('should allow creating an alert on a raw SQL line tile', async () => {
        const webhook = await createTestWebhook();
-        const { dashboard, tileId } = await createTestDashboardWithRawSqlTile();
+        const { dashboard, tileId } = await createTestDashboardWithRawSqlTile({
+          displayType: 'line',
+          sqlTemplate: RAW_SQL_ALERT_TEMPLATE,
+        });
+
+        const alertInput = {
+          dashboardId: dashboard._id.toString(),
+          tileId,
+          threshold: 100,
+          interval: '1h',
+          source: AlertSource.TILE,
+          thresholdType: AlertThresholdType.ABOVE,
+          channel: {
+            type: 'webhook',
+            webhookId: webhook._id.toString(),
+          },
+        };
+
+        const res = await authRequest('post', ALERTS_BASE_URL)
+          .send(alertInput)
+          .expect(200);
+        expect(res.body.data.dashboardId).toBe(dashboard._id.toString());
+        expect(res.body.data.tileId).toBe(tileId);
+      });
+
+      it('should allow creating an alert on a raw SQL number tile', async () => {
+        const webhook = await createTestWebhook();
+        const { dashboard, tileId } = await createTestDashboardWithRawSqlTile({
+          displayType: 'number',
+          sqlTemplate: RAW_SQL_NUMBER_ALERT_TEMPLATE,
+        });
+
+        const alertInput = {
+          dashboardId: dashboard._id.toString(),
+          tileId,
+          threshold: 100,
+          interval: '1h',
+          source: AlertSource.TILE,
+          thresholdType: AlertThresholdType.ABOVE,
+          channel: {
+            type: 'webhook',
+            webhookId: webhook._id.toString(),
+          },
+        };
+
+        const res = await authRequest('post', ALERTS_BASE_URL)
+          .send(alertInput)
+          .expect(200);
+        expect(res.body.data.dashboardId).toBe(dashboard._id.toString());
+        expect(res.body.data.tileId).toBe(tileId);
+      });
+
+      it('should reject creating an alert on a raw SQL table tile', async () => {
+        const webhook = await createTestWebhook();
+        const { dashboard, tileId } = await createTestDashboardWithRawSqlTile({
+          displayType: 'table',
+          sqlTemplate: RAW_SQL_ALERT_TEMPLATE,
+        });

        const alertInput = {
          dashboardId: dashboard._id.toString(),
@ -736,10 +804,36 @@ describe('External API Alerts', () => {
        await authRequest('post', ALERTS_BASE_URL).send(alertInput).expect(400);
      });

-      it('should reject updating an alert to reference a raw SQL tile', async () => {
+      it('should reject creating an alert on a raw SQL tile without interval params', async () => {
+        const webhook = await createTestWebhook();
+        const { dashboard, tileId } = await createTestDashboardWithRawSqlTile({
+          displayType: 'line',
+          sqlTemplate: 'SELECT count() FROM otel_logs',
+        });
+
+        const alertInput = {
+          dashboardId: dashboard._id.toString(),
+          tileId,
+          threshold: 100,
+          interval: '1h',
+          source: AlertSource.TILE,
+          thresholdType: AlertThresholdType.ABOVE,
+          channel: {
+            type: 'webhook',
+            webhookId: webhook._id.toString(),
+          },
+        };
+
+        await authRequest('post', ALERTS_BASE_URL).send(alertInput).expect(400);
+      });
+
+      it('should reject updating an alert to reference a raw SQL table tile', async () => {
        const { alert, webhook } = await createTestAlert();
        const { dashboard: rawSqlDashboard, tileId: rawSqlTileId } =
-          await createTestDashboardWithRawSqlTile();
+          await createTestDashboardWithRawSqlTile({
+            displayType: 'table',
+            sqlTemplate: RAW_SQL_ALERT_TEMPLATE,
+          });

        const updatePayload = {
          threshold: 200,
@ -872,6 +966,197 @@ describe('External API Alerts', () => {
    });
  });

+  describe('BETWEEN and NOT_BETWEEN threshold types', () => {
+    it('should create an alert with BETWEEN threshold type', async () => {
+      const dashboard = await createTestDashboard();
+      const webhook = await createTestWebhook();
+
+      const response = await authRequest('post', ALERTS_BASE_URL)
+        .send({
+          dashboardId: dashboard._id.toString(),
+          tileId: dashboard.tiles[0].id,
+          threshold: 50,
+          thresholdMax: 200,
+          interval: '1h',
+          source: AlertSource.TILE,
+          thresholdType: AlertThresholdType.BETWEEN,
+          channel: {
+            type: 'webhook',
+            webhookId: webhook._id.toString(),
+          },
+        })
+        .expect(200);
+
+      const alert = response.body.data;
+      expect(alert.threshold).toBe(50);
+      expect(alert.thresholdMax).toBe(200);
+      expect(alert.thresholdType).toBe(AlertThresholdType.BETWEEN);
+    });
+
+    it('should create an alert with NOT_BETWEEN threshold type', async () => {
+      const dashboard = await createTestDashboard();
+      const webhook = await createTestWebhook();
+
+      const response = await authRequest('post', ALERTS_BASE_URL)
+        .send({
+          dashboardId: dashboard._id.toString(),
+          tileId: dashboard.tiles[0].id,
+          threshold: 10,
+          thresholdMax: 90,
+          interval: '5m',
+          source: AlertSource.TILE,
+          thresholdType: AlertThresholdType.NOT_BETWEEN,
+          channel: {
+            type: 'webhook',
+            webhookId: webhook._id.toString(),
+          },
+        })
+        .expect(200);
+
+      const alert = response.body.data;
+      expect(alert.threshold).toBe(10);
+      expect(alert.thresholdMax).toBe(90);
+      expect(alert.thresholdType).toBe(AlertThresholdType.NOT_BETWEEN);
+    });
+
+    it('should reject BETWEEN without thresholdMax', async () => {
+      const consoleErrorSpy = jest
+        .spyOn(console, 'error')
+        .mockImplementation(() => {});
+
+      const dashboard = await createTestDashboard();
+      const webhook = await createTestWebhook();
+
+      await authRequest('post', ALERTS_BASE_URL)
+        .send({
+          dashboardId: dashboard._id.toString(),
+          tileId: dashboard.tiles[0].id,
+          threshold: 50,
+          interval: '1h',
+          source: AlertSource.TILE,
+          thresholdType: AlertThresholdType.BETWEEN,
+          channel: {
+            type: 'webhook',
+            webhookId: webhook._id.toString(),
+          },
+        })
+        .expect(400);
+
+      consoleErrorSpy.mockRestore();
+    });
+
+    it('should reject BETWEEN when thresholdMax < threshold', async () => {
+      const consoleErrorSpy = jest
+        .spyOn(console, 'error')
+        .mockImplementation(() => {});
+
+      const dashboard = await createTestDashboard();
+      const webhook = await createTestWebhook();
+
+      await authRequest('post', ALERTS_BASE_URL)
+        .send({
+          dashboardId: dashboard._id.toString(),
+          tileId: dashboard.tiles[0].id,
+          threshold: 100,
+          thresholdMax: 50,
+          interval: '1h',
+          source: AlertSource.TILE,
+          thresholdType: AlertThresholdType.BETWEEN,
+          channel: {
+            type: 'webhook',
+            webhookId: webhook._id.toString(),
+          },
+        })
+        .expect(400);
+
+      consoleErrorSpy.mockRestore();
+    });
+
+    it('should allow thresholdMax equal to threshold for BETWEEN', async () => {
+      const dashboard = await createTestDashboard();
+      const webhook = await createTestWebhook();
+
+      const response = await authRequest('post', ALERTS_BASE_URL)
+        .send({
+          dashboardId: dashboard._id.toString(),
+          tileId: dashboard.tiles[0].id,
+          threshold: 100,
+          thresholdMax: 100,
+          interval: '1h',
+          source: AlertSource.TILE,
+          thresholdType: AlertThresholdType.BETWEEN,
+          channel: {
+            type: 'webhook',
+            webhookId: webhook._id.toString(),
+          },
+        })
+        .expect(200);
+
+      expect(response.body.data.threshold).toBe(100);
+      expect(response.body.data.thresholdMax).toBe(100);
+    });
+
+    it('should update an alert to use BETWEEN threshold type', async () => {
+      const { alert, dashboard, webhook } = await createTestAlert();
+
+      const updateResponse = await authRequest(
+        'put',
+        `${ALERTS_BASE_URL}/${alert.id}`,
+      )
+        .send({
+          dashboardId: dashboard._id.toString(),
+          tileId: dashboard.tiles[0].id,
+          threshold: 20,
+          thresholdMax: 80,
+          interval: '1h',
+          source: AlertSource.TILE,
+          thresholdType: AlertThresholdType.BETWEEN,
+          channel: {
+            type: 'webhook',
+            webhookId: webhook._id.toString(),
+          },
+        })
+        .expect(200);
+
+      const updatedAlert = updateResponse.body.data;
+      expect(updatedAlert.threshold).toBe(20);
+      expect(updatedAlert.thresholdMax).toBe(80);
+      expect(updatedAlert.thresholdType).toBe(AlertThresholdType.BETWEEN);
+    });
+
+    it('should retrieve a BETWEEN alert with thresholdMax', async () => {
+      const dashboard = await createTestDashboard();
+      const webhook = await createTestWebhook();
+
+      const createResponse = await authRequest('post', ALERTS_BASE_URL)
+        .send({
+          dashboardId: dashboard._id.toString(),
+          tileId: dashboard.tiles[0].id,
+          threshold: 10,
+          thresholdMax: 50,
+          interval: '1h',
+          source: AlertSource.TILE,
+          thresholdType: AlertThresholdType.BETWEEN,
+          channel: {
+            type: 'webhook',
+            webhookId: webhook._id.toString(),
+          },
+        })
+        .expect(200);
+
+      const getResponse = await authRequest(
+        'get',
+        `${ALERTS_BASE_URL}/${createResponse.body.data.id}`,
+      ).expect(200);
+
+      expect(getResponse.body.data.threshold).toBe(10);
+      expect(getResponse.body.data.thresholdMax).toBe(50);
+      expect(getResponse.body.data.thresholdType).toBe(
+        AlertThresholdType.BETWEEN,
+      );
+    });
+  });
+
  describe('Authentication', () => {
    it('should require authentication', async () => {
      // Create an unauthenticated agent
@ -883,4 +1168,67 @@ describe('External API Alerts', () => {
        .expect(401);
    });
  });
+
+  describe('Errors field', () => {
+    it('returns recorded execution errors on GET by id', async () => {
+      const { alert } = await createTestAlert();
+
+      const errorTimestamp = new Date('2026-04-17T12:00:00.000Z');
+      await Alert.updateOne(
+        { _id: alert.id },
+        {
+          $set: {
+            executionErrors: [
+              {
+                timestamp: errorTimestamp,
+                type: AlertErrorType.QUERY_ERROR,
+                message: 'ClickHouse returned 500',
+              },
+            ],
+          },
+        },
+      );
+
+      const res = await authRequest(
+        'get',
+        `${ALERTS_BASE_URL}/${alert.id}`,
+      ).expect(200);
+      expect(res.body.data.executionErrors).toHaveLength(1);
+      expect(res.body.data.executionErrors[0].type).toBe(
+        AlertErrorType.QUERY_ERROR,
+      );
+      expect(res.body.data.executionErrors[0].message).toBe(
+        'ClickHouse returned 500',
+      );
+      expect(res.body.data.executionErrors[0].timestamp).toBe(
+        errorTimestamp.toISOString(),
+      );
+    });
+
+    it('returns recorded execution errors on the list endpoint', async () => {
+      const { alert } = await createTestAlert();
+
+      await Alert.updateOne(
+        { _id: alert.id },
+        {
+          $set: {
+            executionErrors: [
+              {
+                timestamp: new Date('2026-04-17T12:00:00.000Z'),
+                type: AlertErrorType.WEBHOOK_ERROR,
+                message: 'webhook delivery failed',
+              },
+            ],
+          },
+        },
+      );
+
+      const res = await authRequest('get', ALERTS_BASE_URL).expect(200);
+      const match = res.body.data.find((a: any) => a.id === alert.id);
+      expect(match).toBeDefined();
+      expect(match.executionErrors).toHaveLength(1);
+      expect(match.executionErrors[0].type).toBe(AlertErrorType.WEBHOOK_ERROR);
+      expect(match.executionErrors[0].message).toBe('webhook delivery failed');
+    });
+  });
 });
--- a/packages/api/src/routers/external-api/tests/dashboards.test.ts
+++ b/packages/api/src/routers/external-api/tests/dashboards.test.ts
@ -3387,7 +3387,7 @@ describe('External API v2 Dashboards - new format', () => {
      });
    });

-    it('should delete alert when tile is updated from builder to raw SQL config', async () => {
+    it('should delete alert when tile is updated from builder to raw SQL config and the display type does not support alerts', async () => {
      const tileId = new ObjectId().toString();
      const dashboard = await createTestDashboard({
        tiles: [
@ -3399,7 +3399,7 @@ describe('External API v2 Dashboards - new format', () => {
            w: 6,
            h: 3,
            config: {
-              displayType: 'line',
+              displayType: 'number',
              source: traceSource._id.toString(),
              select: [
                {
@ -3455,7 +3455,7 @@ describe('External API v2 Dashboards - new format', () => {
              h: 3,
              config: {
                configType: 'sql',
-                displayType: 'line',
+                displayType: 'table',
                connectionId: connection._id.toString(),
                sqlTemplate: 'SELECT count() FROM otel_logs WHERE {timeFilter}',
              },
--- a/packages/api/src/routers/external-api/v2/alerts.ts
+++ b/packages/api/src/routers/external-api/v2/alerts.ts
@ -34,7 +34,7 @@ import { alertSchema, objectIdSchema } from '@/utils/zod';
 *       description: Evaluation interval.
 *     AlertThresholdType:
 *       type: string
- *       enum: [above, below]
+ *       enum: [above, below, above_exclusive, below_or_equal, equal, not_equal, between, not_between]
 *       description: Threshold comparison direction.
 *     AlertSource:
 *       type: string
@ -48,6 +48,31 @@ import { alertSchema, objectIdSchema } from '@/utils/zod';
 *       type: string
 *       enum: [webhook]
 *       description: Channel type.
+ *     AlertErrorType:
+ *       type: string
+ *       enum: [QUERY_ERROR, WEBHOOK_ERROR, INVALID_ALERT, UNKNOWN]
+ *       description: Category of error recorded during alert execution.
+ *     AlertExecutionError:
+ *       type: object
+ *       description: An error recorded during a recent alert execution.
+ *       required:
+ *         - timestamp
+ *         - type
+ *         - message
+ *       properties:
+ *         timestamp:
+ *           type: string
+ *           format: date-time
+ *           description: When the error occurred.
+ *           example: "2026-04-17T12:00:00.000Z"
+ *         type:
+ *           $ref: '#/components/schemas/AlertErrorType'
+ *           description: Category of the error.
+ *           example: "QUERY_ERROR"
+ *         message:
+ *           type: string
+ *           description: Human-readable error message.
+ *           example: "Query timed out after 30s"
 *     AlertSilenced:
 *       type: object
 *       description: Silencing metadata.
@ -95,7 +120,7 @@ import { alertSchema, objectIdSchema } from '@/utils/zod';
 *           example: "65f5e4a3b9e77c001a567890"
 *         tileId:
 *           type: string
- *           description: Tile ID for tile-based alerts. May not be a Raw-SQL-based tile.
+ *           description: Tile ID for tile-based alerts. Must be a line, stacked bar, or number type tile.
 *           nullable: true
 *           example: "65f5e4a3b9e77c001a901234"
 *         savedSearchId:
@ -110,8 +135,13 @@ import { alertSchema, objectIdSchema } from '@/utils/zod';
 *           example: "ServiceName"
 *         threshold:
 *           type: number
- *           description: Threshold value for triggering the alert.
+ *           description: Threshold value for triggering the alert. For between and not_between threshold types, this is the lower bound.
 *           example: 100
+ *         thresholdMax:
+ *           type: number
+ *           nullable: true
+ *           description: Upper bound for between and not_between threshold types. Required when thresholdType is between or not_between, must be >= threshold.
+ *           example: 500
 *         interval:
 *           $ref: '#/components/schemas/AlertInterval'
 *           description: Evaluation interval for the alert.
@ -171,6 +201,12 @@ import { alertSchema, objectIdSchema } from '@/utils/zod';
 *               $ref: '#/components/schemas/AlertSilenced'
 *               description: Silencing metadata.
 *               nullable: true
+ *             executionErrors:
+ *               type: array
+ *               nullable: true
+ *               description: Errors recorded during the most recent alert execution, if any.
+ *               items:
+ *                 $ref: '#/components/schemas/AlertExecutionError'
 *             createdAt:
 *               type: string
 *               nullable: true
--- a/packages/api/src/routers/external-api/v2/dashboards.ts
+++ b/packages/api/src/routers/external-api/v2/dashboards.ts
@ -1,104 +1,30 @@
-import { isRawSqlSavedChartConfig } from '@hyperdx/common-utils/dist/guards';
-import { SearchConditionLanguageSchema as whereLanguageSchema } from '@hyperdx/common-utils/dist/types';
 import express from 'express';
 import { uniq } from 'lodash';
-import { ObjectId } from 'mongodb';
 import mongoose from 'mongoose';
 import { z } from 'zod';

-import { deleteDashboardAlerts } from '@/controllers/alerts';
-import { getConnectionsByTeam } from '@/controllers/connection';
 import { deleteDashboard } from '@/controllers/dashboard';
 import { getSources } from '@/controllers/sources';
 import Dashboard from '@/models/dashboard';
 import { validateRequestWithEnhancedErrors as validateRequest } from '@/utils/enhancedErrors';
-import {
-  translateExternalChartToTileConfig,
-  translateExternalFilterToFilter,
-} from '@/utils/externalApi';
 import logger from '@/utils/logger';
-import {
-  ExternalDashboardFilter,
-  externalDashboardFilterSchema,
-  externalDashboardFilterSchemaWithId,
-  ExternalDashboardFilterWithId,
-  externalDashboardSavedFilterValueSchema,
-  externalDashboardTileListSchema,
-  ExternalDashboardTileWithId,
-  objectIdSchema,
-  tagsSchema,
-} from '@/utils/zod';
+import { ExternalDashboardTileWithId, objectIdSchema } from '@/utils/zod';

 import {
+  cleanupDashboardAlerts,
+  convertExternalFiltersToInternal,
+  convertExternalTilesToInternal,
  convertToExternalDashboard,
-  convertToInternalTileConfig,
+  createDashboardBodySchema,
+  getMissingConnections,
+  getMissingSources,
  isConfigTile,
  isRawSqlExternalTileConfig,
  isSeriesTile,
+  resolveSavedQueryLanguage,
+  updateDashboardBodySchema,
 } from './utils/dashboards';

-/** Returns an array of source IDs that are referenced in the tiles/filters but do not exist in the team's sources */
-async function getMissingSources(
-  team: string | mongoose.Types.ObjectId,
-  tiles: ExternalDashboardTileWithId[],
-  filters?: (ExternalDashboardFilter | ExternalDashboardFilterWithId)[],
-): Promise<string[]> {
-  const sourceIds = new Set<string>();
-
-  for (const tile of tiles) {
-    if (isSeriesTile(tile)) {
-      for (const series of tile.series) {
-        if ('sourceId' in series) {
-          sourceIds.add(series.sourceId);
-        }
-      }
-    } else if (isConfigTile(tile)) {
-      if ('sourceId' in tile.config && tile.config.sourceId) {
-        sourceIds.add(tile.config.sourceId);
-      }
-    }
-  }
-
-  if (filters?.length) {
-    for (const filter of filters) {
-      if ('sourceId' in filter) {
-        sourceIds.add(filter.sourceId);
-      }
-    }
-  }
-
-  const existingSources = await getSources(team.toString());
-  const existingSourceIds = new Set(
-    existingSources.map(source => source._id.toString()),
-  );
-  return [...sourceIds].filter(sourceId => !existingSourceIds.has(sourceId));
-}
-
-/** Returns an array of connection IDs that are referenced in the tiles but do not belong to the team */
-async function getMissingConnections(
-  team: string | mongoose.Types.ObjectId,
-  tiles: ExternalDashboardTileWithId[],
-): Promise<string[]> {
-  const connectionIds = new Set<string>();
-
-  for (const tile of tiles) {
-    if (isConfigTile(tile) && isRawSqlExternalTileConfig(tile.config)) {
-      connectionIds.add(tile.config.connectionId);
-    }
-  }
-
-  if (connectionIds.size === 0) return [];
-
-  const existingConnections = await getConnectionsByTeam(team.toString());
-  const existingConnectionIds = new Set(
-    existingConnections.map(connection => connection._id.toString()),
-  );
-
-  return [...connectionIds].filter(
-    connectionId => !existingConnectionIds.has(connectionId),
-  );
-}
-
 async function getSourceConnectionMismatches(
  team: string | mongoose.Types.ObjectId,
  tiles: ExternalDashboardTileWithId[],
@ -123,62 +49,6 @@ async function getSourceConnectionMismatches(
  return sourcesWithInvalidConnections;
 }

-type SavedQueryLanguage = z.infer<typeof whereLanguageSchema>;
-
-function resolveSavedQueryLanguage(params: {
-  savedQuery: string | null | undefined;
-  savedQueryLanguage: SavedQueryLanguage | null | undefined;
-}): SavedQueryLanguage | null | undefined {
-  const { savedQuery, savedQueryLanguage } = params;
-  if (savedQueryLanguage !== undefined) return savedQueryLanguage;
-  if (savedQuery === null) return null;
-  if (savedQuery) return 'lucene';
-
-  return undefined;
-}
-
-const dashboardBodyBaseShape = {
-  name: z.string().max(1024),
-  tiles: externalDashboardTileListSchema,
-  tags: tagsSchema,
-  savedQuery: z.string().nullable().optional(),
-  savedQueryLanguage: whereLanguageSchema.nullable().optional(),
-  savedFilterValues: z
-    .array(externalDashboardSavedFilterValueSchema)
-    .optional(),
-};
-
-function buildDashboardBodySchema(filterSchema: z.ZodTypeAny): z.ZodEffects<
-  z.ZodObject<
-    typeof dashboardBodyBaseShape & {
-      filters: z.ZodOptional<z.ZodArray<z.ZodTypeAny>>;
-    }
-  >
-> {
-  return z
-    .object({
-      ...dashboardBodyBaseShape,
-      filters: z.array(filterSchema).optional(),
-    })
-    .superRefine((data, ctx) => {
-      if (data.savedQuery != null && data.savedQueryLanguage === null) {
-        ctx.addIssue({
-          code: z.ZodIssueCode.custom,
-          message:
-            'savedQueryLanguage cannot be null when savedQuery is provided',
-          path: ['savedQueryLanguage'],
-        });
-      }
-    });
-}
-
-const createDashboardBodySchema = buildDashboardBodySchema(
-  externalDashboardFilterSchema,
-);
-const updateDashboardBodySchema = buildDashboardBodySchema(
-  externalDashboardFilterSchemaWithId,
-);
-
 /**
 * @openapi
 * components:
@ -1748,27 +1618,8 @@ router.post(
        });
      }

-      const internalTiles = tiles.map(tile => {
-        const tileId = new ObjectId().toString();
-        if (isConfigTile(tile)) {
-          return convertToInternalTileConfig({
-            ...tile,
-            id: tileId,
-          });
-        }
-
-        return translateExternalChartToTileConfig({
-          ...tile,
-          id: tileId,
-        });
-      });
-
-      const filtersWithIds = (filters || []).map(filter =>
-        translateExternalFilterToFilter({
-          ...filter,
-          id: new ObjectId().toString(),
-        }),
-      );
+      const internalTiles = convertExternalTilesToInternal(tiles);
+      const filtersWithIds = convertExternalFiltersToInternal(filters || []);

      const normalizedSavedQueryLanguage = resolveSavedQueryLanguage({
        savedQuery,
@ -2001,18 +1852,10 @@ router.put(
        (existingDashboard?.filters ?? []).map((f: { id: string }) => f.id),
      );

-      // Convert external tiles to internal charts format.
-      // Generate a new id for any tile whose id doesn't match an existing tile.
-      const internalTiles = tiles.map(tile => {
-        const tileId = existingTileIds.has(tile.id)
-          ? tile.id
-          : new ObjectId().toString();
-        if (isConfigTile(tile)) {
-          return convertToInternalTileConfig({ ...tile, id: tileId });
-        }
-
-        return translateExternalChartToTileConfig({ ...tile, id: tileId });
-      });
+      const internalTiles = convertExternalTilesToInternal(
+        tiles,
+        existingTileIds,
+      );

      const setPayload: Record<string, unknown> = {
        name,
@ -2020,13 +1863,9 @@ router.put(
        tags: tags && uniq(tags),
      };
      if (filters !== undefined) {
-        setPayload.filters = filters.map(
-          (filter: ExternalDashboardFilterWithId) => {
-            const filterId = existingFilterIds.has(filter.id)
-              ? filter.id
-              : new ObjectId().toString();
-            return translateExternalFilterToFilter({ ...filter, id: filterId });
-          },
+        setPayload.filters = convertExternalFiltersToInternal(
+          filters,
+          existingFilterIds,
        );
      }
      if (savedQuery !== undefined) {
@ -2053,21 +1892,12 @@ router.put(
        return res.sendStatus(404);
      }

-      // Delete alerts for tiles that are now raw SQL (unsupported) or were removed
-      const newTileIdSet = new Set(internalTiles.map(t => t.id));
-      const tileIdsToDeleteAlerts = [
-        ...internalTiles
-          .filter(tile => isRawSqlSavedChartConfig(tile.config))
-          .map(tile => tile.id),
-        ...[...existingTileIds].filter(id => !newTileIdSet.has(id)),
-      ];
-      if (tileIdsToDeleteAlerts.length > 0) {
-        logger.info(
-          { dashboardId, teamId, tileIds: tileIdsToDeleteAlerts },
-          `Deleting alerts for tiles with unsupported config or removed tiles`,
-        );
-        await deleteDashboardAlerts(dashboardId, teamId, tileIdsToDeleteAlerts);
-      }
+      await cleanupDashboardAlerts({
+        dashboardId,
+        teamId,
+        internalTiles,
+        existingTileIds,
+      });

      res.json({
        data: convertToExternalDashboard(updatedDashboard),
--- a/packages/api/src/routers/external-api/v2/index.ts
+++ b/packages/api/src/routers/external-api/v2/index.ts
@ -6,17 +6,13 @@ import chartsRouter from '@/routers/external-api/v2/charts';
 import dashboardRouter from '@/routers/external-api/v2/dashboards';
 import sourcesRouter from '@/routers/external-api/v2/sources';
 import webhooksRouter from '@/routers/external-api/v2/webhooks';
-import rateLimiter from '@/utils/rateLimiter';
+import rateLimiter, { rateLimiterKeyGenerator } from '@/utils/rateLimiter';

 const router = express.Router();

-const rateLimiterKeyGenerator = (req: express.Request): string => {
-  return req.headers.authorization ?? req.ip ?? 'unknown';
-};
-
 const defaultRateLimiter = rateLimiter({
  windowMs: 60 * 1000, // 1 minute
-  max: 100, // Limit each IP to 100 requests per `window`
+  max: 100, // Limit each API key to 100 requests per `window`
  standardHeaders: true, // Return rate limit info in the `RateLimit-*` headers
  legacyHeaders: false, // Disable the `X-RateLimit-*` headers
  keyGenerator: rateLimiterKeyGenerator,
--- a/packages/api/src/routers/external-api/v2/utils/dashboards.ts
+++ b/packages/api/src/routers/external-api/v2/utils/dashboards.ts
@ -1,3 +1,4 @@
+import { displayTypeSupportsRawSqlAlerts } from '@hyperdx/common-utils/dist/core/utils';
 import { isRawSqlSavedChartConfig } from '@hyperdx/common-utils/dist/guards';
 import {
  AggregateFunctionSchema,
@ -6,19 +7,35 @@ import {
  RawSqlSavedChartConfig,
  SavedChartConfig,
 } from '@hyperdx/common-utils/dist/types';
+import { SearchConditionLanguageSchema as whereLanguageSchema } from '@hyperdx/common-utils/dist/types';
 import { pick } from 'lodash';
 import _ from 'lodash';
+import mongoose from 'mongoose';
+import { z } from 'zod';

+import { deleteDashboardAlerts } from '@/controllers/alerts';
+import { getConnectionsByTeam } from '@/controllers/connection';
+import { getSources } from '@/controllers/sources';
 import { DashboardDocument } from '@/models/dashboard';
-import { translateFilterToExternalFilter } from '@/utils/externalApi';
+import {
+  translateExternalChartToTileConfig,
+  translateExternalFilterToFilter,
+  translateFilterToExternalFilter,
+} from '@/utils/externalApi';
 import logger from '@/utils/logger';
 import {
+  ExternalDashboardFilter,
+  externalDashboardFilterSchema,
+  externalDashboardFilterSchemaWithId,
  ExternalDashboardFilterWithId,
  ExternalDashboardRawSqlTileConfig,
+  externalDashboardSavedFilterValueSchema,
  ExternalDashboardSelectItem,
  ExternalDashboardTileConfig,
+  externalDashboardTileListSchema,
  ExternalDashboardTileWithId,
  externalQuantileLevelSchema,
+  tagsSchema,
 } from '@/utils/zod';

 // --------------------------------------------------------------------------------
@ -475,3 +492,220 @@ export function convertToInternalTileConfig(
    config: strippedConfig,
  };
 }
+
+// --------------------------------------------------------------------------------
+// Shared dashboard validation helpers (used by both the REST router and MCP tools)
+// --------------------------------------------------------------------------------
+
+/** Returns source IDs referenced in tiles/filters that do not exist for the team */
+export async function getMissingSources(
+  team: string | mongoose.Types.ObjectId,
+  tiles: ExternalDashboardTileWithId[],
+  filters?: (ExternalDashboardFilter | ExternalDashboardFilterWithId)[],
+): Promise<string[]> {
+  const sourceIds = new Set<string>();
+
+  for (const tile of tiles) {
+    if (isSeriesTile(tile)) {
+      for (const series of tile.series) {
+        if ('sourceId' in series) {
+          sourceIds.add(series.sourceId);
+        }
+      }
+    } else if (isConfigTile(tile)) {
+      if ('sourceId' in tile.config && tile.config.sourceId) {
+        sourceIds.add(tile.config.sourceId);
+      }
+    }
+  }
+
+  if (filters?.length) {
+    for (const filter of filters) {
+      if ('sourceId' in filter) {
+        sourceIds.add(filter.sourceId);
+      }
+    }
+  }
+
+  const existingSources = await getSources(team.toString());
+  const existingSourceIds = new Set(
+    existingSources.map(source => source._id.toString()),
+  );
+  return [...sourceIds].filter(sourceId => !existingSourceIds.has(sourceId));
+}
+
+/** Returns connection IDs referenced in tiles that do not belong to the team */
+export async function getMissingConnections(
+  team: string | mongoose.Types.ObjectId,
+  tiles: ExternalDashboardTileWithId[],
+): Promise<string[]> {
+  const connectionIds = new Set<string>();
+
+  for (const tile of tiles) {
+    if (isConfigTile(tile) && isRawSqlExternalTileConfig(tile.config)) {
+      connectionIds.add(tile.config.connectionId);
+    }
+  }
+
+  if (connectionIds.size === 0) return [];
+
+  const existingConnections = await getConnectionsByTeam(team.toString());
+  const existingConnectionIds = new Set(
+    existingConnections.map(connection => connection._id.toString()),
+  );
+
+  return [...connectionIds].filter(
+    connectionId => !existingConnectionIds.has(connectionId),
+  );
+}
+
+type SavedQueryLanguage = z.infer<typeof whereLanguageSchema>;
+
+export function resolveSavedQueryLanguage(params: {
+  savedQuery: string | null | undefined;
+  savedQueryLanguage: SavedQueryLanguage | null | undefined;
+}): SavedQueryLanguage | null | undefined {
+  const { savedQuery, savedQueryLanguage } = params;
+  if (savedQueryLanguage !== undefined) return savedQueryLanguage;
+  if (savedQuery === null) return null;
+  if (savedQuery) return 'lucene';
+
+  return undefined;
+}
+
+const dashboardBodyBaseShape = {
+  name: z.string().max(1024),
+  tiles: externalDashboardTileListSchema,
+  tags: tagsSchema,
+  savedQuery: z.string().nullable().optional(),
+  savedQueryLanguage: whereLanguageSchema.nullable().optional(),
+  savedFilterValues: z
+    .array(externalDashboardSavedFilterValueSchema)
+    .optional(),
+};
+
+// --------------------------------------------------------------------------------
+// Shared tile/filter conversion helpers (used by both external API and MCP)
+// --------------------------------------------------------------------------------
+
+/**
+ * Convert external tile definitions to internal Mongoose-compatible format.
+ * Generates new ObjectIds for tiles that don't already have a matching ID in
+ * `existingTileIds` (update path) or for all tiles (create path).
+ */
+export function convertExternalTilesToInternal(
+  tiles: ExternalDashboardTileWithId[],
+  existingTileIds?: Set<string>,
+): DashboardDocument['tiles'] {
+  return tiles.map(tile => {
+    const tileId =
+      existingTileIds && tile.id && existingTileIds.has(tile.id)
+        ? tile.id
+        : new mongoose.Types.ObjectId().toString();
+    const tileWithId = { ...tile, id: tileId };
+    if (isConfigTile(tileWithId)) {
+      return convertToInternalTileConfig(tileWithId);
+    }
+    if (isSeriesTile(tileWithId)) {
+      return translateExternalChartToTileConfig(tileWithId);
+    }
+    // Fallback for tiles with neither config nor series — treat as empty series tile.
+    // This shouldn't happen with valid input, but matches the previous behavior.
+    return translateExternalChartToTileConfig(tileWithId as SeriesTile);
+  });
+}
+
+/**
+ * Convert external filter definitions to internal format, preserving IDs that
+ * match `existingFilterIds` (update path) or generating new ones (create path).
+ */
+export function convertExternalFiltersToInternal(
+  filters: (ExternalDashboardFilter | ExternalDashboardFilterWithId)[],
+  existingFilterIds?: Set<string>,
+) {
+  return filters.map(filter => {
+    const filterId =
+      existingFilterIds && 'id' in filter && existingFilterIds.has(filter.id)
+        ? filter.id
+        : new mongoose.Types.ObjectId().toString();
+    return translateExternalFilterToFilter({ ...filter, id: filterId });
+  });
+}
+
+/**
+ * Delete alerts for tiles that were removed or converted to raw SQL
+ * (which doesn't support alerts).
+ */
+export async function cleanupDashboardAlerts({
+  dashboardId,
+  teamId,
+  internalTiles,
+  existingTileIds,
+}: {
+  dashboardId: string;
+  teamId: string | mongoose.Types.ObjectId;
+  internalTiles: DashboardDocument['tiles'];
+  existingTileIds: Set<string>;
+}) {
+  const newTileIdSet = new Set(internalTiles.map(t => t.id));
+  const tileIdsToDeleteAlerts = [
+    ...internalTiles
+      .filter(
+        tile =>
+          isRawSqlSavedChartConfig(tile.config) &&
+          !displayTypeSupportsRawSqlAlerts(tile.config.displayType),
+      )
+      .map(tile => tile.id),
+    ...[...existingTileIds].filter(id => !newTileIdSet.has(id)),
+  ];
+  if (tileIdsToDeleteAlerts.length > 0) {
+    logger.info(
+      { dashboardId, teamId, tileIds: tileIdsToDeleteAlerts },
+      'Deleting alerts for tiles with unsupported config or removed tiles',
+    );
+    const teamObjectId =
+      teamId instanceof mongoose.Types.ObjectId
+        ? teamId
+        : new mongoose.Types.ObjectId(teamId);
+    await deleteDashboardAlerts(
+      dashboardId,
+      teamObjectId,
+      tileIdsToDeleteAlerts,
+    );
+  }
+}
+
+// --------------------------------------------------------------------------------
+// Body validation schemas
+// --------------------------------------------------------------------------------
+
+function buildDashboardBodySchema(filterSchema: z.ZodTypeAny): z.ZodEffects<
+  z.ZodObject<
+    typeof dashboardBodyBaseShape & {
+      filters: z.ZodOptional<z.ZodArray<z.ZodTypeAny>>;
+    }
+  >
+> {
+  return z
+    .object({
+      ...dashboardBodyBaseShape,
+      filters: z.array(filterSchema).optional(),
+    })
+    .superRefine((data, ctx) => {
+      if (data.savedQuery != null && data.savedQueryLanguage === null) {
+        ctx.addIssue({
+          code: z.ZodIssueCode.custom,
+          message:
+            'savedQueryLanguage cannot be null when savedQuery is provided',
+          path: ['savedQueryLanguage'],
+        });
+      }
+    });
+}
+
+export const createDashboardBodySchema = buildDashboardBodySchema(
+  externalDashboardFilterSchema,
+);
+export const updateDashboardBodySchema = buildDashboardBodySchema(
+  externalDashboardFilterSchemaWithId,
+);
--- a/packages/api/src/tasks/tests/util.test.ts
+++ b/packages/api/src/tasks/tests/util.test.ts
@ -109,15 +109,15 @@ describe('util', () => {
    });

    it('should handle keys with empty segments', () => {
-      expect(() => unflattenObject({ 'foo..bar': 'baz' })).toThrowError();
+      expect(() => unflattenObject({ 'foo..bar': 'baz' })).toThrow();
    });

    it('should handle keys starting with separator', () => {
-      expect(() => unflattenObject({ '.foo.bar': 'baz' })).toThrowError();
+      expect(() => unflattenObject({ '.foo.bar': 'baz' })).toThrow();
    });

    it('should handle keys ending with separator', () => {
-      expect(() => unflattenObject({ 'foo.bar.': 'baz' })).toThrowError();
+      expect(() => unflattenObject({ 'foo.bar.': 'baz' })).toThrow();
    });

    it('should handle complex custom separator', () => {
--- a/packages/api/src/tasks/checkAlerts/tests/snapshots/renderAlertTemplate.test.ts.snap
+++ b/packages/api/src/tasks/checkAlerts/tests/snapshots/renderAlertTemplate.test.ts.snap
@ -0,0 +1,362 @@
+// Jest Snapshot v1, https://jestjs.io/docs/snapshot-testing
+
+exports[`buildAlertMessageTemplateTitle saved search alerts ALERT state above threshold=5 alertValue=10 1`] = `"🚨 Alert for "My Search" - 10 lines found"`;
+
+exports[`buildAlertMessageTemplateTitle saved search alerts ALERT state above_exclusive threshold=5 alertValue=10 1`] = `"🚨 Alert for "My Search" - 10 lines found"`;
+
+exports[`buildAlertMessageTemplateTitle saved search alerts ALERT state below threshold=5 alertValue=2 1`] = `"🚨 Alert for "My Search" - 2 lines found"`;
+
+exports[`buildAlertMessageTemplateTitle saved search alerts ALERT state below_or_equal threshold=5 alertValue=3 1`] = `"🚨 Alert for "My Search" - 3 lines found"`;
+
+exports[`buildAlertMessageTemplateTitle saved search alerts ALERT state between threshold=5 alertValue=6 1`] = `"🚨 Alert for "My Search" - 6 lines found"`;
+
+exports[`buildAlertMessageTemplateTitle saved search alerts ALERT state equal threshold=5 alertValue=5 1`] = `"🚨 Alert for "My Search" - 5 lines found"`;
+
+exports[`buildAlertMessageTemplateTitle saved search alerts ALERT state not_between threshold=5 alertValue=12 1`] = `"🚨 Alert for "My Search" - 12 lines found"`;
+
+exports[`buildAlertMessageTemplateTitle saved search alerts ALERT state not_equal threshold=5 alertValue=10 1`] = `"🚨 Alert for "My Search" - 10 lines found"`;
+
+exports[`buildAlertMessageTemplateTitle saved search alerts OK state (resolved) above threshold=5 okValue=3 1`] = `"✅ Alert for "My Search" - 3 lines found"`;
+
+exports[`buildAlertMessageTemplateTitle saved search alerts OK state (resolved) above_exclusive threshold=5 okValue=3 1`] = `"✅ Alert for "My Search" - 3 lines found"`;
+
+exports[`buildAlertMessageTemplateTitle saved search alerts OK state (resolved) below threshold=5 okValue=10 1`] = `"✅ Alert for "My Search" - 10 lines found"`;
+
+exports[`buildAlertMessageTemplateTitle saved search alerts OK state (resolved) below_or_equal threshold=5 okValue=10 1`] = `"✅ Alert for "My Search" - 10 lines found"`;
+
+exports[`buildAlertMessageTemplateTitle saved search alerts OK state (resolved) between threshold=5 okValue=10 1`] = `"✅ Alert for "My Search" - 10 lines found"`;
+
+exports[`buildAlertMessageTemplateTitle saved search alerts OK state (resolved) equal threshold=5 okValue=10 1`] = `"✅ Alert for "My Search" - 10 lines found"`;
+
+exports[`buildAlertMessageTemplateTitle saved search alerts OK state (resolved) not_between threshold=5 okValue=6 1`] = `"✅ Alert for "My Search" - 6 lines found"`;
+
+exports[`buildAlertMessageTemplateTitle saved search alerts OK state (resolved) not_equal threshold=5 okValue=5 1`] = `"✅ Alert for "My Search" - 5 lines found"`;
+
+exports[`buildAlertMessageTemplateTitle tile alerts ALERT state above threshold=5 alertValue=10 1`] = `"🚨 Alert for "Test Chart" in "My Dashboard" - 10 meets or exceeds 5"`;
+
+exports[`buildAlertMessageTemplateTitle tile alerts ALERT state above_exclusive threshold=5 alertValue=10 1`] = `"🚨 Alert for "Test Chart" in "My Dashboard" - 10 exceeds 5"`;
+
+exports[`buildAlertMessageTemplateTitle tile alerts ALERT state below threshold=5 alertValue=2 1`] = `"🚨 Alert for "Test Chart" in "My Dashboard" - 2 falls below 5"`;
+
+exports[`buildAlertMessageTemplateTitle tile alerts ALERT state below_or_equal threshold=5 alertValue=3 1`] = `"🚨 Alert for "Test Chart" in "My Dashboard" - 3 falls to or below 5"`;
+
+exports[`buildAlertMessageTemplateTitle tile alerts ALERT state between threshold=5 alertValue=6 1`] = `"🚨 Alert for "Test Chart" in "My Dashboard" - 6 falls between 5 and 7"`;
+
+exports[`buildAlertMessageTemplateTitle tile alerts ALERT state decimal threshold 1`] = `"🚨 Alert for "Test Chart" in "My Dashboard" - 10.1 meets or exceeds 1.5"`;
+
+exports[`buildAlertMessageTemplateTitle tile alerts ALERT state equal threshold=5 alertValue=5 1`] = `"🚨 Alert for "Test Chart" in "My Dashboard" - 5 equals 5"`;
+
+exports[`buildAlertMessageTemplateTitle tile alerts ALERT state integer threshold rounds value 1`] = `"🚨 Alert for "Test Chart" in "My Dashboard" - 11 meets or exceeds 5"`;
+
+exports[`buildAlertMessageTemplateTitle tile alerts ALERT state not_between threshold=5 alertValue=12 1`] = `"🚨 Alert for "Test Chart" in "My Dashboard" - 12 falls outside 5 and 7"`;
+
+exports[`buildAlertMessageTemplateTitle tile alerts ALERT state not_equal threshold=5 alertValue=10 1`] = `"🚨 Alert for "Test Chart" in "My Dashboard" - 10 does not equal 5"`;
+
+exports[`buildAlertMessageTemplateTitle tile alerts OK state (resolved) above threshold=5 okValue=3 1`] = `"✅ Alert for "Test Chart" in "My Dashboard" - 3 falls below 5"`;
+
+exports[`buildAlertMessageTemplateTitle tile alerts OK state (resolved) above_exclusive threshold=5 okValue=3 1`] = `"✅ Alert for "Test Chart" in "My Dashboard" - 3 falls to or below 5"`;
+
+exports[`buildAlertMessageTemplateTitle tile alerts OK state (resolved) below threshold=5 okValue=10 1`] = `"✅ Alert for "Test Chart" in "My Dashboard" - 10 meets or exceeds 5"`;
+
+exports[`buildAlertMessageTemplateTitle tile alerts OK state (resolved) below_or_equal threshold=5 okValue=10 1`] = `"✅ Alert for "Test Chart" in "My Dashboard" - 10 exceeds 5"`;
+
+exports[`buildAlertMessageTemplateTitle tile alerts OK state (resolved) between threshold=5 okValue=10 1`] = `"✅ Alert for "Test Chart" in "My Dashboard" - 10 falls outside 5 and 7"`;
+
+exports[`buildAlertMessageTemplateTitle tile alerts OK state (resolved) equal threshold=5 okValue=10 1`] = `"✅ Alert for "Test Chart" in "My Dashboard" - 10 does not equal 5"`;
+
+exports[`buildAlertMessageTemplateTitle tile alerts OK state (resolved) not_between threshold=5 okValue=6 1`] = `"✅ Alert for "Test Chart" in "My Dashboard" - 6 falls between 5 and 7"`;
+
+exports[`buildAlertMessageTemplateTitle tile alerts OK state (resolved) not_equal threshold=5 okValue=5 1`] = `"✅ Alert for "Test Chart" in "My Dashboard" - 5 equals 5"`;
+
+exports[`renderAlertTemplate saved search alerts ALERT state above threshold=5 alertValue=10 1`] = `
+"
+10 lines found, which meets or exceeds the threshold of 5 lines
+Time Range (UTC): [Mar 17 10:10:00 PM - Mar 17 10:15:00 PM)
+
+\`\`\`
+"2023-03-17 22:14:01","error","Failed to connect to database"
+"2023-03-17 22:13:45","error","Connection timeout after 30s"
+"2023-03-17 22:12:30","error","Retry limit exceeded"
+\`\`\`"
+`;
+
+exports[`renderAlertTemplate saved search alerts ALERT state above_exclusive threshold=5 alertValue=10 1`] = `
+"
+10 lines found, which exceeds the threshold of 5 lines
+Time Range (UTC): [Mar 17 10:10:00 PM - Mar 17 10:15:00 PM)
+
+\`\`\`
+"2023-03-17 22:14:01","error","Failed to connect to database"
+"2023-03-17 22:13:45","error","Connection timeout after 30s"
+"2023-03-17 22:12:30","error","Retry limit exceeded"
+\`\`\`"
+`;
+
+exports[`renderAlertTemplate saved search alerts ALERT state below threshold=5 alertValue=2 1`] = `
+"
+2 lines found, which falls below the threshold of 5 lines
+Time Range (UTC): [Mar 17 10:10:00 PM - Mar 17 10:15:00 PM)
+
+\`\`\`
+"2023-03-17 22:14:01","error","Failed to connect to database"
+"2023-03-17 22:13:45","error","Connection timeout after 30s"
+"2023-03-17 22:12:30","error","Retry limit exceeded"
+\`\`\`"
+`;
+
+exports[`renderAlertTemplate saved search alerts ALERT state below_or_equal threshold=5 alertValue=3 1`] = `
+"
+3 lines found, which falls to or below the threshold of 5 lines
+Time Range (UTC): [Mar 17 10:10:00 PM - Mar 17 10:15:00 PM)
+
+\`\`\`
+"2023-03-17 22:14:01","error","Failed to connect to database"
+"2023-03-17 22:13:45","error","Connection timeout after 30s"
+"2023-03-17 22:12:30","error","Retry limit exceeded"
+\`\`\`"
+`;
+
+exports[`renderAlertTemplate saved search alerts ALERT state between threshold=5 alertValue=6 1`] = `
+"
+6 lines found, which falls between the threshold of 5 and 7 lines
+Time Range (UTC): [Mar 17 10:10:00 PM - Mar 17 10:15:00 PM)
+
+\`\`\`
+"2023-03-17 22:14:01","error","Failed to connect to database"
+"2023-03-17 22:13:45","error","Connection timeout after 30s"
+"2023-03-17 22:12:30","error","Retry limit exceeded"
+\`\`\`"
+`;
+
+exports[`renderAlertTemplate saved search alerts ALERT state equal threshold=5 alertValue=5 1`] = `
+"
+5 lines found, which equals the threshold of 5 lines
+Time Range (UTC): [Mar 17 10:10:00 PM - Mar 17 10:15:00 PM)
+
+\`\`\`
+"2023-03-17 22:14:01","error","Failed to connect to database"
+"2023-03-17 22:13:45","error","Connection timeout after 30s"
+"2023-03-17 22:12:30","error","Retry limit exceeded"
+\`\`\`"
+`;
+
+exports[`renderAlertTemplate saved search alerts ALERT state not_between threshold=5 alertValue=12 1`] = `
+"
+12 lines found, which falls outside the threshold of 5 and 7 lines
+Time Range (UTC): [Mar 17 10:10:00 PM - Mar 17 10:15:00 PM)
+
+\`\`\`
+"2023-03-17 22:14:01","error","Failed to connect to database"
+"2023-03-17 22:13:45","error","Connection timeout after 30s"
+"2023-03-17 22:12:30","error","Retry limit exceeded"
+\`\`\`"
+`;
+
+exports[`renderAlertTemplate saved search alerts ALERT state not_equal threshold=5 alertValue=10 1`] = `
+"
+10 lines found, which does not equal the threshold of 5 lines
+Time Range (UTC): [Mar 17 10:10:00 PM - Mar 17 10:15:00 PM)
+
+\`\`\`
+"2023-03-17 22:14:01","error","Failed to connect to database"
+"2023-03-17 22:13:45","error","Connection timeout after 30s"
+"2023-03-17 22:12:30","error","Retry limit exceeded"
+\`\`\`"
+`;
+
+exports[`renderAlertTemplate saved search alerts ALERT state with group 1`] = `
+"Group: "http"
+10 lines found, which meets or exceeds the threshold of 5 lines
+Time Range (UTC): [Mar 17 10:10:00 PM - Mar 17 10:15:00 PM)
+
+\`\`\`
+"2023-03-17 22:14:01","error","Failed to connect to database"
+"2023-03-17 22:13:45","error","Connection timeout after 30s"
+"2023-03-17 22:12:30","error","Retry limit exceeded"
+\`\`\`"
+`;
+
+exports[`renderAlertTemplate saved search alerts OK state (resolved) above threshold=5 okValue=3 1`] = `
+"The alert has been resolved.
+Time Range (UTC): [Mar 17 10:10:00 PM - Mar 17 10:15:00 PM)
+"
+`;
+
+exports[`renderAlertTemplate saved search alerts OK state (resolved) above_exclusive threshold=5 okValue=3 1`] = `
+"The alert has been resolved.
+Time Range (UTC): [Mar 17 10:10:00 PM - Mar 17 10:15:00 PM)
+"
+`;
+
+exports[`renderAlertTemplate saved search alerts OK state (resolved) below threshold=5 okValue=10 1`] = `
+"The alert has been resolved.
+Time Range (UTC): [Mar 17 10:10:00 PM - Mar 17 10:15:00 PM)
+"
+`;
+
+exports[`renderAlertTemplate saved search alerts OK state (resolved) below_or_equal threshold=5 okValue=10 1`] = `
+"The alert has been resolved.
+Time Range (UTC): [Mar 17 10:10:00 PM - Mar 17 10:15:00 PM)
+"
+`;
+
+exports[`renderAlertTemplate saved search alerts OK state (resolved) between threshold=5 okValue=10 1`] = `
+"The alert has been resolved.
+Time Range (UTC): [Mar 17 10:10:00 PM - Mar 17 10:15:00 PM)
+"
+`;
+
+exports[`renderAlertTemplate saved search alerts OK state (resolved) equal threshold=5 okValue=10 1`] = `
+"The alert has been resolved.
+Time Range (UTC): [Mar 17 10:10:00 PM - Mar 17 10:15:00 PM)
+"
+`;
+
+exports[`renderAlertTemplate saved search alerts OK state (resolved) not_between threshold=5 okValue=6 1`] = `
+"The alert has been resolved.
+Time Range (UTC): [Mar 17 10:10:00 PM - Mar 17 10:15:00 PM)
+"
+`;
+
+exports[`renderAlertTemplate saved search alerts OK state (resolved) not_equal threshold=5 okValue=5 1`] = `
+"The alert has been resolved.
+Time Range (UTC): [Mar 17 10:10:00 PM - Mar 17 10:15:00 PM)
+"
+`;
+
+exports[`renderAlertTemplate saved search alerts OK state (resolved) with group 1`] = `
+"Group: "http" - The alert has been resolved.
+Time Range (UTC): [Mar 17 10:10:00 PM - Mar 17 10:15:00 PM)
+"
+`;
+
+exports[`renderAlertTemplate tile alerts ALERT state above threshold=5 alertValue=10 1`] = `
+"
+10 meets or exceeds 5
+Time Range (UTC): [Mar 17 10:10:00 PM - Mar 17 10:15:00 PM)
+"
+`;
+
+exports[`renderAlertTemplate tile alerts ALERT state above_exclusive threshold=5 alertValue=10 1`] = `
+"
+10 exceeds 5
+Time Range (UTC): [Mar 17 10:10:00 PM - Mar 17 10:15:00 PM)
+"
+`;
+
+exports[`renderAlertTemplate tile alerts ALERT state below threshold=5 alertValue=2 1`] = `
+"
+2 falls below 5
+Time Range (UTC): [Mar 17 10:10:00 PM - Mar 17 10:15:00 PM)
+"
+`;
+
+exports[`renderAlertTemplate tile alerts ALERT state below_or_equal threshold=5 alertValue=3 1`] = `
+"
+3 falls to or below 5
+Time Range (UTC): [Mar 17 10:10:00 PM - Mar 17 10:15:00 PM)
+"
+`;
+
+exports[`renderAlertTemplate tile alerts ALERT state between threshold=5 alertValue=6 1`] = `
+"
+6 falls between 5 and 7
+Time Range (UTC): [Mar 17 10:10:00 PM - Mar 17 10:15:00 PM)
+"
+`;
+
+exports[`renderAlertTemplate tile alerts ALERT state decimal threshold 1`] = `
+"
+10.1 meets or exceeds 1.5
+Time Range (UTC): [Mar 17 10:10:00 PM - Mar 17 10:15:00 PM)
+"
+`;
+
+exports[`renderAlertTemplate tile alerts ALERT state equal threshold=5 alertValue=5 1`] = `
+"
+5 equals 5
+Time Range (UTC): [Mar 17 10:10:00 PM - Mar 17 10:15:00 PM)
+"
+`;
+
+exports[`renderAlertTemplate tile alerts ALERT state integer threshold rounds value 1`] = `
+"
+11 meets or exceeds 5
+Time Range (UTC): [Mar 17 10:10:00 PM - Mar 17 10:15:00 PM)
+"
+`;
+
+exports[`renderAlertTemplate tile alerts ALERT state not_between threshold=5 alertValue=12 1`] = `
+"
+12 falls outside 5 and 7
+Time Range (UTC): [Mar 17 10:10:00 PM - Mar 17 10:15:00 PM)
+"
+`;
+
+exports[`renderAlertTemplate tile alerts ALERT state not_equal threshold=5 alertValue=10 1`] = `
+"
+10 does not equal 5
+Time Range (UTC): [Mar 17 10:10:00 PM - Mar 17 10:15:00 PM)
+"
+`;
+
+exports[`renderAlertTemplate tile alerts ALERT state with group 1`] = `
+"Group: "us-east-1"
+10 meets or exceeds 5
+Time Range (UTC): [Mar 17 10:10:00 PM - Mar 17 10:15:00 PM)
+"
+`;
+
+exports[`renderAlertTemplate tile alerts OK state (resolved) above threshold=5 okValue=3 1`] = `
+"The alert has been resolved.
+Time Range (UTC): [Mar 17 10:10:00 PM - Mar 17 10:15:00 PM)
+"
+`;
+
+exports[`renderAlertTemplate tile alerts OK state (resolved) above_exclusive threshold=5 okValue=3 1`] = `
+"The alert has been resolved.
+Time Range (UTC): [Mar 17 10:10:00 PM - Mar 17 10:15:00 PM)
+"
+`;
+
+exports[`renderAlertTemplate tile alerts OK state (resolved) below threshold=5 okValue=10 1`] = `
+"The alert has been resolved.
+Time Range (UTC): [Mar 17 10:10:00 PM - Mar 17 10:15:00 PM)
+"
+`;
+
+exports[`renderAlertTemplate tile alerts OK state (resolved) below_or_equal threshold=5 okValue=10 1`] = `
+"The alert has been resolved.
+Time Range (UTC): [Mar 17 10:10:00 PM - Mar 17 10:15:00 PM)
+"
+`;
+
+exports[`renderAlertTemplate tile alerts OK state (resolved) between threshold=5 okValue=10 1`] = `
+"The alert has been resolved.
+Time Range (UTC): [Mar 17 10:10:00 PM - Mar 17 10:15:00 PM)
+"
+`;
+
+exports[`renderAlertTemplate tile alerts OK state (resolved) equal threshold=5 okValue=10 1`] = `
+"The alert has been resolved.
+Time Range (UTC): [Mar 17 10:10:00 PM - Mar 17 10:15:00 PM)
+"
+`;
+
+exports[`renderAlertTemplate tile alerts OK state (resolved) not_between threshold=5 okValue=6 1`] = `
+"The alert has been resolved.
+Time Range (UTC): [Mar 17 10:10:00 PM - Mar 17 10:15:00 PM)
+"
+`;
+
+exports[`renderAlertTemplate tile alerts OK state (resolved) not_equal threshold=5 okValue=5 1`] = `
+"The alert has been resolved.
+Time Range (UTC): [Mar 17 10:10:00 PM - Mar 17 10:15:00 PM)
+"
+`;
+
+exports[`renderAlertTemplate tile alerts OK state (resolved) with group 1`] = `
+"Group: "us-east-1" - The alert has been resolved.
+Time Range (UTC): [Mar 17 10:10:00 PM - Mar 17 10:15:00 PM)
+"
+`;
--- a/packages/api/src/tasks/checkAlerts/tests/checkAlerts.test.ts
+++ b/packages/api/src/tasks/checkAlerts/tests/checkAlerts.test.ts
--- a/packages/api/src/tasks/checkAlerts/tests/checkAlertsTask.test.ts
+++ b/packages/api/src/tasks/checkAlerts/tests/checkAlertsTask.test.ts
@ -36,6 +36,7 @@ describe('CheckAlertTask', () => {
        getAlertTasks: jest.fn(),
        getWebhooks: jest.fn(),
        updateAlertState: jest.fn(),
+        recordAlertErrors: jest.fn(),
        asyncDispose: jest.fn(),
        buildChartLink: jest.fn(),
        buildLogSearchLink: jest.fn(),
--- a/packages/api/src/tasks/checkAlerts/tests/renderAlertTemplate.test.ts
+++ b/packages/api/src/tasks/checkAlerts/tests/renderAlertTemplate.test.ts
@ -0,0 +1,451 @@
+import {
+  AlertState,
+  AlertThresholdType,
+  SourceKind,
+} from '@hyperdx/common-utils/dist/types';
+import mongoose from 'mongoose';
+
+import { makeTile } from '@/fixtures';
+import { AlertSource } from '@/models/alert';
+import { loadProvider } from '@/tasks/checkAlerts/providers';
+import {
+  AlertMessageTemplateDefaultView,
+  buildAlertMessageTemplateTitle,
+  renderAlertTemplate,
+} from '@/tasks/checkAlerts/template';
+
+let alertProvider: any;
+
+beforeAll(async () => {
+  alertProvider = await loadProvider();
+});
+
+// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
+const mockMetadata = {
+  getColumn: jest.fn().mockImplementation(({ column }) => {
+    const columnMap = {
+      Timestamp: { name: 'Timestamp', type: 'DateTime' },
+      Body: { name: 'Body', type: 'String' },
+      SeverityText: { name: 'SeverityText', type: 'String' },
+      ServiceName: { name: 'ServiceName', type: 'String' },
+    };
+    return Promise.resolve(columnMap[column]);
+  }),
+  getColumns: jest.fn().mockResolvedValue([]),
+  getMapKeys: jest.fn().mockResolvedValue([]),
+  getMapValues: jest.fn().mockResolvedValue([]),
+  getAllFields: jest.fn().mockResolvedValue([]),
+  getTableMetadata: jest.fn().mockResolvedValue({}),
+  getClickHouseSettings: jest.fn().mockReturnValue({}),
+  setClickHouseSettings: jest.fn(),
+  getSkipIndices: jest.fn().mockResolvedValue([]),
+  getSetting: jest.fn().mockResolvedValue(undefined),
+} as any;
+
+const sampleLogsCsv = [
+  '"2023-03-17 22:14:01","error","Failed to connect to database"',
+  '"2023-03-17 22:13:45","error","Connection timeout after 30s"',
+  '"2023-03-17 22:12:30","error","Retry limit exceeded"',
+].join('\n');
+
+// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
+const mockClickhouseClient = {
+  query: jest.fn().mockResolvedValue({
+    json: jest.fn().mockResolvedValue({ data: [] }),
+    text: jest.fn().mockResolvedValue(sampleLogsCsv),
+  }),
+} as any;
+
+const startTime = new Date('2023-03-17T22:10:00.000Z');
+const endTime = new Date('2023-03-17T22:15:00.000Z');
+
+const makeSearchView = (
+  overrides: Partial<AlertMessageTemplateDefaultView> & {
+    thresholdType?: AlertThresholdType;
+    threshold?: number;
+    thresholdMax?: number;
+    value?: number;
+    group?: string;
+  } = {},
+): AlertMessageTemplateDefaultView => ({
+  alert: {
+    thresholdType: overrides.thresholdType ?? AlertThresholdType.ABOVE,
+    threshold: overrides.threshold ?? 5,
+    thresholdMax: overrides.thresholdMax,
+    source: AlertSource.SAVED_SEARCH,
+    channel: { type: null },
+    interval: '1m',
+  },
+  source: {
+    id: 'fake-source-id',
+    kind: SourceKind.Log,
+    team: 'team-123',
+    from: { databaseName: 'default', tableName: 'otel_logs' },
+    timestampValueExpression: 'Timestamp',
+    connection: 'connection-123',
+    name: 'Logs',
+    defaultTableSelectExpression: 'Timestamp, Body',
+  },
+  savedSearch: {
+    _id: 'fake-saved-search-id' as any,
+    team: 'team-123' as any,
+    id: 'fake-saved-search-id',
+    name: 'My Search',
+    select: 'Body',
+    where: 'Body: "error"',
+    whereLanguage: 'lucene',
+    orderBy: 'timestamp',
+    source: 'fake-source-id' as any,
+    tags: ['test'],
+    createdAt: new Date(),
+    updatedAt: new Date(),
+  },
+  attributes: {},
+  granularity: '1m',
+  group: overrides.group,
+  isGroupedAlert: false,
+  startTime,
+  endTime,
+  value: overrides.value ?? 10,
+});
+
+const testTile = makeTile({ id: 'test-tile-id' });
+const makeTileView = (
+  overrides: Partial<AlertMessageTemplateDefaultView> & {
+    thresholdType?: AlertThresholdType;
+    threshold?: number;
+    thresholdMax?: number;
+    value?: number;
+    group?: string;
+  } = {},
+): AlertMessageTemplateDefaultView => ({
+  alert: {
+    thresholdType: overrides.thresholdType ?? AlertThresholdType.ABOVE,
+    threshold: overrides.threshold ?? 5,
+    thresholdMax: overrides.thresholdMax,
+    source: AlertSource.TILE,
+    channel: { type: null },
+    interval: '1m',
+    tileId: 'test-tile-id',
+  },
+  dashboard: {
+    _id: new mongoose.Types.ObjectId(),
+    id: 'id-123',
+    name: 'My Dashboard',
+    tiles: [testTile],
+    team: 'team-123' as any,
+    tags: ['test'],
+    createdAt: new Date(),
+    updatedAt: new Date(),
+  },
+  attributes: {},
+  granularity: '5 minute',
+  group: overrides.group,
+  isGroupedAlert: false,
+  startTime,
+  endTime,
+  value: overrides.value ?? 10,
+});
+
+const render = (view: AlertMessageTemplateDefaultView, state: AlertState) =>
+  renderAlertTemplate({
+    alertProvider,
+    clickhouseClient: mockClickhouseClient,
+    metadata: mockMetadata,
+    state,
+    template: null,
+    title: 'Test Alert Title',
+    view,
+    teamWebhooksById: new Map(),
+  });
+
+interface AlertCase {
+  thresholdType: AlertThresholdType;
+  threshold: number;
+  thresholdMax?: number; // for between-type thresholds
+  alertValue: number; // value that would trigger the alert
+  okValue: number; // value that would resolve the alert
+}
+
+const alertCases: AlertCase[] = [
+  {
+    thresholdType: AlertThresholdType.ABOVE,
+    threshold: 5,
+    alertValue: 10,
+    okValue: 3,
+  },
+  {
+    thresholdType: AlertThresholdType.ABOVE_EXCLUSIVE,
+    threshold: 5,
+    alertValue: 10,
+    okValue: 3,
+  },
+  {
+    thresholdType: AlertThresholdType.BELOW,
+    threshold: 5,
+    alertValue: 2,
+    okValue: 10,
+  },
+  {
+    thresholdType: AlertThresholdType.BELOW_OR_EQUAL,
+    threshold: 5,
+    alertValue: 3,
+    okValue: 10,
+  },
+  {
+    thresholdType: AlertThresholdType.EQUAL,
+    threshold: 5,
+    alertValue: 5,
+    okValue: 10,
+  },
+  {
+    thresholdType: AlertThresholdType.NOT_EQUAL,
+    threshold: 5,
+    alertValue: 10,
+    okValue: 5,
+  },
+  {
+    thresholdType: AlertThresholdType.BETWEEN,
+    threshold: 5,
+    thresholdMax: 7,
+    alertValue: 6,
+    okValue: 10,
+  },
+  {
+    thresholdType: AlertThresholdType.NOT_BETWEEN,
+    threshold: 5,
+    thresholdMax: 7,
+    alertValue: 12,
+    okValue: 6,
+  },
+];
+
+describe('renderAlertTemplate', () => {
+  describe('saved search alerts', () => {
+    describe('ALERT state', () => {
+      it.each(alertCases)(
+        '$thresholdType threshold=$threshold alertValue=$alertValue',
+        async ({ thresholdType, threshold, thresholdMax, alertValue }) => {
+          const result = await render(
+            makeSearchView({
+              thresholdType,
+              threshold,
+              thresholdMax,
+              value: alertValue,
+            }),
+            AlertState.ALERT,
+          );
+          expect(result).toMatchSnapshot();
+        },
+      );
+
+      it('with group', async () => {
+        const result = await render(
+          makeSearchView({ group: 'http' }),
+          AlertState.ALERT,
+        );
+        expect(result).toMatchSnapshot();
+      });
+    });
+
+    describe('OK state (resolved)', () => {
+      it.each(alertCases)(
+        '$thresholdType threshold=$threshold okValue=$okValue',
+        async ({ thresholdType, threshold, thresholdMax, okValue }) => {
+          const result = await render(
+            makeSearchView({
+              thresholdType,
+              threshold,
+              thresholdMax,
+              value: okValue,
+            }),
+            AlertState.OK,
+          );
+          expect(result).toMatchSnapshot();
+        },
+      );
+
+      it('with group', async () => {
+        const result = await render(
+          makeSearchView({ group: 'http' }),
+          AlertState.OK,
+        );
+        expect(result).toMatchSnapshot();
+      });
+    });
+  });
+
+  describe('tile alerts', () => {
+    describe('ALERT state', () => {
+      it.each(alertCases)(
+        '$thresholdType threshold=$threshold alertValue=$alertValue',
+        async ({ thresholdType, threshold, thresholdMax, alertValue }) => {
+          const result = await render(
+            makeTileView({
+              thresholdType,
+              threshold,
+              thresholdMax,
+              value: alertValue,
+            }),
+            AlertState.ALERT,
+          );
+          expect(result).toMatchSnapshot();
+        },
+      );
+
+      it('with group', async () => {
+        const result = await render(
+          makeTileView({ group: 'us-east-1' }),
+          AlertState.ALERT,
+        );
+        expect(result).toMatchSnapshot();
+      });
+
+      it('decimal threshold', async () => {
+        const result = await render(
+          makeTileView({
+            thresholdType: AlertThresholdType.ABOVE,
+            threshold: 1.5,
+            value: 10.123,
+          }),
+          AlertState.ALERT,
+        );
+        expect(result).toMatchSnapshot();
+      });
+
+      it('integer threshold rounds value', async () => {
+        const result = await render(
+          makeTileView({
+            thresholdType: AlertThresholdType.ABOVE,
+            threshold: 5,
+            value: 10.789,
+          }),
+          AlertState.ALERT,
+        );
+        expect(result).toMatchSnapshot();
+      });
+    });
+
+    describe('OK state (resolved)', () => {
+      it.each(alertCases)(
+        '$thresholdType threshold=$threshold okValue=$okValue',
+        async ({ thresholdType, threshold, thresholdMax, okValue }) => {
+          const result = await render(
+            makeTileView({
+              thresholdType,
+              threshold,
+              thresholdMax,
+              value: okValue,
+            }),
+            AlertState.OK,
+          );
+          expect(result).toMatchSnapshot();
+        },
+      );
+
+      it('with group', async () => {
+        const result = await render(
+          makeTileView({ group: 'us-east-1' }),
+          AlertState.OK,
+        );
+        expect(result).toMatchSnapshot();
+      });
+    });
+  });
+});
+
+describe('buildAlertMessageTemplateTitle', () => {
+  describe('saved search alerts', () => {
+    describe('ALERT state', () => {
+      it.each(alertCases)(
+        '$thresholdType threshold=$threshold alertValue=$alertValue',
+        ({ thresholdType, threshold, alertValue }) => {
+          const result = buildAlertMessageTemplateTitle({
+            view: makeSearchView({
+              thresholdType,
+              threshold,
+              value: alertValue,
+            }),
+            state: AlertState.ALERT,
+          });
+          expect(result).toMatchSnapshot();
+        },
+      );
+    });
+
+    describe('OK state (resolved)', () => {
+      it.each(alertCases)(
+        '$thresholdType threshold=$threshold okValue=$okValue',
+        ({ thresholdType, threshold, okValue }) => {
+          const result = buildAlertMessageTemplateTitle({
+            view: makeSearchView({ thresholdType, threshold, value: okValue }),
+            state: AlertState.OK,
+          });
+          expect(result).toMatchSnapshot();
+        },
+      );
+    });
+  });
+
+  describe('tile alerts', () => {
+    describe('ALERT state', () => {
+      it.each(alertCases)(
+        '$thresholdType threshold=$threshold alertValue=$alertValue',
+        ({ thresholdType, threshold, thresholdMax, alertValue }) => {
+          const result = buildAlertMessageTemplateTitle({
+            view: makeTileView({
+              thresholdType,
+              threshold,
+              thresholdMax,
+              value: alertValue,
+            }),
+            state: AlertState.ALERT,
+          });
+          expect(result).toMatchSnapshot();
+        },
+      );
+
+      it('decimal threshold', () => {
+        const result = buildAlertMessageTemplateTitle({
+          view: makeTileView({
+            thresholdType: AlertThresholdType.ABOVE,
+            threshold: 1.5,
+            value: 10.123,
+          }),
+          state: AlertState.ALERT,
+        });
+        expect(result).toMatchSnapshot();
+      });
+
+      it('integer threshold rounds value', () => {
+        const result = buildAlertMessageTemplateTitle({
+          view: makeTileView({
+            thresholdType: AlertThresholdType.ABOVE,
+            threshold: 5,
+            value: 10.789,
+          }),
+          state: AlertState.ALERT,
+        });
+        expect(result).toMatchSnapshot();
+      });
+    });
+
+    describe('OK state (resolved)', () => {
+      it.each(alertCases)(
+        '$thresholdType threshold=$threshold okValue=$okValue',
+        ({ thresholdType, threshold, thresholdMax, okValue }) => {
+          const result = buildAlertMessageTemplateTitle({
+            view: makeTileView({
+              thresholdType,
+              threshold,
+              thresholdMax,
+              value: okValue,
+            }),
+            state: AlertState.OK,
+          });
+          expect(result).toMatchSnapshot();
+        },
+      );
+    });
+  });
+});
--- a/packages/api/src/tasks/checkAlerts/tests/singleInvocationAlert.test.ts
+++ b/packages/api/src/tasks/checkAlerts/tests/singleInvocationAlert.test.ts
@ -6,7 +6,11 @@ import ms from 'ms';
 import * as config from '@/config';
 import { createAlert } from '@/controllers/alerts';
 import { createTeam } from '@/controllers/team';
-import { bulkInsertLogs, getServer } from '@/fixtures';
+import {
+  bulkInsertLogs,
+  getServer,
+  RAW_SQL_NUMBER_ALERT_TEMPLATE,
+} from '@/fixtures';
 import Alert, { AlertSource, AlertThresholdType } from '@/models/alert';
 import AlertHistory from '@/models/alertHistory';
 import Connection from '@/models/connection';
@ -244,7 +248,7 @@ describe('Single Invocation Alert Test', () => {
    // Verify the message body contains the search link
    const messageBody = webhookPayload.blocks[0].text.text;
    expect(messageBody).toContain('lines found');
-    expect(messageBody).toContain('expected less than 1 lines');
+    expect(messageBody).toContain('meets or exceeds the threshold of 1 lines');
    expect(messageBody).toContain('http://app:8080/search/');
    expect(messageBody).toContain('from=');
    expect(messageBody).toContain('to=');
@ -858,4 +862,137 @@ describe('Single Invocation Alert Test', () => {
    expect(dashboard.tiles[1].config.name).toBe('Second Tile Name');
    expect(enhancedAlert.tileId).toBe('second-tile-id');
  });
+
+  it('should trigger alert for raw SQL Number chart tile', async () => {
+    jest.spyOn(slack, 'postMessageToWebhook').mockResolvedValue(null as any);
+
+    const team = await createTeam({ name: 'Test Team' });
+
+    const connection = await Connection.create({
+      team: team._id,
+      name: 'Test Connection',
+      host: config.CLICKHOUSE_HOST,
+      username: config.CLICKHOUSE_USER,
+      password: config.CLICKHOUSE_PASSWORD,
+    });
+
+    const webhook = await new Webhook({
+      team: team._id,
+      service: 'slack',
+      url: 'https://hooks.slack.com/services/test-number',
+      name: 'Test Webhook',
+    }).save();
+
+    const dashboard = await new Dashboard({
+      name: 'Number Chart Alert Dashboard',
+      team: team._id,
+      tiles: [
+        {
+          id: 'number-tile-1',
+          x: 0,
+          y: 0,
+          w: 6,
+          h: 4,
+          config: {
+            configType: 'sql',
+            displayType: 'number',
+            sqlTemplate: RAW_SQL_NUMBER_ALERT_TEMPLATE,
+            connection: connection.id,
+          },
+        },
+      ],
+    }).save();
+
+    const mockUserId = new mongoose.Types.ObjectId();
+    const alert = await createAlert(
+      team._id,
+      {
+        source: AlertSource.TILE,
+        channel: {
+          type: 'webhook',
+          webhookId: webhook._id.toString(),
+        },
+        interval: '5m',
+        thresholdType: AlertThresholdType.ABOVE,
+        threshold: 1,
+        dashboardId: dashboard.id,
+        tileId: 'number-tile-1',
+        name: 'Number Chart Alert',
+      },
+      mockUserId,
+    );
+
+    const now = new Date('2023-11-16T22:12:00.000Z');
+    const eventTime = new Date(now.getTime() - ms('3m'));
+
+    // Insert logs that should be counted by the Number chart query
+    await bulkInsertLogs([
+      {
+        ServiceName: 'web',
+        Timestamp: eventTime,
+        SeverityText: 'error',
+        Body: 'Number chart error 1',
+      },
+      {
+        ServiceName: 'web',
+        Timestamp: eventTime,
+        SeverityText: 'error',
+        Body: 'Number chart error 2',
+      },
+      {
+        ServiceName: 'web',
+        Timestamp: eventTime,
+        SeverityText: 'error',
+        Body: 'Number chart error 3',
+      },
+    ]);
+
+    const enhancedAlert: any = await Alert.findById(alert.id).populate([
+      'team',
+      'savedSearch',
+    ]);
+
+    const tile = dashboard.tiles?.find((t: any) => t.id === 'number-tile-1');
+
+    const details: AlertDetails = {
+      alert: enhancedAlert,
+      source: undefined,
+      taskType: AlertTaskType.TILE,
+      tile: tile!,
+      dashboard,
+      previousMap: new Map(),
+    };
+
+    const clickhouseClient = new ClickhouseClient({
+      host: connection.host,
+      username: connection.username,
+      password: connection.password,
+    });
+
+    await processAlert(
+      now,
+      details,
+      clickhouseClient,
+      connection.id,
+      alertProvider,
+      new Map([[webhook.id.toString(), webhook]]),
+    );
+
+    // Verify alert state changed to ALERT
+    expect((await Alert.findById(enhancedAlert.id))!.state).toBe('ALERT');
+
+    // Verify alert history was created
+    const alertHistories = await AlertHistory.find({
+      alert: alert.id,
+    }).sort({ createdAt: 1 });
+
+    expect(alertHistories.length).toBe(1);
+    expect(alertHistories[0].state).toBe('ALERT');
+    expect(alertHistories[0].counts).toBe(1);
+    expect(alertHistories[0].lastValues.length).toBe(1);
+    expect(alertHistories[0].lastValues[0].count).toBe(3);
+
+    // Verify webhook was called
+    expect(slack.postMessageToWebhook).toHaveBeenCalledTimes(1);
+  });
 });
--- a/packages/api/src/tasks/checkAlerts/index.ts
+++ b/packages/api/src/tasks/checkAlerts/index.ts
@ -14,28 +14,37 @@ import {
  Metadata,
 } from '@hyperdx/common-utils/dist/core/metadata';
 import { renderChartConfig } from '@hyperdx/common-utils/dist/core/renderChartConfig';
-import { aliasMapToWithClauses } from '@hyperdx/common-utils/dist/core/utils';
+import {
+  aliasMapToWithClauses,
+  displayTypeSupportsRawSqlAlerts,
+  isTimeSeriesDisplayType,
+} from '@hyperdx/common-utils/dist/core/utils';
 import { timeBucketByGranularity } from '@hyperdx/common-utils/dist/core/utils';
 import {
+  isBuilderChartConfig,
  isBuilderSavedChartConfig,
+  isRawSqlChartConfig,
  isRawSqlSavedChartConfig,
 } from '@hyperdx/common-utils/dist/guards';
 import {
+  AlertErrorType,
+  AlertThresholdType,
  BuilderChartConfigWithOptDateRange,
+  ChartConfigWithOptDateRange,
  DisplayType,
  getSampleWeightExpression,
  pickSampleWeightExpressionProps,
  SourceKind,
 } from '@hyperdx/common-utils/dist/types';
 import * as fns from 'date-fns';
-import { isString } from 'lodash';
+import { isString, pick } from 'lodash';
 import { ObjectId } from 'mongoose';
 import mongoose from 'mongoose';
 import ms from 'ms';
 import { serializeError } from 'serialize-error';

 import { ALERT_HISTORY_QUERY_CONCURRENCY } from '@/controllers/alertHistory';
-import { AlertState, AlertThresholdType, IAlert } from '@/models/alert';
+import { AlertState, IAlert, IAlertError } from '@/models/alert';
 import AlertHistory, { IAlertHistory } from '@/models/alertHistory';
 import { IDashboard } from '@/models/dashboard';
 import { ISavedSearch } from '@/models/savedSearch';
@ -81,6 +90,17 @@ export const alertHasGroupBy = (details: AlertDetails): boolean => {
  ) {
    return true;
  }
+
+  // Without a reliable parser, it's difficult to tell if the raw sql contains a
+  // group by (besides the group by on the interval), so we'll assume it might
+  // in the case of time series charts, and assume it will not in the case of number charts.
+  // Group name will just be blank if there are no group by values.
+  if (
+    details.taskType === AlertTaskType.TILE &&
+    isRawSqlSavedChartConfig(details.tile.config)
+  ) {
+    return details.tile.config.displayType !== DisplayType.Number;
+  }
  return false;
 };

@ -118,18 +138,73 @@ export async function computeAliasWithClauses(
  return aliasMapToWithClauses(aliasMap);
 }

+export class InvalidAlertError extends Error {
+  constructor(message: string) {
+    super(message);
+    this.name = 'InvalidAlertError';
+  }
+}
+
+// For security, we do not surface raw error messages for webhook or unknown
+// failures — they may leak URLs, response bodies, or other sensitive detail
+// from upstream systems. QUERY_ERROR and INVALID_ALERT messages are authored
+// by us (ClickHouse errors or our own validation) and are safe to display.
+const HARDCODED_ALERT_ERROR_MESSAGES: Partial<Record<AlertErrorType, string>> =
+  {
+    [AlertErrorType.WEBHOOK_ERROR]:
+      'Failed to send webhook notification. Check the webhook configuration and destination.',
+    [AlertErrorType.UNKNOWN]:
+      'An unknown error occurred while processing the alert.',
+  };
+
+const makeAlertError = (
+  type: AlertErrorType,
+  message: string,
+): IAlertError => ({
+  timestamp: new Date(),
+  type,
+  message: (HARDCODED_ALERT_ERROR_MESSAGES[type] ?? message).slice(0, 10000),
+});
+
+const getErrorMessage = (e: unknown): string => {
+  if (e instanceof Error) {
+    return e.message;
+  }
+  return String(e);
+};
+
 export const doesExceedThreshold = (
-  thresholdType: AlertThresholdType,
-  threshold: number,
+  {
+    threshold,
+    thresholdType,
+    thresholdMax,
+  }: Pick<IAlert, 'thresholdType' | 'threshold' | 'thresholdMax'>,
  value: number,
 ) => {
-  const isThresholdTypeAbove = thresholdType === AlertThresholdType.ABOVE;
-  if (isThresholdTypeAbove && value >= threshold) {
-    return true;
-  } else if (!isThresholdTypeAbove && value < threshold) {
-    return true;
+  switch (thresholdType) {
+    case AlertThresholdType.ABOVE:
+      return value >= threshold;
+    case AlertThresholdType.BELOW:
+      return value < threshold;
+    case AlertThresholdType.ABOVE_EXCLUSIVE:
+      return value > threshold;
+    case AlertThresholdType.BELOW_OR_EQUAL:
+      return value <= threshold;
+    case AlertThresholdType.EQUAL:
+      return value === threshold;
+    case AlertThresholdType.NOT_EQUAL:
+      return value !== threshold;
+    case AlertThresholdType.BETWEEN:
+    case AlertThresholdType.NOT_BETWEEN:
+      if (thresholdMax == null) {
+        throw new InvalidAlertError(
+          `thresholdMax is required for threshold type "${thresholdType}"`,
+        );
+      }
+      return thresholdType === AlertThresholdType.BETWEEN
+        ? value >= threshold && value <= thresholdMax
+        : value < threshold || value > thresholdMax;
  }
-  return false;
 };

 const normalizeScheduleOffsetMinutes = ({
@ -292,6 +367,7 @@ const fireChannelEvent = async ({
      silenced: alert.silenced,
      source: alert.source,
      threshold: alert.threshold,
+      thresholdMax: alert.thresholdMax,
      thresholdType: alert.thresholdType,
      tileId: alert.tileId,
    },
@ -370,9 +446,10 @@ const shouldSkipAlertCheck = (
  // Skip if ANY previous history for this alert was created in the current window
  return Array.from(previousMap.entries()).some(([key, history]) => {
    // For grouped alerts, check any key that starts with alertId prefix
-    // For non-grouped alerts, check exact match with alertId
+    // or matches the bare alertId (empty group key case).
+    // For non-grouped alerts, check exact match with alertId.
    const isMatchingKey = hasGroupBy
-      ? key.startsWith(alertKeyPrefix)
+      ? key === alert.id || key.startsWith(alertKeyPrefix)
      : key === alert.id;

    return (
@ -394,11 +471,11 @@ const getAlertEvaluationDateRange = (
  // Find the latest createdAt among all histories for this alert
  let previousCreatedAt: Date | undefined;
  if (hasGroupBy) {
-    // For grouped alerts, find the latest createdAt among all groups
-    // Use the latest to avoid checking from old groups that might no longer exist
+    // For grouped alerts, find the latest createdAt among all groups.
+    // Also check the bare alertId key for the empty group key case.
    const alertKeyPrefix = getAlertKeyPrefix(alert.id);
    for (const [key, history] of previousMap.entries()) {
-      if (key.startsWith(alertKeyPrefix)) {
+      if (key === alert.id || key.startsWith(alertKeyPrefix)) {
        if (!previousCreatedAt || history.createdAt > previousCreatedAt) {
          previousCreatedAt = history.createdAt;
        }
@ -430,9 +507,10 @@ const getChartConfigFromAlert = (
  connection: string,
  dateRange: [Date, Date],
  windowSizeInMins: number,
-): BuilderChartConfigWithOptDateRange | undefined => {
-  const { alert, source } = details;
+): ChartConfigWithOptDateRange | undefined => {
+  const { alert } = details;
  if (details.taskType === AlertTaskType.SAVED_SEARCH) {
+    const { source } = details;
    const savedSearch = details.savedSearch;
    return {
      connection,
@ -463,8 +541,43 @@ const getChartConfigFromAlert = (
  } else if (details.taskType === AlertTaskType.TILE) {
    const tile = details.tile;

-    // Alerts are not supported for raw sql based charts
-    if (isRawSqlSavedChartConfig(tile.config)) return undefined;
+    // Raw SQL tiles: build a RawSqlChartConfig
+    if (isRawSqlSavedChartConfig(tile.config)) {
+      if (displayTypeSupportsRawSqlAlerts(tile.config.displayType)) {
+        return {
+          ...pick(tile.config, [
+            'configType',
+            'sqlTemplate',
+            'displayType',
+            'source',
+          ]),
+          connection,
+          dateRange,
+          // Only time-series charts use interval bucketing
+          ...(isTimeSeriesDisplayType(tile.config.displayType) && {
+            granularity: `${windowSizeInMins} minute`,
+          }),
+          // Include source metadata for macro expansion ($__sourceTable)
+          ...(details.source && {
+            from: details.source.from,
+            metricTables:
+              details.source.kind === SourceKind.Metric
+                ? details.source.metricTables
+                : undefined,
+          }),
+        };
+      }
+      return undefined;
+    }
+
+    const { source } = details;
+    if (!source) {
+      logger.error(
+        { alertId: alert.id },
+        'Source not found for builder tile alert',
+      );
+      return undefined;
+    }

    // Doesn't work for metric alerts yet
    if (
@ -513,9 +626,21 @@ const getChartConfigFromAlert = (
  return undefined;
 };

+type ResponseMetadata =
+  | {
+      type: 'time_series';
+      timestampColumnName: string;
+      valueColumnNames: Set<string>;
+    }
+  | {
+      type: 'single_value';
+      valueColumnNames: Set<string>;
+    };
+
 const getResponseMetadata = (
+  chartConfig: ChartConfigWithOptDateRange,
  data: ResponseJSON<Record<string, string | number>>,
-) => {
+): ResponseMetadata | undefined => {
  if (!data?.meta) {
    return undefined;
  }
@ -527,39 +652,60 @@ const getResponseMetadata = (
      jsType: clickhouse.convertCHDataTypeToJSType(m.type),
    })) ?? [];

-  const timestampColumnName = meta.find(
-    m => m.jsType === clickhouse.JSDataType.Date,
-  )?.name;
  const valueColumnNames = new Set(
    meta
      .filter(m => m.jsType === clickhouse.JSDataType.Number)
      .map(m => m.name),
  );

-  if (timestampColumnName == null) {
-    logger.error({ meta }, 'Failed to find timestamp column');
-    return undefined;
-  }
-
  if (valueColumnNames.size === 0) {
    logger.error({ meta }, 'Failed to find value column');
    return undefined;
  }

-  return { timestampColumnName, valueColumnNames };
+  // Raw SQL charts with Number display type don't use interval parameters, so they cannot be treated as timeseries.
+  // Number-type Builder Charts are rendered as time-series, to maintain legacy behavior for existing alerts.
+  if (
+    isRawSqlChartConfig(chartConfig) &&
+    chartConfig.displayType === DisplayType.Number
+  ) {
+    return { type: 'single_value', valueColumnNames };
+  } else {
+    const timestampColumnName = meta.find(
+      m => m.jsType === clickhouse.JSDataType.Date,
+    )?.name;
+
+    if (timestampColumnName == null) {
+      logger.error({ meta }, 'Failed to find timestamp column');
+      return undefined;
+    }
+
+    return { type: 'time_series', timestampColumnName, valueColumnNames };
+  }
 };

+/**
+ * Parses the following from the given alert query result:
+ * - `value`: the numeric value to compare against the alert threshold, taken
+ *   from the last column in the result which is included in valueColumnNames
+ * - `extraFields`: an array of strings representing the names and values of
+ *   each column in the result which is neither the timestampColumnName nor a
+ *   valueColumnName, formatted as "columnName:value".
+ */
 const parseAlertData = (
  data: Record<string, string | number>,
-  meta: { timestampColumnName: string; valueColumnNames: Set<string> },
+  meta: ResponseMetadata,
 ) => {
  let value: number | null = null;
  const extraFields: string[] = [];

  for (const [k, v] of Object.entries(data)) {
    if (meta.valueColumnNames.has(k)) {
+      // Due to output_format_json_quote_64bit_integers=1, 64-bit integers will be returned as strings.
+      // Parse them as integers to ensure correct threshold comparison.
+      // Floats are not returned as strings (unless output_format_json_quote_64bit_floats=1, which is not the default).
      value = isString(v) ? parseInt(v) : v;
-    } else if (k !== meta.timestampColumnName) {
+    } else if (meta.type !== 'time_series' || k !== meta.timestampColumnName) {
      extraFields.push(`${k}:${v}`);
    }
  }
@ -575,7 +721,11 @@ export const processAlert = async (
  alertProvider: AlertProvider,
  teamWebhooksById: Map<string, IWebhook>,
 ) => {
-  const { alert, source, previousMap } = details;
+  const { alert, previousMap } = details;
+  const source = 'source' in details ? details.source : undefined;
+  // Errors collected during this execution. Webhook errors accumulate here; query
+  // and validation errors are recorded via recordAlertErrors before returning.
+  const executionErrors: IAlertError[] = [];
  try {
    const windowSizeInMins = ms(alert.interval) / 60000;
    const scheduleStartAt = normalizeScheduleStartAt({
@ -680,10 +830,18 @@ export const processAlert = async (
    // so we render the saved search's select separately to discover aliases
    // and inject them as WITH clauses into the alert query.
    if (details.taskType === AlertTaskType.SAVED_SEARCH) {
+      if (!isBuilderChartConfig(chartConfig)) {
+        logger.error({
+          chartConfig,
+          message:
+            'Found non-builder chart config for saved search alert, cannot compute WITH clauses',
+        });
+        throw new Error('Expected builder chart config for saved search alert');
+      }
      try {
        const withClauses = await computeAliasWithClauses(
          details.savedSearch,
-          source,
+          details.source,
          metadata,
        );
        if (withClauses) {
@ -700,25 +858,51 @@ export const processAlert = async (
    // Optimize chart config with materialized views, if available.
    // materializedViews exists on Log and Trace sources.
    const mvSource =
-      source.kind === SourceKind.Log || source.kind === SourceKind.Trace
+      source?.kind === SourceKind.Log || source?.kind === SourceKind.Trace
        ? source
        : undefined;
-    const optimizedChartConfig = mvSource?.materializedViews?.length
-      ? await tryOptimizeConfigWithMaterializedView(
-          chartConfig,
-          metadata,
-          clickhouseClient,
-          undefined,
-          mvSource,
-        )
-      : chartConfig;
+    const optimizedChartConfig =
+      isBuilderChartConfig(chartConfig) && mvSource?.materializedViews?.length
+        ? await tryOptimizeConfigWithMaterializedView(
+            chartConfig,
+            metadata,
+            clickhouseClient,
+            undefined,
+            mvSource,
+          )
+        : chartConfig;

-    // Query for alert data
-    const checksData = await clickhouseClient.queryChartConfig({
-      config: optimizedChartConfig,
-      metadata,
-      querySettings: source.querySettings,
-    });
+    // Readonly = 2 means the query is readonly but can still specify query settings.
+    // This is done only for Raw SQL configs because it carries a minor risk of conflict with
+    // existing settings (which may have readonly = 1) and is not required for builder
+    // chart configs, which are always rendered as select statements.
+    const clickHouseSettings = isRawSqlChartConfig(optimizedChartConfig)
+      ? { readonly: '2' }
+      : {};
+
+    // Query for alert data. If the query fails, record the error and exit
+    // without touching alert state or creating an AlertHistory.
+    let checksData;
+    try {
+      checksData = await clickhouseClient.queryChartConfig({
+        config: optimizedChartConfig,
+        metadata,
+        opts: { clickhouse_settings: clickHouseSettings },
+        querySettings: source?.querySettings,
+      });
+    } catch (e) {
+      logger.error(
+        {
+          alertId: alert.id,
+          error: serializeError(e),
+        },
+        'Alert query failed, skipping state/history update',
+      );
+      await alertProvider.recordAlertErrors(alert.id, [
+        makeAlertError(AlertErrorType.QUERY_ERROR, getErrorMessage(e)),
+      ]);
+      return;
+    }

    logger.info(
      {
@ -797,21 +981,84 @@ export const processAlert = async (
          { alertId: alert.id, group, error: serializeError(e) },
          'Failed to fire channel event',
        );
+        executionErrors.push(
+          makeAlertError(AlertErrorType.WEBHOOK_ERROR, getErrorMessage(e)),
+        );
      }
    };

+    const sendNotificationIfResolved = async (
+      previousHistory: AggregatedAlertHistory | undefined,
+      currentHistory: IAlertHistory,
+      groupKey: string,
+    ) => {
+      if (
+        previousHistory?.state === AlertState.ALERT &&
+        currentHistory.state === AlertState.OK
+      ) {
+        const lastValue =
+          currentHistory.lastValues[currentHistory.lastValues.length - 1];
+        await trySendNotification({
+          state: AlertState.OK,
+          group: groupKey,
+          totalCount: lastValue?.count || 0,
+          startTime: lastValue?.startTime || nowInMinsRoundDown,
+        });
+      }
+    };
+
+    const meta = getResponseMetadata(chartConfig, checksData);
+    if (!meta) {
+      logger.error({ alertId: alert.id }, 'Failed to get response metadata');
+      return;
+    }
+
+    // single_value type (Raw SQL Number charts) returns a single value with no
+    // timestamp column, and are assumed to not have groups.
+    if (meta.type === 'single_value') {
+      // Use the date range end as the alert timestamp.
+      const alertTimestamp = dateRange[1];
+      const history = getOrCreateHistory('');
+
+      // The value is taken from the last numeric column of the first row.
+      // The value defaults to 0.
+      const value =
+        checksData.data.length > 0
+          ? (parseAlertData(checksData.data[0], meta).value ?? 0)
+          : 0;
+
+      history.lastValues.push({ count: value, startTime: alertTimestamp });
+      if (doesExceedThreshold(alert, value)) {
+        history.state = AlertState.ALERT;
+        history.counts += 1;
+        await trySendNotification({
+          state: AlertState.ALERT,
+          group: '',
+          totalCount: value,
+          startTime: alertTimestamp,
+        });
+      }
+
+      // Auto-resolve
+      const previous = previousMap.get(computeHistoryMapKey(alert.id, ''));
+      await sendNotificationIfResolved(previous, history, '');
+
+      const historyRecords = Array.from(histories.values());
+      await alertProvider.updateAlertState(
+        alert.id,
+        historyRecords,
+        executionErrors,
+      );
+      return;
+    }
+
+    // ── Time-series path (Line/StackedBar charts) ──
    const expectedBuckets = timeBucketByGranularity(
      dateRange[0],
      dateRange[1],
      `${windowSizeInMins} minute`,
    );

-    const meta = getResponseMetadata(checksData);
-    if (!meta) {
-      logger.error({ alertId: alert.id }, 'Failed to get response metadata');
-      return;
-    }
-
    // Group data by time bucket (grouped alerts may have multiple entries per time bucket)
    const checkDataByBucket = new Map<
      number,
@ -838,12 +1085,7 @@ export const processAlert = async (
          'No data returned from ClickHouse for time bucket',
        );

-        // Empty periods are filled with a 0 values.
-        const zeroValueIsAlert = doesExceedThreshold(
-          alert.thresholdType,
-          alert.threshold,
-          0,
-        );
+        const zeroValueIsAlert = doesExceedThreshold(alert, 0);

        const hasAlertsInPreviousMap = previousMap
          .values()
@ -884,7 +1126,7 @@ export const processAlert = async (
        const groupKey = hasGroupBy ? extraFields.join(', ') : '';
        const history = getOrCreateHistory(groupKey);

-        if (doesExceedThreshold(alert.thresholdType, alert.threshold, value)) {
+        if (doesExceedThreshold(alert, value)) {
          history.state = AlertState.ALERT;
          await trySendNotification({
            state: AlertState.ALERT,
@ -912,7 +1154,7 @@ export const processAlert = async (
        if (
          previousHistory.state === AlertState.ALERT &&
          !histories.has(groupKey) &&
-          !doesExceedThreshold(alert.thresholdType, alert.threshold, 0)
+          !doesExceedThreshold(alert, 0)
        ) {
          logger.info(
            {
@ -936,24 +1178,16 @@ export const processAlert = async (
    for (const [groupKey, history] of histories.entries()) {
      const previousKey = computeHistoryMapKey(alert.id, groupKey);
      const groupPrevious = previousMap.get(previousKey);
-
-      if (
-        groupPrevious?.state === AlertState.ALERT &&
-        history.state === AlertState.OK
-      ) {
-        const lastValue = history.lastValues[history.lastValues.length - 1];
-        await trySendNotification({
-          state: AlertState.OK,
-          group: groupKey,
-          totalCount: lastValue?.count || 0,
-          startTime: lastValue?.startTime || nowInMinsRoundDown,
-        });
-      }
+      await sendNotificationIfResolved(groupPrevious, history, groupKey);
    }

    // Save all history records and update alert state
    const historyRecords = Array.from(histories.values());
-    await alertProvider.updateAlertState(alert.id, historyRecords);
+    await alertProvider.updateAlertState(
+      alert.id,
+      historyRecords,
+      executionErrors,
+    );
  } catch (e) {
    // Uncomment this for better error messages locally
    // console.error(e);
@ -964,6 +1198,25 @@ export const processAlert = async (
      },
      'Failed to process alert',
    );
+    // Record error without touching state/history.
+    const message = getErrorMessage(e);
+    const type =
+      e instanceof InvalidAlertError
+        ? AlertErrorType.INVALID_ALERT
+        : AlertErrorType.UNKNOWN;
+    try {
+      await alertProvider.recordAlertErrors(alert.id, [
+        makeAlertError(type, message),
+      ]);
+    } catch (recordErr) {
+      logger.error(
+        {
+          alertId: alert.id,
+          error: serializeError(recordErr),
+        },
+        'Failed to persist alert execution error',
+      );
+    }
  }
 };

--- a/packages/api/src/tasks/checkAlerts/providers/tests/default.test.ts
+++ b/packages/api/src/tasks/checkAlerts/providers/tests/default.test.ts
@ -193,13 +193,14 @@ describe('DefaultAlertProvider', () => {

        // Validate source is proper ISource object
        const alertSource = result[0].alerts[0].source;
-        expect(alertSource.connection).toBe(connection.id); // Should be ObjectId, not populated IConnection
-        expect(alertSource.name).toBe('Test Source');
-        expect(alertSource.kind).toBe('log');
-        expect(alertSource.team).toBeDefined();
-        expect(alertSource.from?.databaseName).toBe('default');
-        expect(alertSource.from?.tableName).toBe('logs');
-        expect(alertSource.timestampValueExpression).toBe('timestamp');
+        expect(alertSource).toBeDefined();
+        expect(alertSource!.connection).toBe(connection.id); // Should be ObjectId, not populated IConnection
+        expect(alertSource!.name).toBe('Test Source');
+        expect(alertSource!.kind).toBe('log');
+        expect(alertSource!.team).toBeDefined();
+        expect(alertSource!.from?.databaseName).toBe('default');
+        expect(alertSource!.from?.tableName).toBe('logs');
+        expect(alertSource!.timestampValueExpression).toBe('timestamp');

        // Ensure it's a plain object, not a mongoose document
        expect((alertSource as any).toObject).toBeUndefined(); // mongoose documents have toObject method
--- a/packages/api/src/tasks/checkAlerts/providers/default.ts
+++ b/packages/api/src/tasks/checkAlerts/providers/default.ts
@ -1,4 +1,5 @@
 import { ClickhouseClient } from '@hyperdx/common-utils/dist/clickhouse/node';
+import { displayTypeSupportsRawSqlAlerts } from '@hyperdx/common-utils/dist/core/utils';
 import { isRawSqlSavedChartConfig } from '@hyperdx/common-utils/dist/guards';
 import { Tile } from '@hyperdx/common-utils/dist/types';
 import mongoose from 'mongoose';
@ -8,7 +9,12 @@ import { URLSearchParams } from 'url';
 import * as config from '@/config';
 import { LOCAL_APP_TEAM } from '@/controllers/team';
 import { connectDB, mongooseConnection, ObjectId } from '@/models';
-import Alert, { AlertSource, AlertState, type IAlert } from '@/models/alert';
+import Alert, {
+  AlertSource,
+  AlertState,
+  type IAlert,
+  type IAlertError,
+} from '@/models/alert';
 import AlertHistory, { IAlertHistory } from '@/models/alertHistory';
 import Connection, { IConnection } from '@/models/connection';
 import Dashboard from '@/models/dashboard';
@ -108,13 +114,56 @@ async function getTileDetails(
  }

  if (isRawSqlSavedChartConfig(tile.config)) {
-    logger.warn({
-      tileId,
-      dashboardId: dashboard._id,
-      alertId: alert.id,
-      message: 'skipping alert with raw sql chart config, not supported',
-    });
-    return [];
+    if (!displayTypeSupportsRawSqlAlerts(tile.config.displayType)) {
+      logger.warn({
+        tileId,
+        dashboardId: dashboard._id,
+        alertId: alert.id,
+        message:
+          'skipping alert with raw sql chart config, only line/bar display types are supported',
+      });
+      return [];
+    }
+
+    // Raw SQL tiles store connection ID directly on the config
+    const connection = await Connection.findOne({
+      _id: tile.config.connection,
+      team: alert.team,
+    }).select('+password');
+
+    if (!connection) {
+      logger.error({
+        message: 'connection not found for raw sql tile',
+        connectionId: tile.config.connection,
+        tileId,
+        dashboardId: dashboard._id,
+        alertId: alert.id,
+      });
+      return [];
+    }
+
+    // Optionally look up source for filter/macro metadata
+    let source: ISource | undefined;
+    if (tile.config.source) {
+      const sourceDoc = await Source.findOne({
+        _id: tile.config.source,
+        team: alert.team,
+      });
+      if (sourceDoc) {
+        source = sourceDoc.toObject();
+      }
+    }
+
+    return [
+      connection,
+      {
+        alert,
+        source,
+        taskType: AlertTaskType.TILE,
+        tile,
+        dashboard,
+      },
+    ];
  }

  const source = await Source.findOne({
@ -288,7 +337,11 @@ export default class DefaultAlertProvider implements AlertProvider {
    return url.toString();
  }

-  async updateAlertState(alertId: string, histories: IAlertHistory[]) {
+  async updateAlertState(
+    alertId: string,
+    histories: IAlertHistory[],
+    errors: IAlertError[],
+  ) {
    // Save history records first (in parallel), then update alert state
    // Use Promise.allSettled to handle partial failures gracefully
    const historyResults = await Promise.allSettled(
@ -324,10 +377,17 @@ export default class DefaultAlertProvider implements AlertProvider {
      ? AlertState.ALERT
      : AlertState.OK;

-    // Update alert state based on successfully saved histories
+    // Update alert state + errors based on this execution
    await Alert.updateOne(
      { _id: new mongoose.Types.ObjectId(alertId) },
-      { $set: { state: finalState } },
+      { $set: { state: finalState, executionErrors: errors } },
+    );
+  }
+
+  async recordAlertErrors(alertId: string, errors: IAlertError[]) {
+    await Alert.updateOne(
+      { _id: new mongoose.Types.ObjectId(alertId) },
+      { $set: { executionErrors: errors } },
    );
  }

--- a/packages/api/src/tasks/checkAlerts/providers/index.ts
+++ b/packages/api/src/tasks/checkAlerts/providers/index.ts
@ -3,7 +3,7 @@ import { Tile } from '@hyperdx/common-utils/dist/types';
 import _ from 'lodash';

 import { ObjectId } from '@/models';
-import { IAlert } from '@/models/alert';
+import { IAlert, IAlertError } from '@/models/alert';
 import { IAlertHistory } from '@/models/alertHistory';
 import { IConnection } from '@/models/connection';
 import { IDashboard } from '@/models/dashboard';
@ -32,15 +32,16 @@ export type PopulatedAlertChannel = { type: 'webhook' } & { channel: IWebhook };
 // the are required when the type is set accordingly.
 export type AlertDetails = {
  alert: IAlert;
-  source: ISource;
  previousMap: Map<string, AggregatedAlertHistory>; // Map of alertId||group -> history for group-by alerts
 } & (
  | {
      taskType: AlertTaskType.SAVED_SEARCH;
+      source: ISource;
      savedSearch: Omit<ISavedSearch, 'source'>;
    }
  | {
      taskType: AlertTaskType.TILE;
+      source?: ISource;
      tile: Tile;
      dashboard: IDashboard;
    }
@ -78,8 +79,20 @@ export interface AlertProvider {
   * Save the given AlertHistory records and update the associated alert's state.
   * Uses Promise.allSettled to handle partial failures gracefully.
   * The alert state is determined from successfully saved histories, or falls back to all histories if all saves fail.
+   * Also replaces the alert's `executionErrors` field with the provided errors from the current execution.
   */
-  updateAlertState(alertId: string, histories: IAlertHistory[]): Promise<void>;
+  updateAlertState(
+    alertId: string,
+    histories: IAlertHistory[],
+    errors: IAlertError[],
+  ): Promise<void>;
+
+  /**
+   * Replace the alert's `executionErrors` field without changing state or creating history.
+   * Use this when an error prevents the normal state/history update from running
+   * (e.g. a ClickHouse query error).
+   */
+  recordAlertErrors(alertId: string, errors: IAlertError[]): Promise<void>;

  /** Fetch all webhooks for the given team, returning a map of webhook ID to webhook */
  getWebhooks(teamId: string | ObjectId): Promise<Map<string, IWebhook>>;
--- a/packages/api/src/tasks/checkAlerts/template.ts
+++ b/packages/api/src/tasks/checkAlerts/template.ts
@ -8,8 +8,10 @@ import {
 } from '@hyperdx/common-utils/dist/core/utils';
 import {
  AlertChannelType,
+  AlertThresholdType,
  ChartConfigWithOptDateRange,
  DisplayType,
+  isRangeThresholdType,
  pickSampleWeightExpressionProps,
  SourceKind,
  WebhookService,
@ -24,7 +26,7 @@ import { z } from 'zod';

 import * as config from '@/config';
 import { AlertInput } from '@/controllers/alerts';
-import { AlertSource, AlertState, AlertThresholdType } from '@/models/alert';
+import { AlertSource, AlertState } from '@/models/alert';
 import { IDashboard } from '@/models/dashboard';
 import { ISavedSearch } from '@/models/savedSearch';
 import { ISource } from '@/models/source';
@ -42,6 +44,58 @@ import { truncateString } from '@/utils/common';
 import logger from '@/utils/logger';
 import * as slack from '@/utils/slack';

+const describeThresholdViolation = (
+  thresholdType: AlertThresholdType,
+): string => {
+  switch (thresholdType) {
+    case AlertThresholdType.ABOVE:
+      return 'meets or exceeds';
+    case AlertThresholdType.ABOVE_EXCLUSIVE:
+      return 'exceeds';
+    case AlertThresholdType.BELOW:
+      return 'falls below';
+    case AlertThresholdType.BELOW_OR_EQUAL:
+      return 'falls to or below';
+    case AlertThresholdType.EQUAL:
+      return 'equals';
+    case AlertThresholdType.NOT_EQUAL:
+      return 'does not equal';
+    case AlertThresholdType.BETWEEN:
+      return 'falls between';
+    case AlertThresholdType.NOT_BETWEEN:
+      return 'falls outside';
+  }
+};
+
+const describeThresholdResolution = (
+  thresholdType: AlertThresholdType,
+): string => {
+  switch (thresholdType) {
+    case AlertThresholdType.ABOVE:
+      return 'falls below';
+    case AlertThresholdType.ABOVE_EXCLUSIVE:
+      return 'falls to or below';
+    case AlertThresholdType.BELOW:
+      return 'meets or exceeds';
+    case AlertThresholdType.BELOW_OR_EQUAL:
+      return 'exceeds';
+    case AlertThresholdType.EQUAL:
+      return 'does not equal';
+    case AlertThresholdType.NOT_EQUAL:
+      return 'equals';
+    case AlertThresholdType.BETWEEN:
+      return 'falls outside';
+    case AlertThresholdType.NOT_BETWEEN:
+      return 'falls between';
+  }
+};
+
+const describeThreshold = (alert: AlertInput): string => {
+  return isRangeThresholdType(alert.thresholdType)
+    ? `${alert.threshold} and ${alert.thresholdMax ?? '?'}`
+    : `${alert.threshold}`;
+};
+
 const MAX_MESSAGE_LENGTH = 500;
 const NOTIFY_FN_NAME = '__hdx_notify_channel__';
 const IS_MATCH_FN_NAME = 'is_match';
@ -278,7 +332,7 @@ export const handleSendGenericWebhook = async (
      },
      'Failed to compile generic webhook body',
    );
-    return;
+    throw new Error('Failed to build webhook request body', { cause: e });
  }

  try {
@ -300,6 +354,8 @@ export const handleSendGenericWebhook = async (
      },
      'Failed to send generic webhook message',
    );
+    // rethrow so that it can be recorded in alert errors
+    throw e;
  }
 };

@ -376,14 +432,10 @@ export const buildAlertMessageTemplateTitle = ({
    const baseTitle = template
      ? handlebars.compile(template)(view)
      : `Alert for "${tile.config.name}" in "${dashboard.name}" - ${formattedValue} ${
-          doesExceedThreshold(alert.thresholdType, alert.threshold, value)
-            ? alert.thresholdType === AlertThresholdType.ABOVE
-              ? 'exceeds'
-              : 'falls below'
-            : alert.thresholdType === AlertThresholdType.ABOVE
-              ? 'falls below'
-              : 'exceeds'
-        } ${alert.threshold}`;
+          doesExceedThreshold(alert, value)
+            ? describeThresholdViolation(alert.thresholdType)
+            : describeThresholdResolution(alert.thresholdType)
+        } ${describeThreshold(alert)}`;
    return `${emoji}${baseTitle}`;
  }

@ -424,7 +476,7 @@ const getPopulatedChannel = (
  channelType: AlertChannelType,
  channelIdOrNamePrefix: string,
  teamWebhooksById: Map<string, IWebhook>,
-): PopulatedAlertChannel | undefined => {
+): PopulatedAlertChannel => {
  switch (channelType) {
    case 'webhook': {
      const webhook =
@ -438,13 +490,15 @@ const getPopulatedChannel = (
          },
          'webhook not found',
        );
-        return undefined;
+        throw new Error(
+          `Webhook not found. The webhook may have been deleted — update the alert's notification channel.`,
+        );
      }
      return { type: 'webhook', channel: webhook };
    }
    default: {
      logger.error({ channelType }, 'Unsupported alert channel type');
-      return undefined;
+      throw new Error('Unsupported alert destination');
    }
  }
 };
@ -649,11 +703,7 @@ ${targetTemplate}`;
    }

    rawTemplateBody = `${group ? `Group: "${group}"` : ''}
-${value} lines found, expected ${
-      alert.thresholdType === AlertThresholdType.ABOVE
-        ? 'less than'
-        : 'greater than'
-    } ${alert.threshold} lines\n${timeRangeMessage}
+${value} lines found, which ${describeThresholdViolation(alert.thresholdType)} the threshold of ${describeThreshold(alert)} lines\n${timeRangeMessage}
 ${targetTemplate}
 \`\`\`
 ${truncatedResults}
@ -665,14 +715,10 @@ ${truncatedResults}
    const formattedValue = formatValueToMatchThreshold(value, alert.threshold);
    rawTemplateBody = `${group ? `Group: "${group}"` : ''}
 ${formattedValue} ${
-      doesExceedThreshold(alert.thresholdType, alert.threshold, value)
-        ? alert.thresholdType === AlertThresholdType.ABOVE
-          ? 'exceeds'
-          : 'falls below'
-        : alert.thresholdType === AlertThresholdType.ABOVE
-          ? 'falls below'
-          : 'exceeds'
-    } ${alert.threshold}\n${timeRangeMessage}
+      doesExceedThreshold(alert, value)
+        ? describeThresholdViolation(alert.thresholdType)
+        : describeThresholdResolution(alert.thresholdType)
+    } ${describeThreshold(alert)}\n${timeRangeMessage}
 ${targetTemplate}`;
  }

--- a/packages/api/src/utils/tests/snapshots/logParser.test.ts.snap
+++ b/packages/api/src/utils/tests/snapshots/logParser.test.ts.snap
@ -1,41 +1,41 @@
-// Jest Snapshot v1, https://goo.gl/fbAQLP
+// Jest Snapshot v1, https://jestjs.io/docs/snapshot-testing

 exports[`logParser mapObjectToKeyValuePairs 1`] = `
-Object {
-  "bool.names": Array [
+{
+  "bool.names": [
    "foo2",
    "good.burrito.is",
  ],
-  "bool.values": Array [
+  "bool.values": [
    0,
    1,
  ],
-  "number.names": Array [
+  "number.names": [
    "foo1",
  ],
-  "number.values": Array [
+  "number.values": [
    123,
  ],
-  "string.names": Array [
+  "string.names": [
    "foo",
    "nested.foo",
    "array1",
    "array2",
  ],
-  "string.values": Array [
+  "string.values": [
    "123",
    "bar",
    "[456]",
-    "[\\"foo1\\",{\\"foo2\\":\\"bar2\\"},[{\\"foo3\\":\\"bar3\\"}]]",
+    "["foo1",{"foo2":"bar2"},[{"foo3":"bar3"}]]",
  ],
 }
 `;

 exports[`logParser mapObjectToKeyValuePairs 2`] = `
-Object {
-  "bool.names": Array [],
-  "bool.values": Array [],
-  "number.names": Array [
+{
+  "bool.names": [],
+  "bool.values": [],
+  "number.names": [
    "foo0",
    "foo1",
    "foo2",
@ -1061,7 +1061,7 @@ Object {
    "foo1022",
    "foo1023",
  ],
-  "number.values": Array [
+  "number.values": [
    0,
    1,
    2,
@ -2087,7 +2087,7 @@ Object {
    1022,
    1023,
  ],
-  "string.names": Array [],
-  "string.values": Array [],
+  "string.names": [],
+  "string.values": [],
 }
 `;
--- a/packages/api/src/utils/tests/trimToolResponse.test.ts
+++ b/packages/api/src/utils/tests/trimToolResponse.test.ts
@ -0,0 +1,121 @@
+import { trimToolResponse } from '../trimToolResponse';
+
+describe('trimToolResponse', () => {
+  describe('small data (within maxSize)', () => {
+    it('should return primitive values unchanged', () => {
+      expect(trimToolResponse(42)).toBe(42);
+      expect(trimToolResponse('hello')).toBe('hello');
+      expect(trimToolResponse(null)).toBeNull();
+      expect(trimToolResponse(true)).toBe(true);
+    });
+
+    it('should return small arrays unchanged', () => {
+      const data = [1, 2, 3, 4, 5];
+      expect(trimToolResponse(data)).toEqual(data);
+    });
+
+    it('should return small objects unchanged', () => {
+      const data = { a: 1, b: 'hello', c: [1, 2, 3] };
+      expect(trimToolResponse(data)).toEqual(data);
+    });
+  });
+
+  describe('large arrays', () => {
+    it('should trim large arrays to fit within maxSize', () => {
+      // Create an array that exceeds maxSize
+      const largeArray = Array.from({ length: 500 }, (_, i) => ({
+        id: i,
+        data: 'x'.repeat(200),
+      }));
+
+      const result = trimToolResponse(largeArray, 5000);
+      expect(Array.isArray(result)).toBe(true);
+      expect(result.length).toBeLessThan(largeArray.length);
+      expect(result.length).toBeGreaterThanOrEqual(10); // minimum 10 items
+      expect(JSON.stringify(result).length).toBeLessThanOrEqual(5000);
+    });
+
+    it('should keep at least 10 items', () => {
+      const largeArray = Array.from({ length: 100 }, (_, i) => ({
+        id: i,
+        data: 'x'.repeat(500),
+      }));
+
+      // maxSize so small even 10 items may exceed it, but we keep 10 minimum
+      const result = trimToolResponse(largeArray, 100);
+      expect(Array.isArray(result)).toBe(true);
+      expect(result.length).toBeGreaterThanOrEqual(10);
+    });
+
+    it('should not trim arrays that fit within maxSize', () => {
+      const smallArray = [1, 2, 3, 4, 5];
+      const result = trimToolResponse(smallArray, 50000);
+      expect(result).toEqual(smallArray);
+    });
+  });
+
+  describe('large objects', () => {
+    it('should trim large objects to fit within maxSize', () => {
+      const largeObj: Record<string, string> = {};
+      for (let i = 0; i < 100; i++) {
+        largeObj[`key_${i}`] = 'x'.repeat(200);
+      }
+
+      const result = trimToolResponse(largeObj, 5000);
+      // The trimmed result must be smaller than the original
+      expect(JSON.stringify(result).length).toBeLessThan(
+        JSON.stringify(largeObj).length,
+      );
+      // All keys should still be present (values are trimmed, not dropped)
+      expect(
+        Object.keys(result).filter(k => k !== '__hdx_trimmed'),
+      ).toHaveLength(100);
+      // The sentinel flag should be set to indicate trimming occurred
+      expect(result.__hdx_trimmed).toBe(true);
+    });
+
+    it('should not trim objects that fit within maxSize', () => {
+      const obj = { a: 1, b: 2 };
+      const result = trimToolResponse(obj, 50000);
+      expect(result).toEqual(obj);
+    });
+  });
+
+  describe('getAIMetadata structure', () => {
+    it('should handle objects with allFieldsWithKeys and keyValues', () => {
+      const metadataObj = {
+        allFieldsWithKeys: Array.from({ length: 200 }, (_, i) => ({
+          field: `field_${i}`,
+          key: `key_${i}`,
+          extra: 'x'.repeat(100),
+        })),
+        keyValues: Object.fromEntries(
+          Array.from({ length: 200 }, (_, i) => [`kv_${i}`, 'x'.repeat(100)]),
+        ),
+        otherProp: 'preserved',
+      };
+
+      const result = trimToolResponse(metadataObj, 5000);
+      expect(result).toHaveProperty('allFieldsWithKeys');
+      expect(result).toHaveProperty('keyValues');
+      expect(result).toHaveProperty('otherProp', 'preserved');
+      expect(Array.isArray(result.allFieldsWithKeys)).toBe(true);
+      expect(typeof result.keyValues).toBe('object');
+    });
+  });
+
+  describe('default maxSize', () => {
+    it('should use 50000 as default maxSize', () => {
+      // Create data just over default size
+      const data = Array.from({ length: 1000 }, (_, i) => ({
+        id: i,
+        payload: 'x'.repeat(100),
+      }));
+
+      const resultDefault = trimToolResponse(data);
+      const resultExplicit = trimToolResponse(data, 50000);
+      // Both should produce the same result
+      expect(resultDefault.length).toBe(resultExplicit.length);
+    });
+  });
+});
--- a/packages/api/src/utils/externalApi.ts
+++ b/packages/api/src/utils/externalApi.ts
@ -1,18 +1,20 @@
 import {
+  AlertErrorType,
+  AlertThresholdType,
  BuilderSavedChartConfig,
  DashboardFilter,
  DisplayType,
  SavedChartConfig,
 } from '@hyperdx/common-utils/dist/types';
 import { omit } from 'lodash';
-import { FlattenMaps, LeanDocument } from 'mongoose';

+import type { ObjectId } from '@/models';
 import {
  AlertChannel,
  AlertDocument,
  AlertInterval,
  AlertState,
-  AlertThresholdType,
+  IAlert,
 } from '@/models/alert';
 import type { DashboardDocument } from '@/models/dashboard';
 import { SeriesTile } from '@/routers/external-api/v2/utils/dashboards';
@ -228,6 +230,7 @@ export type ExternalAlert = {
  name?: string | null;
  message?: string | null;
  threshold: number;
+  thresholdMax?: number;
  interval: AlertInterval;
  scheduleOffsetMinutes?: number;
  scheduleStartAt?: string | null;
@ -245,13 +248,16 @@ export type ExternalAlert = {
    at: string;
    until: string;
  };
+  executionErrors?: {
+    timestamp: string;
+    type: AlertErrorType;
+    message: string;
+  }[];
  createdAt?: string;
  updatedAt?: string;
 };

-type AlertDocumentObject =
-  | AlertDocument
-  | FlattenMaps<LeanDocument<AlertDocument>>;
+type AlertDocumentObject = IAlert & { _id: ObjectId };

 function hasCreatedAt(
  alert: AlertDocumentObject,
@ -295,6 +301,19 @@ function transformSilencedToExternalSilenced(
    : undefined;
 }

+function transformErrorsToExternalErrors(
+  errors: AlertDocumentObject['executionErrors'],
+): ExternalAlert['executionErrors'] {
+  return errors?.map(err => ({
+    timestamp:
+      err.timestamp instanceof Date
+        ? err.timestamp.toISOString()
+        : String(err.timestamp),
+    type: err.type,
+    message: err.message,
+  }));
+}
+
 export function translateAlertDocumentToExternalAlert(
  alert: AlertDocument,
 ): ExternalAlert {
@ -309,6 +328,7 @@ export function translateAlertDocumentToExternalAlert(
    name: alertObj.name,
    message: alertObj.message,
    threshold: alertObj.threshold,
+    thresholdMax: alertObj.thresholdMax,
    interval: alertObj.interval,
    ...(alertObj.scheduleOffsetMinutes != null && {
      scheduleOffsetMinutes: alertObj.scheduleOffsetMinutes,
@ -324,6 +344,7 @@ export function translateAlertDocumentToExternalAlert(
    savedSearchId: alertObj.savedSearch?.toString(),
    groupBy: alertObj.groupBy,
    silenced: transformSilencedToExternalSilenced(alertObj.silenced),
+    executionErrors: transformErrorsToExternalErrors(alertObj.executionErrors),
    createdAt: hasCreatedAt(alertObj)
      ? alertObj.createdAt.toISOString()
      : undefined,
--- a/packages/api/src/utils/rateLimiter.ts
+++ b/packages/api/src/utils/rateLimiter.ts
@ -1,5 +1,10 @@
+import express from 'express';
 import rateLimit, { Options } from 'express-rate-limit';

+export const rateLimiterKeyGenerator = (req: express.Request): string => {
+  return req.headers.authorization ?? req.ip ?? 'unknown';
+};
+
 export default (config?: Partial<Options>) => {
  return rateLimit({
    ...config,
--- a/packages/api/src/utils/serialization.ts
+++ b/packages/api/src/utils/serialization.ts
@ -9,7 +9,7 @@ type JsonStringifiable = { toJSON(): string };
 * toJSON(): string). This allows passing raw Mongoose data to sendJson()
 * while keeping type inference from the typed Express response.
 */
-type PreSerialized<T> = T extends string
+export type PreSerialized<T> = T extends string
  ? string | JsonStringifiable
  : T extends (infer U)[]
    ? PreSerialized<U>[]
--- a/packages/api/src/utils/trimToolResponse.ts
+++ b/packages/api/src/utils/trimToolResponse.ts
@ -0,0 +1,109 @@
+import logger from '@/utils/logger';
+
+/**
+ * Trims large data structures to prevent "Request Entity Too Large" errors
+ * when multiple tool calls accumulate data in the conversation history.
+ */
+export function trimToolResponse(data: any, maxSize: number = 50000): any {
+  const serialized = JSON.stringify(data);
+
+  // If data is within acceptable size, return as-is
+  if (serialized.length <= maxSize) {
+    return data;
+  }
+
+  logger.warn(
+    `Tool response too large, trimming data. Original Size: ${serialized.length}, Max Size: ${maxSize}`,
+  );
+
+  // Handle different data structures
+  if (Array.isArray(data)) {
+    return trimArray(data, maxSize);
+  }
+
+  if (typeof data === 'object' && data !== null) {
+    return trimObject(data, maxSize);
+  }
+
+  return data;
+}
+
+function trimArray(arr: any[], maxSize: number): any[] {
+  // Keep reducing array size until it fits
+  let result = [...arr];
+  let resultSize = JSON.stringify(result).length;
+
+  while (resultSize > maxSize && result.length > 10) {
+    // Keep at least 10 items
+    const newLength = Math.max(10, Math.floor(result.length * 0.7));
+    result = result.slice(0, newLength);
+    resultSize = JSON.stringify(result).length;
+  }
+
+  // If we're still over budget (e.g. a single item exceeds maxSize), truncate
+  // individual oversized items so the array itself stays within the limit.
+  if (resultSize > maxSize) {
+    result = result.map(item => {
+      const itemStr = JSON.stringify(item);
+      if (itemStr.length > maxSize) {
+        logger.info(
+          `Trimming oversized array item (${itemStr.length} bytes > ${maxSize} limit)`,
+        );
+        if (typeof item === 'object' && item !== null) {
+          return trimObject(item, maxSize);
+        }
+        // Scalar that is itself too large — return a truncation marker
+        return { __hdx_trimmed: true, originalSize: itemStr.length };
+      }
+      return item;
+    });
+  }
+
+  if (result.length < arr.length) {
+    logger.info(`Trimmed array from ${arr.length} to ${result.length} items`);
+  }
+
+  return result;
+}
+
+// Keys in trimObject come exclusively from Object.entries() on internal tool
+// response data — never from user-supplied HTTP input — so bracket-notation
+// writes are not an injection risk; see inline eslint-disable comments below.
+function trimObject(obj: any, maxSize: number): any {
+  const entries = Object.entries(obj);
+  if (entries.length === 0) return obj;
+
+  const result: any = {};
+
+  // Give each key an equal share of the budget so that no single large value
+  // crowds out the rest (e.g. a large array at key[0] eating all the budget
+  // before key[1] gets a chance to appear).
+  const perKeyBudget = Math.floor(maxSize / entries.length);
+  let trimmed = false;
+
+  for (const [key, value] of entries) {
+    const valueStr = JSON.stringify(value);
+
+    if (valueStr.length <= perKeyBudget) {
+      result[key] = value; // eslint-disable-line security/detect-object-injection
+    } else {
+      logger.info(
+        `Trimming oversized object value at key "${key}" (${valueStr.length} bytes > ${perKeyBudget} per-key budget)`,
+      );
+      if (Array.isArray(value)) {
+        result[key] = trimArray(value, perKeyBudget); // eslint-disable-line security/detect-object-injection
+      } else if (typeof value === 'object' && value !== null) {
+        result[key] = trimObject(value, perKeyBudget); // eslint-disable-line security/detect-object-injection
+      } else {
+        result[key] = { __hdx_trimmed: true, originalSize: valueStr.length }; // eslint-disable-line security/detect-object-injection
+      }
+      trimmed = true;
+    }
+  }
+
+  if (trimmed) {
+    result.__hdx_trimmed = true;
+  }
+
+  return result;
+}
--- a/packages/api/src/utils/zod.ts
+++ b/packages/api/src/utils/zod.ts
@ -1,17 +1,19 @@
 import {
  AggregateFunctionSchema,
+  AlertThresholdType,
  DashboardFilterSchema,
  MetricsDataType,
  NumberFormatSchema,
  scheduleStartAtSchema,
  SearchConditionLanguageSchema as whereLanguageSchema,
  validateAlertScheduleOffsetMinutes,
+  validateAlertThresholdMax,
  WebhookService,
 } from '@hyperdx/common-utils/dist/types';
 import { Types } from 'mongoose';
 import { z } from 'zod';

-import { AlertSource, AlertThresholdType } from '@/models/alert';
+import { AlertSource } from '@/models/alert';

 export const objectIdSchema = z.string().refine(val => {
  return Types.ObjectId.isValid(val);
@ -510,12 +512,14 @@ export const alertSchema = z
    scheduleStartAt: scheduleStartAtSchema,
    threshold: z.number(),
    thresholdType: z.nativeEnum(AlertThresholdType),
+    thresholdMax: z.number().optional(),
    source: z.nativeEnum(AlertSource).default(AlertSource.SAVED_SEARCH),
    name: z.string().min(1).max(512).nullish(),
    message: z.string().min(1).max(4096).nullish(),
  })
  .and(zSavedSearchAlert.or(zTileAlert))
-  .superRefine(validateAlertScheduleOffsetMinutes);
+  .superRefine(validateAlertScheduleOffsetMinutes)
+  .superRefine(validateAlertThresholdMax);

 // ==============================
 // Webhooks
--- a/packages/api/tsconfig.json
+++ b/packages/api/tsconfig.json
@ -5,6 +5,7 @@
    "paths": {
      "@/*": ["./*"]
    },
+    "types": ["jest", "node"],
    "outDir": "build",
    "isolatedModules": true,
    "skipLibCheck": true,
--- a/packages/app/CHANGELOG.md
+++ b/packages/app/CHANGELOG.md
@ -1,5 +1,64 @@
 # @hyperdx/app

+## 2.24.0
+
+### Minor Changes
+
+- 5885d479: Introduces Shared Filters, enabling teams to pin and surface common filters across all members.
+- 0bfec148: Upgrade Mantine from v7 to v9 and remove react-hook-form-mantine dependency
+
+### Patch Changes
+
+- 1fada918: feat: Support alerts on Raw SQL Number Charts
+- c4a1311e: fix: Fix "Copy entire row as JSON" button crashing on rows with non-string values
+- a5869f0e: Dedupe source validation issue toasts so repeated source refetches update a single notification instead of stacking duplicates.
+- 7953c028: feat: Add between-type alert thresholds
+- d3a61f9b: feat: Add additional alert threshold types
+- 5149fabd: feat: Add Python Runtime Metrics dashboard template
+- 085f3074: feat: Implement alerting for Raw SQL-based dashboard tiles
+- 739fe140: fix: time selector always resets to 00:00
+- 3c057720: feat: Show alert execution errors in the UI
+- 6ff1ba60: feat: Add alert history + ack to alert editor
+- 4ca1d472: Allow manually constructed /trace URLs to land in the existing search experience with the trace viewer opened from URL state. This keeps trace deep links user-friendly while reusing the search page for source selection, not-found handling, and trace inspection.
+- Updated dependencies [418f70c5]
+- Updated dependencies [1fada918]
+- Updated dependencies [7953c028]
+- Updated dependencies [d3a61f9b]
+- Updated dependencies [5885d479]
+- Updated dependencies [cc714f90]
+- Updated dependencies [085f3074]
+- Updated dependencies [3c057720]
+- Updated dependencies [6ff1ba60]
+  - @hyperdx/common-utils@0.18.0
+
+## 2.23.2
+
+### Patch Changes
+
+- 0daa5299: feat: Generate stable source IDs in local mode
+
+## 2.23.1
+
+### Patch Changes
+
+- 7d1a8e54: fix: Show sidebar favorites empty state when none are starred yet
+- 800689ac: feat: Add reusable EmptyState component and adopt it across pages for consistent empty/no-data states
+- 2570ff84: fix: Change K8s CPU chart format from percentage to number to support both old and new OTel collector metric names
+- ad71dc2e: feat: Add keyboard shortcuts modal from the Help menu
+
+  - New **Keyboard shortcuts** item opens a modal documenting app shortcuts (command palette ⌘/Ctrl+K, search focus, time picker, tables, traces, dashboards, and more).
+  - Help menu items ordered by importance (documentation and setup before shortcuts and community).
+  - Shortcuts modal uses a readable width, row dividers, and **or** vs **+** labels so alternative keys are not confused with key chords.
+
+- 1bcca2cd: feat: Add alert icons to dashboard list page
+- 52986a94: Fix bug when accessing session replay panel from search page
+- ffc961c6: fix: Add error message and edit button when tile source is missing
+- 3ffafced: feat: show error details in search event patterns
+- 61db3e8b: refactor: Create TileAlertEditor component
+- f8d2edde: feat: Show created/updated metadata for saved searches and dashboards
+- Updated dependencies [24767c58]
+  - @hyperdx/common-utils@0.17.1
+
 ## 2.23.0

 ### Minor Changes
--- a/packages/app/eslint.config.mjs
+++ b/packages/app/eslint.config.mjs
@ -91,6 +91,7 @@ export default [
      'next-env.d.ts',
      'playwright-report/**',
      '.next/**',
+      '.next-e2e/**',
      '.storybook/**',
      'node_modules/**',
      'out/**',
@ -121,8 +122,28 @@ export default [
      ...nextPlugin.configs.recommended.rules,
      ...nextPlugin.configs['core-web-vitals'].rules,
      ...reactHooksPlugin.configs.recommended.rules,
+      ...eslintReactPlugin.configs['recommended-type-checked'].rules,
+      
+      // Non-default react-hooks rules
+      'react-hooks/set-state-in-render': 'error',
      'react-hooks/set-state-in-effect': 'warn',
      'react-hooks/exhaustive-deps': 'error',
+      
+      // Disable rules from @eslint-react that have equivalent rules enabled in eslint-plugin-react-hooks
+      '@eslint-react/rules-of-hooks': 'off',
+      '@eslint-react/component-hook-factories': 'off',
+      '@eslint-react/exhaustive-deps': 'off',
+      '@eslint-react/error-boundaries': 'off',
+      '@eslint-react/immutability': 'off',
+      '@eslint-react/purity': 'off',
+      '@eslint-react/refs': 'off',
+      '@eslint-react/set-state-in-effect': 'off',
+      '@eslint-react/set-state-in-render': 'off',
+      '@eslint-react/no-nested-component-definitions': 'off',
+      '@eslint-react/no-nested-lazy-component-declarations': 'off',
+      '@eslint-react/unsupported-syntax': 'off',
+      '@eslint-react/use-memo': 'off',
+      
      'react-hook-form/no-use-watch': 'error',
      '@eslint-react/no-unstable-default-props': 'error',
      '@typescript-eslint/ban-ts-comment': 'warn',
@ -205,6 +226,7 @@ export default [
    rules: {
      // Drop date rules — new Date() / Date.now() are fine in tests
      'no-restricted-syntax': ['error', ...UI_SYNTAX_RESTRICTIONS],
+      '@eslint-react/component-hook-factories': 'off',
    },
  },
  {
--- a/Show more
+++ b/Show more