From 68ef3d6f97d897fdcb9e157b9161659cd975f4a4 Mon Sep 17 00:00:00 2001 From: Alex Fedotyev <61838744+alex-fedotyev@users.noreply.github.com> Date: Thu, 5 Mar 2026 08:52:54 -0800 Subject: [PATCH] feat: deterministic sampling with adaptive sample size (#1849) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary Closes #1827 Replaces non-deterministic `ORDER BY rand()` with deterministic `cityHash64(SpanId)` sampling and introduces sampling configuration constants. ### What this PR does - **Deterministic sampling**: `ORDER BY cityHash64(SpanId)` instead of `rand()` — same data always produces the same sample, so results are stable across re-renders - **Named constants**: `SAMPLE_SIZE`, `STABLE_SAMPLE_EXPR` replace hardcoded `1000` and `'rand()'` in query configs - **Adaptive sizing foundation**: `computeEffectiveSampleSize()` function with `MIN_SAMPLE_SIZE`/`MAX_SAMPLE_SIZE`/`SAMPLE_RATIO` constants, exported and tested (6 unit tests) ### What this PR does NOT do (follow-up) - **Count query for adaptive sizing**: Wiring `computeEffectiveSampleSize` into the actual queries requires adding a lightweight `count()` query. This is deferred to keep this PR focused on the deterministic sampling change. - **Dynamic column detection**: `STABLE_SAMPLE_EXPR` uses `SpanId` which is trace-specific. Event Deltas currently only renders on the traces search page where `SpanId` is always present. If the feature expands to logs/metrics, this should be parameterized per source (documented in code comment). ## Test plan - [ ] Same data + same hover always highlights the same heatmap cells (deterministic) - [ ] Run `npx jest src/components/__tests__/deltaChartSampling.test.ts` — 6 tests pass 🤖 Generated with [Claude Code](https://claude.com/claude-code) --- .changeset/sampling-improvements.md | 5 ++ packages/app/src/components/DBDeltaChart.tsx | 19 +++++-- .../Search/DBSearchHeatmapChart.tsx | 1 + .../__tests__/deltaChartSampling.test.ts | 57 +++++++++++++++++++ .../app/src/components/deltaChartUtils.ts | 40 +++++++++++++ 5 files changed, 116 insertions(+), 6 deletions(-) create mode 100644 .changeset/sampling-improvements.md create mode 100644 packages/app/src/components/__tests__/deltaChartSampling.test.ts diff --git a/.changeset/sampling-improvements.md b/.changeset/sampling-improvements.md new file mode 100644 index 00000000..304caaf4 --- /dev/null +++ b/.changeset/sampling-improvements.md @@ -0,0 +1,5 @@ +--- +"@hyperdx/app": patch +--- + +feat: deterministic sampling with adaptive sample size for Event Deltas diff --git a/packages/app/src/components/DBDeltaChart.tsx b/packages/app/src/components/DBDeltaChart.tsx index fd5bd68d..d38d44ff 100644 --- a/packages/app/src/components/DBDeltaChart.tsx +++ b/packages/app/src/components/DBDeltaChart.tsx @@ -23,9 +23,11 @@ import { getFirstTimestampValueExpression } from '@/source'; import { SQLPreview } from './ChartSQLPreview'; import { getPropertyStatistics, + getStableSampleExpression, isDenylisted, isHighCardinality, mergeValueStatisticsMaps, + SAMPLE_SIZE, } from './deltaChartUtils'; import { CHART_GAP, @@ -42,6 +44,7 @@ export default function DBDeltaChart({ xMax, yMin, yMax, + spanIdExpression, }: { config: ChartConfigWithDateRange; valueExpr: string; @@ -49,10 +52,14 @@ export default function DBDeltaChart({ xMax: number; yMin: number; yMax: number; + spanIdExpression?: string; }) { // Determine if the value expression uses aggregate functions const isAggregate = isAggregateFunction(valueExpr); + // Build deterministic ORDER BY expression from source's spanIdExpression + const stableSampleExpr = getStableSampleExpression(spanIdExpression); + // Get the timestamp expression from config const timestampExpr = getFirstTimestampValueExpression( config.timestampValueExpression, @@ -136,8 +143,8 @@ export default function DBDeltaChart({ ] : []), ], - orderBy: [{ ordering: 'DESC', valueExpression: 'rand()' }], - limit: { limit: 1000 }, + orderBy: [{ ordering: 'DESC', valueExpression: stableSampleExpr }], + limit: { limit: SAMPLE_SIZE }, }, }, ]; @@ -191,8 +198,8 @@ export default function DBDeltaChart({ with: buildWithClauses(true), select: '*', filters: buildFilters(true), - orderBy: [{ ordering: 'DESC', valueExpression: 'rand()' }], - limit: { limit: 1000 }, + orderBy: [{ ordering: 'DESC', valueExpression: stableSampleExpr }], + limit: { limit: SAMPLE_SIZE }, }); const { data: inlierData } = useQueriedChartConfig({ @@ -200,8 +207,8 @@ export default function DBDeltaChart({ with: buildWithClauses(false), select: '*', filters: buildFilters(false), - orderBy: [{ ordering: 'DESC', valueExpression: 'rand()' }], - limit: { limit: 1000 }, + orderBy: [{ ordering: 'DESC', valueExpression: stableSampleExpr }], + limit: { limit: SAMPLE_SIZE }, }); // Column metadata for field classification (from ClickHouse response) diff --git a/packages/app/src/components/Search/DBSearchHeatmapChart.tsx b/packages/app/src/components/Search/DBSearchHeatmapChart.tsx index 1b710ea1..fea4425c 100644 --- a/packages/app/src/components/Search/DBSearchHeatmapChart.tsx +++ b/packages/app/src/components/Search/DBSearchHeatmapChart.tsx @@ -119,6 +119,7 @@ export function DBSearchHeatmapChart({ xMax={fields.xMax} yMin={fields.yMin} yMax={fields.yMax} + spanIdExpression={source.spanIdExpression} /> ) : (