mirror of
https://github.com/hyperdxio/hyperdx
synced 2026-04-21 13:37:15 +00:00
## Summary Adds a pattern mining feature to the CLI, accessible via `Shift+P`. This mirrors the web app's Pattern Table functionality but runs entirely in TypeScript — no Pyodide/Python WASM needed. **Linear:** https://linear.app/hyperdx/issue/HDX-3964 ## What changed ### 1. Drain library in common-utils (`packages/common-utils/src/drain/`) Ported the [browser-drain](https://github.com/DeploySentinel/browser-drain) TypeScript library into `@hyperdx/common-utils`. This is a pure TypeScript implementation of the Drain3 log template mining algorithm, including: - `TemplateMiner` / `TemplateMinerConfig` — main API - `Drain` — core algorithm with prefix tree and LRU cluster cache - `LogMasker` — regex-based token masking (IPs, numbers, etc.) - `LruCache` — custom LRU cache matching Python Drain3's eviction semantics - 11 Jest tests ported from the original `node:test` suite ### 2. CLI pattern view (`packages/cli/src/components/EventViewer/`) **Keybinding:** `Shift+P` toggles pattern view (pauses follow mode, restores on exit) **Data flow (mirrors web app's `useGroupedPatterns`):** - Issues `SELECT ... ORDER BY rand() LIMIT 100000` to randomly sample up to 100K events - Issues parallel `SELECT count()` to get true total event count - Feeds sampled log bodies through the TypeScript `TemplateMiner` - Estimates pattern counts via `sampleMultiplier = totalCount / sampledRowCount` - Computes time-bucketed trend data per pattern **UI:** - Pattern list with columns: Est. Count (with `~` prefix), Pattern - `l`/`Enter` expands a pattern to show its sample events (full table columns) - `h`/`Esc` returns to pattern list - `j/k/G/g/Ctrl+D/Ctrl+U` navigation throughout - Loading spinner while sampling query runs **Alias fix:** Pattern and count queries compute `WITH` clauses from the source's `defaultTableSelectExpression` so Lucene searches using aliases (e.g. `level:error` where `level` is an alias for `SeverityText`) resolve correctly. ### New files - `packages/common-utils/src/drain/` — 7 source files + barrel index - `packages/common-utils/src/__tests__/drain.test.ts` - `packages/cli/src/components/EventViewer/usePatternData.ts` - `packages/cli/src/components/EventViewer/PatternView.tsx` - `packages/cli/src/components/EventViewer/PatternSamplesView.tsx` ### Modified files - `packages/cli/src/api/eventQuery.ts` — added `buildPatternSampleQuery`, `buildTotalCountQuery`, `buildAliasWithClauses` - `packages/cli/src/components/EventViewer/EventViewer.tsx` — wired in pattern state + rendering - `packages/cli/src/components/EventViewer/useKeybindings.ts` — added P, l, h keybindings + pattern/sample navigation - `packages/cli/src/components/EventViewer/SubComponents.tsx` — added P to help screen ### Demo https://github.com/user-attachments/assets/50a2edfc-8891-43ae-ab86-b96fca778c66
333 lines
9.3 KiB
TypeScript
333 lines
9.3 KiB
TypeScript
/**
|
|
* Fetches a random sample of events from ClickHouse, mines patterns
|
|
* using the Drain algorithm, and estimates total counts using a
|
|
* sampleMultiplier — mirroring the web frontend's useGroupedPatterns.
|
|
*/
|
|
import { useState, useEffect, useCallback, useRef } from 'react';
|
|
|
|
import {
|
|
TemplateMiner,
|
|
TemplateMinerConfig,
|
|
} from '@hyperdx/common-utils/dist/drain';
|
|
import type { Metadata } from '@hyperdx/common-utils/dist/core/metadata';
|
|
import { convertDateRangeToGranularityString } from '@hyperdx/common-utils/dist/core/utils';
|
|
|
|
import type { SourceResponse, ProxyClickhouseClient } from '@/api/client';
|
|
import {
|
|
buildPatternSampleQuery,
|
|
buildTotalCountQuery,
|
|
} from '@/api/eventQuery';
|
|
import { getEventBody } from '@/shared/source';
|
|
|
|
import type { EventRow } from './types';
|
|
import { flatten } from './utils';
|
|
|
|
// ---- Constants -----------------------------------------------------
|
|
|
|
const SAMPLES = 10_000;
|
|
|
|
// ---- Time bucketing utilities --------------------------------------
|
|
|
|
/** Parse a granularity string like "5 minute" into seconds. */
|
|
function granularityToSeconds(granularity: string): number {
|
|
const [num, unit] = granularity.split(' ');
|
|
const n = parseInt(num, 10);
|
|
switch (unit) {
|
|
case 'second':
|
|
return n;
|
|
case 'minute':
|
|
return n * 60;
|
|
case 'hour':
|
|
return n * 3600;
|
|
case 'day':
|
|
return n * 86400;
|
|
default:
|
|
return n * 60;
|
|
}
|
|
}
|
|
|
|
/** Round a timestamp down to the start of its granularity bucket. */
|
|
function toStartOfBucket(ts: number, granularityMs: number): number {
|
|
return Math.floor(ts / granularityMs) * granularityMs;
|
|
}
|
|
|
|
/** Generate all bucket start timestamps between start and end. */
|
|
function generateBuckets(
|
|
startMs: number,
|
|
endMs: number,
|
|
granularityMs: number,
|
|
): number[] {
|
|
const buckets: number[] = [];
|
|
let current = toStartOfBucket(startMs, granularityMs);
|
|
while (current < endMs) {
|
|
buckets.push(current);
|
|
current += granularityMs;
|
|
}
|
|
return buckets;
|
|
}
|
|
|
|
// ---- Types ---------------------------------------------------------
|
|
|
|
export interface TrendBucket {
|
|
ts: number;
|
|
count: number;
|
|
}
|
|
|
|
export interface PatternGroup {
|
|
id: string;
|
|
pattern: string;
|
|
/** Raw count within the sample */
|
|
count: number;
|
|
/** Estimated total count (count * sampleMultiplier), prefixed with ~ in display */
|
|
estimatedCount: number;
|
|
samples: EventRow[];
|
|
/** Time-bucketed trend data for sparkline */
|
|
trend: TrendBucket[];
|
|
}
|
|
|
|
export interface UsePatternDataParams {
|
|
clickhouseClient: ProxyClickhouseClient;
|
|
metadata: Metadata;
|
|
source: SourceResponse;
|
|
submittedQuery: string;
|
|
startTime: Date;
|
|
endTime: Date;
|
|
/** Only fetch when true (i.e., pattern view is open) */
|
|
enabled: boolean;
|
|
}
|
|
|
|
export interface UsePatternDataReturn {
|
|
patterns: PatternGroup[];
|
|
loading: boolean;
|
|
error: Error | null;
|
|
totalCount: number | null;
|
|
sampledRowCount: number;
|
|
}
|
|
|
|
// ---- Hook ----------------------------------------------------------
|
|
|
|
export function usePatternData({
|
|
clickhouseClient,
|
|
metadata,
|
|
source,
|
|
submittedQuery,
|
|
startTime,
|
|
endTime,
|
|
enabled,
|
|
}: UsePatternDataParams): UsePatternDataReturn {
|
|
const [patterns, setPatterns] = useState<PatternGroup[]>([]);
|
|
const [loading, setLoading] = useState(false);
|
|
const [error, setError] = useState<Error | null>(null);
|
|
const [totalCount, setTotalCount] = useState<number | null>(null);
|
|
const [sampledRowCount, setSampledRowCount] = useState(0);
|
|
|
|
// Track the last query params to avoid redundant fetches
|
|
const lastFetchRef = useRef<string>('');
|
|
|
|
const bodyColumn = (() => {
|
|
const expr = getEventBody(source);
|
|
if (expr) return expr;
|
|
return undefined;
|
|
})();
|
|
|
|
const fetchPatterns = useCallback(async () => {
|
|
const fetchKey = JSON.stringify({
|
|
source: source.id,
|
|
submittedQuery,
|
|
startTime: startTime.getTime(),
|
|
endTime: endTime.getTime(),
|
|
});
|
|
|
|
// Skip if we already fetched for these exact params
|
|
if (lastFetchRef.current === fetchKey) return;
|
|
lastFetchRef.current = fetchKey;
|
|
|
|
setLoading(true);
|
|
setError(null);
|
|
|
|
try {
|
|
// Fire both queries in parallel
|
|
const [sampleChSql, countChSql] = await Promise.all([
|
|
buildPatternSampleQuery(
|
|
{
|
|
source,
|
|
searchQuery: submittedQuery,
|
|
startTime,
|
|
endTime,
|
|
sampleLimit: SAMPLES,
|
|
},
|
|
metadata,
|
|
),
|
|
buildTotalCountQuery(
|
|
{ source, searchQuery: submittedQuery, startTime, endTime },
|
|
metadata,
|
|
),
|
|
]);
|
|
|
|
const [sampleResult, countResult] = await Promise.all([
|
|
clickhouseClient.query({
|
|
query: sampleChSql.sql,
|
|
query_params: sampleChSql.params,
|
|
format: 'JSON',
|
|
connectionId: source.connection,
|
|
}),
|
|
clickhouseClient.query({
|
|
query: countChSql.sql,
|
|
query_params: countChSql.params,
|
|
format: 'JSON',
|
|
connectionId: source.connection,
|
|
}),
|
|
]);
|
|
|
|
const sampleJson = (await sampleResult.json()) as { data: EventRow[] };
|
|
const countJson = (await countResult.json()) as {
|
|
data: Array<Record<string, string | number>>;
|
|
};
|
|
|
|
const sampleRows = sampleJson.data ?? [];
|
|
const total = Number(countJson.data?.[0]?.total ?? 0);
|
|
|
|
setTotalCount(total);
|
|
setSampledRowCount(sampleRows.length);
|
|
|
|
if (sampleRows.length === 0) {
|
|
setPatterns([]);
|
|
setLoading(false);
|
|
return;
|
|
}
|
|
|
|
// Determine columns from the result keys
|
|
const resultKeys = Object.keys(sampleRows[0]);
|
|
const effectiveBodyColumn =
|
|
bodyColumn ?? resultKeys[resultKeys.length - 1];
|
|
// Use the source's timestamp expression, falling back to the first column
|
|
const tsExpr = source.timestampValueExpression ?? 'TimestampTime';
|
|
const tsColumn = resultKeys.find(k => k === tsExpr) ?? resultKeys[0];
|
|
|
|
// Compute granularity for trend buckets
|
|
const granularity = convertDateRangeToGranularityString(
|
|
[startTime, endTime],
|
|
24,
|
|
);
|
|
const granularityMs = granularityToSeconds(granularity) * 1000;
|
|
const allBuckets = generateBuckets(
|
|
startTime.getTime(),
|
|
endTime.getTime(),
|
|
granularityMs,
|
|
);
|
|
|
|
// Mine patterns
|
|
const config = new TemplateMinerConfig();
|
|
const miner = new TemplateMiner(config);
|
|
|
|
const clustered: Array<{
|
|
clusterId: number;
|
|
row: EventRow;
|
|
tsMs: number;
|
|
}> = [];
|
|
for (const row of sampleRows) {
|
|
const body = row[effectiveBodyColumn];
|
|
const text = body != null ? flatten(String(body)) : '';
|
|
const result = miner.addLogMessage(text);
|
|
const tsRaw = row[tsColumn];
|
|
const tsMs =
|
|
tsRaw != null
|
|
? new Date(String(tsRaw)).getTime()
|
|
: startTime.getTime();
|
|
clustered.push({ clusterId: result.clusterId, row, tsMs });
|
|
}
|
|
|
|
// Group by cluster ID
|
|
const groups = new Map<
|
|
number,
|
|
{
|
|
rows: EventRow[];
|
|
template: string;
|
|
bucketCounts: Map<number, number>;
|
|
}
|
|
>();
|
|
|
|
for (const { clusterId, row, tsMs } of clustered) {
|
|
const bucket = toStartOfBucket(tsMs, granularityMs);
|
|
const existing = groups.get(clusterId);
|
|
if (existing) {
|
|
existing.rows.push(row);
|
|
existing.bucketCounts.set(
|
|
bucket,
|
|
(existing.bucketCounts.get(bucket) ?? 0) + 1,
|
|
);
|
|
} else {
|
|
const body = row[effectiveBodyColumn];
|
|
const text = body != null ? flatten(String(body)) : '';
|
|
const match = miner.match(text, 'fallback');
|
|
const bucketCounts = new Map<number, number>();
|
|
bucketCounts.set(bucket, 1);
|
|
groups.set(clusterId, {
|
|
rows: [row],
|
|
template: match?.getTemplate() ?? text,
|
|
bucketCounts,
|
|
});
|
|
}
|
|
}
|
|
|
|
// Compute sampleMultiplier
|
|
const sampleMultiplier =
|
|
total > 0 && sampleRows.length > 0 ? total / sampleRows.length : 1;
|
|
|
|
// Convert to sorted array with estimated counts and trend data
|
|
const result: PatternGroup[] = [];
|
|
for (const [id, { rows, template, bucketCounts }] of groups) {
|
|
const trend: TrendBucket[] = allBuckets.map(bucketTs => ({
|
|
ts: bucketTs,
|
|
count: Math.round(
|
|
(bucketCounts.get(bucketTs) ?? 0) * sampleMultiplier,
|
|
),
|
|
}));
|
|
|
|
result.push({
|
|
id: String(id),
|
|
pattern: template,
|
|
count: rows.length,
|
|
estimatedCount: Math.max(
|
|
Math.round(rows.length * sampleMultiplier),
|
|
1,
|
|
),
|
|
samples: rows,
|
|
trend,
|
|
});
|
|
}
|
|
|
|
result.sort((a, b) => b.estimatedCount - a.estimatedCount);
|
|
setPatterns(result);
|
|
} catch (err: unknown) {
|
|
setError(err instanceof Error ? err : new Error(String(err)));
|
|
// Clear the fetch key so a retry will re-fetch
|
|
lastFetchRef.current = '';
|
|
} finally {
|
|
setLoading(false);
|
|
}
|
|
}, [
|
|
clickhouseClient,
|
|
metadata,
|
|
source,
|
|
submittedQuery,
|
|
startTime,
|
|
endTime,
|
|
bodyColumn,
|
|
]);
|
|
|
|
useEffect(() => {
|
|
if (enabled) {
|
|
fetchPatterns();
|
|
}
|
|
}, [enabled, fetchPatterns]);
|
|
|
|
// Clear patterns when disabled
|
|
useEffect(() => {
|
|
if (!enabled) {
|
|
lastFetchRef.current = '';
|
|
}
|
|
}, [enabled]);
|
|
|
|
return { patterns, loading, error, totalCount, sampledRowCount };
|
|
}
|