hyperdx/packages/cli/src/components/EventViewer/usePatternData.ts

/**
 * Fetches a random sample of events from ClickHouse, mines patterns
 * using the Drain algorithm, and estimates total counts using a
 * sampleMultiplier — mirroring the web frontend's useGroupedPatterns.
 */
import { useState, useEffect, useCallback, useRef } from 'react';

import {
  TemplateMiner,
  TemplateMinerConfig,
} from '@hyperdx/common-utils/dist/drain';
import type { Metadata } from '@hyperdx/common-utils/dist/core/metadata';
import { convertDateRangeToGranularityString } from '@hyperdx/common-utils/dist/core/utils';

import type { SourceResponse, ProxyClickhouseClient } from '@/api/client';
import {
  buildPatternSampleQuery,
  buildTotalCountQuery,
} from '@/api/eventQuery';
import { getEventBody } from '@/shared/source';

import type { EventRow } from './types';
import { flatten } from './utils';

// ---- Constants -----------------------------------------------------

const SAMPLES = 10_000;

// ---- Time bucketing utilities --------------------------------------

/** Parse a granularity string like "5 minute" into seconds. */
function granularityToSeconds(granularity: string): number {
  const [num, unit] = granularity.split(' ');
  const n = parseInt(num, 10);
  switch (unit) {
    case 'second':
      return n;
    case 'minute':
      return n * 60;
    case 'hour':
      return n * 3600;
    case 'day':
      return n * 86400;
    default:
      return n * 60;
  }
}

/** Round a timestamp down to the start of its granularity bucket. */
function toStartOfBucket(ts: number, granularityMs: number): number {
  return Math.floor(ts / granularityMs) * granularityMs;
}

/** Generate all bucket start timestamps between start and end. */
function generateBuckets(
  startMs: number,
  endMs: number,
  granularityMs: number,
): number[] {
  const buckets: number[] = [];
  let current = toStartOfBucket(startMs, granularityMs);
  while (current < endMs) {
    buckets.push(current);
    current += granularityMs;
  }
  return buckets;
}

// ---- Types ---------------------------------------------------------

export interface TrendBucket {
  ts: number;
  count: number;
}

export interface PatternGroup {
  id: string;
  pattern: string;
  /** Raw count within the sample */
  count: number;
  /** Estimated total count (count * sampleMultiplier), prefixed with ~ in display */
  estimatedCount: number;
  samples: EventRow[];
  /** Time-bucketed trend data for sparkline */
  trend: TrendBucket[];
}

export interface UsePatternDataParams {
  clickhouseClient: ProxyClickhouseClient;
  metadata: Metadata;
  source: SourceResponse;
  submittedQuery: string;
  startTime: Date;
  endTime: Date;
  /** Only fetch when true (i.e., pattern view is open) */
  enabled: boolean;
}

export interface UsePatternDataReturn {
  patterns: PatternGroup[];
  loading: boolean;
  error: Error | null;
  totalCount: number | null;
  sampledRowCount: number;
}

// ---- Hook ----------------------------------------------------------

export function usePatternData({
  clickhouseClient,
  metadata,
  source,
  submittedQuery,
  startTime,
  endTime,
  enabled,
}: UsePatternDataParams): UsePatternDataReturn {
  const [patterns, setPatterns] = useState<PatternGroup[]>([]);
  const [loading, setLoading] = useState(false);
  const [error, setError] = useState<Error | null>(null);
  const [totalCount, setTotalCount] = useState<number | null>(null);
  const [sampledRowCount, setSampledRowCount] = useState(0);

  // Track the last query params to avoid redundant fetches
  const lastFetchRef = useRef<string>('');

  const bodyColumn = (() => {
    const expr = getEventBody(source);
    if (expr) return expr;
    return undefined;
  })();

  const fetchPatterns = useCallback(async () => {
    const fetchKey = JSON.stringify({
      source: source.id,
      submittedQuery,
      startTime: startTime.getTime(),
      endTime: endTime.getTime(),
    });

    // Skip if we already fetched for these exact params
    if (lastFetchRef.current === fetchKey) return;
    lastFetchRef.current = fetchKey;

    setLoading(true);
    setError(null);

    try {
      // Fire both queries in parallel
      const [sampleChSql, countChSql] = await Promise.all([
        buildPatternSampleQuery(
          {
            source,
            searchQuery: submittedQuery,
            startTime,
            endTime,
            sampleLimit: SAMPLES,
          },
          metadata,
        ),
        buildTotalCountQuery(
          { source, searchQuery: submittedQuery, startTime, endTime },
          metadata,
        ),
      ]);

      const [sampleResult, countResult] = await Promise.all([
        clickhouseClient.query({
          query: sampleChSql.sql,
          query_params: sampleChSql.params,
          format: 'JSON',
          connectionId: source.connection,
        }),
        clickhouseClient.query({
          query: countChSql.sql,
          query_params: countChSql.params,
          format: 'JSON',
          connectionId: source.connection,
        }),
      ]);

      const sampleJson = (await sampleResult.json()) as { data: EventRow[] };
      const countJson = (await countResult.json()) as {
        data: Array<Record<string, string | number>>;
      };

      const sampleRows = sampleJson.data ?? [];
      const total = Number(countJson.data?.[0]?.total ?? 0);

      setTotalCount(total);
      setSampledRowCount(sampleRows.length);

      if (sampleRows.length === 0) {
        setPatterns([]);
        setLoading(false);
        return;
      }

      // Determine columns from the result keys
      const resultKeys = Object.keys(sampleRows[0]);
      const effectiveBodyColumn =
        bodyColumn ?? resultKeys[resultKeys.length - 1];
      // Use the source's timestamp expression, falling back to the first column
      const tsExpr = source.timestampValueExpression ?? 'TimestampTime';
      const tsColumn = resultKeys.find(k => k === tsExpr) ?? resultKeys[0];

      // Compute granularity for trend buckets
      const granularity = convertDateRangeToGranularityString(
        [startTime, endTime],
        24,
      );
      const granularityMs = granularityToSeconds(granularity) * 1000;
      const allBuckets = generateBuckets(
        startTime.getTime(),
        endTime.getTime(),
        granularityMs,
      );

      // Mine patterns
      const config = new TemplateMinerConfig();
      const miner = new TemplateMiner(config);

      const clustered: Array<{
        clusterId: number;
        row: EventRow;
        tsMs: number;
      }> = [];
      for (const row of sampleRows) {
        const body = row[effectiveBodyColumn];
        const text = body != null ? flatten(String(body)) : '';
        const result = miner.addLogMessage(text);
        const tsRaw = row[tsColumn];
        const tsMs =
          tsRaw != null
            ? new Date(String(tsRaw)).getTime()
            : startTime.getTime();
        clustered.push({ clusterId: result.clusterId, row, tsMs });
      }

      // Group by cluster ID
      const groups = new Map<
        number,
        {
          rows: EventRow[];
          template: string;
          bucketCounts: Map<number, number>;
        }
      >();

      for (const { clusterId, row, tsMs } of clustered) {
        const bucket = toStartOfBucket(tsMs, granularityMs);
        const existing = groups.get(clusterId);
        if (existing) {
          existing.rows.push(row);
          existing.bucketCounts.set(
            bucket,
            (existing.bucketCounts.get(bucket) ?? 0) + 1,
          );
        } else {
          const body = row[effectiveBodyColumn];
          const text = body != null ? flatten(String(body)) : '';
          const match = miner.match(text, 'fallback');
          const bucketCounts = new Map<number, number>();
          bucketCounts.set(bucket, 1);
          groups.set(clusterId, {
            rows: [row],
            template: match?.getTemplate() ?? text,
            bucketCounts,
          });
        }
      }

      // Compute sampleMultiplier
      const sampleMultiplier =
        total > 0 && sampleRows.length > 0 ? total / sampleRows.length : 1;

      // Convert to sorted array with estimated counts and trend data
      const result: PatternGroup[] = [];
      for (const [id, { rows, template, bucketCounts }] of groups) {
        const trend: TrendBucket[] = allBuckets.map(bucketTs => ({
          ts: bucketTs,
          count: Math.round(
            (bucketCounts.get(bucketTs) ?? 0) * sampleMultiplier,
          ),
        }));

        result.push({
          id: String(id),
          pattern: template,
          count: rows.length,
          estimatedCount: Math.max(
            Math.round(rows.length * sampleMultiplier),
            1,
          ),
          samples: rows,
          trend,
        });
      }

      result.sort((a, b) => b.estimatedCount - a.estimatedCount);
      setPatterns(result);
    } catch (err: unknown) {
      setError(err instanceof Error ? err : new Error(String(err)));
      // Clear the fetch key so a retry will re-fetch
      lastFetchRef.current = '';
    } finally {
      setLoading(false);
    }
  }, [
    clickhouseClient,
    metadata,
    source,
    submittedQuery,
    startTime,
    endTime,
    bodyColumn,
  ]);

  useEffect(() => {
    if (enabled) {
      fetchPatterns();
    }
  }, [enabled, fetchPatterns]);

  // Clear patterns when disabled
  useEffect(() => {
    if (!enabled) {
      lastFetchRef.current = '';
    }
  }, [enabled]);

  return { patterns, loading, error, totalCount, sampledRowCount };
}
[HDX-3964] Add event pattern mining to CLI (Shift+P) (#2106) ## Summary Adds a pattern mining feature to the CLI, accessible via `Shift+P`. This mirrors the web app's Pattern Table functionality but runs entirely in TypeScript — no Pyodide/Python WASM needed. Linear: https://linear.app/hyperdx/issue/HDX-3964 ## What changed ### 1. Drain library in common-utils (`packages/common-utils/src/drain/`) Ported the [browser-drain](https://github.com/DeploySentinel/browser-drain) TypeScript library into `@hyperdx/common-utils`. This is a pure TypeScript implementation of the Drain3 log template mining algorithm, including: - `TemplateMiner` / `TemplateMinerConfig` — main API - `Drain` — core algorithm with prefix tree and LRU cluster cache - `LogMasker` — regex-based token masking (IPs, numbers, etc.) - `LruCache` — custom LRU cache matching Python Drain3's eviction semantics - 11 Jest tests ported from the original `node:test` suite ### 2. CLI pattern view (`packages/cli/src/components/EventViewer/`) Keybinding: `Shift+P` toggles pattern view (pauses follow mode, restores on exit) Data flow (mirrors web app's `useGroupedPatterns`): - Issues `SELECT ... ORDER BY rand() LIMIT 100000` to randomly sample up to 100K events - Issues parallel `SELECT count()` to get true total event count - Feeds sampled log bodies through the TypeScript `TemplateMiner` - Estimates pattern counts via `sampleMultiplier = totalCount / sampledRowCount` - Computes time-bucketed trend data per pattern UI: - Pattern list with columns: Est. Count (with `~` prefix), Pattern - `l`/`Enter` expands a pattern to show its sample events (full table columns) - `h`/`Esc` returns to pattern list - `j/k/G/g/Ctrl+D/Ctrl+U` navigation throughout - Loading spinner while sampling query runs Alias fix: Pattern and count queries compute `WITH` clauses from the source's `defaultTableSelectExpression` so Lucene searches using aliases (e.g. `level:error` where `level` is an alias for `SeverityText`) resolve correctly. ### New files - `packages/common-utils/src/drain/` — 7 source files + barrel index - `packages/common-utils/src/__tests__/drain.test.ts` - `packages/cli/src/components/EventViewer/usePatternData.ts` - `packages/cli/src/components/EventViewer/PatternView.tsx` - `packages/cli/src/components/EventViewer/PatternSamplesView.tsx` ### Modified files - `packages/cli/src/api/eventQuery.ts` — added `buildPatternSampleQuery`, `buildTotalCountQuery`, `buildAliasWithClauses` - `packages/cli/src/components/EventViewer/EventViewer.tsx` — wired in pattern state + rendering - `packages/cli/src/components/EventViewer/useKeybindings.ts` — added P, l, h keybindings + pattern/sample navigation - `packages/cli/src/components/EventViewer/SubComponents.tsx` — added P to help screen ### Demo https://github.com/user-attachments/assets/50a2edfc-8891-43ae-ab86-b96fca778c66 2026-04-14 18:03:56 +00:00			`/**`
			`* Fetches a random sample of events from ClickHouse, mines patterns`
			`* using the Drain algorithm, and estimates total counts using a`
			`* sampleMultiplier — mirroring the web frontend's useGroupedPatterns.`
			`*/`
			`import { useState, useEffect, useCallback, useRef } from 'react';`

			`import {`
			`TemplateMiner,`
			`TemplateMinerConfig,`
			`} from '@hyperdx/common-utils/dist/drain';`
			`import type { Metadata } from '@hyperdx/common-utils/dist/core/metadata';`
			`import { convertDateRangeToGranularityString } from '@hyperdx/common-utils/dist/core/utils';`

			`import type { SourceResponse, ProxyClickhouseClient } from '@/api/client';`
			`import {`
			`buildPatternSampleQuery,`
			`buildTotalCountQuery,`
			`} from '@/api/eventQuery';`
			`import { getEventBody } from '@/shared/source';`

			`import type { EventRow } from './types';`
			`import { flatten } from './utils';`

			`// ---- Constants -----------------------------------------------------`

			`const SAMPLES = 10_000;`

			`// ---- Time bucketing utilities --------------------------------------`

			`/** Parse a granularity string like "5 minute" into seconds. */`
			`function granularityToSeconds(granularity: string): number {`
			`const [num, unit] = granularity.split(' ');`
			`const n = parseInt(num, 10);`
			`switch (unit) {`
			`case 'second':`
			`return n;`
			`case 'minute':`
			`return n * 60;`
			`case 'hour':`
			`return n * 3600;`
			`case 'day':`
			`return n * 86400;`
			`default:`
			`return n * 60;`
			`}`
			`}`

			`/** Round a timestamp down to the start of its granularity bucket. */`
			`function toStartOfBucket(ts: number, granularityMs: number): number {`
			`return Math.floor(ts / granularityMs) * granularityMs;`
			`}`

			`/** Generate all bucket start timestamps between start and end. */`
			`function generateBuckets(`
			`startMs: number,`
			`endMs: number,`
			`granularityMs: number,`
			`): number[] {`
			`const buckets: number[] = [];`
			`let current = toStartOfBucket(startMs, granularityMs);`
			`while (current < endMs) {`
			`buckets.push(current);`
			`current += granularityMs;`
			`}`
			`return buckets;`
			`}`

			`// ---- Types ---------------------------------------------------------`

			`export interface TrendBucket {`
			`ts: number;`
			`count: number;`
			`}`

			`export interface PatternGroup {`
			`id: string;`
			`pattern: string;`
			`/** Raw count within the sample */`
			`count: number;`
			`/** Estimated total count (count * sampleMultiplier), prefixed with ~ in display */`
			`estimatedCount: number;`
			`samples: EventRow[];`
			`/** Time-bucketed trend data for sparkline */`
			`trend: TrendBucket[];`
			`}`

			`export interface UsePatternDataParams {`
			`clickhouseClient: ProxyClickhouseClient;`
			`metadata: Metadata;`
			`source: SourceResponse;`
			`submittedQuery: string;`
			`startTime: Date;`
			`endTime: Date;`
			`/** Only fetch when true (i.e., pattern view is open) */`
			`enabled: boolean;`
			`}`

			`export interface UsePatternDataReturn {`
			`patterns: PatternGroup[];`
			`loading: boolean;`
			`error: Error \| null;`
			`totalCount: number \| null;`
			`sampledRowCount: number;`
			`}`

			`// ---- Hook ----------------------------------------------------------`

			`export function usePatternData({`
			`clickhouseClient,`
			`metadata,`
			`source,`
			`submittedQuery,`
			`startTime,`
			`endTime,`
			`enabled,`
			`}: UsePatternDataParams): UsePatternDataReturn {`
			`const [patterns, setPatterns] = useState<PatternGroup[]>([]);`
			`const [loading, setLoading] = useState(false);`
			`const [error, setError] = useState<Error \| null>(null);`
			`const [totalCount, setTotalCount] = useState<number \| null>(null);`
			`const [sampledRowCount, setSampledRowCount] = useState(0);`

			`// Track the last query params to avoid redundant fetches`
			`const lastFetchRef = useRef<string>('');`

			`const bodyColumn = (() => {`
			`const expr = getEventBody(source);`
			`if (expr) return expr;`
			`return undefined;`
			`})();`

			`const fetchPatterns = useCallback(async () => {`
			`const fetchKey = JSON.stringify({`
			`source: source.id,`
			`submittedQuery,`
			`startTime: startTime.getTime(),`
			`endTime: endTime.getTime(),`
			`});`

			`// Skip if we already fetched for these exact params`
			`if (lastFetchRef.current === fetchKey) return;`
			`lastFetchRef.current = fetchKey;`

			`setLoading(true);`
			`setError(null);`

			`try {`
			`// Fire both queries in parallel`
			`const [sampleChSql, countChSql] = await Promise.all([`
			`buildPatternSampleQuery(`
			`{`
			`source,`
			`searchQuery: submittedQuery,`
			`startTime,`
			`endTime,`
			`sampleLimit: SAMPLES,`
			`},`
			`metadata,`
			`),`
			`buildTotalCountQuery(`
			`{ source, searchQuery: submittedQuery, startTime, endTime },`
			`metadata,`
			`),`
			`]);`

			`const [sampleResult, countResult] = await Promise.all([`
			`clickhouseClient.query({`
			`query: sampleChSql.sql,`
			`query_params: sampleChSql.params,`
			`format: 'JSON',`
			`connectionId: source.connection,`
			`}),`
			`clickhouseClient.query({`
			`query: countChSql.sql,`
			`query_params: countChSql.params,`
			`format: 'JSON',`
			`connectionId: source.connection,`
			`}),`
			`]);`

			`const sampleJson = (await sampleResult.json()) as { data: EventRow[] };`
			`const countJson = (await countResult.json()) as {`
			`data: Array<Record<string, string \| number>>;`
			`};`

			`const sampleRows = sampleJson.data ?? [];`
			`const total = Number(countJson.data?.[0]?.total ?? 0);`

			`setTotalCount(total);`
			`setSampledRowCount(sampleRows.length);`

			`if (sampleRows.length === 0) {`
			`setPatterns([]);`
			`setLoading(false);`
			`return;`
			`}`

			`// Determine columns from the result keys`
			`const resultKeys = Object.keys(sampleRows[0]);`
			`const effectiveBodyColumn =`
			`bodyColumn ?? resultKeys[resultKeys.length - 1];`
			`// Use the source's timestamp expression, falling back to the first column`
			`const tsExpr = source.timestampValueExpression ?? 'TimestampTime';`
			`const tsColumn = resultKeys.find(k => k === tsExpr) ?? resultKeys[0];`

			`// Compute granularity for trend buckets`
			`const granularity = convertDateRangeToGranularityString(`
			`[startTime, endTime],`
			`24,`
			`);`
			`const granularityMs = granularityToSeconds(granularity) * 1000;`
			`const allBuckets = generateBuckets(`
			`startTime.getTime(),`
			`endTime.getTime(),`
			`granularityMs,`
			`);`

			`// Mine patterns`
			`const config = new TemplateMinerConfig();`
			`const miner = new TemplateMiner(config);`

			`const clustered: Array<{`
			`clusterId: number;`
			`row: EventRow;`
			`tsMs: number;`
			`}> = [];`
			`for (const row of sampleRows) {`
			`const body = row[effectiveBodyColumn];`
			`const text = body != null ? flatten(String(body)) : '';`
			`const result = miner.addLogMessage(text);`
			`const tsRaw = row[tsColumn];`
			`const tsMs =`
			`tsRaw != null`
			`? new Date(String(tsRaw)).getTime()`
			`: startTime.getTime();`
			`clustered.push({ clusterId: result.clusterId, row, tsMs });`
			`}`

			`// Group by cluster ID`
			`const groups = new Map<`
			`number,`
			`{`
			`rows: EventRow[];`
			`template: string;`
			`bucketCounts: Map<number, number>;`
			`}`
			`>();`

			`for (const { clusterId, row, tsMs } of clustered) {`
			`const bucket = toStartOfBucket(tsMs, granularityMs);`
			`const existing = groups.get(clusterId);`
			`if (existing) {`
			`existing.rows.push(row);`
			`existing.bucketCounts.set(`
			`bucket,`
			`(existing.bucketCounts.get(bucket) ?? 0) + 1,`
			`);`
			`} else {`
			`const body = row[effectiveBodyColumn];`
			`const text = body != null ? flatten(String(body)) : '';`
			`const match = miner.match(text, 'fallback');`
			`const bucketCounts = new Map<number, number>();`
			`bucketCounts.set(bucket, 1);`
			`groups.set(clusterId, {`
			`rows: [row],`
			`template: match?.getTemplate() ?? text,`
			`bucketCounts,`
			`});`
			`}`
			`}`

			`// Compute sampleMultiplier`
			`const sampleMultiplier =`
			`total > 0 && sampleRows.length > 0 ? total / sampleRows.length : 1;`

			`// Convert to sorted array with estimated counts and trend data`
			`const result: PatternGroup[] = [];`
			`for (const [id, { rows, template, bucketCounts }] of groups) {`
			`const trend: TrendBucket[] = allBuckets.map(bucketTs => ({`
			`ts: bucketTs,`
			`count: Math.round(`
			`(bucketCounts.get(bucketTs) ?? 0) * sampleMultiplier,`
			`),`
			`}));`

			`result.push({`
			`id: String(id),`
			`pattern: template,`
			`count: rows.length,`
			`estimatedCount: Math.max(`
			`Math.round(rows.length * sampleMultiplier),`
			`1,`
			`),`
			`samples: rows,`
			`trend,`
			`});`
			`}`

			`result.sort((a, b) => b.estimatedCount - a.estimatedCount);`
			`setPatterns(result);`
			`} catch (err: unknown) {`
			`setError(err instanceof Error ? err : new Error(String(err)));`
			`// Clear the fetch key so a retry will re-fetch`
			`lastFetchRef.current = '';`
			`} finally {`
			`setLoading(false);`
			`}`
			`}, [`
			`clickhouseClient,`
			`metadata,`
			`source,`
			`submittedQuery,`
			`startTime,`
			`endTime,`
			`bodyColumn,`
			`]);`

			`useEffect(() => {`
			`if (enabled) {`
			`fetchPatterns();`
			`}`
			`}, [enabled, fetchPatterns]);`

			`// Clear patterns when disabled`
			`useEffect(() => {`
			`if (!enabled) {`
			`lastFetchRef.current = '';`
			`}`
			`}, [enabled]);`

			`return { patterns, loading, error, totalCount, sampledRowCount };`
			`}`