[HDX-3964] Add event pattern mining to CLI (Shift+P) (#2106)

## Summary

Adds a pattern mining feature to the CLI, accessible via `Shift+P`. This mirrors the web app's Pattern Table functionality but runs entirely in TypeScript — no Pyodide/Python WASM needed.

**Linear:** https://linear.app/hyperdx/issue/HDX-3964

## What changed

### 1. Drain library in common-utils (`packages/common-utils/src/drain/`)

Ported the [browser-drain](https://github.com/DeploySentinel/browser-drain) TypeScript library into `@hyperdx/common-utils`. This is a pure TypeScript implementation of the Drain3 log template mining algorithm, including:

- `TemplateMiner` / `TemplateMinerConfig` — main API
- `Drain` — core algorithm with prefix tree and LRU cluster cache
- `LogMasker` — regex-based token masking (IPs, numbers, etc.)
- `LruCache` — custom LRU cache matching Python Drain3's eviction semantics
- 11 Jest tests ported from the original `node:test` suite

### 2. CLI pattern view (`packages/cli/src/components/EventViewer/`)

**Keybinding:** `Shift+P` toggles pattern view (pauses follow mode, restores on exit)

**Data flow (mirrors web app's `useGroupedPatterns`):**
- Issues `SELECT ... ORDER BY rand() LIMIT 100000` to randomly sample up to 100K events
- Issues parallel `SELECT count()` to get true total event count
- Feeds sampled log bodies through the TypeScript `TemplateMiner`
- Estimates pattern counts via `sampleMultiplier = totalCount / sampledRowCount`
- Computes time-bucketed trend data per pattern

**UI:**
- Pattern list with columns: Est. Count (with `~` prefix), Pattern
- `l`/`Enter` expands a pattern to show its sample events (full table columns)
- `h`/`Esc` returns to pattern list
- `j/k/G/g/Ctrl+D/Ctrl+U` navigation throughout
- Loading spinner while sampling query runs

**Alias fix:** Pattern and count queries compute `WITH` clauses from the source's `defaultTableSelectExpression` so Lucene searches using aliases (e.g. `level:error` where `level` is an alias for `SeverityText`) resolve correctly.

### New files
- `packages/common-utils/src/drain/` — 7 source files + barrel index
- `packages/common-utils/src/__tests__/drain.test.ts`
- `packages/cli/src/components/EventViewer/usePatternData.ts`
- `packages/cli/src/components/EventViewer/PatternView.tsx`
- `packages/cli/src/components/EventViewer/PatternSamplesView.tsx`

### Modified files
- `packages/cli/src/api/eventQuery.ts` — added `buildPatternSampleQuery`, `buildTotalCountQuery`, `buildAliasWithClauses`
- `packages/cli/src/components/EventViewer/EventViewer.tsx` — wired in pattern state + rendering
- `packages/cli/src/components/EventViewer/useKeybindings.ts` — added P, l, h keybindings + pattern/sample navigation
- `packages/cli/src/components/EventViewer/SubComponents.tsx` — added P to help screen

### Demo

https://github.com/user-attachments/assets/50a2edfc-8891-43ae-ab86-b96fca778c66
This commit is contained in:
Warren Lee 2026-04-14 11:03:56 -07:00 committed by GitHub
parent 7a49c74835
commit 418f70c516
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
21 changed files with 2119 additions and 5 deletions

View file

@ -0,0 +1,5 @@
---
"@hyperdx/cli": patch
---
Add event pattern mining view (Shift+P) with sampled estimation and drill-down

View file

@ -0,0 +1,5 @@
---
"@hyperdx/common-utils": patch
---
Add Drain log template mining library (ported from browser-drain)

View file

@ -42,6 +42,7 @@
"run:clickhouse": "nx run @hyperdx/app:run:clickhouse",
"dev": "sh -c '. ./scripts/dev-env.sh && yarn build:common-utils && dotenvx run --convention=nextjs -- docker compose -p \"$HDX_DEV_PROJECT\" -f docker-compose.dev.yml up -d && yarn app:dev; dotenvx run --convention=nextjs -- docker compose -p \"$HDX_DEV_PROJECT\" -f docker-compose.dev.yml down'",
"dev:local": "IS_LOCAL_APP_MODE='DANGEROUSLY_is_local_app_mode💀' yarn dev",
"cli:dev": "yarn workspace @hyperdx/cli dev",
"dev:down": "sh -c '. ./scripts/dev-env.sh && docker compose -p \"$HDX_DEV_PROJECT\" -f docker-compose.dev.yml down && sh ./scripts/dev-kill-ports.sh'",
"dev:compose": "sh -c '. ./scripts/dev-env.sh && docker compose -p \"$HDX_DEV_PROJECT\" -f docker-compose.dev.yml'",
"knip": "knip",

View file

@ -10,6 +10,7 @@ import type {
import { chSqlToAliasMap } from '@hyperdx/common-utils/dist/clickhouse';
import { renderChartConfig } from '@hyperdx/common-utils/dist/core/renderChartConfig';
import type { Metadata } from '@hyperdx/common-utils/dist/core/metadata';
import { aliasMapToWithClauses } from '@hyperdx/common-utils/dist/core/utils';
import { DisplayType } from '@hyperdx/common-utils/dist/types';
import type {
BuilderChartConfigWithDateRange,
@ -106,6 +107,144 @@ export async function buildEventSearchQuery(
return renderChartConfig(config, metadata, source.querySettings);
}
// ---- Alias WITH clauses from source select --------------------------
/**
* Compute WITH clauses from the source's default select expression.
* When a source defines `SeverityText as level`, searches for `level:error`
* need `WITH SeverityText AS level` so the alias is available in WHERE.
*/
async function buildAliasWithClauses(
source: SourceResponse,
metadata: Metadata,
): Promise<BuilderChartConfigWithDateRange['with']> {
const selectExpr = source.defaultTableSelectExpression;
if (!selectExpr) return undefined;
const tsExpr = source.timestampValueExpression ?? 'TimestampTime';
// Render a dummy query with the source's select to extract aliases
const dummyConfig: BuilderChartConfigWithDateRange = {
displayType: DisplayType.Search,
select: selectExpr,
from: source.from,
where: '',
connection: source.connection,
timestampValueExpression: tsExpr,
implicitColumnExpression: source.implicitColumnExpression,
limit: { limit: 0 },
dateRange: [new Date(), new Date()],
};
const dummySql = await renderChartConfig(
dummyConfig,
metadata,
source.querySettings,
);
const aliasMap = chSqlToAliasMap(dummySql);
return aliasMapToWithClauses(aliasMap);
}
// ---- Pattern sampling query -----------------------------------------
export interface PatternSampleQueryOptions {
source: SourceResponse;
searchQuery?: string;
startTime: Date;
endTime: Date;
/** Number of random rows to sample (default 100_000) */
sampleLimit?: number;
}
/**
* Build a query that randomly samples events for pattern mining.
* Selects the body column and timestamp, ordered by rand().
*/
export async function buildPatternSampleQuery(
opts: PatternSampleQueryOptions,
metadata: Metadata,
): Promise<ChSql> {
const {
source,
searchQuery = '',
startTime,
endTime,
sampleLimit = 100_000,
} = opts;
const tsExpr = source.timestampValueExpression ?? 'TimestampTime';
// Use the same select as the main event table so sample rows have all columns
let selectExpr = source.defaultTableSelectExpression ?? '';
if (!selectExpr && source.kind === 'trace') {
selectExpr = buildTraceSelectExpression(source);
}
// Compute alias WITH clauses so search aliases (e.g. `level`) resolve in WHERE
const aliasWith = searchQuery
? await buildAliasWithClauses(source, metadata)
: undefined;
const config: BuilderChartConfigWithDateRange = {
displayType: DisplayType.Search,
select: selectExpr,
from: source.from,
where: searchQuery,
whereLanguage: searchQuery ? 'lucene' : 'sql',
connection: source.connection,
timestampValueExpression: tsExpr,
implicitColumnExpression: source.implicitColumnExpression,
orderBy: 'rand()',
limit: { limit: sampleLimit },
dateRange: [startTime, endTime],
...(aliasWith ? { with: aliasWith } : {}),
};
return renderChartConfig(config, metadata, source.querySettings);
}
// ---- Total count query ----------------------------------------------
export interface TotalCountQueryOptions {
source: SourceResponse;
searchQuery?: string;
startTime: Date;
endTime: Date;
}
/**
* Build a query to get the total count of events matching the search.
*/
export async function buildTotalCountQuery(
opts: TotalCountQueryOptions,
metadata: Metadata,
): Promise<ChSql> {
const { source, searchQuery = '', startTime, endTime } = opts;
const tsExpr = source.timestampValueExpression ?? 'TimestampTime';
// Compute alias WITH clauses so search aliases (e.g. `level`) resolve in WHERE
const aliasWith = searchQuery
? await buildAliasWithClauses(source, metadata)
: undefined;
const config: BuilderChartConfigWithDateRange = {
displayType: DisplayType.Table,
select: 'count() as total',
from: source.from,
where: searchQuery,
whereLanguage: searchQuery ? 'lucene' : 'sql',
connection: source.connection,
timestampValueExpression: tsExpr,
implicitColumnExpression: source.implicitColumnExpression,
limit: { limit: 1 },
dateRange: [startTime, endTime],
...(aliasWith ? { with: aliasWith } : {}),
};
return renderChartConfig(config, metadata, source.querySettings);
}
// ---- Full row fetch (SELECT *) -------------------------------------
// ---- Trace waterfall queries ----------------------------------------

View file

@ -15,8 +15,11 @@ import {
SqlPreviewScreen,
} from './SubComponents';
import { TableView } from './TableView';
import { PatternView } from './PatternView';
import { PatternSamplesView } from './PatternSamplesView';
import { DetailPanel } from './DetailPanel';
import { useEventData } from './useEventData';
import { usePatternData } from './usePatternData';
import { useKeybindings } from './useKeybindings';
export default function EventViewer({
@ -76,6 +79,12 @@ export default function EventViewer({
const [traceSelectedNode, setTraceSelectedNode] = useState<SpanNode | null>(
null,
);
const [showPatterns, setShowPatterns] = useState(false);
const [patternSelectedRow, setPatternSelectedRow] = useState(0);
const [patternScrollOffset, setPatternScrollOffset] = useState(0);
const [expandedPattern, setExpandedPattern] = useState<number | null>(null);
const [sampleSelectedRow, setSampleSelectedRow] = useState(0);
const [sampleScrollOffset, setSampleScrollOffset] = useState(0);
const [timeRange, setTimeRange] = useState<TimeRange>(() => {
const now = new Date();
return { start: new Date(now.getTime() - 60 * 60 * 1000), end: now };
@ -111,6 +120,23 @@ export default function EventViewer({
expandedRow,
});
// ---- Pattern mining -----------------------------------------------
const {
patterns,
loading: patternsLoading,
error: patternsError,
totalCount: patternsTotalCount,
} = usePatternData({
clickhouseClient,
metadata,
source,
submittedQuery,
startTime: timeRange.start,
endTime: timeRange.end,
enabled: showPatterns,
});
// ---- Derived values ----------------------------------------------
const columns = useMemo(
@ -171,11 +197,22 @@ export default function EventViewer({
focusDetailSearch,
showHelp,
showSql,
showPatterns,
expandedRow,
detailTab,
traceDetailExpanded,
selectedRow,
scrollOffset,
patternSelectedRow,
patternScrollOffset,
patternCount: patterns.length,
expandedPattern,
sampleSelectedRow,
sampleScrollOffset,
sampleCount:
expandedPattern !== null
? (patterns[expandedPattern]?.samples.length ?? 0)
: 0,
isFollowing,
hasMore,
events,
@ -198,7 +235,13 @@ export default function EventViewer({
setFocusDetailSearch,
setShowHelp,
setShowSql,
setShowPatterns,
setSqlScrollOffset,
setPatternSelectedRow,
setPatternScrollOffset,
setExpandedPattern,
setSampleSelectedRow,
setSampleScrollOffset,
setSelectedRow,
setScrollOffset,
setExpandedRow,
@ -313,6 +356,27 @@ export default function EventViewer({
onTraceChSqlChange={setTraceChSql}
onTraceSelectedNodeChange={setTraceSelectedNode}
/>
) : showPatterns &&
expandedPattern !== null &&
patterns[expandedPattern] ? (
<PatternSamplesView
pattern={patterns[expandedPattern]}
columns={columns}
selectedRow={sampleSelectedRow}
scrollOffset={sampleScrollOffset}
maxRows={maxRows}
wrapLines={wrapLines}
/>
) : showPatterns ? (
<PatternView
patterns={patterns}
selectedRow={patternSelectedRow}
scrollOffset={patternScrollOffset}
maxRows={maxRows}
loading={patternsLoading}
error={patternsError}
wrapLines={wrapLines}
/>
) : (
<TableView
columns={columns}
@ -328,13 +392,35 @@ export default function EventViewer({
)}
<Footer
rowCount={events.length}
cursorPos={scrollOffset + selectedRow + 1}
rowCount={
showPatterns && expandedPattern !== null
? (patterns[expandedPattern]?.samples.length ?? 0)
: showPatterns
? patterns.length
: events.length
}
cursorPos={
showPatterns && expandedPattern !== null
? sampleScrollOffset + sampleSelectedRow + 1
: showPatterns
? patternScrollOffset + patternSelectedRow + 1
: scrollOffset + selectedRow + 1
}
wrapLines={wrapLines}
isFollowing={isFollowing}
loadingMore={loadingMore}
paginationError={paginationError}
scrollInfo={expandedRow !== null ? `Ctrl+D/U to scroll` : undefined}
scrollInfo={
expandedRow !== null
? 'Ctrl+D/U to scroll'
: showPatterns && expandedPattern !== null
? '[SAMPLES] h to go back'
: showPatterns
? patternsLoading
? '[PATTERNS] Sampling…'
: `[PATTERNS] ${patterns.length} patterns${patternsTotalCount != null ? ` from ~${patternsTotalCount.toLocaleString()} events` : ''} — l to expand, P/Esc to close`
: undefined
}
/>
</Box>
</Box>

View file

@ -0,0 +1,101 @@
import React from 'react';
import { Box, Text } from 'ink';
import type { PatternGroup } from './usePatternData';
import type { Column, EventRow } from './types';
import { formatDynamicRow } from './utils';
import { TableHeader } from './SubComponents';
// ---- Types ---------------------------------------------------------
type PatternSamplesViewProps = {
pattern: PatternGroup;
columns: Column[];
selectedRow: number;
scrollOffset: number;
maxRows: number;
wrapLines: boolean;
};
// ---- Component -----------------------------------------------------
export function PatternSamplesView({
pattern,
columns,
selectedRow,
scrollOffset,
maxRows,
wrapLines,
}: PatternSamplesViewProps) {
// Reserve 3 rows for the pattern header (pattern text + count + blank line)
const tableMaxRows = Math.max(1, maxRows - 3);
const samples = pattern.samples;
const visible = samples.slice(scrollOffset, scrollOffset + tableMaxRows);
const emptyRows = tableMaxRows - visible.length;
return (
<Box flexDirection="column" marginTop={1} height={maxRows + 1}>
{/* Pattern header */}
<Box>
<Text bold color="green">
Pattern:{' '}
</Text>
<Text wrap="truncate">{pattern.pattern}</Text>
</Box>
<Box>
<Text dimColor>
~{pattern.estimatedCount.toLocaleString()} estimated events (
{pattern.count.toLocaleString()} sampled) h to go back
</Text>
</Box>
<Text> </Text>
{/* Sample events table */}
<TableHeader columns={columns} />
{visible.map((row: EventRow, i: number) => {
const isSelected = i === selectedRow;
const formatted = formatDynamicRow(row, columns);
return (
<Box key={i} overflowX="hidden">
<Box width={2}>
<Text color="cyan" bold>
{isSelected ? '▸' : ' '}
</Text>
</Box>
{formatted.cells.map((cell: string, ci: number) => (
<Box
key={ci}
width={columns[ci]?.width ?? '10%'}
overflowX={wrapLines ? undefined : 'hidden'}
>
<Text
wrap={wrapLines ? 'wrap' : 'truncate'}
color={
isSelected
? 'cyan'
: ci === 0
? 'gray'
: formatted.severityColor && ci === 1
? formatted.severityColor
: undefined
}
bold={isSelected}
dimColor={ci === 0 && !isSelected}
inverse={isSelected}
>
{cell}
</Text>
</Box>
))}
</Box>
);
})}
{emptyRows > 0 &&
Array.from({ length: emptyRows }).map((_, i) => (
<Text key={`pad-${i}`}> </Text>
))}
</Box>
);
}

View file

@ -0,0 +1,101 @@
import React from 'react';
import { Box, Text } from 'ink';
import Spinner from 'ink-spinner';
import type { PatternGroup } from './usePatternData';
// ---- Types ---------------------------------------------------------
type PatternViewProps = {
patterns: PatternGroup[];
selectedRow: number;
scrollOffset: number;
maxRows: number;
loading: boolean;
error: Error | null;
wrapLines: boolean;
};
// ---- Component -----------------------------------------------------
export function PatternView({
patterns,
selectedRow,
scrollOffset,
maxRows,
loading,
error,
wrapLines,
}: PatternViewProps) {
const visible = patterns.slice(scrollOffset, scrollOffset + maxRows);
const emptyRows = maxRows - visible.length;
return (
<Box flexDirection="column" marginTop={1} height={maxRows + 1}>
{/* Header */}
<Box overflowX="hidden">
<Box width="12%">
<Text bold dimColor wrap="truncate">
Est. Count
</Text>
</Box>
<Box flexGrow={1}>
<Text bold dimColor wrap="truncate">
Pattern
</Text>
</Box>
</Box>
{loading ? (
<Text>
<Spinner type="dots" /> Sampling events and mining patterns
</Text>
) : error ? (
<Text color="red">Error: {error.message}</Text>
) : visible.length === 0 ? (
<Text dimColor>No patterns found.</Text>
) : null}
{!loading &&
!error &&
visible.map((p, i) => {
const isSelected = i === selectedRow;
return (
<Box key={p.id} overflowX="hidden">
<Box width={2}>
<Text color="cyan" bold>
{isSelected ? '▸' : ' '}
</Text>
</Box>
<Box width="12%" overflowX={wrapLines ? undefined : 'hidden'}>
<Text
color={isSelected ? 'cyan' : 'yellow'}
bold={isSelected}
inverse={isSelected}
wrap={wrapLines ? 'wrap' : 'truncate'}
>
~{p.estimatedCount.toLocaleString()}
</Text>
</Box>
<Box flexGrow={1} overflowX={wrapLines ? undefined : 'hidden'}>
<Text
color={isSelected ? 'cyan' : undefined}
bold={isSelected}
inverse={isSelected}
wrap={wrapLines ? 'wrap' : 'truncate'}
>
{p.pattern}
</Text>
</Box>
</Box>
);
})}
{emptyRows > 0 &&
!loading &&
Array.from({ length: emptyRows }).map((_, i) => (
<Text key={`pad-${i}`}> </Text>
))}
</Box>
);
}

View file

@ -184,6 +184,7 @@ export const HelpScreen = React.memo(function HelpScreen() {
['Shift+Tab', 'Previous source / saved search'],
['t', 'Edit time range in $EDITOR'],
['s', 'Edit select clause in $EDITOR'],
['P (Shift+P)', 'Show event patterns'],
['D', 'Show generated SQL'],
['f', 'Toggle follow mode (live tail)'],
['o', 'Open trace in browser'],

View file

@ -22,11 +22,19 @@ export interface KeybindingParams {
focusDetailSearch: boolean;
showHelp: boolean;
showSql: boolean;
showPatterns: boolean;
expandedRow: number | null;
detailTab: 'overview' | 'columns' | 'trace';
traceDetailExpanded: boolean;
selectedRow: number;
scrollOffset: number;
patternSelectedRow: number;
patternScrollOffset: number;
patternCount: number;
expandedPattern: number | null;
sampleSelectedRow: number;
sampleScrollOffset: number;
sampleCount: number;
isFollowing: boolean;
hasMore: boolean;
events: EventRow[];
@ -58,7 +66,13 @@ export interface KeybindingParams {
setFocusDetailSearch: React.Dispatch<React.SetStateAction<boolean>>;
setShowHelp: React.Dispatch<React.SetStateAction<boolean>>;
setShowSql: React.Dispatch<React.SetStateAction<boolean>>;
setShowPatterns: React.Dispatch<React.SetStateAction<boolean>>;
setSqlScrollOffset: React.Dispatch<React.SetStateAction<number>>;
setPatternSelectedRow: React.Dispatch<React.SetStateAction<number>>;
setPatternScrollOffset: React.Dispatch<React.SetStateAction<number>>;
setExpandedPattern: React.Dispatch<React.SetStateAction<number | null>>;
setSampleSelectedRow: React.Dispatch<React.SetStateAction<number>>;
setSampleScrollOffset: React.Dispatch<React.SetStateAction<number>>;
setSelectedRow: React.Dispatch<React.SetStateAction<number>>;
setScrollOffset: React.Dispatch<React.SetStateAction<number>>;
setExpandedRow: React.Dispatch<React.SetStateAction<number | null>>;
@ -93,11 +107,19 @@ export function useKeybindings(params: KeybindingParams): void {
focusDetailSearch,
showHelp,
showSql,
showPatterns,
expandedRow,
detailTab,
traceDetailExpanded,
selectedRow,
scrollOffset,
patternSelectedRow,
patternScrollOffset,
patternCount,
expandedPattern,
sampleSelectedRow,
sampleScrollOffset,
sampleCount,
isFollowing,
hasMore,
events,
@ -120,7 +142,13 @@ export function useKeybindings(params: KeybindingParams): void {
setFocusDetailSearch,
setShowHelp,
setShowSql,
setShowPatterns,
setSqlScrollOffset,
setPatternSelectedRow,
setPatternScrollOffset,
setExpandedPattern,
setSampleSelectedRow,
setSampleScrollOffset,
setSelectedRow,
setScrollOffset,
setExpandedRow,
@ -213,6 +241,155 @@ export function useKeybindings(params: KeybindingParams): void {
}
return;
}
// Pattern samples navigation (expanded pattern)
if (showPatterns && expandedPattern !== null && expandedRow === null) {
if (input === 'h' || key.escape) {
setExpandedPattern(null);
return;
}
if (input === 'j' || key.downArrow) {
setSampleSelectedRow(r => {
const next = r + 1;
const visibleCount = Math.min(
sampleCount - sampleScrollOffset,
maxRows,
);
if (next >= maxRows) {
setSampleScrollOffset(o =>
Math.min(o + 1, Math.max(0, sampleCount - maxRows)),
);
return r;
}
return Math.min(next, visibleCount - 1);
});
return;
}
if (input === 'k' || key.upArrow) {
setSampleSelectedRow(r => {
const next = r - 1;
if (next < 0) {
setSampleScrollOffset(o => Math.max(0, o - 1));
return 0;
}
return next;
});
return;
}
if (input === 'G') {
const maxOffset = Math.max(0, sampleCount - maxRows);
setSampleScrollOffset(maxOffset);
setSampleSelectedRow(Math.min(sampleCount - 1, maxRows - 1));
return;
}
if (input === 'g') {
setSampleScrollOffset(0);
setSampleSelectedRow(0);
return;
}
if (key.ctrl && input === 'd') {
const half = Math.floor(maxRows / 2);
const maxOffset = Math.max(0, sampleCount - maxRows);
setSampleScrollOffset(o => Math.min(o + half, maxOffset));
return;
}
if (key.ctrl && input === 'u') {
const half = Math.floor(maxRows / 2);
setSampleScrollOffset(o => Math.max(0, o - half));
return;
}
if (input === 'w') {
setWrapLines(w => !w);
return;
}
if (input === 'q') process.exit(0);
return;
}
// Pattern view navigation
if (showPatterns && expandedRow === null) {
if (input === 'P') {
setShowPatterns(false);
if (wasFollowingRef.current) {
setIsFollowing(true);
}
return;
}
if (key.escape) {
setShowPatterns(false);
if (wasFollowingRef.current) {
setIsFollowing(true);
}
return;
}
if (key.return || input === 'l') {
setExpandedPattern(patternScrollOffset + patternSelectedRow);
setSampleSelectedRow(0);
setSampleScrollOffset(0);
return;
}
if (input === 'j' || key.downArrow) {
setPatternSelectedRow(r => {
const next = r + 1;
const visibleCount = Math.min(
patternCount - patternScrollOffset,
maxRows,
);
if (next >= maxRows) {
setPatternScrollOffset(o =>
Math.min(o + 1, Math.max(0, patternCount - maxRows)),
);
return r;
}
return Math.min(next, visibleCount - 1);
});
return;
}
if (input === 'k' || key.upArrow) {
setPatternSelectedRow(r => {
const next = r - 1;
if (next < 0) {
setPatternScrollOffset(o => Math.max(0, o - 1));
return 0;
}
return next;
});
return;
}
if (input === 'G') {
const maxOffset = Math.max(0, patternCount - maxRows);
setPatternScrollOffset(maxOffset);
setPatternSelectedRow(Math.min(patternCount - 1, maxRows - 1));
return;
}
if (input === 'g') {
setPatternScrollOffset(0);
setPatternSelectedRow(0);
return;
}
if (key.ctrl && input === 'd') {
const half = Math.floor(maxRows / 2);
const maxOffset = Math.max(0, patternCount - maxRows);
setPatternScrollOffset(o => Math.min(o + half, maxOffset));
return;
}
if (key.ctrl && input === 'u') {
const half = Math.floor(maxRows / 2);
setPatternScrollOffset(o => Math.max(0, o - half));
return;
}
if (input === 'w') {
setWrapLines(w => !w);
return;
}
if (input === '/') {
setFocusSearch(true);
return;
}
if (input === 'q') process.exit(0);
return;
}
// ---- Trace tab keybindings ----------------------------------------
if (expandedRow !== null && detailTab === 'trace') {
// When detail view is expanded (full-page Event Details):
@ -388,6 +565,14 @@ export function useKeybindings(params: KeybindingParams): void {
handleTabSwitch(key.shift ? -1 : 1);
return;
}
if (input === 'P') {
wasFollowingRef.current = isFollowing;
setIsFollowing(false);
setShowPatterns(true);
setPatternSelectedRow(0);
setPatternScrollOffset(0);
return;
}
if (input === 'A' && onOpenAlerts) {
onOpenAlerts();
return;

View file

@ -0,0 +1,333 @@
/**
* Fetches a random sample of events from ClickHouse, mines patterns
* using the Drain algorithm, and estimates total counts using a
* sampleMultiplier mirroring the web frontend's useGroupedPatterns.
*/
import { useState, useEffect, useCallback, useRef } from 'react';
import {
TemplateMiner,
TemplateMinerConfig,
} from '@hyperdx/common-utils/dist/drain';
import type { Metadata } from '@hyperdx/common-utils/dist/core/metadata';
import { convertDateRangeToGranularityString } from '@hyperdx/common-utils/dist/core/utils';
import type { SourceResponse, ProxyClickhouseClient } from '@/api/client';
import {
buildPatternSampleQuery,
buildTotalCountQuery,
} from '@/api/eventQuery';
import { getEventBody } from '@/shared/source';
import type { EventRow } from './types';
import { flatten } from './utils';
// ---- Constants -----------------------------------------------------
const SAMPLES = 10_000;
// ---- Time bucketing utilities --------------------------------------
/** Parse a granularity string like "5 minute" into seconds. */
function granularityToSeconds(granularity: string): number {
const [num, unit] = granularity.split(' ');
const n = parseInt(num, 10);
switch (unit) {
case 'second':
return n;
case 'minute':
return n * 60;
case 'hour':
return n * 3600;
case 'day':
return n * 86400;
default:
return n * 60;
}
}
/** Round a timestamp down to the start of its granularity bucket. */
function toStartOfBucket(ts: number, granularityMs: number): number {
return Math.floor(ts / granularityMs) * granularityMs;
}
/** Generate all bucket start timestamps between start and end. */
function generateBuckets(
startMs: number,
endMs: number,
granularityMs: number,
): number[] {
const buckets: number[] = [];
let current = toStartOfBucket(startMs, granularityMs);
while (current < endMs) {
buckets.push(current);
current += granularityMs;
}
return buckets;
}
// ---- Types ---------------------------------------------------------
export interface TrendBucket {
ts: number;
count: number;
}
export interface PatternGroup {
id: string;
pattern: string;
/** Raw count within the sample */
count: number;
/** Estimated total count (count * sampleMultiplier), prefixed with ~ in display */
estimatedCount: number;
samples: EventRow[];
/** Time-bucketed trend data for sparkline */
trend: TrendBucket[];
}
export interface UsePatternDataParams {
clickhouseClient: ProxyClickhouseClient;
metadata: Metadata;
source: SourceResponse;
submittedQuery: string;
startTime: Date;
endTime: Date;
/** Only fetch when true (i.e., pattern view is open) */
enabled: boolean;
}
export interface UsePatternDataReturn {
patterns: PatternGroup[];
loading: boolean;
error: Error | null;
totalCount: number | null;
sampledRowCount: number;
}
// ---- Hook ----------------------------------------------------------
export function usePatternData({
clickhouseClient,
metadata,
source,
submittedQuery,
startTime,
endTime,
enabled,
}: UsePatternDataParams): UsePatternDataReturn {
const [patterns, setPatterns] = useState<PatternGroup[]>([]);
const [loading, setLoading] = useState(false);
const [error, setError] = useState<Error | null>(null);
const [totalCount, setTotalCount] = useState<number | null>(null);
const [sampledRowCount, setSampledRowCount] = useState(0);
// Track the last query params to avoid redundant fetches
const lastFetchRef = useRef<string>('');
const bodyColumn = (() => {
const expr = getEventBody(source);
if (expr) return expr;
return undefined;
})();
const fetchPatterns = useCallback(async () => {
const fetchKey = JSON.stringify({
source: source.id,
submittedQuery,
startTime: startTime.getTime(),
endTime: endTime.getTime(),
});
// Skip if we already fetched for these exact params
if (lastFetchRef.current === fetchKey) return;
lastFetchRef.current = fetchKey;
setLoading(true);
setError(null);
try {
// Fire both queries in parallel
const [sampleChSql, countChSql] = await Promise.all([
buildPatternSampleQuery(
{
source,
searchQuery: submittedQuery,
startTime,
endTime,
sampleLimit: SAMPLES,
},
metadata,
),
buildTotalCountQuery(
{ source, searchQuery: submittedQuery, startTime, endTime },
metadata,
),
]);
const [sampleResult, countResult] = await Promise.all([
clickhouseClient.query({
query: sampleChSql.sql,
query_params: sampleChSql.params,
format: 'JSON',
connectionId: source.connection,
}),
clickhouseClient.query({
query: countChSql.sql,
query_params: countChSql.params,
format: 'JSON',
connectionId: source.connection,
}),
]);
const sampleJson = (await sampleResult.json()) as { data: EventRow[] };
const countJson = (await countResult.json()) as {
data: Array<Record<string, string | number>>;
};
const sampleRows = sampleJson.data ?? [];
const total = Number(countJson.data?.[0]?.total ?? 0);
setTotalCount(total);
setSampledRowCount(sampleRows.length);
if (sampleRows.length === 0) {
setPatterns([]);
setLoading(false);
return;
}
// Determine columns from the result keys
const resultKeys = Object.keys(sampleRows[0]);
const effectiveBodyColumn =
bodyColumn ?? resultKeys[resultKeys.length - 1];
// Use the source's timestamp expression, falling back to the first column
const tsExpr = source.timestampValueExpression ?? 'TimestampTime';
const tsColumn = resultKeys.find(k => k === tsExpr) ?? resultKeys[0];
// Compute granularity for trend buckets
const granularity = convertDateRangeToGranularityString(
[startTime, endTime],
24,
);
const granularityMs = granularityToSeconds(granularity) * 1000;
const allBuckets = generateBuckets(
startTime.getTime(),
endTime.getTime(),
granularityMs,
);
// Mine patterns
const config = new TemplateMinerConfig();
const miner = new TemplateMiner(config);
const clustered: Array<{
clusterId: number;
row: EventRow;
tsMs: number;
}> = [];
for (const row of sampleRows) {
const body = row[effectiveBodyColumn];
const text = body != null ? flatten(String(body)) : '';
const result = miner.addLogMessage(text);
const tsRaw = row[tsColumn];
const tsMs =
tsRaw != null
? new Date(String(tsRaw)).getTime()
: startTime.getTime();
clustered.push({ clusterId: result.clusterId, row, tsMs });
}
// Group by cluster ID
const groups = new Map<
number,
{
rows: EventRow[];
template: string;
bucketCounts: Map<number, number>;
}
>();
for (const { clusterId, row, tsMs } of clustered) {
const bucket = toStartOfBucket(tsMs, granularityMs);
const existing = groups.get(clusterId);
if (existing) {
existing.rows.push(row);
existing.bucketCounts.set(
bucket,
(existing.bucketCounts.get(bucket) ?? 0) + 1,
);
} else {
const body = row[effectiveBodyColumn];
const text = body != null ? flatten(String(body)) : '';
const match = miner.match(text, 'fallback');
const bucketCounts = new Map<number, number>();
bucketCounts.set(bucket, 1);
groups.set(clusterId, {
rows: [row],
template: match?.getTemplate() ?? text,
bucketCounts,
});
}
}
// Compute sampleMultiplier
const sampleMultiplier =
total > 0 && sampleRows.length > 0 ? total / sampleRows.length : 1;
// Convert to sorted array with estimated counts and trend data
const result: PatternGroup[] = [];
for (const [id, { rows, template, bucketCounts }] of groups) {
const trend: TrendBucket[] = allBuckets.map(bucketTs => ({
ts: bucketTs,
count: Math.round(
(bucketCounts.get(bucketTs) ?? 0) * sampleMultiplier,
),
}));
result.push({
id: String(id),
pattern: template,
count: rows.length,
estimatedCount: Math.max(
Math.round(rows.length * sampleMultiplier),
1,
),
samples: rows,
trend,
});
}
result.sort((a, b) => b.estimatedCount - a.estimatedCount);
setPatterns(result);
} catch (err: unknown) {
setError(err instanceof Error ? err : new Error(String(err)));
// Clear the fetch key so a retry will re-fetch
lastFetchRef.current = '';
} finally {
setLoading(false);
}
}, [
clickhouseClient,
metadata,
source,
submittedQuery,
startTime,
endTime,
bodyColumn,
]);
useEffect(() => {
if (enabled) {
fetchPatterns();
}
}, [enabled, fetchPatterns]);
// Clear patterns when disabled
useEffect(() => {
if (!enabled) {
lastFetchRef.current = '';
}
}, [enabled]);
return { patterns, loading, error, totalCount, sampledRowCount };
}

View file

@ -284,7 +284,13 @@ export default function TraceWaterfall({
<Box width={labelWidth} overflowX="hidden">
<Text
wrap="truncate"
color={isHighlighted ? 'white' : isLog ? 'green' : statusColor}
color={
isHighlighted
? 'white'
: isLog
? (statusColor ?? 'green')
: statusColor
}
bold={!!statusColor}
inverse={isHighlighted}
>

View file

@ -51,7 +51,12 @@ export function getStatusColor(node: SpanNode): 'red' | 'yellow' | undefined {
}
export function getBarColor(node: SpanNode): string {
if (node.kind === 'log') return 'green';
if (node.kind === 'log') {
const sev = node.StatusCode?.toLowerCase();
if (sev === 'error' || sev === 'fatal' || sev === 'critical') return 'red';
if (sev === 'warn' || sev === 'warning') return 'yellow';
return 'green';
}
if (node.StatusCode === '2' || node.StatusCode === 'Error') return 'red';
if (node.StatusCode === '1') return 'yellow';
return 'cyan';

View file

@ -0,0 +1,287 @@
import { TemplateMinerConfig } from '../drain/config';
import { Drain } from '../drain/drain';
import { TemplateMiner } from '../drain/template-miner';
describe('Drain', () => {
it('test_add_shorter_than_depth_message', () => {
const model = new Drain(4);
let [cluster, changeType] = model.addLogMessage('hello');
expect(changeType).toBe('cluster_created');
[cluster, changeType] = model.addLogMessage('hello');
expect(changeType).toBe('none');
[cluster, changeType] = model.addLogMessage('otherword');
expect(changeType).toBe('cluster_created');
expect(model.clusterCount).toBe(2);
});
it('test_add_log_message', () => {
const model = new Drain();
const entries = `
Dec 10 07:07:38 LabSZ sshd[24206]: input_userauth_request: invalid user test9 [preauth]
Dec 10 07:08:28 LabSZ sshd[24208]: input_userauth_request: invalid user webmaster [preauth]
Dec 10 09:12:32 LabSZ sshd[24490]: Failed password for invalid user ftpuser from 0.0.0.0 port 62891 ssh2
Dec 10 09:12:35 LabSZ sshd[24492]: Failed password for invalid user pi from 0.0.0.0 port 49289 ssh2
Dec 10 09:12:44 LabSZ sshd[24501]: Failed password for invalid user ftpuser from 0.0.0.0 port 60836 ssh2
Dec 10 07:28:03 LabSZ sshd[24245]: input_userauth_request: invalid user pgadmin [preauth]`
.split('\n')
.filter(l => l.trim().length > 0);
const expected = `
Dec 10 07:07:38 LabSZ sshd[24206]: input_userauth_request: invalid user test9 [preauth]
Dec 10 <*> LabSZ <*> input_userauth_request: invalid user <*> [preauth]
Dec 10 09:12:32 LabSZ sshd[24490]: Failed password for invalid user ftpuser from 0.0.0.0 port 62891 ssh2
Dec 10 <*> LabSZ <*> Failed password for invalid user <*> from 0.0.0.0 port <*> ssh2
Dec 10 <*> LabSZ <*> Failed password for invalid user <*> from 0.0.0.0 port <*> ssh2
Dec 10 <*> LabSZ <*> input_userauth_request: invalid user <*> [preauth]`
.split('\n')
.filter(l => l.trim().length > 0)
.map(l => l.trim());
const actual: string[] = [];
for (const entry of entries) {
const [cluster] = model.addLogMessage(entry);
actual.push(cluster.getTemplate());
}
expect(actual).toEqual(expected);
// Python test reports 8 because splitlines() includes 2 empty entries from triple-quote;
// we only feed the 6 real log lines, so total is 6.
expect(model.getTotalClusterSize()).toBe(6);
});
it('test_add_log_message_sim_75', () => {
const model = new Drain(4, 0.75, 100);
const entries = `
Dec 10 07:07:38 LabSZ sshd[24206]: input_userauth_request: invalid user test9 [preauth]
Dec 10 07:08:28 LabSZ sshd[24208]: input_userauth_request: invalid user webmaster [preauth]
Dec 10 09:12:32 LabSZ sshd[24490]: Failed password for invalid user ftpuser from 0.0.0.0 port 62891 ssh2
Dec 10 09:12:35 LabSZ sshd[24492]: Failed password for invalid user pi from 0.0.0.0 port 49289 ssh2
Dec 10 09:12:44 LabSZ sshd[24501]: Failed password for invalid user ftpuser from 0.0.0.0 port 60836 ssh2
Dec 10 07:28:03 LabSZ sshd[24245]: input_userauth_request: invalid user pgadmin [preauth]`
.split('\n')
.filter(l => l.trim().length > 0);
const expected = `
Dec 10 07:07:38 LabSZ sshd[24206]: input_userauth_request: invalid user test9 [preauth]
Dec 10 07:08:28 LabSZ sshd[24208]: input_userauth_request: invalid user webmaster [preauth]
Dec 10 09:12:32 LabSZ sshd[24490]: Failed password for invalid user ftpuser from 0.0.0.0 port 62891 ssh2
Dec 10 <*> LabSZ <*> Failed password for invalid user <*> from 0.0.0.0 port <*> ssh2
Dec 10 <*> LabSZ <*> Failed password for invalid user <*> from 0.0.0.0 port <*> ssh2
Dec 10 07:28:03 LabSZ sshd[24245]: input_userauth_request: invalid user pgadmin [preauth]`
.split('\n')
.filter(l => l.trim().length > 0)
.map(l => l.trim());
const actual: string[] = [];
for (const entry of entries) {
const [cluster] = model.addLogMessage(entry);
actual.push(cluster.getTemplate());
}
expect(actual).toEqual(expected);
expect(model.getTotalClusterSize()).toBe(6);
});
it('test_max_clusters', () => {
const model = new Drain(4, 0.4, 100, 1);
const entries = `
A format 1
A format 2
B format 1
B format 2
A format 3`
.split('\n')
.filter(l => l.trim().length > 0);
const expected = [
'A format 1',
'A format <*>',
'B format 1',
'B format <*>',
'A format 3',
];
const actual: string[] = [];
for (const entry of entries) {
const [cluster] = model.addLogMessage(entry);
actual.push(cluster.getTemplate());
}
expect(actual).toEqual(expected);
expect(model.getTotalClusterSize()).toBe(1);
});
it('test_max_clusters_lru_multiple_leaf_nodes', () => {
const model = new Drain(4, 0.4, 100, 2, [], '*');
const entries = [
'A A A',
'A A B',
'B A A',
'B A B',
'C A A',
'C A B',
'B A A',
'A A A',
];
const expected = [
'A A A',
'A A *',
'B A A',
'B A *',
'C A A',
'C A *',
'B A *',
'A A A',
];
const actual: string[] = [];
for (const entry of entries) {
const [cluster] = model.addLogMessage(entry);
actual.push(cluster.getTemplate());
}
expect(actual).toEqual(expected);
expect(model.getTotalClusterSize()).toBe(4);
});
it('test_max_clusters_lru_single_leaf_node', () => {
const model = new Drain(4, 0.4, 100, 2, [], '*');
const entries = [
'A A A',
'A A B',
'A B A',
'A B B',
'A C A',
'A C B',
'A B A',
'A A A',
];
const expected = [
'A A A',
'A A *',
'A B A',
'A B *',
'A C A',
'A C *',
'A B *',
'A A A',
];
const actual: string[] = [];
for (const entry of entries) {
const [cluster] = model.addLogMessage(entry);
actual.push(cluster.getTemplate());
}
expect(actual).toEqual(expected);
});
it('test_match_only', () => {
const model = new Drain();
model.addLogMessage('aa aa aa');
model.addLogMessage('aa aa bb');
model.addLogMessage('aa aa cc');
model.addLogMessage('xx yy zz');
let c = model.match('aa aa tt');
expect(c).not.toBeNull();
expect(c!.clusterId).toBe(1);
c = model.match('xx yy zz');
expect(c).not.toBeNull();
expect(c!.clusterId).toBe(2);
c = model.match('xx yy rr');
expect(c).toBeNull();
c = model.match('nothing');
expect(c).toBeNull();
});
it('test_create_template', () => {
const model = new Drain(4, 0.4, 100, null, [], '*');
const seq1 = ['aa', 'bb', 'dd'];
const seq2 = ['aa', 'bb', 'cc'];
let template = model.createTemplate(seq1, seq2);
expect(template).toEqual(['aa', 'bb', '*']);
template = model.createTemplate(seq1, seq1);
expect(template).toEqual(seq1);
expect(() => model.createTemplate(seq1, ['aa'])).toThrow();
});
});
describe('TemplateMiner', () => {
it('add_log_message with masking', () => {
const config = new TemplateMinerConfig();
config.maskingInstructions = [
{
pattern:
'((?<=[^A-Za-z0-9])|^)(\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3})((?=[^A-Za-z0-9])|$)',
maskWith: 'IP',
},
{
pattern: '((?<=[^A-Za-z0-9])|^)([\\-\\+]?\\d+)((?=[^A-Za-z0-9])|$)',
maskWith: 'NUM',
},
];
config.maskPrefix = '<:';
config.maskSuffix = ':>';
const miner = new TemplateMiner(config);
let result = miner.addLogMessage('connected to 10.0.0.1');
expect(result.changeType).toBe('cluster_created');
expect(result.clusterId).toBe(1);
expect(result.templateMined).toContain('<:IP:>');
result = miner.addLogMessage('connected to 192.168.0.1');
expect(result.changeType).toBe('none');
expect(result.clusterId).toBe(1);
});
it('match after training', () => {
const config = new TemplateMinerConfig();
const miner = new TemplateMiner(config);
miner.addLogMessage('user alice logged in');
miner.addLogMessage('user bob logged in');
const matched = miner.match('user charlie logged in');
expect(matched).not.toBeNull();
expect(matched!.clusterId).toBe(1);
const noMatch = miner.match('something completely different');
expect(noMatch).toBeNull();
});
it('extract_parameters', () => {
const config = new TemplateMinerConfig();
config.maskingInstructions = [
{
pattern: '((?<=[^A-Za-z0-9])|^)([\\-\\+]?\\d+)((?=[^A-Za-z0-9])|$)',
maskWith: 'NUM',
},
];
const miner = new TemplateMiner(config);
miner.addLogMessage('user johndoe logged in 11 minutes ago');
miner.addLogMessage('user janedoe logged in 5 minutes ago');
const result = miner.addLogMessage('user bob logged in 3 minutes ago');
const params = miner.extractParameters(
result.templateMined,
'user bob logged in 3 minutes ago',
false,
);
expect(params).not.toBeNull();
expect(params!.length).toBeGreaterThan(0);
});
});

View file

@ -0,0 +1,58 @@
export interface MaskingInstructionConfig {
pattern: string;
maskWith: string;
}
export class TemplateMinerConfig {
drainDepth: number = 4;
drainSimTh: number = 0.4;
drainMaxChildren: number = 100;
drainMaxClusters: number | null = null;
drainExtraDelimiters: string[] = [];
maskPrefix: string = '<';
maskSuffix: string = '>';
maskingInstructions: MaskingInstructionConfig[] = [];
parametrizeNumericTokens: boolean = true;
parameterExtractionCacheCapacity: number = 3000;
static fromJSON(json: Record<string, unknown>): TemplateMinerConfig {
const config = new TemplateMinerConfig();
if (typeof json.drain_depth === 'number')
config.drainDepth = json.drain_depth;
if (typeof json.drain_sim_th === 'number')
config.drainSimTh = json.drain_sim_th;
if (typeof json.drain_max_children === 'number')
config.drainMaxChildren = json.drain_max_children;
if (
typeof json.drain_max_clusters === 'number' ||
json.drain_max_clusters === null
)
config.drainMaxClusters = json.drain_max_clusters;
if (Array.isArray(json.drain_extra_delimiters)) {
config.drainExtraDelimiters = (
json.drain_extra_delimiters as unknown[]
).filter((v): v is string => typeof v === 'string');
}
if (typeof json.mask_prefix === 'string')
config.maskPrefix = json.mask_prefix;
if (typeof json.mask_suffix === 'string')
config.maskSuffix = json.mask_suffix;
if (typeof json.parametrize_numeric_tokens === 'boolean')
config.parametrizeNumericTokens = json.parametrize_numeric_tokens;
if (typeof json.parameter_extraction_cache_capacity === 'number')
config.parameterExtractionCacheCapacity =
json.parameter_extraction_cache_capacity;
if (Array.isArray(json.masking_instructions)) {
config.maskingInstructions = (json.masking_instructions as unknown[])
.filter(
(item): item is Record<string, string> =>
typeof item === 'object' && item !== null,
)
.map(mi => ({
pattern: mi.regex_pattern ?? mi.pattern,
maskWith: mi.mask_with ?? mi.maskWith,
}));
}
return config;
}
}

View file

@ -0,0 +1,370 @@
import { LogCluster } from './log-cluster';
import { LruCache } from './lru-cache';
import { Node } from './node';
export class Drain {
logClusterDepth: number;
private maxNodeDepth: number;
simTh: number;
maxChildren: number;
rootNode: Node;
extraDelimiters: string[];
maxClusters: number | null;
paramStr: string;
parametrizeNumericTokens: boolean;
private unlimitedStore: Map<number, LogCluster> | null;
private limitedStore: LruCache<LogCluster> | null;
clustersCounter: number;
constructor(
depth: number = 4,
simTh: number = 0.4,
maxChildren: number = 100,
maxClusters: number | null = null,
extraDelimiters: string[] = [],
paramStr: string = '<*>',
parametrizeNumericTokens: boolean = true,
) {
if (depth < 3) {
throw new Error('depth argument must be at least 3');
}
this.logClusterDepth = depth;
this.maxNodeDepth = depth - 2;
this.simTh = simTh;
this.maxChildren = maxChildren;
this.rootNode = new Node();
this.extraDelimiters = extraDelimiters;
this.maxClusters = maxClusters;
this.paramStr = paramStr;
this.parametrizeNumericTokens = parametrizeNumericTokens;
this.clustersCounter = 0;
if (maxClusters !== null) {
this.unlimitedStore = null;
this.limitedStore = new LruCache<LogCluster>(maxClusters);
} else {
this.unlimitedStore = new Map<number, LogCluster>();
this.limitedStore = null;
}
}
get clusterCount(): number {
if (this.unlimitedStore) return this.unlimitedStore.size;
return this.limitedStore!.size;
}
getTotalClusterSize(): number {
let total = 0;
if (this.unlimitedStore) {
for (const c of this.unlimitedStore.values()) total += c.size;
} else {
for (const c of this.limitedStore!.values()) total += c.size;
}
return total;
}
private clusterPeek(id: number): LogCluster | undefined {
if (this.unlimitedStore) return this.unlimitedStore.get(id);
return this.limitedStore!.peek(id);
}
private clusterGet(id: number): LogCluster | undefined {
if (this.unlimitedStore) return this.unlimitedStore.get(id);
return this.limitedStore!.get(id);
}
private clusterContains(id: number): boolean {
if (this.unlimitedStore) return this.unlimitedStore.has(id);
return this.limitedStore!.has(id);
}
private clusterInsert(id: number, cluster: LogCluster): void {
if (this.unlimitedStore) {
this.unlimitedStore.set(id, cluster);
} else {
this.limitedStore!.put(id, cluster);
}
}
private static hasNumbers(s: string): boolean {
for (let i = 0; i < s.length; i++) {
const c = s.charCodeAt(i);
if (c >= 48 && c <= 57) return true;
}
return false;
}
getContentAsTokens(content: string): string[] {
let c = content.trim();
for (const delimiter of this.extraDelimiters) {
c = c.split(delimiter).join(' ');
}
if (c.length === 0) return [];
return c.split(/\s+/);
}
getSeqDistance(
seq1: string[],
seq2: string[],
includeParams: boolean,
): [number, number] {
if (seq1.length !== seq2.length) {
throw new Error('seq1 and seq2 must have equal length');
}
if (seq1.length === 0) return [1.0, 0];
let simTokens = 0;
let paramCount = 0;
for (let i = 0; i < seq1.length; i++) {
if (seq1[i] === this.paramStr) {
paramCount++;
continue;
}
if (seq1[i] === seq2[i]) {
simTokens++;
}
}
if (includeParams) {
simTokens += paramCount;
}
return [simTokens / seq1.length, paramCount];
}
createTemplate(seq1: string[], seq2: string[]): string[] {
if (seq1.length !== seq2.length) {
throw new Error('seq1 and seq2 must have equal length');
}
return seq1.map((t1, i) => (t1 === seq2[i] ? seq2[i] : this.paramStr));
}
private fastMatch(
clusterIds: number[],
tokens: string[],
simTh: number,
includeParams: boolean,
): number | null {
let maxSim = -1;
let maxParamCount = -1;
let maxClusterId: number | null = null;
for (const cid of clusterIds) {
const cluster = this.clusterPeek(cid);
if (!cluster) continue;
const [curSim, paramCount] = this.getSeqDistance(
cluster.logTemplateTokens,
tokens,
includeParams,
);
if (
curSim > maxSim ||
(curSim === maxSim && paramCount > maxParamCount)
) {
maxSim = curSim;
maxParamCount = paramCount;
maxClusterId = cid;
}
}
if (maxSim >= simTh) {
return maxClusterId;
}
return null;
}
private treeSearch(
tokens: string[],
simTh: number,
includeParams: boolean,
): number | null {
const tokenCount = tokens.length;
const tokenCountStr = String(tokenCount);
const firstNode = this.rootNode.keyToChildNode.get(tokenCountStr);
if (!firstNode) return null;
if (tokenCount === 0) {
const firstId = firstNode.clusterIds[0];
return firstId !== undefined ? firstId : null;
}
let curNode: Node = firstNode;
let curNodeDepth = 1;
for (const token of tokens) {
if (curNodeDepth >= this.maxNodeDepth) break;
if (curNodeDepth >= tokenCount) break;
const child: Node | undefined = curNode.keyToChildNode.get(token);
if (child) {
curNode = child;
} else {
const wildcardChild: Node | undefined = curNode.keyToChildNode.get(
this.paramStr,
);
if (wildcardChild) {
curNode = wildcardChild;
} else {
return null;
}
}
curNodeDepth++;
}
return this.fastMatch(curNode.clusterIds, tokens, simTh, includeParams);
}
private addSeqToPrefixTree(
clusterId: number,
templateTokens: string[],
): void {
const tokenCount = templateTokens.length;
const tokenCountStr = String(tokenCount);
if (!this.rootNode.keyToChildNode.has(tokenCountStr)) {
this.rootNode.keyToChildNode.set(tokenCountStr, new Node());
}
let curNode = this.rootNode.keyToChildNode.get(tokenCountStr)!;
if (tokenCount === 0) {
curNode.clusterIds = [clusterId];
return;
}
let currentDepth = 1;
for (const token of templateTokens) {
if (currentDepth >= this.maxNodeDepth || currentDepth >= tokenCount) {
const newClusterIds = curNode.clusterIds.filter(cid =>
this.clusterContains(cid),
);
newClusterIds.push(clusterId);
curNode.clusterIds = newClusterIds;
break;
}
if (!curNode.keyToChildNode.has(token)) {
if (this.parametrizeNumericTokens && Drain.hasNumbers(token)) {
if (!curNode.keyToChildNode.has(this.paramStr)) {
curNode.keyToChildNode.set(this.paramStr, new Node());
}
curNode = curNode.keyToChildNode.get(this.paramStr)!;
} else if (curNode.keyToChildNode.has(this.paramStr)) {
if (curNode.keyToChildNode.size < this.maxChildren) {
curNode.keyToChildNode.set(token, new Node());
curNode = curNode.keyToChildNode.get(token)!;
} else {
curNode = curNode.keyToChildNode.get(this.paramStr)!;
}
} else {
if (curNode.keyToChildNode.size + 1 < this.maxChildren) {
curNode.keyToChildNode.set(token, new Node());
curNode = curNode.keyToChildNode.get(token)!;
} else if (curNode.keyToChildNode.size + 1 === this.maxChildren) {
curNode.keyToChildNode.set(this.paramStr, new Node());
curNode = curNode.keyToChildNode.get(this.paramStr)!;
} else {
curNode = curNode.keyToChildNode.get(this.paramStr)!;
}
}
} else {
curNode = curNode.keyToChildNode.get(token)!;
}
currentDepth++;
}
}
addLogMessage(content: string): [LogCluster, string] {
const contentTokens = this.getContentAsTokens(content);
const matchClusterId = this.treeSearch(contentTokens, this.simTh, false);
if (matchClusterId === null) {
this.clustersCounter++;
const clusterId = this.clustersCounter;
const cluster = new LogCluster(contentTokens, clusterId);
this.clusterInsert(clusterId, cluster);
this.addSeqToPrefixTree(clusterId, contentTokens);
return [cluster, 'cluster_created'];
}
const existingCluster = this.clusterPeek(matchClusterId)!;
const newTemplateTokens = this.createTemplate(
contentTokens,
existingCluster.logTemplateTokens,
);
const updateType =
newTemplateTokens.length === existingCluster.logTemplateTokens.length &&
newTemplateTokens.every(
(t, i) => t === existingCluster.logTemplateTokens[i],
)
? 'none'
: 'cluster_template_changed';
existingCluster.logTemplateTokens = newTemplateTokens;
existingCluster.size += 1;
// Touch to update LRU ordering
this.clusterGet(matchClusterId);
return [existingCluster, updateType];
}
private getClustersIdsForSeqLen(seqLen: number): number[] {
const collectRecursive = (node: Node, ids: number[]): void => {
ids.push(...node.clusterIds);
for (const child of node.keyToChildNode.values()) {
collectRecursive(child, ids);
}
};
const key = String(seqLen);
const node = this.rootNode.keyToChildNode.get(key);
if (!node) return [];
const ids: number[] = [];
collectRecursive(node, ids);
return ids;
}
match(
content: string,
fullSearchStrategy: string = 'never',
): LogCluster | null {
if (!['always', 'never', 'fallback'].includes(fullSearchStrategy)) {
throw new Error(`Invalid full_search_strategy: ${fullSearchStrategy}`);
}
const contentTokens = this.getContentAsTokens(content);
const requiredSimTh = 1.0;
const fullSearch = (): LogCluster | null => {
const allIds = this.getClustersIdsForSeqLen(contentTokens.length);
const matchedId = this.fastMatch(
allIds,
contentTokens,
requiredSimTh,
true,
);
if (matchedId === null) return null;
return this.clusterPeek(matchedId) ?? null;
};
if (fullSearchStrategy === 'always') {
return fullSearch();
}
const matchId = this.treeSearch(contentTokens, requiredSimTh, true);
if (matchId !== null) {
return this.clusterPeek(matchId) ?? null;
}
if (fullSearchStrategy === 'never') {
return null;
}
return fullSearch();
}
}

View file

@ -0,0 +1,9 @@
export type { MaskingInstructionConfig } from './config';
export { TemplateMinerConfig } from './config';
export { Drain } from './drain';
export { LogCluster } from './log-cluster';
export { LruCache } from './lru-cache';
export { LogMasker, MaskingInstruction } from './masking';
export { Node } from './node';
export type { AddLogMessageResult, ExtractedParameter } from './template-miner';
export { TemplateMiner } from './template-miner';

View file

@ -0,0 +1,19 @@
export class LogCluster {
logTemplateTokens: string[];
clusterId: number;
size: number;
constructor(logTemplateTokens: string[], clusterId: number) {
this.logTemplateTokens = [...logTemplateTokens];
this.clusterId = clusterId;
this.size = 1;
}
getTemplate(): string {
return this.logTemplateTokens.join(' ');
}
toString(): string {
return `ID=${String(this.clusterId).padEnd(5)} : size=${String(this.size).padEnd(10)}: ${this.getTemplate()}`;
}
}

View file

@ -0,0 +1,154 @@
interface LruEntry<V> {
key: number;
value: V;
prev: number | null;
next: number | null;
}
/**
* LRU cache with separate peek (no eviction update) and get (eviction update) methods.
* This mirrors the behavior of Python's cachetools.LRUCache used in Drain3,
* where LogClusterCache.get() bypasses eviction ordering.
*/
export class LruCache<V> {
private capacity: number;
private map: Map<number, number> = new Map();
private entries: (LruEntry<V> | null)[] = [];
private freeSlots: number[] = [];
private head: number | null = null;
private tail: number | null = null;
constructor(capacity: number) {
this.capacity = capacity;
}
get size(): number {
return this.map.size;
}
has(key: number): boolean {
return this.map.has(key);
}
/** Read value without updating recency (used in fast_match). */
peek(key: number): V | undefined {
const slot = this.map.get(key);
if (slot === undefined) return undefined;
return this.entries[slot]?.value;
}
/** Read value and mark as most recently used. */
get(key: number): V | undefined {
const slot = this.map.get(key);
if (slot === undefined) return undefined;
this.moveToHead(slot);
return this.entries[slot]?.value;
}
/** Insert or update. Returns the evicted [key, value] if cache was at capacity. */
put(key: number, value: V): [number, V] | undefined {
const existingSlot = this.map.get(key);
if (existingSlot !== undefined) {
const entry = this.entries[existingSlot]!;
entry.value = value;
this.moveToHead(existingSlot);
return undefined;
}
let evicted: [number, V] | undefined;
if (this.map.size >= this.capacity) {
evicted = this.evictTail();
}
const slot = this.allocSlot({
key,
value,
prev: null,
next: this.head,
});
if (this.head !== null) {
this.entries[this.head]!.prev = slot;
}
this.head = slot;
if (this.tail === null) {
this.tail = slot;
}
this.map.set(key, slot);
return evicted;
}
remove(key: number): V | undefined {
const slot = this.map.get(key);
if (slot === undefined) return undefined;
this.map.delete(key);
this.unlink(slot);
const entry = this.entries[slot]!;
this.entries[slot] = null;
this.freeSlots.push(slot);
return entry.value;
}
values(): V[] {
const result: V[] = [];
for (const entry of this.entries) {
if (entry !== null) {
result.push(entry.value);
}
}
return result;
}
private allocSlot(entry: LruEntry<V>): number {
if (this.freeSlots.length > 0) {
const slot = this.freeSlots.pop()!;
this.entries[slot] = entry;
return slot;
}
this.entries.push(entry);
return this.entries.length - 1;
}
private unlink(slot: number): void {
const entry = this.entries[slot]!;
if (entry.prev !== null) {
this.entries[entry.prev]!.next = entry.next;
} else {
this.head = entry.next;
}
if (entry.next !== null) {
this.entries[entry.next]!.prev = entry.prev;
} else {
this.tail = entry.prev;
}
}
private moveToHead(slot: number): void {
if (this.head === slot) return;
this.unlink(slot);
const entry = this.entries[slot]!;
entry.prev = null;
entry.next = this.head;
if (this.head !== null) {
this.entries[this.head]!.prev = slot;
}
this.head = slot;
if (this.tail === null) {
this.tail = slot;
}
}
private evictTail(): [number, V] | undefined {
if (this.tail === null) return undefined;
const tailSlot = this.tail;
const entry = this.entries[tailSlot]!;
const key = entry.key;
const value = entry.value;
this.map.delete(key);
this.unlink(tailSlot);
this.entries[tailSlot] = null;
this.freeSlots.push(tailSlot);
return [key, value];
}
}

View file

@ -0,0 +1,60 @@
export class MaskingInstruction {
private regex: RegExp;
private _pattern: string;
maskWith: string;
constructor(pattern: string, maskWith: string) {
this._pattern = pattern;
this.regex = new RegExp(pattern, 'g');
this.maskWith = maskWith;
}
get pattern(): string {
return this._pattern;
}
mask(content: string, maskPrefix: string, maskSuffix: string): string {
const replacement = maskPrefix + this.maskWith + maskSuffix;
this.regex.lastIndex = 0;
return content.replace(this.regex, replacement);
}
}
export class LogMasker {
maskPrefix: string;
maskSuffix: string;
private _maskingInstructions: MaskingInstruction[];
private maskNameToInstructions: Map<string, MaskingInstruction[]>;
constructor(
maskingInstructions: MaskingInstruction[],
maskPrefix: string,
maskSuffix: string,
) {
this.maskPrefix = maskPrefix;
this.maskSuffix = maskSuffix;
this._maskingInstructions = maskingInstructions;
this.maskNameToInstructions = new Map();
for (const mi of maskingInstructions) {
const list = this.maskNameToInstructions.get(mi.maskWith) ?? [];
list.push(mi);
this.maskNameToInstructions.set(mi.maskWith, list);
}
}
mask(content: string): string {
let result = content;
for (const mi of this._maskingInstructions) {
result = mi.mask(result, this.maskPrefix, this.maskSuffix);
}
return result;
}
get maskNames(): string[] {
return Array.from(this.maskNameToInstructions.keys());
}
instructionsByMaskName(maskName: string): MaskingInstruction[] {
return this.maskNameToInstructions.get(maskName) ?? [];
}
}

View file

@ -0,0 +1,9 @@
export class Node {
keyToChildNode: Map<string, Node>;
clusterIds: number[];
constructor() {
this.keyToChildNode = new Map();
this.clusterIds = [];
}
}

View file

@ -0,0 +1,180 @@
import { TemplateMinerConfig } from './config';
import { Drain } from './drain';
import { LogCluster } from './log-cluster';
import { LruCache } from './lru-cache';
import { LogMasker, MaskingInstruction } from './masking';
export interface ExtractedParameter {
value: string;
maskName: string;
}
export interface AddLogMessageResult {
changeType: string;
clusterId: number;
clusterSize: number;
templateMined: string;
clusterCount: number;
}
export class TemplateMiner {
drain: Drain;
private masker: LogMasker;
private extractionCache: LruCache<{
regex: string;
paramMap: Map<string, string>;
}>;
private extraDelimiters: string[];
constructor(config?: TemplateMinerConfig) {
const cfg = config ?? new TemplateMinerConfig();
const paramStr = cfg.maskPrefix + '*' + cfg.maskSuffix;
const maskingInstructions = cfg.maskingInstructions.map(
mi => new MaskingInstruction(mi.pattern, mi.maskWith),
);
this.masker = new LogMasker(
maskingInstructions,
cfg.maskPrefix,
cfg.maskSuffix,
);
this.drain = new Drain(
cfg.drainDepth,
cfg.drainSimTh,
cfg.drainMaxChildren,
cfg.drainMaxClusters,
cfg.drainExtraDelimiters,
paramStr,
cfg.parametrizeNumericTokens,
);
this.extractionCache = new LruCache(cfg.parameterExtractionCacheCapacity);
this.extraDelimiters = cfg.drainExtraDelimiters;
}
addLogMessage(logMessage: string): AddLogMessageResult {
const maskedContent = this.masker.mask(logMessage);
const [cluster, changeType] = this.drain.addLogMessage(maskedContent);
return {
changeType,
clusterId: cluster.clusterId,
clusterSize: cluster.size,
templateMined: cluster.getTemplate(),
clusterCount: this.drain.clusterCount,
};
}
match(
logMessage: string,
fullSearchStrategy: string = 'never',
): LogCluster | null {
const maskedContent = this.masker.mask(logMessage);
return this.drain.match(maskedContent, fullSearchStrategy);
}
extractParameters(
logTemplate: string,
logMessage: string,
exactMatching: boolean = true,
): ExtractedParameter[] | null {
let message = logMessage;
for (const delimiter of this.extraDelimiters) {
message = message.split(delimiter).join(' ');
}
const { regex: templateRegex, paramMap } =
this.getTemplateParameterExtractionRegex(logTemplate, exactMatching);
const parameterMatch = message.match(new RegExp(templateRegex));
if (!parameterMatch || !parameterMatch.groups) {
return null;
}
const extracted: ExtractedParameter[] = [];
for (const [groupName, maskName] of paramMap) {
const value = parameterMatch.groups[groupName];
if (value !== undefined) {
extracted.push({ value, maskName });
}
}
// Sort by position in input string (left-to-right) to match Python Drain3 behavior,
// which depends on CPython's set iteration order for mask name processing.
extracted.sort((a, b) => {
const posA = message.indexOf(a.value);
const posB = message.indexOf(b.value);
return posA - posB;
});
return extracted;
}
private getTemplateParameterExtractionRegex(
logTemplate: string,
exactMatching: boolean,
): { regex: string; paramMap: Map<string, string> } {
const cacheKey = simpleHash(`${logTemplate}|${exactMatching}`);
const cached = this.extractionCache.peek(cacheKey);
if (cached) return cached;
const paramMap = new Map<string, string>();
let paramNameCounter = 0;
const maskNames = new Set(this.masker.maskNames);
maskNames.add('*');
const escapedPrefix = escapeRegex(this.masker.maskPrefix);
const escapedSuffix = escapeRegex(this.masker.maskSuffix);
let templateRegex = escapeRegex(logTemplate);
for (const maskName of maskNames) {
const searchStr = escapedPrefix + escapeRegex(maskName) + escapedSuffix;
while (true) {
const allowedPatterns: string[] = [];
if (exactMatching && maskName !== '*') {
const instructions = this.masker.instructionsByMaskName(maskName);
for (const mi of instructions) {
allowedPatterns.push(mi.pattern);
}
}
if (!exactMatching || maskName === '*') {
allowedPatterns.push('.+?');
}
const paramGroupName = `p_${paramNameCounter}`;
paramNameCounter++;
paramMap.set(paramGroupName, maskName);
const joined = allowedPatterns.join('|');
const captureRegex = `(?<${paramGroupName}>${joined})`;
if (templateRegex.includes(searchStr)) {
templateRegex = templateRegex.replace(searchStr, captureRegex);
} else {
break;
}
}
}
templateRegex = templateRegex.replace(/\\ /g, '\\s+');
templateRegex = `^${templateRegex}$`;
const result = { regex: templateRegex, paramMap };
this.extractionCache.put(cacheKey, result);
return result;
}
}
function escapeRegex(s: string): string {
return s.replace(/[.*+?^${}()|[\]\\-]/g, '\\$&');
}
function simpleHash(s: string): number {
let hash = 5381;
for (let i = 0; i < s.length; i++) {
hash = ((hash * 33) ^ s.charCodeAt(i)) >>> 0;
}
return hash;
}