From 418f70c5162106a4066c11cadc8b87df59ea7425 Mon Sep 17 00:00:00 2001 From: Warren Lee <5959690+wrn14897@users.noreply.github.com> Date: Tue, 14 Apr 2026 11:03:56 -0700 Subject: [PATCH] [HDX-3964] Add event pattern mining to CLI (Shift+P) (#2106) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary Adds a pattern mining feature to the CLI, accessible via `Shift+P`. This mirrors the web app's Pattern Table functionality but runs entirely in TypeScript — no Pyodide/Python WASM needed. **Linear:** https://linear.app/hyperdx/issue/HDX-3964 ## What changed ### 1. Drain library in common-utils (`packages/common-utils/src/drain/`) Ported the [browser-drain](https://github.com/DeploySentinel/browser-drain) TypeScript library into `@hyperdx/common-utils`. This is a pure TypeScript implementation of the Drain3 log template mining algorithm, including: - `TemplateMiner` / `TemplateMinerConfig` — main API - `Drain` — core algorithm with prefix tree and LRU cluster cache - `LogMasker` — regex-based token masking (IPs, numbers, etc.) - `LruCache` — custom LRU cache matching Python Drain3's eviction semantics - 11 Jest tests ported from the original `node:test` suite ### 2. CLI pattern view (`packages/cli/src/components/EventViewer/`) **Keybinding:** `Shift+P` toggles pattern view (pauses follow mode, restores on exit) **Data flow (mirrors web app's `useGroupedPatterns`):** - Issues `SELECT ... ORDER BY rand() LIMIT 100000` to randomly sample up to 100K events - Issues parallel `SELECT count()` to get true total event count - Feeds sampled log bodies through the TypeScript `TemplateMiner` - Estimates pattern counts via `sampleMultiplier = totalCount / sampledRowCount` - Computes time-bucketed trend data per pattern **UI:** - Pattern list with columns: Est. Count (with `~` prefix), Pattern - `l`/`Enter` expands a pattern to show its sample events (full table columns) - `h`/`Esc` returns to pattern list - `j/k/G/g/Ctrl+D/Ctrl+U` navigation throughout - Loading spinner while sampling query runs **Alias fix:** Pattern and count queries compute `WITH` clauses from the source's `defaultTableSelectExpression` so Lucene searches using aliases (e.g. `level:error` where `level` is an alias for `SeverityText`) resolve correctly. ### New files - `packages/common-utils/src/drain/` — 7 source files + barrel index - `packages/common-utils/src/__tests__/drain.test.ts` - `packages/cli/src/components/EventViewer/usePatternData.ts` - `packages/cli/src/components/EventViewer/PatternView.tsx` - `packages/cli/src/components/EventViewer/PatternSamplesView.tsx` ### Modified files - `packages/cli/src/api/eventQuery.ts` — added `buildPatternSampleQuery`, `buildTotalCountQuery`, `buildAliasWithClauses` - `packages/cli/src/components/EventViewer/EventViewer.tsx` — wired in pattern state + rendering - `packages/cli/src/components/EventViewer/useKeybindings.ts` — added P, l, h keybindings + pattern/sample navigation - `packages/cli/src/components/EventViewer/SubComponents.tsx` — added P to help screen ### Demo https://github.com/user-attachments/assets/50a2edfc-8891-43ae-ab86-b96fca778c66 --- .changeset/add-cli-pattern-mining.md | 5 + .changeset/add-drain-library.md | 5 + package.json | 1 + packages/cli/src/api/eventQuery.ts | 139 +++++++ .../components/EventViewer/EventViewer.tsx | 92 ++++- .../EventViewer/PatternSamplesView.tsx | 101 +++++ .../components/EventViewer/PatternView.tsx | 101 +++++ .../components/EventViewer/SubComponents.tsx | 1 + .../components/EventViewer/useKeybindings.ts | 185 +++++++++ .../components/EventViewer/usePatternData.ts | 333 ++++++++++++++++ .../TraceWaterfall/TraceWaterfall.tsx | 8 +- .../src/components/TraceWaterfall/utils.ts | 7 +- .../common-utils/src/__tests__/drain.test.ts | 287 ++++++++++++++ packages/common-utils/src/drain/config.ts | 58 +++ packages/common-utils/src/drain/drain.ts | 370 ++++++++++++++++++ packages/common-utils/src/drain/index.ts | 9 + .../common-utils/src/drain/log-cluster.ts | 19 + packages/common-utils/src/drain/lru-cache.ts | 154 ++++++++ packages/common-utils/src/drain/masking.ts | 60 +++ packages/common-utils/src/drain/node.ts | 9 + .../common-utils/src/drain/template-miner.ts | 180 +++++++++ 21 files changed, 2119 insertions(+), 5 deletions(-) create mode 100644 .changeset/add-cli-pattern-mining.md create mode 100644 .changeset/add-drain-library.md create mode 100644 packages/cli/src/components/EventViewer/PatternSamplesView.tsx create mode 100644 packages/cli/src/components/EventViewer/PatternView.tsx create mode 100644 packages/cli/src/components/EventViewer/usePatternData.ts create mode 100644 packages/common-utils/src/__tests__/drain.test.ts create mode 100644 packages/common-utils/src/drain/config.ts create mode 100644 packages/common-utils/src/drain/drain.ts create mode 100644 packages/common-utils/src/drain/index.ts create mode 100644 packages/common-utils/src/drain/log-cluster.ts create mode 100644 packages/common-utils/src/drain/lru-cache.ts create mode 100644 packages/common-utils/src/drain/masking.ts create mode 100644 packages/common-utils/src/drain/node.ts create mode 100644 packages/common-utils/src/drain/template-miner.ts diff --git a/.changeset/add-cli-pattern-mining.md b/.changeset/add-cli-pattern-mining.md new file mode 100644 index 00000000..3c8f3b3e --- /dev/null +++ b/.changeset/add-cli-pattern-mining.md @@ -0,0 +1,5 @@ +--- +"@hyperdx/cli": patch +--- + +Add event pattern mining view (Shift+P) with sampled estimation and drill-down diff --git a/.changeset/add-drain-library.md b/.changeset/add-drain-library.md new file mode 100644 index 00000000..63506a59 --- /dev/null +++ b/.changeset/add-drain-library.md @@ -0,0 +1,5 @@ +--- +"@hyperdx/common-utils": patch +--- + +Add Drain log template mining library (ported from browser-drain) diff --git a/package.json b/package.json index ee1611fc..0d2e0827 100644 --- a/package.json +++ b/package.json @@ -42,6 +42,7 @@ "run:clickhouse": "nx run @hyperdx/app:run:clickhouse", "dev": "sh -c '. ./scripts/dev-env.sh && yarn build:common-utils && dotenvx run --convention=nextjs -- docker compose -p \"$HDX_DEV_PROJECT\" -f docker-compose.dev.yml up -d && yarn app:dev; dotenvx run --convention=nextjs -- docker compose -p \"$HDX_DEV_PROJECT\" -f docker-compose.dev.yml down'", "dev:local": "IS_LOCAL_APP_MODE='DANGEROUSLY_is_local_app_mode💀' yarn dev", + "cli:dev": "yarn workspace @hyperdx/cli dev", "dev:down": "sh -c '. ./scripts/dev-env.sh && docker compose -p \"$HDX_DEV_PROJECT\" -f docker-compose.dev.yml down && sh ./scripts/dev-kill-ports.sh'", "dev:compose": "sh -c '. ./scripts/dev-env.sh && docker compose -p \"$HDX_DEV_PROJECT\" -f docker-compose.dev.yml'", "knip": "knip", diff --git a/packages/cli/src/api/eventQuery.ts b/packages/cli/src/api/eventQuery.ts index c7d3fb57..141ed1b2 100644 --- a/packages/cli/src/api/eventQuery.ts +++ b/packages/cli/src/api/eventQuery.ts @@ -10,6 +10,7 @@ import type { import { chSqlToAliasMap } from '@hyperdx/common-utils/dist/clickhouse'; import { renderChartConfig } from '@hyperdx/common-utils/dist/core/renderChartConfig'; import type { Metadata } from '@hyperdx/common-utils/dist/core/metadata'; +import { aliasMapToWithClauses } from '@hyperdx/common-utils/dist/core/utils'; import { DisplayType } from '@hyperdx/common-utils/dist/types'; import type { BuilderChartConfigWithDateRange, @@ -106,6 +107,144 @@ export async function buildEventSearchQuery( return renderChartConfig(config, metadata, source.querySettings); } +// ---- Alias WITH clauses from source select -------------------------- + +/** + * Compute WITH clauses from the source's default select expression. + * When a source defines `SeverityText as level`, searches for `level:error` + * need `WITH SeverityText AS level` so the alias is available in WHERE. + */ +async function buildAliasWithClauses( + source: SourceResponse, + metadata: Metadata, +): Promise { + const selectExpr = source.defaultTableSelectExpression; + if (!selectExpr) return undefined; + + const tsExpr = source.timestampValueExpression ?? 'TimestampTime'; + + // Render a dummy query with the source's select to extract aliases + const dummyConfig: BuilderChartConfigWithDateRange = { + displayType: DisplayType.Search, + select: selectExpr, + from: source.from, + where: '', + connection: source.connection, + timestampValueExpression: tsExpr, + implicitColumnExpression: source.implicitColumnExpression, + limit: { limit: 0 }, + dateRange: [new Date(), new Date()], + }; + + const dummySql = await renderChartConfig( + dummyConfig, + metadata, + source.querySettings, + ); + const aliasMap = chSqlToAliasMap(dummySql); + return aliasMapToWithClauses(aliasMap); +} + +// ---- Pattern sampling query ----------------------------------------- + +export interface PatternSampleQueryOptions { + source: SourceResponse; + searchQuery?: string; + startTime: Date; + endTime: Date; + /** Number of random rows to sample (default 100_000) */ + sampleLimit?: number; +} + +/** + * Build a query that randomly samples events for pattern mining. + * Selects the body column and timestamp, ordered by rand(). + */ +export async function buildPatternSampleQuery( + opts: PatternSampleQueryOptions, + metadata: Metadata, +): Promise { + const { + source, + searchQuery = '', + startTime, + endTime, + sampleLimit = 100_000, + } = opts; + + const tsExpr = source.timestampValueExpression ?? 'TimestampTime'; + + // Use the same select as the main event table so sample rows have all columns + let selectExpr = source.defaultTableSelectExpression ?? ''; + if (!selectExpr && source.kind === 'trace') { + selectExpr = buildTraceSelectExpression(source); + } + + // Compute alias WITH clauses so search aliases (e.g. `level`) resolve in WHERE + const aliasWith = searchQuery + ? await buildAliasWithClauses(source, metadata) + : undefined; + + const config: BuilderChartConfigWithDateRange = { + displayType: DisplayType.Search, + select: selectExpr, + from: source.from, + where: searchQuery, + whereLanguage: searchQuery ? 'lucene' : 'sql', + connection: source.connection, + timestampValueExpression: tsExpr, + implicitColumnExpression: source.implicitColumnExpression, + orderBy: 'rand()', + limit: { limit: sampleLimit }, + dateRange: [startTime, endTime], + ...(aliasWith ? { with: aliasWith } : {}), + }; + + return renderChartConfig(config, metadata, source.querySettings); +} + +// ---- Total count query ---------------------------------------------- + +export interface TotalCountQueryOptions { + source: SourceResponse; + searchQuery?: string; + startTime: Date; + endTime: Date; +} + +/** + * Build a query to get the total count of events matching the search. + */ +export async function buildTotalCountQuery( + opts: TotalCountQueryOptions, + metadata: Metadata, +): Promise { + const { source, searchQuery = '', startTime, endTime } = opts; + + const tsExpr = source.timestampValueExpression ?? 'TimestampTime'; + + // Compute alias WITH clauses so search aliases (e.g. `level`) resolve in WHERE + const aliasWith = searchQuery + ? await buildAliasWithClauses(source, metadata) + : undefined; + + const config: BuilderChartConfigWithDateRange = { + displayType: DisplayType.Table, + select: 'count() as total', + from: source.from, + where: searchQuery, + whereLanguage: searchQuery ? 'lucene' : 'sql', + connection: source.connection, + timestampValueExpression: tsExpr, + implicitColumnExpression: source.implicitColumnExpression, + limit: { limit: 1 }, + dateRange: [startTime, endTime], + ...(aliasWith ? { with: aliasWith } : {}), + }; + + return renderChartConfig(config, metadata, source.querySettings); +} + // ---- Full row fetch (SELECT *) ------------------------------------- // ---- Trace waterfall queries ---------------------------------------- diff --git a/packages/cli/src/components/EventViewer/EventViewer.tsx b/packages/cli/src/components/EventViewer/EventViewer.tsx index 675e8f1f..1ade22ec 100644 --- a/packages/cli/src/components/EventViewer/EventViewer.tsx +++ b/packages/cli/src/components/EventViewer/EventViewer.tsx @@ -15,8 +15,11 @@ import { SqlPreviewScreen, } from './SubComponents'; import { TableView } from './TableView'; +import { PatternView } from './PatternView'; +import { PatternSamplesView } from './PatternSamplesView'; import { DetailPanel } from './DetailPanel'; import { useEventData } from './useEventData'; +import { usePatternData } from './usePatternData'; import { useKeybindings } from './useKeybindings'; export default function EventViewer({ @@ -76,6 +79,12 @@ export default function EventViewer({ const [traceSelectedNode, setTraceSelectedNode] = useState( null, ); + const [showPatterns, setShowPatterns] = useState(false); + const [patternSelectedRow, setPatternSelectedRow] = useState(0); + const [patternScrollOffset, setPatternScrollOffset] = useState(0); + const [expandedPattern, setExpandedPattern] = useState(null); + const [sampleSelectedRow, setSampleSelectedRow] = useState(0); + const [sampleScrollOffset, setSampleScrollOffset] = useState(0); const [timeRange, setTimeRange] = useState(() => { const now = new Date(); return { start: new Date(now.getTime() - 60 * 60 * 1000), end: now }; @@ -111,6 +120,23 @@ export default function EventViewer({ expandedRow, }); + // ---- Pattern mining ----------------------------------------------- + + const { + patterns, + loading: patternsLoading, + error: patternsError, + totalCount: patternsTotalCount, + } = usePatternData({ + clickhouseClient, + metadata, + source, + submittedQuery, + startTime: timeRange.start, + endTime: timeRange.end, + enabled: showPatterns, + }); + // ---- Derived values ---------------------------------------------- const columns = useMemo( @@ -171,11 +197,22 @@ export default function EventViewer({ focusDetailSearch, showHelp, showSql, + showPatterns, expandedRow, detailTab, traceDetailExpanded, selectedRow, scrollOffset, + patternSelectedRow, + patternScrollOffset, + patternCount: patterns.length, + expandedPattern, + sampleSelectedRow, + sampleScrollOffset, + sampleCount: + expandedPattern !== null + ? (patterns[expandedPattern]?.samples.length ?? 0) + : 0, isFollowing, hasMore, events, @@ -198,7 +235,13 @@ export default function EventViewer({ setFocusDetailSearch, setShowHelp, setShowSql, + setShowPatterns, setSqlScrollOffset, + setPatternSelectedRow, + setPatternScrollOffset, + setExpandedPattern, + setSampleSelectedRow, + setSampleScrollOffset, setSelectedRow, setScrollOffset, setExpandedRow, @@ -313,6 +356,27 @@ export default function EventViewer({ onTraceChSqlChange={setTraceChSql} onTraceSelectedNodeChange={setTraceSelectedNode} /> + ) : showPatterns && + expandedPattern !== null && + patterns[expandedPattern] ? ( + + ) : showPatterns ? ( + ) : ( diff --git a/packages/cli/src/components/EventViewer/PatternSamplesView.tsx b/packages/cli/src/components/EventViewer/PatternSamplesView.tsx new file mode 100644 index 00000000..a222a03e --- /dev/null +++ b/packages/cli/src/components/EventViewer/PatternSamplesView.tsx @@ -0,0 +1,101 @@ +import React from 'react'; +import { Box, Text } from 'ink'; + +import type { PatternGroup } from './usePatternData'; +import type { Column, EventRow } from './types'; +import { formatDynamicRow } from './utils'; +import { TableHeader } from './SubComponents'; + +// ---- Types --------------------------------------------------------- + +type PatternSamplesViewProps = { + pattern: PatternGroup; + columns: Column[]; + selectedRow: number; + scrollOffset: number; + maxRows: number; + wrapLines: boolean; +}; + +// ---- Component ----------------------------------------------------- + +export function PatternSamplesView({ + pattern, + columns, + selectedRow, + scrollOffset, + maxRows, + wrapLines, +}: PatternSamplesViewProps) { + // Reserve 3 rows for the pattern header (pattern text + count + blank line) + const tableMaxRows = Math.max(1, maxRows - 3); + const samples = pattern.samples; + const visible = samples.slice(scrollOffset, scrollOffset + tableMaxRows); + const emptyRows = tableMaxRows - visible.length; + + return ( + + {/* Pattern header */} + + + Pattern:{' '} + + {pattern.pattern} + + + + ~{pattern.estimatedCount.toLocaleString()} estimated events ( + {pattern.count.toLocaleString()} sampled) — h to go back + + + + + {/* Sample events table */} + + + {visible.map((row: EventRow, i: number) => { + const isSelected = i === selectedRow; + const formatted = formatDynamicRow(row, columns); + return ( + + + + {isSelected ? '▸' : ' '} + + + {formatted.cells.map((cell: string, ci: number) => ( + + + {cell} + + + ))} + + ); + })} + + {emptyRows > 0 && + Array.from({ length: emptyRows }).map((_, i) => ( + + ))} + + ); +} diff --git a/packages/cli/src/components/EventViewer/PatternView.tsx b/packages/cli/src/components/EventViewer/PatternView.tsx new file mode 100644 index 00000000..028c6788 --- /dev/null +++ b/packages/cli/src/components/EventViewer/PatternView.tsx @@ -0,0 +1,101 @@ +import React from 'react'; +import { Box, Text } from 'ink'; +import Spinner from 'ink-spinner'; + +import type { PatternGroup } from './usePatternData'; + +// ---- Types --------------------------------------------------------- + +type PatternViewProps = { + patterns: PatternGroup[]; + selectedRow: number; + scrollOffset: number; + maxRows: number; + loading: boolean; + error: Error | null; + wrapLines: boolean; +}; + +// ---- Component ----------------------------------------------------- + +export function PatternView({ + patterns, + selectedRow, + scrollOffset, + maxRows, + loading, + error, + wrapLines, +}: PatternViewProps) { + const visible = patterns.slice(scrollOffset, scrollOffset + maxRows); + const emptyRows = maxRows - visible.length; + + return ( + + {/* Header */} + + + + Est. Count + + + + + Pattern + + + + + {loading ? ( + + Sampling events and mining patterns… + + ) : error ? ( + Error: {error.message} + ) : visible.length === 0 ? ( + No patterns found. + ) : null} + + {!loading && + !error && + visible.map((p, i) => { + const isSelected = i === selectedRow; + return ( + + + + {isSelected ? '▸' : ' '} + + + + + ~{p.estimatedCount.toLocaleString()} + + + + + {p.pattern} + + + + ); + })} + + {emptyRows > 0 && + !loading && + Array.from({ length: emptyRows }).map((_, i) => ( + + ))} + + ); +} diff --git a/packages/cli/src/components/EventViewer/SubComponents.tsx b/packages/cli/src/components/EventViewer/SubComponents.tsx index d7e2e22d..cc93508b 100644 --- a/packages/cli/src/components/EventViewer/SubComponents.tsx +++ b/packages/cli/src/components/EventViewer/SubComponents.tsx @@ -184,6 +184,7 @@ export const HelpScreen = React.memo(function HelpScreen() { ['Shift+Tab', 'Previous source / saved search'], ['t', 'Edit time range in $EDITOR'], ['s', 'Edit select clause in $EDITOR'], + ['P (Shift+P)', 'Show event patterns'], ['D', 'Show generated SQL'], ['f', 'Toggle follow mode (live tail)'], ['o', 'Open trace in browser'], diff --git a/packages/cli/src/components/EventViewer/useKeybindings.ts b/packages/cli/src/components/EventViewer/useKeybindings.ts index e16e9702..092d50d7 100644 --- a/packages/cli/src/components/EventViewer/useKeybindings.ts +++ b/packages/cli/src/components/EventViewer/useKeybindings.ts @@ -22,11 +22,19 @@ export interface KeybindingParams { focusDetailSearch: boolean; showHelp: boolean; showSql: boolean; + showPatterns: boolean; expandedRow: number | null; detailTab: 'overview' | 'columns' | 'trace'; traceDetailExpanded: boolean; selectedRow: number; scrollOffset: number; + patternSelectedRow: number; + patternScrollOffset: number; + patternCount: number; + expandedPattern: number | null; + sampleSelectedRow: number; + sampleScrollOffset: number; + sampleCount: number; isFollowing: boolean; hasMore: boolean; events: EventRow[]; @@ -58,7 +66,13 @@ export interface KeybindingParams { setFocusDetailSearch: React.Dispatch>; setShowHelp: React.Dispatch>; setShowSql: React.Dispatch>; + setShowPatterns: React.Dispatch>; setSqlScrollOffset: React.Dispatch>; + setPatternSelectedRow: React.Dispatch>; + setPatternScrollOffset: React.Dispatch>; + setExpandedPattern: React.Dispatch>; + setSampleSelectedRow: React.Dispatch>; + setSampleScrollOffset: React.Dispatch>; setSelectedRow: React.Dispatch>; setScrollOffset: React.Dispatch>; setExpandedRow: React.Dispatch>; @@ -93,11 +107,19 @@ export function useKeybindings(params: KeybindingParams): void { focusDetailSearch, showHelp, showSql, + showPatterns, expandedRow, detailTab, traceDetailExpanded, selectedRow, scrollOffset, + patternSelectedRow, + patternScrollOffset, + patternCount, + expandedPattern, + sampleSelectedRow, + sampleScrollOffset, + sampleCount, isFollowing, hasMore, events, @@ -120,7 +142,13 @@ export function useKeybindings(params: KeybindingParams): void { setFocusDetailSearch, setShowHelp, setShowSql, + setShowPatterns, setSqlScrollOffset, + setPatternSelectedRow, + setPatternScrollOffset, + setExpandedPattern, + setSampleSelectedRow, + setSampleScrollOffset, setSelectedRow, setScrollOffset, setExpandedRow, @@ -213,6 +241,155 @@ export function useKeybindings(params: KeybindingParams): void { } return; } + + // Pattern samples navigation (expanded pattern) + if (showPatterns && expandedPattern !== null && expandedRow === null) { + if (input === 'h' || key.escape) { + setExpandedPattern(null); + return; + } + if (input === 'j' || key.downArrow) { + setSampleSelectedRow(r => { + const next = r + 1; + const visibleCount = Math.min( + sampleCount - sampleScrollOffset, + maxRows, + ); + if (next >= maxRows) { + setSampleScrollOffset(o => + Math.min(o + 1, Math.max(0, sampleCount - maxRows)), + ); + return r; + } + return Math.min(next, visibleCount - 1); + }); + return; + } + if (input === 'k' || key.upArrow) { + setSampleSelectedRow(r => { + const next = r - 1; + if (next < 0) { + setSampleScrollOffset(o => Math.max(0, o - 1)); + return 0; + } + return next; + }); + return; + } + if (input === 'G') { + const maxOffset = Math.max(0, sampleCount - maxRows); + setSampleScrollOffset(maxOffset); + setSampleSelectedRow(Math.min(sampleCount - 1, maxRows - 1)); + return; + } + if (input === 'g') { + setSampleScrollOffset(0); + setSampleSelectedRow(0); + return; + } + if (key.ctrl && input === 'd') { + const half = Math.floor(maxRows / 2); + const maxOffset = Math.max(0, sampleCount - maxRows); + setSampleScrollOffset(o => Math.min(o + half, maxOffset)); + return; + } + if (key.ctrl && input === 'u') { + const half = Math.floor(maxRows / 2); + setSampleScrollOffset(o => Math.max(0, o - half)); + return; + } + if (input === 'w') { + setWrapLines(w => !w); + return; + } + if (input === 'q') process.exit(0); + return; + } + + // Pattern view navigation + if (showPatterns && expandedRow === null) { + if (input === 'P') { + setShowPatterns(false); + if (wasFollowingRef.current) { + setIsFollowing(true); + } + return; + } + if (key.escape) { + setShowPatterns(false); + if (wasFollowingRef.current) { + setIsFollowing(true); + } + return; + } + if (key.return || input === 'l') { + setExpandedPattern(patternScrollOffset + patternSelectedRow); + setSampleSelectedRow(0); + setSampleScrollOffset(0); + return; + } + if (input === 'j' || key.downArrow) { + setPatternSelectedRow(r => { + const next = r + 1; + const visibleCount = Math.min( + patternCount - patternScrollOffset, + maxRows, + ); + if (next >= maxRows) { + setPatternScrollOffset(o => + Math.min(o + 1, Math.max(0, patternCount - maxRows)), + ); + return r; + } + return Math.min(next, visibleCount - 1); + }); + return; + } + if (input === 'k' || key.upArrow) { + setPatternSelectedRow(r => { + const next = r - 1; + if (next < 0) { + setPatternScrollOffset(o => Math.max(0, o - 1)); + return 0; + } + return next; + }); + return; + } + if (input === 'G') { + const maxOffset = Math.max(0, patternCount - maxRows); + setPatternScrollOffset(maxOffset); + setPatternSelectedRow(Math.min(patternCount - 1, maxRows - 1)); + return; + } + if (input === 'g') { + setPatternScrollOffset(0); + setPatternSelectedRow(0); + return; + } + if (key.ctrl && input === 'd') { + const half = Math.floor(maxRows / 2); + const maxOffset = Math.max(0, patternCount - maxRows); + setPatternScrollOffset(o => Math.min(o + half, maxOffset)); + return; + } + if (key.ctrl && input === 'u') { + const half = Math.floor(maxRows / 2); + setPatternScrollOffset(o => Math.max(0, o - half)); + return; + } + if (input === 'w') { + setWrapLines(w => !w); + return; + } + if (input === '/') { + setFocusSearch(true); + return; + } + if (input === 'q') process.exit(0); + return; + } + // ---- Trace tab keybindings ---------------------------------------- if (expandedRow !== null && detailTab === 'trace') { // When detail view is expanded (full-page Event Details): @@ -388,6 +565,14 @@ export function useKeybindings(params: KeybindingParams): void { handleTabSwitch(key.shift ? -1 : 1); return; } + if (input === 'P') { + wasFollowingRef.current = isFollowing; + setIsFollowing(false); + setShowPatterns(true); + setPatternSelectedRow(0); + setPatternScrollOffset(0); + return; + } if (input === 'A' && onOpenAlerts) { onOpenAlerts(); return; diff --git a/packages/cli/src/components/EventViewer/usePatternData.ts b/packages/cli/src/components/EventViewer/usePatternData.ts new file mode 100644 index 00000000..42528ca7 --- /dev/null +++ b/packages/cli/src/components/EventViewer/usePatternData.ts @@ -0,0 +1,333 @@ +/** + * Fetches a random sample of events from ClickHouse, mines patterns + * using the Drain algorithm, and estimates total counts using a + * sampleMultiplier — mirroring the web frontend's useGroupedPatterns. + */ +import { useState, useEffect, useCallback, useRef } from 'react'; + +import { + TemplateMiner, + TemplateMinerConfig, +} from '@hyperdx/common-utils/dist/drain'; +import type { Metadata } from '@hyperdx/common-utils/dist/core/metadata'; +import { convertDateRangeToGranularityString } from '@hyperdx/common-utils/dist/core/utils'; + +import type { SourceResponse, ProxyClickhouseClient } from '@/api/client'; +import { + buildPatternSampleQuery, + buildTotalCountQuery, +} from '@/api/eventQuery'; +import { getEventBody } from '@/shared/source'; + +import type { EventRow } from './types'; +import { flatten } from './utils'; + +// ---- Constants ----------------------------------------------------- + +const SAMPLES = 10_000; + +// ---- Time bucketing utilities -------------------------------------- + +/** Parse a granularity string like "5 minute" into seconds. */ +function granularityToSeconds(granularity: string): number { + const [num, unit] = granularity.split(' '); + const n = parseInt(num, 10); + switch (unit) { + case 'second': + return n; + case 'minute': + return n * 60; + case 'hour': + return n * 3600; + case 'day': + return n * 86400; + default: + return n * 60; + } +} + +/** Round a timestamp down to the start of its granularity bucket. */ +function toStartOfBucket(ts: number, granularityMs: number): number { + return Math.floor(ts / granularityMs) * granularityMs; +} + +/** Generate all bucket start timestamps between start and end. */ +function generateBuckets( + startMs: number, + endMs: number, + granularityMs: number, +): number[] { + const buckets: number[] = []; + let current = toStartOfBucket(startMs, granularityMs); + while (current < endMs) { + buckets.push(current); + current += granularityMs; + } + return buckets; +} + +// ---- Types --------------------------------------------------------- + +export interface TrendBucket { + ts: number; + count: number; +} + +export interface PatternGroup { + id: string; + pattern: string; + /** Raw count within the sample */ + count: number; + /** Estimated total count (count * sampleMultiplier), prefixed with ~ in display */ + estimatedCount: number; + samples: EventRow[]; + /** Time-bucketed trend data for sparkline */ + trend: TrendBucket[]; +} + +export interface UsePatternDataParams { + clickhouseClient: ProxyClickhouseClient; + metadata: Metadata; + source: SourceResponse; + submittedQuery: string; + startTime: Date; + endTime: Date; + /** Only fetch when true (i.e., pattern view is open) */ + enabled: boolean; +} + +export interface UsePatternDataReturn { + patterns: PatternGroup[]; + loading: boolean; + error: Error | null; + totalCount: number | null; + sampledRowCount: number; +} + +// ---- Hook ---------------------------------------------------------- + +export function usePatternData({ + clickhouseClient, + metadata, + source, + submittedQuery, + startTime, + endTime, + enabled, +}: UsePatternDataParams): UsePatternDataReturn { + const [patterns, setPatterns] = useState([]); + const [loading, setLoading] = useState(false); + const [error, setError] = useState(null); + const [totalCount, setTotalCount] = useState(null); + const [sampledRowCount, setSampledRowCount] = useState(0); + + // Track the last query params to avoid redundant fetches + const lastFetchRef = useRef(''); + + const bodyColumn = (() => { + const expr = getEventBody(source); + if (expr) return expr; + return undefined; + })(); + + const fetchPatterns = useCallback(async () => { + const fetchKey = JSON.stringify({ + source: source.id, + submittedQuery, + startTime: startTime.getTime(), + endTime: endTime.getTime(), + }); + + // Skip if we already fetched for these exact params + if (lastFetchRef.current === fetchKey) return; + lastFetchRef.current = fetchKey; + + setLoading(true); + setError(null); + + try { + // Fire both queries in parallel + const [sampleChSql, countChSql] = await Promise.all([ + buildPatternSampleQuery( + { + source, + searchQuery: submittedQuery, + startTime, + endTime, + sampleLimit: SAMPLES, + }, + metadata, + ), + buildTotalCountQuery( + { source, searchQuery: submittedQuery, startTime, endTime }, + metadata, + ), + ]); + + const [sampleResult, countResult] = await Promise.all([ + clickhouseClient.query({ + query: sampleChSql.sql, + query_params: sampleChSql.params, + format: 'JSON', + connectionId: source.connection, + }), + clickhouseClient.query({ + query: countChSql.sql, + query_params: countChSql.params, + format: 'JSON', + connectionId: source.connection, + }), + ]); + + const sampleJson = (await sampleResult.json()) as { data: EventRow[] }; + const countJson = (await countResult.json()) as { + data: Array>; + }; + + const sampleRows = sampleJson.data ?? []; + const total = Number(countJson.data?.[0]?.total ?? 0); + + setTotalCount(total); + setSampledRowCount(sampleRows.length); + + if (sampleRows.length === 0) { + setPatterns([]); + setLoading(false); + return; + } + + // Determine columns from the result keys + const resultKeys = Object.keys(sampleRows[0]); + const effectiveBodyColumn = + bodyColumn ?? resultKeys[resultKeys.length - 1]; + // Use the source's timestamp expression, falling back to the first column + const tsExpr = source.timestampValueExpression ?? 'TimestampTime'; + const tsColumn = resultKeys.find(k => k === tsExpr) ?? resultKeys[0]; + + // Compute granularity for trend buckets + const granularity = convertDateRangeToGranularityString( + [startTime, endTime], + 24, + ); + const granularityMs = granularityToSeconds(granularity) * 1000; + const allBuckets = generateBuckets( + startTime.getTime(), + endTime.getTime(), + granularityMs, + ); + + // Mine patterns + const config = new TemplateMinerConfig(); + const miner = new TemplateMiner(config); + + const clustered: Array<{ + clusterId: number; + row: EventRow; + tsMs: number; + }> = []; + for (const row of sampleRows) { + const body = row[effectiveBodyColumn]; + const text = body != null ? flatten(String(body)) : ''; + const result = miner.addLogMessage(text); + const tsRaw = row[tsColumn]; + const tsMs = + tsRaw != null + ? new Date(String(tsRaw)).getTime() + : startTime.getTime(); + clustered.push({ clusterId: result.clusterId, row, tsMs }); + } + + // Group by cluster ID + const groups = new Map< + number, + { + rows: EventRow[]; + template: string; + bucketCounts: Map; + } + >(); + + for (const { clusterId, row, tsMs } of clustered) { + const bucket = toStartOfBucket(tsMs, granularityMs); + const existing = groups.get(clusterId); + if (existing) { + existing.rows.push(row); + existing.bucketCounts.set( + bucket, + (existing.bucketCounts.get(bucket) ?? 0) + 1, + ); + } else { + const body = row[effectiveBodyColumn]; + const text = body != null ? flatten(String(body)) : ''; + const match = miner.match(text, 'fallback'); + const bucketCounts = new Map(); + bucketCounts.set(bucket, 1); + groups.set(clusterId, { + rows: [row], + template: match?.getTemplate() ?? text, + bucketCounts, + }); + } + } + + // Compute sampleMultiplier + const sampleMultiplier = + total > 0 && sampleRows.length > 0 ? total / sampleRows.length : 1; + + // Convert to sorted array with estimated counts and trend data + const result: PatternGroup[] = []; + for (const [id, { rows, template, bucketCounts }] of groups) { + const trend: TrendBucket[] = allBuckets.map(bucketTs => ({ + ts: bucketTs, + count: Math.round( + (bucketCounts.get(bucketTs) ?? 0) * sampleMultiplier, + ), + })); + + result.push({ + id: String(id), + pattern: template, + count: rows.length, + estimatedCount: Math.max( + Math.round(rows.length * sampleMultiplier), + 1, + ), + samples: rows, + trend, + }); + } + + result.sort((a, b) => b.estimatedCount - a.estimatedCount); + setPatterns(result); + } catch (err: unknown) { + setError(err instanceof Error ? err : new Error(String(err))); + // Clear the fetch key so a retry will re-fetch + lastFetchRef.current = ''; + } finally { + setLoading(false); + } + }, [ + clickhouseClient, + metadata, + source, + submittedQuery, + startTime, + endTime, + bodyColumn, + ]); + + useEffect(() => { + if (enabled) { + fetchPatterns(); + } + }, [enabled, fetchPatterns]); + + // Clear patterns when disabled + useEffect(() => { + if (!enabled) { + lastFetchRef.current = ''; + } + }, [enabled]); + + return { patterns, loading, error, totalCount, sampledRowCount }; +} diff --git a/packages/cli/src/components/TraceWaterfall/TraceWaterfall.tsx b/packages/cli/src/components/TraceWaterfall/TraceWaterfall.tsx index e5618645..f0a9b87e 100644 --- a/packages/cli/src/components/TraceWaterfall/TraceWaterfall.tsx +++ b/packages/cli/src/components/TraceWaterfall/TraceWaterfall.tsx @@ -284,7 +284,13 @@ export default function TraceWaterfall({ diff --git a/packages/cli/src/components/TraceWaterfall/utils.ts b/packages/cli/src/components/TraceWaterfall/utils.ts index 0087ee92..825ed253 100644 --- a/packages/cli/src/components/TraceWaterfall/utils.ts +++ b/packages/cli/src/components/TraceWaterfall/utils.ts @@ -51,7 +51,12 @@ export function getStatusColor(node: SpanNode): 'red' | 'yellow' | undefined { } export function getBarColor(node: SpanNode): string { - if (node.kind === 'log') return 'green'; + if (node.kind === 'log') { + const sev = node.StatusCode?.toLowerCase(); + if (sev === 'error' || sev === 'fatal' || sev === 'critical') return 'red'; + if (sev === 'warn' || sev === 'warning') return 'yellow'; + return 'green'; + } if (node.StatusCode === '2' || node.StatusCode === 'Error') return 'red'; if (node.StatusCode === '1') return 'yellow'; return 'cyan'; diff --git a/packages/common-utils/src/__tests__/drain.test.ts b/packages/common-utils/src/__tests__/drain.test.ts new file mode 100644 index 00000000..110a8a85 --- /dev/null +++ b/packages/common-utils/src/__tests__/drain.test.ts @@ -0,0 +1,287 @@ +import { TemplateMinerConfig } from '../drain/config'; +import { Drain } from '../drain/drain'; +import { TemplateMiner } from '../drain/template-miner'; + +describe('Drain', () => { + it('test_add_shorter_than_depth_message', () => { + const model = new Drain(4); + let [cluster, changeType] = model.addLogMessage('hello'); + expect(changeType).toBe('cluster_created'); + + [cluster, changeType] = model.addLogMessage('hello'); + expect(changeType).toBe('none'); + + [cluster, changeType] = model.addLogMessage('otherword'); + expect(changeType).toBe('cluster_created'); + + expect(model.clusterCount).toBe(2); + }); + + it('test_add_log_message', () => { + const model = new Drain(); + const entries = ` +Dec 10 07:07:38 LabSZ sshd[24206]: input_userauth_request: invalid user test9 [preauth] +Dec 10 07:08:28 LabSZ sshd[24208]: input_userauth_request: invalid user webmaster [preauth] +Dec 10 09:12:32 LabSZ sshd[24490]: Failed password for invalid user ftpuser from 0.0.0.0 port 62891 ssh2 +Dec 10 09:12:35 LabSZ sshd[24492]: Failed password for invalid user pi from 0.0.0.0 port 49289 ssh2 +Dec 10 09:12:44 LabSZ sshd[24501]: Failed password for invalid user ftpuser from 0.0.0.0 port 60836 ssh2 +Dec 10 07:28:03 LabSZ sshd[24245]: input_userauth_request: invalid user pgadmin [preauth]` + .split('\n') + .filter(l => l.trim().length > 0); + + const expected = ` +Dec 10 07:07:38 LabSZ sshd[24206]: input_userauth_request: invalid user test9 [preauth] +Dec 10 <*> LabSZ <*> input_userauth_request: invalid user <*> [preauth] +Dec 10 09:12:32 LabSZ sshd[24490]: Failed password for invalid user ftpuser from 0.0.0.0 port 62891 ssh2 +Dec 10 <*> LabSZ <*> Failed password for invalid user <*> from 0.0.0.0 port <*> ssh2 +Dec 10 <*> LabSZ <*> Failed password for invalid user <*> from 0.0.0.0 port <*> ssh2 +Dec 10 <*> LabSZ <*> input_userauth_request: invalid user <*> [preauth]` + .split('\n') + .filter(l => l.trim().length > 0) + .map(l => l.trim()); + + const actual: string[] = []; + for (const entry of entries) { + const [cluster] = model.addLogMessage(entry); + actual.push(cluster.getTemplate()); + } + + expect(actual).toEqual(expected); + // Python test reports 8 because splitlines() includes 2 empty entries from triple-quote; + // we only feed the 6 real log lines, so total is 6. + expect(model.getTotalClusterSize()).toBe(6); + }); + + it('test_add_log_message_sim_75', () => { + const model = new Drain(4, 0.75, 100); + const entries = ` +Dec 10 07:07:38 LabSZ sshd[24206]: input_userauth_request: invalid user test9 [preauth] +Dec 10 07:08:28 LabSZ sshd[24208]: input_userauth_request: invalid user webmaster [preauth] +Dec 10 09:12:32 LabSZ sshd[24490]: Failed password for invalid user ftpuser from 0.0.0.0 port 62891 ssh2 +Dec 10 09:12:35 LabSZ sshd[24492]: Failed password for invalid user pi from 0.0.0.0 port 49289 ssh2 +Dec 10 09:12:44 LabSZ sshd[24501]: Failed password for invalid user ftpuser from 0.0.0.0 port 60836 ssh2 +Dec 10 07:28:03 LabSZ sshd[24245]: input_userauth_request: invalid user pgadmin [preauth]` + .split('\n') + .filter(l => l.trim().length > 0); + + const expected = ` +Dec 10 07:07:38 LabSZ sshd[24206]: input_userauth_request: invalid user test9 [preauth] +Dec 10 07:08:28 LabSZ sshd[24208]: input_userauth_request: invalid user webmaster [preauth] +Dec 10 09:12:32 LabSZ sshd[24490]: Failed password for invalid user ftpuser from 0.0.0.0 port 62891 ssh2 +Dec 10 <*> LabSZ <*> Failed password for invalid user <*> from 0.0.0.0 port <*> ssh2 +Dec 10 <*> LabSZ <*> Failed password for invalid user <*> from 0.0.0.0 port <*> ssh2 +Dec 10 07:28:03 LabSZ sshd[24245]: input_userauth_request: invalid user pgadmin [preauth]` + .split('\n') + .filter(l => l.trim().length > 0) + .map(l => l.trim()); + + const actual: string[] = []; + for (const entry of entries) { + const [cluster] = model.addLogMessage(entry); + actual.push(cluster.getTemplate()); + } + + expect(actual).toEqual(expected); + expect(model.getTotalClusterSize()).toBe(6); + }); + + it('test_max_clusters', () => { + const model = new Drain(4, 0.4, 100, 1); + const entries = ` +A format 1 +A format 2 +B format 1 +B format 2 +A format 3` + .split('\n') + .filter(l => l.trim().length > 0); + + const expected = [ + 'A format 1', + 'A format <*>', + 'B format 1', + 'B format <*>', + 'A format 3', + ]; + + const actual: string[] = []; + for (const entry of entries) { + const [cluster] = model.addLogMessage(entry); + actual.push(cluster.getTemplate()); + } + + expect(actual).toEqual(expected); + expect(model.getTotalClusterSize()).toBe(1); + }); + + it('test_max_clusters_lru_multiple_leaf_nodes', () => { + const model = new Drain(4, 0.4, 100, 2, [], '*'); + const entries = [ + 'A A A', + 'A A B', + 'B A A', + 'B A B', + 'C A A', + 'C A B', + 'B A A', + 'A A A', + ]; + const expected = [ + 'A A A', + 'A A *', + 'B A A', + 'B A *', + 'C A A', + 'C A *', + 'B A *', + 'A A A', + ]; + + const actual: string[] = []; + for (const entry of entries) { + const [cluster] = model.addLogMessage(entry); + actual.push(cluster.getTemplate()); + } + + expect(actual).toEqual(expected); + expect(model.getTotalClusterSize()).toBe(4); + }); + + it('test_max_clusters_lru_single_leaf_node', () => { + const model = new Drain(4, 0.4, 100, 2, [], '*'); + const entries = [ + 'A A A', + 'A A B', + 'A B A', + 'A B B', + 'A C A', + 'A C B', + 'A B A', + 'A A A', + ]; + const expected = [ + 'A A A', + 'A A *', + 'A B A', + 'A B *', + 'A C A', + 'A C *', + 'A B *', + 'A A A', + ]; + + const actual: string[] = []; + for (const entry of entries) { + const [cluster] = model.addLogMessage(entry); + actual.push(cluster.getTemplate()); + } + + expect(actual).toEqual(expected); + }); + + it('test_match_only', () => { + const model = new Drain(); + model.addLogMessage('aa aa aa'); + model.addLogMessage('aa aa bb'); + model.addLogMessage('aa aa cc'); + model.addLogMessage('xx yy zz'); + + let c = model.match('aa aa tt'); + expect(c).not.toBeNull(); + expect(c!.clusterId).toBe(1); + + c = model.match('xx yy zz'); + expect(c).not.toBeNull(); + expect(c!.clusterId).toBe(2); + + c = model.match('xx yy rr'); + expect(c).toBeNull(); + + c = model.match('nothing'); + expect(c).toBeNull(); + }); + + it('test_create_template', () => { + const model = new Drain(4, 0.4, 100, null, [], '*'); + + const seq1 = ['aa', 'bb', 'dd']; + const seq2 = ['aa', 'bb', 'cc']; + + let template = model.createTemplate(seq1, seq2); + expect(template).toEqual(['aa', 'bb', '*']); + + template = model.createTemplate(seq1, seq1); + expect(template).toEqual(seq1); + + expect(() => model.createTemplate(seq1, ['aa'])).toThrow(); + }); +}); + +describe('TemplateMiner', () => { + it('add_log_message with masking', () => { + const config = new TemplateMinerConfig(); + config.maskingInstructions = [ + { + pattern: + '((?<=[^A-Za-z0-9])|^)(\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3})((?=[^A-Za-z0-9])|$)', + maskWith: 'IP', + }, + { + pattern: '((?<=[^A-Za-z0-9])|^)([\\-\\+]?\\d+)((?=[^A-Za-z0-9])|$)', + maskWith: 'NUM', + }, + ]; + config.maskPrefix = '<:'; + config.maskSuffix = ':>'; + + const miner = new TemplateMiner(config); + + let result = miner.addLogMessage('connected to 10.0.0.1'); + expect(result.changeType).toBe('cluster_created'); + expect(result.clusterId).toBe(1); + expect(result.templateMined).toContain('<:IP:>'); + + result = miner.addLogMessage('connected to 192.168.0.1'); + expect(result.changeType).toBe('none'); + expect(result.clusterId).toBe(1); + }); + + it('match after training', () => { + const config = new TemplateMinerConfig(); + const miner = new TemplateMiner(config); + + miner.addLogMessage('user alice logged in'); + miner.addLogMessage('user bob logged in'); + + const matched = miner.match('user charlie logged in'); + expect(matched).not.toBeNull(); + expect(matched!.clusterId).toBe(1); + + const noMatch = miner.match('something completely different'); + expect(noMatch).toBeNull(); + }); + + it('extract_parameters', () => { + const config = new TemplateMinerConfig(); + config.maskingInstructions = [ + { + pattern: '((?<=[^A-Za-z0-9])|^)([\\-\\+]?\\d+)((?=[^A-Za-z0-9])|$)', + maskWith: 'NUM', + }, + ]; + const miner = new TemplateMiner(config); + + miner.addLogMessage('user johndoe logged in 11 minutes ago'); + miner.addLogMessage('user janedoe logged in 5 minutes ago'); + + const result = miner.addLogMessage('user bob logged in 3 minutes ago'); + const params = miner.extractParameters( + result.templateMined, + 'user bob logged in 3 minutes ago', + false, + ); + + expect(params).not.toBeNull(); + expect(params!.length).toBeGreaterThan(0); + }); +}); diff --git a/packages/common-utils/src/drain/config.ts b/packages/common-utils/src/drain/config.ts new file mode 100644 index 00000000..0624e5fb --- /dev/null +++ b/packages/common-utils/src/drain/config.ts @@ -0,0 +1,58 @@ +export interface MaskingInstructionConfig { + pattern: string; + maskWith: string; +} + +export class TemplateMinerConfig { + drainDepth: number = 4; + drainSimTh: number = 0.4; + drainMaxChildren: number = 100; + drainMaxClusters: number | null = null; + drainExtraDelimiters: string[] = []; + maskPrefix: string = '<'; + maskSuffix: string = '>'; + maskingInstructions: MaskingInstructionConfig[] = []; + parametrizeNumericTokens: boolean = true; + parameterExtractionCacheCapacity: number = 3000; + + static fromJSON(json: Record): TemplateMinerConfig { + const config = new TemplateMinerConfig(); + if (typeof json.drain_depth === 'number') + config.drainDepth = json.drain_depth; + if (typeof json.drain_sim_th === 'number') + config.drainSimTh = json.drain_sim_th; + if (typeof json.drain_max_children === 'number') + config.drainMaxChildren = json.drain_max_children; + if ( + typeof json.drain_max_clusters === 'number' || + json.drain_max_clusters === null + ) + config.drainMaxClusters = json.drain_max_clusters; + if (Array.isArray(json.drain_extra_delimiters)) { + config.drainExtraDelimiters = ( + json.drain_extra_delimiters as unknown[] + ).filter((v): v is string => typeof v === 'string'); + } + if (typeof json.mask_prefix === 'string') + config.maskPrefix = json.mask_prefix; + if (typeof json.mask_suffix === 'string') + config.maskSuffix = json.mask_suffix; + if (typeof json.parametrize_numeric_tokens === 'boolean') + config.parametrizeNumericTokens = json.parametrize_numeric_tokens; + if (typeof json.parameter_extraction_cache_capacity === 'number') + config.parameterExtractionCacheCapacity = + json.parameter_extraction_cache_capacity; + if (Array.isArray(json.masking_instructions)) { + config.maskingInstructions = (json.masking_instructions as unknown[]) + .filter( + (item): item is Record => + typeof item === 'object' && item !== null, + ) + .map(mi => ({ + pattern: mi.regex_pattern ?? mi.pattern, + maskWith: mi.mask_with ?? mi.maskWith, + })); + } + return config; + } +} diff --git a/packages/common-utils/src/drain/drain.ts b/packages/common-utils/src/drain/drain.ts new file mode 100644 index 00000000..ead441df --- /dev/null +++ b/packages/common-utils/src/drain/drain.ts @@ -0,0 +1,370 @@ +import { LogCluster } from './log-cluster'; +import { LruCache } from './lru-cache'; +import { Node } from './node'; + +export class Drain { + logClusterDepth: number; + private maxNodeDepth: number; + simTh: number; + maxChildren: number; + rootNode: Node; + extraDelimiters: string[]; + maxClusters: number | null; + paramStr: string; + parametrizeNumericTokens: boolean; + + private unlimitedStore: Map | null; + private limitedStore: LruCache | null; + clustersCounter: number; + + constructor( + depth: number = 4, + simTh: number = 0.4, + maxChildren: number = 100, + maxClusters: number | null = null, + extraDelimiters: string[] = [], + paramStr: string = '<*>', + parametrizeNumericTokens: boolean = true, + ) { + if (depth < 3) { + throw new Error('depth argument must be at least 3'); + } + + this.logClusterDepth = depth; + this.maxNodeDepth = depth - 2; + this.simTh = simTh; + this.maxChildren = maxChildren; + this.rootNode = new Node(); + this.extraDelimiters = extraDelimiters; + this.maxClusters = maxClusters; + this.paramStr = paramStr; + this.parametrizeNumericTokens = parametrizeNumericTokens; + this.clustersCounter = 0; + + if (maxClusters !== null) { + this.unlimitedStore = null; + this.limitedStore = new LruCache(maxClusters); + } else { + this.unlimitedStore = new Map(); + this.limitedStore = null; + } + } + + get clusterCount(): number { + if (this.unlimitedStore) return this.unlimitedStore.size; + return this.limitedStore!.size; + } + + getTotalClusterSize(): number { + let total = 0; + if (this.unlimitedStore) { + for (const c of this.unlimitedStore.values()) total += c.size; + } else { + for (const c of this.limitedStore!.values()) total += c.size; + } + return total; + } + + private clusterPeek(id: number): LogCluster | undefined { + if (this.unlimitedStore) return this.unlimitedStore.get(id); + return this.limitedStore!.peek(id); + } + + private clusterGet(id: number): LogCluster | undefined { + if (this.unlimitedStore) return this.unlimitedStore.get(id); + return this.limitedStore!.get(id); + } + + private clusterContains(id: number): boolean { + if (this.unlimitedStore) return this.unlimitedStore.has(id); + return this.limitedStore!.has(id); + } + + private clusterInsert(id: number, cluster: LogCluster): void { + if (this.unlimitedStore) { + this.unlimitedStore.set(id, cluster); + } else { + this.limitedStore!.put(id, cluster); + } + } + + private static hasNumbers(s: string): boolean { + for (let i = 0; i < s.length; i++) { + const c = s.charCodeAt(i); + if (c >= 48 && c <= 57) return true; + } + return false; + } + + getContentAsTokens(content: string): string[] { + let c = content.trim(); + for (const delimiter of this.extraDelimiters) { + c = c.split(delimiter).join(' '); + } + if (c.length === 0) return []; + return c.split(/\s+/); + } + + getSeqDistance( + seq1: string[], + seq2: string[], + includeParams: boolean, + ): [number, number] { + if (seq1.length !== seq2.length) { + throw new Error('seq1 and seq2 must have equal length'); + } + if (seq1.length === 0) return [1.0, 0]; + + let simTokens = 0; + let paramCount = 0; + + for (let i = 0; i < seq1.length; i++) { + if (seq1[i] === this.paramStr) { + paramCount++; + continue; + } + if (seq1[i] === seq2[i]) { + simTokens++; + } + } + + if (includeParams) { + simTokens += paramCount; + } + + return [simTokens / seq1.length, paramCount]; + } + + createTemplate(seq1: string[], seq2: string[]): string[] { + if (seq1.length !== seq2.length) { + throw new Error('seq1 and seq2 must have equal length'); + } + return seq1.map((t1, i) => (t1 === seq2[i] ? seq2[i] : this.paramStr)); + } + + private fastMatch( + clusterIds: number[], + tokens: string[], + simTh: number, + includeParams: boolean, + ): number | null { + let maxSim = -1; + let maxParamCount = -1; + let maxClusterId: number | null = null; + + for (const cid of clusterIds) { + const cluster = this.clusterPeek(cid); + if (!cluster) continue; + const [curSim, paramCount] = this.getSeqDistance( + cluster.logTemplateTokens, + tokens, + includeParams, + ); + if ( + curSim > maxSim || + (curSim === maxSim && paramCount > maxParamCount) + ) { + maxSim = curSim; + maxParamCount = paramCount; + maxClusterId = cid; + } + } + + if (maxSim >= simTh) { + return maxClusterId; + } + return null; + } + + private treeSearch( + tokens: string[], + simTh: number, + includeParams: boolean, + ): number | null { + const tokenCount = tokens.length; + const tokenCountStr = String(tokenCount); + + const firstNode = this.rootNode.keyToChildNode.get(tokenCountStr); + if (!firstNode) return null; + + if (tokenCount === 0) { + const firstId = firstNode.clusterIds[0]; + return firstId !== undefined ? firstId : null; + } + + let curNode: Node = firstNode; + let curNodeDepth = 1; + for (const token of tokens) { + if (curNodeDepth >= this.maxNodeDepth) break; + if (curNodeDepth >= tokenCount) break; + + const child: Node | undefined = curNode.keyToChildNode.get(token); + if (child) { + curNode = child; + } else { + const wildcardChild: Node | undefined = curNode.keyToChildNode.get( + this.paramStr, + ); + if (wildcardChild) { + curNode = wildcardChild; + } else { + return null; + } + } + curNodeDepth++; + } + + return this.fastMatch(curNode.clusterIds, tokens, simTh, includeParams); + } + + private addSeqToPrefixTree( + clusterId: number, + templateTokens: string[], + ): void { + const tokenCount = templateTokens.length; + const tokenCountStr = String(tokenCount); + + if (!this.rootNode.keyToChildNode.has(tokenCountStr)) { + this.rootNode.keyToChildNode.set(tokenCountStr, new Node()); + } + + let curNode = this.rootNode.keyToChildNode.get(tokenCountStr)!; + + if (tokenCount === 0) { + curNode.clusterIds = [clusterId]; + return; + } + + let currentDepth = 1; + for (const token of templateTokens) { + if (currentDepth >= this.maxNodeDepth || currentDepth >= tokenCount) { + const newClusterIds = curNode.clusterIds.filter(cid => + this.clusterContains(cid), + ); + newClusterIds.push(clusterId); + curNode.clusterIds = newClusterIds; + break; + } + + if (!curNode.keyToChildNode.has(token)) { + if (this.parametrizeNumericTokens && Drain.hasNumbers(token)) { + if (!curNode.keyToChildNode.has(this.paramStr)) { + curNode.keyToChildNode.set(this.paramStr, new Node()); + } + curNode = curNode.keyToChildNode.get(this.paramStr)!; + } else if (curNode.keyToChildNode.has(this.paramStr)) { + if (curNode.keyToChildNode.size < this.maxChildren) { + curNode.keyToChildNode.set(token, new Node()); + curNode = curNode.keyToChildNode.get(token)!; + } else { + curNode = curNode.keyToChildNode.get(this.paramStr)!; + } + } else { + if (curNode.keyToChildNode.size + 1 < this.maxChildren) { + curNode.keyToChildNode.set(token, new Node()); + curNode = curNode.keyToChildNode.get(token)!; + } else if (curNode.keyToChildNode.size + 1 === this.maxChildren) { + curNode.keyToChildNode.set(this.paramStr, new Node()); + curNode = curNode.keyToChildNode.get(this.paramStr)!; + } else { + curNode = curNode.keyToChildNode.get(this.paramStr)!; + } + } + } else { + curNode = curNode.keyToChildNode.get(token)!; + } + currentDepth++; + } + } + + addLogMessage(content: string): [LogCluster, string] { + const contentTokens = this.getContentAsTokens(content); + const matchClusterId = this.treeSearch(contentTokens, this.simTh, false); + + if (matchClusterId === null) { + this.clustersCounter++; + const clusterId = this.clustersCounter; + const cluster = new LogCluster(contentTokens, clusterId); + this.clusterInsert(clusterId, cluster); + this.addSeqToPrefixTree(clusterId, contentTokens); + return [cluster, 'cluster_created']; + } + + const existingCluster = this.clusterPeek(matchClusterId)!; + const newTemplateTokens = this.createTemplate( + contentTokens, + existingCluster.logTemplateTokens, + ); + + const updateType = + newTemplateTokens.length === existingCluster.logTemplateTokens.length && + newTemplateTokens.every( + (t, i) => t === existingCluster.logTemplateTokens[i], + ) + ? 'none' + : 'cluster_template_changed'; + + existingCluster.logTemplateTokens = newTemplateTokens; + existingCluster.size += 1; + + // Touch to update LRU ordering + this.clusterGet(matchClusterId); + + return [existingCluster, updateType]; + } + + private getClustersIdsForSeqLen(seqLen: number): number[] { + const collectRecursive = (node: Node, ids: number[]): void => { + ids.push(...node.clusterIds); + for (const child of node.keyToChildNode.values()) { + collectRecursive(child, ids); + } + }; + + const key = String(seqLen); + const node = this.rootNode.keyToChildNode.get(key); + if (!node) return []; + const ids: number[] = []; + collectRecursive(node, ids); + return ids; + } + + match( + content: string, + fullSearchStrategy: string = 'never', + ): LogCluster | null { + if (!['always', 'never', 'fallback'].includes(fullSearchStrategy)) { + throw new Error(`Invalid full_search_strategy: ${fullSearchStrategy}`); + } + + const contentTokens = this.getContentAsTokens(content); + const requiredSimTh = 1.0; + + const fullSearch = (): LogCluster | null => { + const allIds = this.getClustersIdsForSeqLen(contentTokens.length); + const matchedId = this.fastMatch( + allIds, + contentTokens, + requiredSimTh, + true, + ); + if (matchedId === null) return null; + return this.clusterPeek(matchedId) ?? null; + }; + + if (fullSearchStrategy === 'always') { + return fullSearch(); + } + + const matchId = this.treeSearch(contentTokens, requiredSimTh, true); + if (matchId !== null) { + return this.clusterPeek(matchId) ?? null; + } + + if (fullSearchStrategy === 'never') { + return null; + } + + return fullSearch(); + } +} diff --git a/packages/common-utils/src/drain/index.ts b/packages/common-utils/src/drain/index.ts new file mode 100644 index 00000000..1a0faf02 --- /dev/null +++ b/packages/common-utils/src/drain/index.ts @@ -0,0 +1,9 @@ +export type { MaskingInstructionConfig } from './config'; +export { TemplateMinerConfig } from './config'; +export { Drain } from './drain'; +export { LogCluster } from './log-cluster'; +export { LruCache } from './lru-cache'; +export { LogMasker, MaskingInstruction } from './masking'; +export { Node } from './node'; +export type { AddLogMessageResult, ExtractedParameter } from './template-miner'; +export { TemplateMiner } from './template-miner'; diff --git a/packages/common-utils/src/drain/log-cluster.ts b/packages/common-utils/src/drain/log-cluster.ts new file mode 100644 index 00000000..5ce01eab --- /dev/null +++ b/packages/common-utils/src/drain/log-cluster.ts @@ -0,0 +1,19 @@ +export class LogCluster { + logTemplateTokens: string[]; + clusterId: number; + size: number; + + constructor(logTemplateTokens: string[], clusterId: number) { + this.logTemplateTokens = [...logTemplateTokens]; + this.clusterId = clusterId; + this.size = 1; + } + + getTemplate(): string { + return this.logTemplateTokens.join(' '); + } + + toString(): string { + return `ID=${String(this.clusterId).padEnd(5)} : size=${String(this.size).padEnd(10)}: ${this.getTemplate()}`; + } +} diff --git a/packages/common-utils/src/drain/lru-cache.ts b/packages/common-utils/src/drain/lru-cache.ts new file mode 100644 index 00000000..9be41abf --- /dev/null +++ b/packages/common-utils/src/drain/lru-cache.ts @@ -0,0 +1,154 @@ +interface LruEntry { + key: number; + value: V; + prev: number | null; + next: number | null; +} + +/** + * LRU cache with separate peek (no eviction update) and get (eviction update) methods. + * This mirrors the behavior of Python's cachetools.LRUCache used in Drain3, + * where LogClusterCache.get() bypasses eviction ordering. + */ +export class LruCache { + private capacity: number; + private map: Map = new Map(); + private entries: (LruEntry | null)[] = []; + private freeSlots: number[] = []; + private head: number | null = null; + private tail: number | null = null; + + constructor(capacity: number) { + this.capacity = capacity; + } + + get size(): number { + return this.map.size; + } + + has(key: number): boolean { + return this.map.has(key); + } + + /** Read value without updating recency (used in fast_match). */ + peek(key: number): V | undefined { + const slot = this.map.get(key); + if (slot === undefined) return undefined; + return this.entries[slot]?.value; + } + + /** Read value and mark as most recently used. */ + get(key: number): V | undefined { + const slot = this.map.get(key); + if (slot === undefined) return undefined; + this.moveToHead(slot); + return this.entries[slot]?.value; + } + + /** Insert or update. Returns the evicted [key, value] if cache was at capacity. */ + put(key: number, value: V): [number, V] | undefined { + const existingSlot = this.map.get(key); + if (existingSlot !== undefined) { + const entry = this.entries[existingSlot]!; + entry.value = value; + this.moveToHead(existingSlot); + return undefined; + } + + let evicted: [number, V] | undefined; + if (this.map.size >= this.capacity) { + evicted = this.evictTail(); + } + + const slot = this.allocSlot({ + key, + value, + prev: null, + next: this.head, + }); + + if (this.head !== null) { + this.entries[this.head]!.prev = slot; + } + this.head = slot; + if (this.tail === null) { + this.tail = slot; + } + + this.map.set(key, slot); + return evicted; + } + + remove(key: number): V | undefined { + const slot = this.map.get(key); + if (slot === undefined) return undefined; + this.map.delete(key); + this.unlink(slot); + const entry = this.entries[slot]!; + this.entries[slot] = null; + this.freeSlots.push(slot); + return entry.value; + } + + values(): V[] { + const result: V[] = []; + for (const entry of this.entries) { + if (entry !== null) { + result.push(entry.value); + } + } + return result; + } + + private allocSlot(entry: LruEntry): number { + if (this.freeSlots.length > 0) { + const slot = this.freeSlots.pop()!; + this.entries[slot] = entry; + return slot; + } + this.entries.push(entry); + return this.entries.length - 1; + } + + private unlink(slot: number): void { + const entry = this.entries[slot]!; + if (entry.prev !== null) { + this.entries[entry.prev]!.next = entry.next; + } else { + this.head = entry.next; + } + if (entry.next !== null) { + this.entries[entry.next]!.prev = entry.prev; + } else { + this.tail = entry.prev; + } + } + + private moveToHead(slot: number): void { + if (this.head === slot) return; + this.unlink(slot); + const entry = this.entries[slot]!; + entry.prev = null; + entry.next = this.head; + if (this.head !== null) { + this.entries[this.head]!.prev = slot; + } + this.head = slot; + if (this.tail === null) { + this.tail = slot; + } + } + + private evictTail(): [number, V] | undefined { + if (this.tail === null) return undefined; + const tailSlot = this.tail; + const entry = this.entries[tailSlot]!; + const key = entry.key; + const value = entry.value; + this.map.delete(key); + this.unlink(tailSlot); + this.entries[tailSlot] = null; + this.freeSlots.push(tailSlot); + return [key, value]; + } +} diff --git a/packages/common-utils/src/drain/masking.ts b/packages/common-utils/src/drain/masking.ts new file mode 100644 index 00000000..dd578322 --- /dev/null +++ b/packages/common-utils/src/drain/masking.ts @@ -0,0 +1,60 @@ +export class MaskingInstruction { + private regex: RegExp; + private _pattern: string; + maskWith: string; + + constructor(pattern: string, maskWith: string) { + this._pattern = pattern; + this.regex = new RegExp(pattern, 'g'); + this.maskWith = maskWith; + } + + get pattern(): string { + return this._pattern; + } + + mask(content: string, maskPrefix: string, maskSuffix: string): string { + const replacement = maskPrefix + this.maskWith + maskSuffix; + this.regex.lastIndex = 0; + return content.replace(this.regex, replacement); + } +} + +export class LogMasker { + maskPrefix: string; + maskSuffix: string; + private _maskingInstructions: MaskingInstruction[]; + private maskNameToInstructions: Map; + + constructor( + maskingInstructions: MaskingInstruction[], + maskPrefix: string, + maskSuffix: string, + ) { + this.maskPrefix = maskPrefix; + this.maskSuffix = maskSuffix; + this._maskingInstructions = maskingInstructions; + this.maskNameToInstructions = new Map(); + for (const mi of maskingInstructions) { + const list = this.maskNameToInstructions.get(mi.maskWith) ?? []; + list.push(mi); + this.maskNameToInstructions.set(mi.maskWith, list); + } + } + + mask(content: string): string { + let result = content; + for (const mi of this._maskingInstructions) { + result = mi.mask(result, this.maskPrefix, this.maskSuffix); + } + return result; + } + + get maskNames(): string[] { + return Array.from(this.maskNameToInstructions.keys()); + } + + instructionsByMaskName(maskName: string): MaskingInstruction[] { + return this.maskNameToInstructions.get(maskName) ?? []; + } +} diff --git a/packages/common-utils/src/drain/node.ts b/packages/common-utils/src/drain/node.ts new file mode 100644 index 00000000..d2dbadd0 --- /dev/null +++ b/packages/common-utils/src/drain/node.ts @@ -0,0 +1,9 @@ +export class Node { + keyToChildNode: Map; + clusterIds: number[]; + + constructor() { + this.keyToChildNode = new Map(); + this.clusterIds = []; + } +} diff --git a/packages/common-utils/src/drain/template-miner.ts b/packages/common-utils/src/drain/template-miner.ts new file mode 100644 index 00000000..b718cba5 --- /dev/null +++ b/packages/common-utils/src/drain/template-miner.ts @@ -0,0 +1,180 @@ +import { TemplateMinerConfig } from './config'; +import { Drain } from './drain'; +import { LogCluster } from './log-cluster'; +import { LruCache } from './lru-cache'; +import { LogMasker, MaskingInstruction } from './masking'; + +export interface ExtractedParameter { + value: string; + maskName: string; +} + +export interface AddLogMessageResult { + changeType: string; + clusterId: number; + clusterSize: number; + templateMined: string; + clusterCount: number; +} + +export class TemplateMiner { + drain: Drain; + private masker: LogMasker; + private extractionCache: LruCache<{ + regex: string; + paramMap: Map; + }>; + private extraDelimiters: string[]; + + constructor(config?: TemplateMinerConfig) { + const cfg = config ?? new TemplateMinerConfig(); + const paramStr = cfg.maskPrefix + '*' + cfg.maskSuffix; + + const maskingInstructions = cfg.maskingInstructions.map( + mi => new MaskingInstruction(mi.pattern, mi.maskWith), + ); + + this.masker = new LogMasker( + maskingInstructions, + cfg.maskPrefix, + cfg.maskSuffix, + ); + + this.drain = new Drain( + cfg.drainDepth, + cfg.drainSimTh, + cfg.drainMaxChildren, + cfg.drainMaxClusters, + cfg.drainExtraDelimiters, + paramStr, + cfg.parametrizeNumericTokens, + ); + + this.extractionCache = new LruCache(cfg.parameterExtractionCacheCapacity); + this.extraDelimiters = cfg.drainExtraDelimiters; + } + + addLogMessage(logMessage: string): AddLogMessageResult { + const maskedContent = this.masker.mask(logMessage); + const [cluster, changeType] = this.drain.addLogMessage(maskedContent); + return { + changeType, + clusterId: cluster.clusterId, + clusterSize: cluster.size, + templateMined: cluster.getTemplate(), + clusterCount: this.drain.clusterCount, + }; + } + + match( + logMessage: string, + fullSearchStrategy: string = 'never', + ): LogCluster | null { + const maskedContent = this.masker.mask(logMessage); + return this.drain.match(maskedContent, fullSearchStrategy); + } + + extractParameters( + logTemplate: string, + logMessage: string, + exactMatching: boolean = true, + ): ExtractedParameter[] | null { + let message = logMessage; + for (const delimiter of this.extraDelimiters) { + message = message.split(delimiter).join(' '); + } + + const { regex: templateRegex, paramMap } = + this.getTemplateParameterExtractionRegex(logTemplate, exactMatching); + + const parameterMatch = message.match(new RegExp(templateRegex)); + if (!parameterMatch || !parameterMatch.groups) { + return null; + } + + const extracted: ExtractedParameter[] = []; + for (const [groupName, maskName] of paramMap) { + const value = parameterMatch.groups[groupName]; + if (value !== undefined) { + extracted.push({ value, maskName }); + } + } + + // Sort by position in input string (left-to-right) to match Python Drain3 behavior, + // which depends on CPython's set iteration order for mask name processing. + extracted.sort((a, b) => { + const posA = message.indexOf(a.value); + const posB = message.indexOf(b.value); + return posA - posB; + }); + + return extracted; + } + + private getTemplateParameterExtractionRegex( + logTemplate: string, + exactMatching: boolean, + ): { regex: string; paramMap: Map } { + const cacheKey = simpleHash(`${logTemplate}|${exactMatching}`); + const cached = this.extractionCache.peek(cacheKey); + if (cached) return cached; + + const paramMap = new Map(); + let paramNameCounter = 0; + + const maskNames = new Set(this.masker.maskNames); + maskNames.add('*'); + + const escapedPrefix = escapeRegex(this.masker.maskPrefix); + const escapedSuffix = escapeRegex(this.masker.maskSuffix); + let templateRegex = escapeRegex(logTemplate); + + for (const maskName of maskNames) { + const searchStr = escapedPrefix + escapeRegex(maskName) + escapedSuffix; + while (true) { + const allowedPatterns: string[] = []; + if (exactMatching && maskName !== '*') { + const instructions = this.masker.instructionsByMaskName(maskName); + for (const mi of instructions) { + allowedPatterns.push(mi.pattern); + } + } + if (!exactMatching || maskName === '*') { + allowedPatterns.push('.+?'); + } + + const paramGroupName = `p_${paramNameCounter}`; + paramNameCounter++; + paramMap.set(paramGroupName, maskName); + + const joined = allowedPatterns.join('|'); + const captureRegex = `(?<${paramGroupName}>${joined})`; + + if (templateRegex.includes(searchStr)) { + templateRegex = templateRegex.replace(searchStr, captureRegex); + } else { + break; + } + } + } + + templateRegex = templateRegex.replace(/\\ /g, '\\s+'); + templateRegex = `^${templateRegex}$`; + + const result = { regex: templateRegex, paramMap }; + this.extractionCache.put(cacheKey, result); + return result; + } +} + +function escapeRegex(s: string): string { + return s.replace(/[.*+?^${}()|[\]\\-]/g, '\\$&'); +} + +function simpleHash(s: string): number { + let hash = 5381; + for (let i = 0; i < s.length; i++) { + hash = ((hash * 33) ^ s.charCodeAt(i)) >>> 0; + } + return hash; +}