mirror of
https://github.com/hyperdxio/hyperdx
synced 2026-04-21 13:37:15 +00:00
[HDX-3964] Add event pattern mining to CLI (Shift+P) (#2106)
## Summary Adds a pattern mining feature to the CLI, accessible via `Shift+P`. This mirrors the web app's Pattern Table functionality but runs entirely in TypeScript — no Pyodide/Python WASM needed. **Linear:** https://linear.app/hyperdx/issue/HDX-3964 ## What changed ### 1. Drain library in common-utils (`packages/common-utils/src/drain/`) Ported the [browser-drain](https://github.com/DeploySentinel/browser-drain) TypeScript library into `@hyperdx/common-utils`. This is a pure TypeScript implementation of the Drain3 log template mining algorithm, including: - `TemplateMiner` / `TemplateMinerConfig` — main API - `Drain` — core algorithm with prefix tree and LRU cluster cache - `LogMasker` — regex-based token masking (IPs, numbers, etc.) - `LruCache` — custom LRU cache matching Python Drain3's eviction semantics - 11 Jest tests ported from the original `node:test` suite ### 2. CLI pattern view (`packages/cli/src/components/EventViewer/`) **Keybinding:** `Shift+P` toggles pattern view (pauses follow mode, restores on exit) **Data flow (mirrors web app's `useGroupedPatterns`):** - Issues `SELECT ... ORDER BY rand() LIMIT 100000` to randomly sample up to 100K events - Issues parallel `SELECT count()` to get true total event count - Feeds sampled log bodies through the TypeScript `TemplateMiner` - Estimates pattern counts via `sampleMultiplier = totalCount / sampledRowCount` - Computes time-bucketed trend data per pattern **UI:** - Pattern list with columns: Est. Count (with `~` prefix), Pattern - `l`/`Enter` expands a pattern to show its sample events (full table columns) - `h`/`Esc` returns to pattern list - `j/k/G/g/Ctrl+D/Ctrl+U` navigation throughout - Loading spinner while sampling query runs **Alias fix:** Pattern and count queries compute `WITH` clauses from the source's `defaultTableSelectExpression` so Lucene searches using aliases (e.g. `level:error` where `level` is an alias for `SeverityText`) resolve correctly. ### New files - `packages/common-utils/src/drain/` — 7 source files + barrel index - `packages/common-utils/src/__tests__/drain.test.ts` - `packages/cli/src/components/EventViewer/usePatternData.ts` - `packages/cli/src/components/EventViewer/PatternView.tsx` - `packages/cli/src/components/EventViewer/PatternSamplesView.tsx` ### Modified files - `packages/cli/src/api/eventQuery.ts` — added `buildPatternSampleQuery`, `buildTotalCountQuery`, `buildAliasWithClauses` - `packages/cli/src/components/EventViewer/EventViewer.tsx` — wired in pattern state + rendering - `packages/cli/src/components/EventViewer/useKeybindings.ts` — added P, l, h keybindings + pattern/sample navigation - `packages/cli/src/components/EventViewer/SubComponents.tsx` — added P to help screen ### Demo https://github.com/user-attachments/assets/50a2edfc-8891-43ae-ab86-b96fca778c66
This commit is contained in:
parent
7a49c74835
commit
418f70c516
21 changed files with 2119 additions and 5 deletions
5
.changeset/add-cli-pattern-mining.md
Normal file
5
.changeset/add-cli-pattern-mining.md
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
---
|
||||
"@hyperdx/cli": patch
|
||||
---
|
||||
|
||||
Add event pattern mining view (Shift+P) with sampled estimation and drill-down
|
||||
5
.changeset/add-drain-library.md
Normal file
5
.changeset/add-drain-library.md
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
---
|
||||
"@hyperdx/common-utils": patch
|
||||
---
|
||||
|
||||
Add Drain log template mining library (ported from browser-drain)
|
||||
|
|
@ -42,6 +42,7 @@
|
|||
"run:clickhouse": "nx run @hyperdx/app:run:clickhouse",
|
||||
"dev": "sh -c '. ./scripts/dev-env.sh && yarn build:common-utils && dotenvx run --convention=nextjs -- docker compose -p \"$HDX_DEV_PROJECT\" -f docker-compose.dev.yml up -d && yarn app:dev; dotenvx run --convention=nextjs -- docker compose -p \"$HDX_DEV_PROJECT\" -f docker-compose.dev.yml down'",
|
||||
"dev:local": "IS_LOCAL_APP_MODE='DANGEROUSLY_is_local_app_mode💀' yarn dev",
|
||||
"cli:dev": "yarn workspace @hyperdx/cli dev",
|
||||
"dev:down": "sh -c '. ./scripts/dev-env.sh && docker compose -p \"$HDX_DEV_PROJECT\" -f docker-compose.dev.yml down && sh ./scripts/dev-kill-ports.sh'",
|
||||
"dev:compose": "sh -c '. ./scripts/dev-env.sh && docker compose -p \"$HDX_DEV_PROJECT\" -f docker-compose.dev.yml'",
|
||||
"knip": "knip",
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@ import type {
|
|||
import { chSqlToAliasMap } from '@hyperdx/common-utils/dist/clickhouse';
|
||||
import { renderChartConfig } from '@hyperdx/common-utils/dist/core/renderChartConfig';
|
||||
import type { Metadata } from '@hyperdx/common-utils/dist/core/metadata';
|
||||
import { aliasMapToWithClauses } from '@hyperdx/common-utils/dist/core/utils';
|
||||
import { DisplayType } from '@hyperdx/common-utils/dist/types';
|
||||
import type {
|
||||
BuilderChartConfigWithDateRange,
|
||||
|
|
@ -106,6 +107,144 @@ export async function buildEventSearchQuery(
|
|||
return renderChartConfig(config, metadata, source.querySettings);
|
||||
}
|
||||
|
||||
// ---- Alias WITH clauses from source select --------------------------
|
||||
|
||||
/**
|
||||
* Compute WITH clauses from the source's default select expression.
|
||||
* When a source defines `SeverityText as level`, searches for `level:error`
|
||||
* need `WITH SeverityText AS level` so the alias is available in WHERE.
|
||||
*/
|
||||
async function buildAliasWithClauses(
|
||||
source: SourceResponse,
|
||||
metadata: Metadata,
|
||||
): Promise<BuilderChartConfigWithDateRange['with']> {
|
||||
const selectExpr = source.defaultTableSelectExpression;
|
||||
if (!selectExpr) return undefined;
|
||||
|
||||
const tsExpr = source.timestampValueExpression ?? 'TimestampTime';
|
||||
|
||||
// Render a dummy query with the source's select to extract aliases
|
||||
const dummyConfig: BuilderChartConfigWithDateRange = {
|
||||
displayType: DisplayType.Search,
|
||||
select: selectExpr,
|
||||
from: source.from,
|
||||
where: '',
|
||||
connection: source.connection,
|
||||
timestampValueExpression: tsExpr,
|
||||
implicitColumnExpression: source.implicitColumnExpression,
|
||||
limit: { limit: 0 },
|
||||
dateRange: [new Date(), new Date()],
|
||||
};
|
||||
|
||||
const dummySql = await renderChartConfig(
|
||||
dummyConfig,
|
||||
metadata,
|
||||
source.querySettings,
|
||||
);
|
||||
const aliasMap = chSqlToAliasMap(dummySql);
|
||||
return aliasMapToWithClauses(aliasMap);
|
||||
}
|
||||
|
||||
// ---- Pattern sampling query -----------------------------------------
|
||||
|
||||
export interface PatternSampleQueryOptions {
|
||||
source: SourceResponse;
|
||||
searchQuery?: string;
|
||||
startTime: Date;
|
||||
endTime: Date;
|
||||
/** Number of random rows to sample (default 100_000) */
|
||||
sampleLimit?: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a query that randomly samples events for pattern mining.
|
||||
* Selects the body column and timestamp, ordered by rand().
|
||||
*/
|
||||
export async function buildPatternSampleQuery(
|
||||
opts: PatternSampleQueryOptions,
|
||||
metadata: Metadata,
|
||||
): Promise<ChSql> {
|
||||
const {
|
||||
source,
|
||||
searchQuery = '',
|
||||
startTime,
|
||||
endTime,
|
||||
sampleLimit = 100_000,
|
||||
} = opts;
|
||||
|
||||
const tsExpr = source.timestampValueExpression ?? 'TimestampTime';
|
||||
|
||||
// Use the same select as the main event table so sample rows have all columns
|
||||
let selectExpr = source.defaultTableSelectExpression ?? '';
|
||||
if (!selectExpr && source.kind === 'trace') {
|
||||
selectExpr = buildTraceSelectExpression(source);
|
||||
}
|
||||
|
||||
// Compute alias WITH clauses so search aliases (e.g. `level`) resolve in WHERE
|
||||
const aliasWith = searchQuery
|
||||
? await buildAliasWithClauses(source, metadata)
|
||||
: undefined;
|
||||
|
||||
const config: BuilderChartConfigWithDateRange = {
|
||||
displayType: DisplayType.Search,
|
||||
select: selectExpr,
|
||||
from: source.from,
|
||||
where: searchQuery,
|
||||
whereLanguage: searchQuery ? 'lucene' : 'sql',
|
||||
connection: source.connection,
|
||||
timestampValueExpression: tsExpr,
|
||||
implicitColumnExpression: source.implicitColumnExpression,
|
||||
orderBy: 'rand()',
|
||||
limit: { limit: sampleLimit },
|
||||
dateRange: [startTime, endTime],
|
||||
...(aliasWith ? { with: aliasWith } : {}),
|
||||
};
|
||||
|
||||
return renderChartConfig(config, metadata, source.querySettings);
|
||||
}
|
||||
|
||||
// ---- Total count query ----------------------------------------------
|
||||
|
||||
export interface TotalCountQueryOptions {
|
||||
source: SourceResponse;
|
||||
searchQuery?: string;
|
||||
startTime: Date;
|
||||
endTime: Date;
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a query to get the total count of events matching the search.
|
||||
*/
|
||||
export async function buildTotalCountQuery(
|
||||
opts: TotalCountQueryOptions,
|
||||
metadata: Metadata,
|
||||
): Promise<ChSql> {
|
||||
const { source, searchQuery = '', startTime, endTime } = opts;
|
||||
|
||||
const tsExpr = source.timestampValueExpression ?? 'TimestampTime';
|
||||
|
||||
// Compute alias WITH clauses so search aliases (e.g. `level`) resolve in WHERE
|
||||
const aliasWith = searchQuery
|
||||
? await buildAliasWithClauses(source, metadata)
|
||||
: undefined;
|
||||
|
||||
const config: BuilderChartConfigWithDateRange = {
|
||||
displayType: DisplayType.Table,
|
||||
select: 'count() as total',
|
||||
from: source.from,
|
||||
where: searchQuery,
|
||||
whereLanguage: searchQuery ? 'lucene' : 'sql',
|
||||
connection: source.connection,
|
||||
timestampValueExpression: tsExpr,
|
||||
implicitColumnExpression: source.implicitColumnExpression,
|
||||
limit: { limit: 1 },
|
||||
dateRange: [startTime, endTime],
|
||||
...(aliasWith ? { with: aliasWith } : {}),
|
||||
};
|
||||
|
||||
return renderChartConfig(config, metadata, source.querySettings);
|
||||
}
|
||||
|
||||
// ---- Full row fetch (SELECT *) -------------------------------------
|
||||
|
||||
// ---- Trace waterfall queries ----------------------------------------
|
||||
|
|
|
|||
|
|
@ -15,8 +15,11 @@ import {
|
|||
SqlPreviewScreen,
|
||||
} from './SubComponents';
|
||||
import { TableView } from './TableView';
|
||||
import { PatternView } from './PatternView';
|
||||
import { PatternSamplesView } from './PatternSamplesView';
|
||||
import { DetailPanel } from './DetailPanel';
|
||||
import { useEventData } from './useEventData';
|
||||
import { usePatternData } from './usePatternData';
|
||||
import { useKeybindings } from './useKeybindings';
|
||||
|
||||
export default function EventViewer({
|
||||
|
|
@ -76,6 +79,12 @@ export default function EventViewer({
|
|||
const [traceSelectedNode, setTraceSelectedNode] = useState<SpanNode | null>(
|
||||
null,
|
||||
);
|
||||
const [showPatterns, setShowPatterns] = useState(false);
|
||||
const [patternSelectedRow, setPatternSelectedRow] = useState(0);
|
||||
const [patternScrollOffset, setPatternScrollOffset] = useState(0);
|
||||
const [expandedPattern, setExpandedPattern] = useState<number | null>(null);
|
||||
const [sampleSelectedRow, setSampleSelectedRow] = useState(0);
|
||||
const [sampleScrollOffset, setSampleScrollOffset] = useState(0);
|
||||
const [timeRange, setTimeRange] = useState<TimeRange>(() => {
|
||||
const now = new Date();
|
||||
return { start: new Date(now.getTime() - 60 * 60 * 1000), end: now };
|
||||
|
|
@ -111,6 +120,23 @@ export default function EventViewer({
|
|||
expandedRow,
|
||||
});
|
||||
|
||||
// ---- Pattern mining -----------------------------------------------
|
||||
|
||||
const {
|
||||
patterns,
|
||||
loading: patternsLoading,
|
||||
error: patternsError,
|
||||
totalCount: patternsTotalCount,
|
||||
} = usePatternData({
|
||||
clickhouseClient,
|
||||
metadata,
|
||||
source,
|
||||
submittedQuery,
|
||||
startTime: timeRange.start,
|
||||
endTime: timeRange.end,
|
||||
enabled: showPatterns,
|
||||
});
|
||||
|
||||
// ---- Derived values ----------------------------------------------
|
||||
|
||||
const columns = useMemo(
|
||||
|
|
@ -171,11 +197,22 @@ export default function EventViewer({
|
|||
focusDetailSearch,
|
||||
showHelp,
|
||||
showSql,
|
||||
showPatterns,
|
||||
expandedRow,
|
||||
detailTab,
|
||||
traceDetailExpanded,
|
||||
selectedRow,
|
||||
scrollOffset,
|
||||
patternSelectedRow,
|
||||
patternScrollOffset,
|
||||
patternCount: patterns.length,
|
||||
expandedPattern,
|
||||
sampleSelectedRow,
|
||||
sampleScrollOffset,
|
||||
sampleCount:
|
||||
expandedPattern !== null
|
||||
? (patterns[expandedPattern]?.samples.length ?? 0)
|
||||
: 0,
|
||||
isFollowing,
|
||||
hasMore,
|
||||
events,
|
||||
|
|
@ -198,7 +235,13 @@ export default function EventViewer({
|
|||
setFocusDetailSearch,
|
||||
setShowHelp,
|
||||
setShowSql,
|
||||
setShowPatterns,
|
||||
setSqlScrollOffset,
|
||||
setPatternSelectedRow,
|
||||
setPatternScrollOffset,
|
||||
setExpandedPattern,
|
||||
setSampleSelectedRow,
|
||||
setSampleScrollOffset,
|
||||
setSelectedRow,
|
||||
setScrollOffset,
|
||||
setExpandedRow,
|
||||
|
|
@ -313,6 +356,27 @@ export default function EventViewer({
|
|||
onTraceChSqlChange={setTraceChSql}
|
||||
onTraceSelectedNodeChange={setTraceSelectedNode}
|
||||
/>
|
||||
) : showPatterns &&
|
||||
expandedPattern !== null &&
|
||||
patterns[expandedPattern] ? (
|
||||
<PatternSamplesView
|
||||
pattern={patterns[expandedPattern]}
|
||||
columns={columns}
|
||||
selectedRow={sampleSelectedRow}
|
||||
scrollOffset={sampleScrollOffset}
|
||||
maxRows={maxRows}
|
||||
wrapLines={wrapLines}
|
||||
/>
|
||||
) : showPatterns ? (
|
||||
<PatternView
|
||||
patterns={patterns}
|
||||
selectedRow={patternSelectedRow}
|
||||
scrollOffset={patternScrollOffset}
|
||||
maxRows={maxRows}
|
||||
loading={patternsLoading}
|
||||
error={patternsError}
|
||||
wrapLines={wrapLines}
|
||||
/>
|
||||
) : (
|
||||
<TableView
|
||||
columns={columns}
|
||||
|
|
@ -328,13 +392,35 @@ export default function EventViewer({
|
|||
)}
|
||||
|
||||
<Footer
|
||||
rowCount={events.length}
|
||||
cursorPos={scrollOffset + selectedRow + 1}
|
||||
rowCount={
|
||||
showPatterns && expandedPattern !== null
|
||||
? (patterns[expandedPattern]?.samples.length ?? 0)
|
||||
: showPatterns
|
||||
? patterns.length
|
||||
: events.length
|
||||
}
|
||||
cursorPos={
|
||||
showPatterns && expandedPattern !== null
|
||||
? sampleScrollOffset + sampleSelectedRow + 1
|
||||
: showPatterns
|
||||
? patternScrollOffset + patternSelectedRow + 1
|
||||
: scrollOffset + selectedRow + 1
|
||||
}
|
||||
wrapLines={wrapLines}
|
||||
isFollowing={isFollowing}
|
||||
loadingMore={loadingMore}
|
||||
paginationError={paginationError}
|
||||
scrollInfo={expandedRow !== null ? `Ctrl+D/U to scroll` : undefined}
|
||||
scrollInfo={
|
||||
expandedRow !== null
|
||||
? 'Ctrl+D/U to scroll'
|
||||
: showPatterns && expandedPattern !== null
|
||||
? '[SAMPLES] h to go back'
|
||||
: showPatterns
|
||||
? patternsLoading
|
||||
? '[PATTERNS] Sampling…'
|
||||
: `[PATTERNS] ${patterns.length} patterns${patternsTotalCount != null ? ` from ~${patternsTotalCount.toLocaleString()} events` : ''} — l to expand, P/Esc to close`
|
||||
: undefined
|
||||
}
|
||||
/>
|
||||
</Box>
|
||||
</Box>
|
||||
|
|
|
|||
101
packages/cli/src/components/EventViewer/PatternSamplesView.tsx
Normal file
101
packages/cli/src/components/EventViewer/PatternSamplesView.tsx
Normal file
|
|
@ -0,0 +1,101 @@
|
|||
import React from 'react';
|
||||
import { Box, Text } from 'ink';
|
||||
|
||||
import type { PatternGroup } from './usePatternData';
|
||||
import type { Column, EventRow } from './types';
|
||||
import { formatDynamicRow } from './utils';
|
||||
import { TableHeader } from './SubComponents';
|
||||
|
||||
// ---- Types ---------------------------------------------------------
|
||||
|
||||
type PatternSamplesViewProps = {
|
||||
pattern: PatternGroup;
|
||||
columns: Column[];
|
||||
selectedRow: number;
|
||||
scrollOffset: number;
|
||||
maxRows: number;
|
||||
wrapLines: boolean;
|
||||
};
|
||||
|
||||
// ---- Component -----------------------------------------------------
|
||||
|
||||
export function PatternSamplesView({
|
||||
pattern,
|
||||
columns,
|
||||
selectedRow,
|
||||
scrollOffset,
|
||||
maxRows,
|
||||
wrapLines,
|
||||
}: PatternSamplesViewProps) {
|
||||
// Reserve 3 rows for the pattern header (pattern text + count + blank line)
|
||||
const tableMaxRows = Math.max(1, maxRows - 3);
|
||||
const samples = pattern.samples;
|
||||
const visible = samples.slice(scrollOffset, scrollOffset + tableMaxRows);
|
||||
const emptyRows = tableMaxRows - visible.length;
|
||||
|
||||
return (
|
||||
<Box flexDirection="column" marginTop={1} height={maxRows + 1}>
|
||||
{/* Pattern header */}
|
||||
<Box>
|
||||
<Text bold color="green">
|
||||
Pattern:{' '}
|
||||
</Text>
|
||||
<Text wrap="truncate">{pattern.pattern}</Text>
|
||||
</Box>
|
||||
<Box>
|
||||
<Text dimColor>
|
||||
~{pattern.estimatedCount.toLocaleString()} estimated events (
|
||||
{pattern.count.toLocaleString()} sampled) — h to go back
|
||||
</Text>
|
||||
</Box>
|
||||
<Text> </Text>
|
||||
|
||||
{/* Sample events table */}
|
||||
<TableHeader columns={columns} />
|
||||
|
||||
{visible.map((row: EventRow, i: number) => {
|
||||
const isSelected = i === selectedRow;
|
||||
const formatted = formatDynamicRow(row, columns);
|
||||
return (
|
||||
<Box key={i} overflowX="hidden">
|
||||
<Box width={2}>
|
||||
<Text color="cyan" bold>
|
||||
{isSelected ? '▸' : ' '}
|
||||
</Text>
|
||||
</Box>
|
||||
{formatted.cells.map((cell: string, ci: number) => (
|
||||
<Box
|
||||
key={ci}
|
||||
width={columns[ci]?.width ?? '10%'}
|
||||
overflowX={wrapLines ? undefined : 'hidden'}
|
||||
>
|
||||
<Text
|
||||
wrap={wrapLines ? 'wrap' : 'truncate'}
|
||||
color={
|
||||
isSelected
|
||||
? 'cyan'
|
||||
: ci === 0
|
||||
? 'gray'
|
||||
: formatted.severityColor && ci === 1
|
||||
? formatted.severityColor
|
||||
: undefined
|
||||
}
|
||||
bold={isSelected}
|
||||
dimColor={ci === 0 && !isSelected}
|
||||
inverse={isSelected}
|
||||
>
|
||||
{cell}
|
||||
</Text>
|
||||
</Box>
|
||||
))}
|
||||
</Box>
|
||||
);
|
||||
})}
|
||||
|
||||
{emptyRows > 0 &&
|
||||
Array.from({ length: emptyRows }).map((_, i) => (
|
||||
<Text key={`pad-${i}`}> </Text>
|
||||
))}
|
||||
</Box>
|
||||
);
|
||||
}
|
||||
101
packages/cli/src/components/EventViewer/PatternView.tsx
Normal file
101
packages/cli/src/components/EventViewer/PatternView.tsx
Normal file
|
|
@ -0,0 +1,101 @@
|
|||
import React from 'react';
|
||||
import { Box, Text } from 'ink';
|
||||
import Spinner from 'ink-spinner';
|
||||
|
||||
import type { PatternGroup } from './usePatternData';
|
||||
|
||||
// ---- Types ---------------------------------------------------------
|
||||
|
||||
type PatternViewProps = {
|
||||
patterns: PatternGroup[];
|
||||
selectedRow: number;
|
||||
scrollOffset: number;
|
||||
maxRows: number;
|
||||
loading: boolean;
|
||||
error: Error | null;
|
||||
wrapLines: boolean;
|
||||
};
|
||||
|
||||
// ---- Component -----------------------------------------------------
|
||||
|
||||
export function PatternView({
|
||||
patterns,
|
||||
selectedRow,
|
||||
scrollOffset,
|
||||
maxRows,
|
||||
loading,
|
||||
error,
|
||||
wrapLines,
|
||||
}: PatternViewProps) {
|
||||
const visible = patterns.slice(scrollOffset, scrollOffset + maxRows);
|
||||
const emptyRows = maxRows - visible.length;
|
||||
|
||||
return (
|
||||
<Box flexDirection="column" marginTop={1} height={maxRows + 1}>
|
||||
{/* Header */}
|
||||
<Box overflowX="hidden">
|
||||
<Box width="12%">
|
||||
<Text bold dimColor wrap="truncate">
|
||||
Est. Count
|
||||
</Text>
|
||||
</Box>
|
||||
<Box flexGrow={1}>
|
||||
<Text bold dimColor wrap="truncate">
|
||||
Pattern
|
||||
</Text>
|
||||
</Box>
|
||||
</Box>
|
||||
|
||||
{loading ? (
|
||||
<Text>
|
||||
<Spinner type="dots" /> Sampling events and mining patterns…
|
||||
</Text>
|
||||
) : error ? (
|
||||
<Text color="red">Error: {error.message}</Text>
|
||||
) : visible.length === 0 ? (
|
||||
<Text dimColor>No patterns found.</Text>
|
||||
) : null}
|
||||
|
||||
{!loading &&
|
||||
!error &&
|
||||
visible.map((p, i) => {
|
||||
const isSelected = i === selectedRow;
|
||||
return (
|
||||
<Box key={p.id} overflowX="hidden">
|
||||
<Box width={2}>
|
||||
<Text color="cyan" bold>
|
||||
{isSelected ? '▸' : ' '}
|
||||
</Text>
|
||||
</Box>
|
||||
<Box width="12%" overflowX={wrapLines ? undefined : 'hidden'}>
|
||||
<Text
|
||||
color={isSelected ? 'cyan' : 'yellow'}
|
||||
bold={isSelected}
|
||||
inverse={isSelected}
|
||||
wrap={wrapLines ? 'wrap' : 'truncate'}
|
||||
>
|
||||
~{p.estimatedCount.toLocaleString()}
|
||||
</Text>
|
||||
</Box>
|
||||
<Box flexGrow={1} overflowX={wrapLines ? undefined : 'hidden'}>
|
||||
<Text
|
||||
color={isSelected ? 'cyan' : undefined}
|
||||
bold={isSelected}
|
||||
inverse={isSelected}
|
||||
wrap={wrapLines ? 'wrap' : 'truncate'}
|
||||
>
|
||||
{p.pattern}
|
||||
</Text>
|
||||
</Box>
|
||||
</Box>
|
||||
);
|
||||
})}
|
||||
|
||||
{emptyRows > 0 &&
|
||||
!loading &&
|
||||
Array.from({ length: emptyRows }).map((_, i) => (
|
||||
<Text key={`pad-${i}`}> </Text>
|
||||
))}
|
||||
</Box>
|
||||
);
|
||||
}
|
||||
|
|
@ -184,6 +184,7 @@ export const HelpScreen = React.memo(function HelpScreen() {
|
|||
['Shift+Tab', 'Previous source / saved search'],
|
||||
['t', 'Edit time range in $EDITOR'],
|
||||
['s', 'Edit select clause in $EDITOR'],
|
||||
['P (Shift+P)', 'Show event patterns'],
|
||||
['D', 'Show generated SQL'],
|
||||
['f', 'Toggle follow mode (live tail)'],
|
||||
['o', 'Open trace in browser'],
|
||||
|
|
|
|||
|
|
@ -22,11 +22,19 @@ export interface KeybindingParams {
|
|||
focusDetailSearch: boolean;
|
||||
showHelp: boolean;
|
||||
showSql: boolean;
|
||||
showPatterns: boolean;
|
||||
expandedRow: number | null;
|
||||
detailTab: 'overview' | 'columns' | 'trace';
|
||||
traceDetailExpanded: boolean;
|
||||
selectedRow: number;
|
||||
scrollOffset: number;
|
||||
patternSelectedRow: number;
|
||||
patternScrollOffset: number;
|
||||
patternCount: number;
|
||||
expandedPattern: number | null;
|
||||
sampleSelectedRow: number;
|
||||
sampleScrollOffset: number;
|
||||
sampleCount: number;
|
||||
isFollowing: boolean;
|
||||
hasMore: boolean;
|
||||
events: EventRow[];
|
||||
|
|
@ -58,7 +66,13 @@ export interface KeybindingParams {
|
|||
setFocusDetailSearch: React.Dispatch<React.SetStateAction<boolean>>;
|
||||
setShowHelp: React.Dispatch<React.SetStateAction<boolean>>;
|
||||
setShowSql: React.Dispatch<React.SetStateAction<boolean>>;
|
||||
setShowPatterns: React.Dispatch<React.SetStateAction<boolean>>;
|
||||
setSqlScrollOffset: React.Dispatch<React.SetStateAction<number>>;
|
||||
setPatternSelectedRow: React.Dispatch<React.SetStateAction<number>>;
|
||||
setPatternScrollOffset: React.Dispatch<React.SetStateAction<number>>;
|
||||
setExpandedPattern: React.Dispatch<React.SetStateAction<number | null>>;
|
||||
setSampleSelectedRow: React.Dispatch<React.SetStateAction<number>>;
|
||||
setSampleScrollOffset: React.Dispatch<React.SetStateAction<number>>;
|
||||
setSelectedRow: React.Dispatch<React.SetStateAction<number>>;
|
||||
setScrollOffset: React.Dispatch<React.SetStateAction<number>>;
|
||||
setExpandedRow: React.Dispatch<React.SetStateAction<number | null>>;
|
||||
|
|
@ -93,11 +107,19 @@ export function useKeybindings(params: KeybindingParams): void {
|
|||
focusDetailSearch,
|
||||
showHelp,
|
||||
showSql,
|
||||
showPatterns,
|
||||
expandedRow,
|
||||
detailTab,
|
||||
traceDetailExpanded,
|
||||
selectedRow,
|
||||
scrollOffset,
|
||||
patternSelectedRow,
|
||||
patternScrollOffset,
|
||||
patternCount,
|
||||
expandedPattern,
|
||||
sampleSelectedRow,
|
||||
sampleScrollOffset,
|
||||
sampleCount,
|
||||
isFollowing,
|
||||
hasMore,
|
||||
events,
|
||||
|
|
@ -120,7 +142,13 @@ export function useKeybindings(params: KeybindingParams): void {
|
|||
setFocusDetailSearch,
|
||||
setShowHelp,
|
||||
setShowSql,
|
||||
setShowPatterns,
|
||||
setSqlScrollOffset,
|
||||
setPatternSelectedRow,
|
||||
setPatternScrollOffset,
|
||||
setExpandedPattern,
|
||||
setSampleSelectedRow,
|
||||
setSampleScrollOffset,
|
||||
setSelectedRow,
|
||||
setScrollOffset,
|
||||
setExpandedRow,
|
||||
|
|
@ -213,6 +241,155 @@ export function useKeybindings(params: KeybindingParams): void {
|
|||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// Pattern samples navigation (expanded pattern)
|
||||
if (showPatterns && expandedPattern !== null && expandedRow === null) {
|
||||
if (input === 'h' || key.escape) {
|
||||
setExpandedPattern(null);
|
||||
return;
|
||||
}
|
||||
if (input === 'j' || key.downArrow) {
|
||||
setSampleSelectedRow(r => {
|
||||
const next = r + 1;
|
||||
const visibleCount = Math.min(
|
||||
sampleCount - sampleScrollOffset,
|
||||
maxRows,
|
||||
);
|
||||
if (next >= maxRows) {
|
||||
setSampleScrollOffset(o =>
|
||||
Math.min(o + 1, Math.max(0, sampleCount - maxRows)),
|
||||
);
|
||||
return r;
|
||||
}
|
||||
return Math.min(next, visibleCount - 1);
|
||||
});
|
||||
return;
|
||||
}
|
||||
if (input === 'k' || key.upArrow) {
|
||||
setSampleSelectedRow(r => {
|
||||
const next = r - 1;
|
||||
if (next < 0) {
|
||||
setSampleScrollOffset(o => Math.max(0, o - 1));
|
||||
return 0;
|
||||
}
|
||||
return next;
|
||||
});
|
||||
return;
|
||||
}
|
||||
if (input === 'G') {
|
||||
const maxOffset = Math.max(0, sampleCount - maxRows);
|
||||
setSampleScrollOffset(maxOffset);
|
||||
setSampleSelectedRow(Math.min(sampleCount - 1, maxRows - 1));
|
||||
return;
|
||||
}
|
||||
if (input === 'g') {
|
||||
setSampleScrollOffset(0);
|
||||
setSampleSelectedRow(0);
|
||||
return;
|
||||
}
|
||||
if (key.ctrl && input === 'd') {
|
||||
const half = Math.floor(maxRows / 2);
|
||||
const maxOffset = Math.max(0, sampleCount - maxRows);
|
||||
setSampleScrollOffset(o => Math.min(o + half, maxOffset));
|
||||
return;
|
||||
}
|
||||
if (key.ctrl && input === 'u') {
|
||||
const half = Math.floor(maxRows / 2);
|
||||
setSampleScrollOffset(o => Math.max(0, o - half));
|
||||
return;
|
||||
}
|
||||
if (input === 'w') {
|
||||
setWrapLines(w => !w);
|
||||
return;
|
||||
}
|
||||
if (input === 'q') process.exit(0);
|
||||
return;
|
||||
}
|
||||
|
||||
// Pattern view navigation
|
||||
if (showPatterns && expandedRow === null) {
|
||||
if (input === 'P') {
|
||||
setShowPatterns(false);
|
||||
if (wasFollowingRef.current) {
|
||||
setIsFollowing(true);
|
||||
}
|
||||
return;
|
||||
}
|
||||
if (key.escape) {
|
||||
setShowPatterns(false);
|
||||
if (wasFollowingRef.current) {
|
||||
setIsFollowing(true);
|
||||
}
|
||||
return;
|
||||
}
|
||||
if (key.return || input === 'l') {
|
||||
setExpandedPattern(patternScrollOffset + patternSelectedRow);
|
||||
setSampleSelectedRow(0);
|
||||
setSampleScrollOffset(0);
|
||||
return;
|
||||
}
|
||||
if (input === 'j' || key.downArrow) {
|
||||
setPatternSelectedRow(r => {
|
||||
const next = r + 1;
|
||||
const visibleCount = Math.min(
|
||||
patternCount - patternScrollOffset,
|
||||
maxRows,
|
||||
);
|
||||
if (next >= maxRows) {
|
||||
setPatternScrollOffset(o =>
|
||||
Math.min(o + 1, Math.max(0, patternCount - maxRows)),
|
||||
);
|
||||
return r;
|
||||
}
|
||||
return Math.min(next, visibleCount - 1);
|
||||
});
|
||||
return;
|
||||
}
|
||||
if (input === 'k' || key.upArrow) {
|
||||
setPatternSelectedRow(r => {
|
||||
const next = r - 1;
|
||||
if (next < 0) {
|
||||
setPatternScrollOffset(o => Math.max(0, o - 1));
|
||||
return 0;
|
||||
}
|
||||
return next;
|
||||
});
|
||||
return;
|
||||
}
|
||||
if (input === 'G') {
|
||||
const maxOffset = Math.max(0, patternCount - maxRows);
|
||||
setPatternScrollOffset(maxOffset);
|
||||
setPatternSelectedRow(Math.min(patternCount - 1, maxRows - 1));
|
||||
return;
|
||||
}
|
||||
if (input === 'g') {
|
||||
setPatternScrollOffset(0);
|
||||
setPatternSelectedRow(0);
|
||||
return;
|
||||
}
|
||||
if (key.ctrl && input === 'd') {
|
||||
const half = Math.floor(maxRows / 2);
|
||||
const maxOffset = Math.max(0, patternCount - maxRows);
|
||||
setPatternScrollOffset(o => Math.min(o + half, maxOffset));
|
||||
return;
|
||||
}
|
||||
if (key.ctrl && input === 'u') {
|
||||
const half = Math.floor(maxRows / 2);
|
||||
setPatternScrollOffset(o => Math.max(0, o - half));
|
||||
return;
|
||||
}
|
||||
if (input === 'w') {
|
||||
setWrapLines(w => !w);
|
||||
return;
|
||||
}
|
||||
if (input === '/') {
|
||||
setFocusSearch(true);
|
||||
return;
|
||||
}
|
||||
if (input === 'q') process.exit(0);
|
||||
return;
|
||||
}
|
||||
|
||||
// ---- Trace tab keybindings ----------------------------------------
|
||||
if (expandedRow !== null && detailTab === 'trace') {
|
||||
// When detail view is expanded (full-page Event Details):
|
||||
|
|
@ -388,6 +565,14 @@ export function useKeybindings(params: KeybindingParams): void {
|
|||
handleTabSwitch(key.shift ? -1 : 1);
|
||||
return;
|
||||
}
|
||||
if (input === 'P') {
|
||||
wasFollowingRef.current = isFollowing;
|
||||
setIsFollowing(false);
|
||||
setShowPatterns(true);
|
||||
setPatternSelectedRow(0);
|
||||
setPatternScrollOffset(0);
|
||||
return;
|
||||
}
|
||||
if (input === 'A' && onOpenAlerts) {
|
||||
onOpenAlerts();
|
||||
return;
|
||||
|
|
|
|||
333
packages/cli/src/components/EventViewer/usePatternData.ts
Normal file
333
packages/cli/src/components/EventViewer/usePatternData.ts
Normal file
|
|
@ -0,0 +1,333 @@
|
|||
/**
|
||||
* Fetches a random sample of events from ClickHouse, mines patterns
|
||||
* using the Drain algorithm, and estimates total counts using a
|
||||
* sampleMultiplier — mirroring the web frontend's useGroupedPatterns.
|
||||
*/
|
||||
import { useState, useEffect, useCallback, useRef } from 'react';
|
||||
|
||||
import {
|
||||
TemplateMiner,
|
||||
TemplateMinerConfig,
|
||||
} from '@hyperdx/common-utils/dist/drain';
|
||||
import type { Metadata } from '@hyperdx/common-utils/dist/core/metadata';
|
||||
import { convertDateRangeToGranularityString } from '@hyperdx/common-utils/dist/core/utils';
|
||||
|
||||
import type { SourceResponse, ProxyClickhouseClient } from '@/api/client';
|
||||
import {
|
||||
buildPatternSampleQuery,
|
||||
buildTotalCountQuery,
|
||||
} from '@/api/eventQuery';
|
||||
import { getEventBody } from '@/shared/source';
|
||||
|
||||
import type { EventRow } from './types';
|
||||
import { flatten } from './utils';
|
||||
|
||||
// ---- Constants -----------------------------------------------------
|
||||
|
||||
const SAMPLES = 10_000;
|
||||
|
||||
// ---- Time bucketing utilities --------------------------------------
|
||||
|
||||
/** Parse a granularity string like "5 minute" into seconds. */
|
||||
function granularityToSeconds(granularity: string): number {
|
||||
const [num, unit] = granularity.split(' ');
|
||||
const n = parseInt(num, 10);
|
||||
switch (unit) {
|
||||
case 'second':
|
||||
return n;
|
||||
case 'minute':
|
||||
return n * 60;
|
||||
case 'hour':
|
||||
return n * 3600;
|
||||
case 'day':
|
||||
return n * 86400;
|
||||
default:
|
||||
return n * 60;
|
||||
}
|
||||
}
|
||||
|
||||
/** Round a timestamp down to the start of its granularity bucket. */
|
||||
function toStartOfBucket(ts: number, granularityMs: number): number {
|
||||
return Math.floor(ts / granularityMs) * granularityMs;
|
||||
}
|
||||
|
||||
/** Generate all bucket start timestamps between start and end. */
|
||||
function generateBuckets(
|
||||
startMs: number,
|
||||
endMs: number,
|
||||
granularityMs: number,
|
||||
): number[] {
|
||||
const buckets: number[] = [];
|
||||
let current = toStartOfBucket(startMs, granularityMs);
|
||||
while (current < endMs) {
|
||||
buckets.push(current);
|
||||
current += granularityMs;
|
||||
}
|
||||
return buckets;
|
||||
}
|
||||
|
||||
// ---- Types ---------------------------------------------------------
|
||||
|
||||
export interface TrendBucket {
|
||||
ts: number;
|
||||
count: number;
|
||||
}
|
||||
|
||||
export interface PatternGroup {
|
||||
id: string;
|
||||
pattern: string;
|
||||
/** Raw count within the sample */
|
||||
count: number;
|
||||
/** Estimated total count (count * sampleMultiplier), prefixed with ~ in display */
|
||||
estimatedCount: number;
|
||||
samples: EventRow[];
|
||||
/** Time-bucketed trend data for sparkline */
|
||||
trend: TrendBucket[];
|
||||
}
|
||||
|
||||
export interface UsePatternDataParams {
|
||||
clickhouseClient: ProxyClickhouseClient;
|
||||
metadata: Metadata;
|
||||
source: SourceResponse;
|
||||
submittedQuery: string;
|
||||
startTime: Date;
|
||||
endTime: Date;
|
||||
/** Only fetch when true (i.e., pattern view is open) */
|
||||
enabled: boolean;
|
||||
}
|
||||
|
||||
export interface UsePatternDataReturn {
|
||||
patterns: PatternGroup[];
|
||||
loading: boolean;
|
||||
error: Error | null;
|
||||
totalCount: number | null;
|
||||
sampledRowCount: number;
|
||||
}
|
||||
|
||||
// ---- Hook ----------------------------------------------------------
|
||||
|
||||
export function usePatternData({
|
||||
clickhouseClient,
|
||||
metadata,
|
||||
source,
|
||||
submittedQuery,
|
||||
startTime,
|
||||
endTime,
|
||||
enabled,
|
||||
}: UsePatternDataParams): UsePatternDataReturn {
|
||||
const [patterns, setPatterns] = useState<PatternGroup[]>([]);
|
||||
const [loading, setLoading] = useState(false);
|
||||
const [error, setError] = useState<Error | null>(null);
|
||||
const [totalCount, setTotalCount] = useState<number | null>(null);
|
||||
const [sampledRowCount, setSampledRowCount] = useState(0);
|
||||
|
||||
// Track the last query params to avoid redundant fetches
|
||||
const lastFetchRef = useRef<string>('');
|
||||
|
||||
const bodyColumn = (() => {
|
||||
const expr = getEventBody(source);
|
||||
if (expr) return expr;
|
||||
return undefined;
|
||||
})();
|
||||
|
||||
const fetchPatterns = useCallback(async () => {
|
||||
const fetchKey = JSON.stringify({
|
||||
source: source.id,
|
||||
submittedQuery,
|
||||
startTime: startTime.getTime(),
|
||||
endTime: endTime.getTime(),
|
||||
});
|
||||
|
||||
// Skip if we already fetched for these exact params
|
||||
if (lastFetchRef.current === fetchKey) return;
|
||||
lastFetchRef.current = fetchKey;
|
||||
|
||||
setLoading(true);
|
||||
setError(null);
|
||||
|
||||
try {
|
||||
// Fire both queries in parallel
|
||||
const [sampleChSql, countChSql] = await Promise.all([
|
||||
buildPatternSampleQuery(
|
||||
{
|
||||
source,
|
||||
searchQuery: submittedQuery,
|
||||
startTime,
|
||||
endTime,
|
||||
sampleLimit: SAMPLES,
|
||||
},
|
||||
metadata,
|
||||
),
|
||||
buildTotalCountQuery(
|
||||
{ source, searchQuery: submittedQuery, startTime, endTime },
|
||||
metadata,
|
||||
),
|
||||
]);
|
||||
|
||||
const [sampleResult, countResult] = await Promise.all([
|
||||
clickhouseClient.query({
|
||||
query: sampleChSql.sql,
|
||||
query_params: sampleChSql.params,
|
||||
format: 'JSON',
|
||||
connectionId: source.connection,
|
||||
}),
|
||||
clickhouseClient.query({
|
||||
query: countChSql.sql,
|
||||
query_params: countChSql.params,
|
||||
format: 'JSON',
|
||||
connectionId: source.connection,
|
||||
}),
|
||||
]);
|
||||
|
||||
const sampleJson = (await sampleResult.json()) as { data: EventRow[] };
|
||||
const countJson = (await countResult.json()) as {
|
||||
data: Array<Record<string, string | number>>;
|
||||
};
|
||||
|
||||
const sampleRows = sampleJson.data ?? [];
|
||||
const total = Number(countJson.data?.[0]?.total ?? 0);
|
||||
|
||||
setTotalCount(total);
|
||||
setSampledRowCount(sampleRows.length);
|
||||
|
||||
if (sampleRows.length === 0) {
|
||||
setPatterns([]);
|
||||
setLoading(false);
|
||||
return;
|
||||
}
|
||||
|
||||
// Determine columns from the result keys
|
||||
const resultKeys = Object.keys(sampleRows[0]);
|
||||
const effectiveBodyColumn =
|
||||
bodyColumn ?? resultKeys[resultKeys.length - 1];
|
||||
// Use the source's timestamp expression, falling back to the first column
|
||||
const tsExpr = source.timestampValueExpression ?? 'TimestampTime';
|
||||
const tsColumn = resultKeys.find(k => k === tsExpr) ?? resultKeys[0];
|
||||
|
||||
// Compute granularity for trend buckets
|
||||
const granularity = convertDateRangeToGranularityString(
|
||||
[startTime, endTime],
|
||||
24,
|
||||
);
|
||||
const granularityMs = granularityToSeconds(granularity) * 1000;
|
||||
const allBuckets = generateBuckets(
|
||||
startTime.getTime(),
|
||||
endTime.getTime(),
|
||||
granularityMs,
|
||||
);
|
||||
|
||||
// Mine patterns
|
||||
const config = new TemplateMinerConfig();
|
||||
const miner = new TemplateMiner(config);
|
||||
|
||||
const clustered: Array<{
|
||||
clusterId: number;
|
||||
row: EventRow;
|
||||
tsMs: number;
|
||||
}> = [];
|
||||
for (const row of sampleRows) {
|
||||
const body = row[effectiveBodyColumn];
|
||||
const text = body != null ? flatten(String(body)) : '';
|
||||
const result = miner.addLogMessage(text);
|
||||
const tsRaw = row[tsColumn];
|
||||
const tsMs =
|
||||
tsRaw != null
|
||||
? new Date(String(tsRaw)).getTime()
|
||||
: startTime.getTime();
|
||||
clustered.push({ clusterId: result.clusterId, row, tsMs });
|
||||
}
|
||||
|
||||
// Group by cluster ID
|
||||
const groups = new Map<
|
||||
number,
|
||||
{
|
||||
rows: EventRow[];
|
||||
template: string;
|
||||
bucketCounts: Map<number, number>;
|
||||
}
|
||||
>();
|
||||
|
||||
for (const { clusterId, row, tsMs } of clustered) {
|
||||
const bucket = toStartOfBucket(tsMs, granularityMs);
|
||||
const existing = groups.get(clusterId);
|
||||
if (existing) {
|
||||
existing.rows.push(row);
|
||||
existing.bucketCounts.set(
|
||||
bucket,
|
||||
(existing.bucketCounts.get(bucket) ?? 0) + 1,
|
||||
);
|
||||
} else {
|
||||
const body = row[effectiveBodyColumn];
|
||||
const text = body != null ? flatten(String(body)) : '';
|
||||
const match = miner.match(text, 'fallback');
|
||||
const bucketCounts = new Map<number, number>();
|
||||
bucketCounts.set(bucket, 1);
|
||||
groups.set(clusterId, {
|
||||
rows: [row],
|
||||
template: match?.getTemplate() ?? text,
|
||||
bucketCounts,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Compute sampleMultiplier
|
||||
const sampleMultiplier =
|
||||
total > 0 && sampleRows.length > 0 ? total / sampleRows.length : 1;
|
||||
|
||||
// Convert to sorted array with estimated counts and trend data
|
||||
const result: PatternGroup[] = [];
|
||||
for (const [id, { rows, template, bucketCounts }] of groups) {
|
||||
const trend: TrendBucket[] = allBuckets.map(bucketTs => ({
|
||||
ts: bucketTs,
|
||||
count: Math.round(
|
||||
(bucketCounts.get(bucketTs) ?? 0) * sampleMultiplier,
|
||||
),
|
||||
}));
|
||||
|
||||
result.push({
|
||||
id: String(id),
|
||||
pattern: template,
|
||||
count: rows.length,
|
||||
estimatedCount: Math.max(
|
||||
Math.round(rows.length * sampleMultiplier),
|
||||
1,
|
||||
),
|
||||
samples: rows,
|
||||
trend,
|
||||
});
|
||||
}
|
||||
|
||||
result.sort((a, b) => b.estimatedCount - a.estimatedCount);
|
||||
setPatterns(result);
|
||||
} catch (err: unknown) {
|
||||
setError(err instanceof Error ? err : new Error(String(err)));
|
||||
// Clear the fetch key so a retry will re-fetch
|
||||
lastFetchRef.current = '';
|
||||
} finally {
|
||||
setLoading(false);
|
||||
}
|
||||
}, [
|
||||
clickhouseClient,
|
||||
metadata,
|
||||
source,
|
||||
submittedQuery,
|
||||
startTime,
|
||||
endTime,
|
||||
bodyColumn,
|
||||
]);
|
||||
|
||||
useEffect(() => {
|
||||
if (enabled) {
|
||||
fetchPatterns();
|
||||
}
|
||||
}, [enabled, fetchPatterns]);
|
||||
|
||||
// Clear patterns when disabled
|
||||
useEffect(() => {
|
||||
if (!enabled) {
|
||||
lastFetchRef.current = '';
|
||||
}
|
||||
}, [enabled]);
|
||||
|
||||
return { patterns, loading, error, totalCount, sampledRowCount };
|
||||
}
|
||||
|
|
@ -284,7 +284,13 @@ export default function TraceWaterfall({
|
|||
<Box width={labelWidth} overflowX="hidden">
|
||||
<Text
|
||||
wrap="truncate"
|
||||
color={isHighlighted ? 'white' : isLog ? 'green' : statusColor}
|
||||
color={
|
||||
isHighlighted
|
||||
? 'white'
|
||||
: isLog
|
||||
? (statusColor ?? 'green')
|
||||
: statusColor
|
||||
}
|
||||
bold={!!statusColor}
|
||||
inverse={isHighlighted}
|
||||
>
|
||||
|
|
|
|||
|
|
@ -51,7 +51,12 @@ export function getStatusColor(node: SpanNode): 'red' | 'yellow' | undefined {
|
|||
}
|
||||
|
||||
export function getBarColor(node: SpanNode): string {
|
||||
if (node.kind === 'log') return 'green';
|
||||
if (node.kind === 'log') {
|
||||
const sev = node.StatusCode?.toLowerCase();
|
||||
if (sev === 'error' || sev === 'fatal' || sev === 'critical') return 'red';
|
||||
if (sev === 'warn' || sev === 'warning') return 'yellow';
|
||||
return 'green';
|
||||
}
|
||||
if (node.StatusCode === '2' || node.StatusCode === 'Error') return 'red';
|
||||
if (node.StatusCode === '1') return 'yellow';
|
||||
return 'cyan';
|
||||
|
|
|
|||
287
packages/common-utils/src/__tests__/drain.test.ts
Normal file
287
packages/common-utils/src/__tests__/drain.test.ts
Normal file
|
|
@ -0,0 +1,287 @@
|
|||
import { TemplateMinerConfig } from '../drain/config';
|
||||
import { Drain } from '../drain/drain';
|
||||
import { TemplateMiner } from '../drain/template-miner';
|
||||
|
||||
describe('Drain', () => {
|
||||
it('test_add_shorter_than_depth_message', () => {
|
||||
const model = new Drain(4);
|
||||
let [cluster, changeType] = model.addLogMessage('hello');
|
||||
expect(changeType).toBe('cluster_created');
|
||||
|
||||
[cluster, changeType] = model.addLogMessage('hello');
|
||||
expect(changeType).toBe('none');
|
||||
|
||||
[cluster, changeType] = model.addLogMessage('otherword');
|
||||
expect(changeType).toBe('cluster_created');
|
||||
|
||||
expect(model.clusterCount).toBe(2);
|
||||
});
|
||||
|
||||
it('test_add_log_message', () => {
|
||||
const model = new Drain();
|
||||
const entries = `
|
||||
Dec 10 07:07:38 LabSZ sshd[24206]: input_userauth_request: invalid user test9 [preauth]
|
||||
Dec 10 07:08:28 LabSZ sshd[24208]: input_userauth_request: invalid user webmaster [preauth]
|
||||
Dec 10 09:12:32 LabSZ sshd[24490]: Failed password for invalid user ftpuser from 0.0.0.0 port 62891 ssh2
|
||||
Dec 10 09:12:35 LabSZ sshd[24492]: Failed password for invalid user pi from 0.0.0.0 port 49289 ssh2
|
||||
Dec 10 09:12:44 LabSZ sshd[24501]: Failed password for invalid user ftpuser from 0.0.0.0 port 60836 ssh2
|
||||
Dec 10 07:28:03 LabSZ sshd[24245]: input_userauth_request: invalid user pgadmin [preauth]`
|
||||
.split('\n')
|
||||
.filter(l => l.trim().length > 0);
|
||||
|
||||
const expected = `
|
||||
Dec 10 07:07:38 LabSZ sshd[24206]: input_userauth_request: invalid user test9 [preauth]
|
||||
Dec 10 <*> LabSZ <*> input_userauth_request: invalid user <*> [preauth]
|
||||
Dec 10 09:12:32 LabSZ sshd[24490]: Failed password for invalid user ftpuser from 0.0.0.0 port 62891 ssh2
|
||||
Dec 10 <*> LabSZ <*> Failed password for invalid user <*> from 0.0.0.0 port <*> ssh2
|
||||
Dec 10 <*> LabSZ <*> Failed password for invalid user <*> from 0.0.0.0 port <*> ssh2
|
||||
Dec 10 <*> LabSZ <*> input_userauth_request: invalid user <*> [preauth]`
|
||||
.split('\n')
|
||||
.filter(l => l.trim().length > 0)
|
||||
.map(l => l.trim());
|
||||
|
||||
const actual: string[] = [];
|
||||
for (const entry of entries) {
|
||||
const [cluster] = model.addLogMessage(entry);
|
||||
actual.push(cluster.getTemplate());
|
||||
}
|
||||
|
||||
expect(actual).toEqual(expected);
|
||||
// Python test reports 8 because splitlines() includes 2 empty entries from triple-quote;
|
||||
// we only feed the 6 real log lines, so total is 6.
|
||||
expect(model.getTotalClusterSize()).toBe(6);
|
||||
});
|
||||
|
||||
it('test_add_log_message_sim_75', () => {
|
||||
const model = new Drain(4, 0.75, 100);
|
||||
const entries = `
|
||||
Dec 10 07:07:38 LabSZ sshd[24206]: input_userauth_request: invalid user test9 [preauth]
|
||||
Dec 10 07:08:28 LabSZ sshd[24208]: input_userauth_request: invalid user webmaster [preauth]
|
||||
Dec 10 09:12:32 LabSZ sshd[24490]: Failed password for invalid user ftpuser from 0.0.0.0 port 62891 ssh2
|
||||
Dec 10 09:12:35 LabSZ sshd[24492]: Failed password for invalid user pi from 0.0.0.0 port 49289 ssh2
|
||||
Dec 10 09:12:44 LabSZ sshd[24501]: Failed password for invalid user ftpuser from 0.0.0.0 port 60836 ssh2
|
||||
Dec 10 07:28:03 LabSZ sshd[24245]: input_userauth_request: invalid user pgadmin [preauth]`
|
||||
.split('\n')
|
||||
.filter(l => l.trim().length > 0);
|
||||
|
||||
const expected = `
|
||||
Dec 10 07:07:38 LabSZ sshd[24206]: input_userauth_request: invalid user test9 [preauth]
|
||||
Dec 10 07:08:28 LabSZ sshd[24208]: input_userauth_request: invalid user webmaster [preauth]
|
||||
Dec 10 09:12:32 LabSZ sshd[24490]: Failed password for invalid user ftpuser from 0.0.0.0 port 62891 ssh2
|
||||
Dec 10 <*> LabSZ <*> Failed password for invalid user <*> from 0.0.0.0 port <*> ssh2
|
||||
Dec 10 <*> LabSZ <*> Failed password for invalid user <*> from 0.0.0.0 port <*> ssh2
|
||||
Dec 10 07:28:03 LabSZ sshd[24245]: input_userauth_request: invalid user pgadmin [preauth]`
|
||||
.split('\n')
|
||||
.filter(l => l.trim().length > 0)
|
||||
.map(l => l.trim());
|
||||
|
||||
const actual: string[] = [];
|
||||
for (const entry of entries) {
|
||||
const [cluster] = model.addLogMessage(entry);
|
||||
actual.push(cluster.getTemplate());
|
||||
}
|
||||
|
||||
expect(actual).toEqual(expected);
|
||||
expect(model.getTotalClusterSize()).toBe(6);
|
||||
});
|
||||
|
||||
it('test_max_clusters', () => {
|
||||
const model = new Drain(4, 0.4, 100, 1);
|
||||
const entries = `
|
||||
A format 1
|
||||
A format 2
|
||||
B format 1
|
||||
B format 2
|
||||
A format 3`
|
||||
.split('\n')
|
||||
.filter(l => l.trim().length > 0);
|
||||
|
||||
const expected = [
|
||||
'A format 1',
|
||||
'A format <*>',
|
||||
'B format 1',
|
||||
'B format <*>',
|
||||
'A format 3',
|
||||
];
|
||||
|
||||
const actual: string[] = [];
|
||||
for (const entry of entries) {
|
||||
const [cluster] = model.addLogMessage(entry);
|
||||
actual.push(cluster.getTemplate());
|
||||
}
|
||||
|
||||
expect(actual).toEqual(expected);
|
||||
expect(model.getTotalClusterSize()).toBe(1);
|
||||
});
|
||||
|
||||
it('test_max_clusters_lru_multiple_leaf_nodes', () => {
|
||||
const model = new Drain(4, 0.4, 100, 2, [], '*');
|
||||
const entries = [
|
||||
'A A A',
|
||||
'A A B',
|
||||
'B A A',
|
||||
'B A B',
|
||||
'C A A',
|
||||
'C A B',
|
||||
'B A A',
|
||||
'A A A',
|
||||
];
|
||||
const expected = [
|
||||
'A A A',
|
||||
'A A *',
|
||||
'B A A',
|
||||
'B A *',
|
||||
'C A A',
|
||||
'C A *',
|
||||
'B A *',
|
||||
'A A A',
|
||||
];
|
||||
|
||||
const actual: string[] = [];
|
||||
for (const entry of entries) {
|
||||
const [cluster] = model.addLogMessage(entry);
|
||||
actual.push(cluster.getTemplate());
|
||||
}
|
||||
|
||||
expect(actual).toEqual(expected);
|
||||
expect(model.getTotalClusterSize()).toBe(4);
|
||||
});
|
||||
|
||||
it('test_max_clusters_lru_single_leaf_node', () => {
|
||||
const model = new Drain(4, 0.4, 100, 2, [], '*');
|
||||
const entries = [
|
||||
'A A A',
|
||||
'A A B',
|
||||
'A B A',
|
||||
'A B B',
|
||||
'A C A',
|
||||
'A C B',
|
||||
'A B A',
|
||||
'A A A',
|
||||
];
|
||||
const expected = [
|
||||
'A A A',
|
||||
'A A *',
|
||||
'A B A',
|
||||
'A B *',
|
||||
'A C A',
|
||||
'A C *',
|
||||
'A B *',
|
||||
'A A A',
|
||||
];
|
||||
|
||||
const actual: string[] = [];
|
||||
for (const entry of entries) {
|
||||
const [cluster] = model.addLogMessage(entry);
|
||||
actual.push(cluster.getTemplate());
|
||||
}
|
||||
|
||||
expect(actual).toEqual(expected);
|
||||
});
|
||||
|
||||
it('test_match_only', () => {
|
||||
const model = new Drain();
|
||||
model.addLogMessage('aa aa aa');
|
||||
model.addLogMessage('aa aa bb');
|
||||
model.addLogMessage('aa aa cc');
|
||||
model.addLogMessage('xx yy zz');
|
||||
|
||||
let c = model.match('aa aa tt');
|
||||
expect(c).not.toBeNull();
|
||||
expect(c!.clusterId).toBe(1);
|
||||
|
||||
c = model.match('xx yy zz');
|
||||
expect(c).not.toBeNull();
|
||||
expect(c!.clusterId).toBe(2);
|
||||
|
||||
c = model.match('xx yy rr');
|
||||
expect(c).toBeNull();
|
||||
|
||||
c = model.match('nothing');
|
||||
expect(c).toBeNull();
|
||||
});
|
||||
|
||||
it('test_create_template', () => {
|
||||
const model = new Drain(4, 0.4, 100, null, [], '*');
|
||||
|
||||
const seq1 = ['aa', 'bb', 'dd'];
|
||||
const seq2 = ['aa', 'bb', 'cc'];
|
||||
|
||||
let template = model.createTemplate(seq1, seq2);
|
||||
expect(template).toEqual(['aa', 'bb', '*']);
|
||||
|
||||
template = model.createTemplate(seq1, seq1);
|
||||
expect(template).toEqual(seq1);
|
||||
|
||||
expect(() => model.createTemplate(seq1, ['aa'])).toThrow();
|
||||
});
|
||||
});
|
||||
|
||||
describe('TemplateMiner', () => {
|
||||
it('add_log_message with masking', () => {
|
||||
const config = new TemplateMinerConfig();
|
||||
config.maskingInstructions = [
|
||||
{
|
||||
pattern:
|
||||
'((?<=[^A-Za-z0-9])|^)(\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3})((?=[^A-Za-z0-9])|$)',
|
||||
maskWith: 'IP',
|
||||
},
|
||||
{
|
||||
pattern: '((?<=[^A-Za-z0-9])|^)([\\-\\+]?\\d+)((?=[^A-Za-z0-9])|$)',
|
||||
maskWith: 'NUM',
|
||||
},
|
||||
];
|
||||
config.maskPrefix = '<:';
|
||||
config.maskSuffix = ':>';
|
||||
|
||||
const miner = new TemplateMiner(config);
|
||||
|
||||
let result = miner.addLogMessage('connected to 10.0.0.1');
|
||||
expect(result.changeType).toBe('cluster_created');
|
||||
expect(result.clusterId).toBe(1);
|
||||
expect(result.templateMined).toContain('<:IP:>');
|
||||
|
||||
result = miner.addLogMessage('connected to 192.168.0.1');
|
||||
expect(result.changeType).toBe('none');
|
||||
expect(result.clusterId).toBe(1);
|
||||
});
|
||||
|
||||
it('match after training', () => {
|
||||
const config = new TemplateMinerConfig();
|
||||
const miner = new TemplateMiner(config);
|
||||
|
||||
miner.addLogMessage('user alice logged in');
|
||||
miner.addLogMessage('user bob logged in');
|
||||
|
||||
const matched = miner.match('user charlie logged in');
|
||||
expect(matched).not.toBeNull();
|
||||
expect(matched!.clusterId).toBe(1);
|
||||
|
||||
const noMatch = miner.match('something completely different');
|
||||
expect(noMatch).toBeNull();
|
||||
});
|
||||
|
||||
it('extract_parameters', () => {
|
||||
const config = new TemplateMinerConfig();
|
||||
config.maskingInstructions = [
|
||||
{
|
||||
pattern: '((?<=[^A-Za-z0-9])|^)([\\-\\+]?\\d+)((?=[^A-Za-z0-9])|$)',
|
||||
maskWith: 'NUM',
|
||||
},
|
||||
];
|
||||
const miner = new TemplateMiner(config);
|
||||
|
||||
miner.addLogMessage('user johndoe logged in 11 minutes ago');
|
||||
miner.addLogMessage('user janedoe logged in 5 minutes ago');
|
||||
|
||||
const result = miner.addLogMessage('user bob logged in 3 minutes ago');
|
||||
const params = miner.extractParameters(
|
||||
result.templateMined,
|
||||
'user bob logged in 3 minutes ago',
|
||||
false,
|
||||
);
|
||||
|
||||
expect(params).not.toBeNull();
|
||||
expect(params!.length).toBeGreaterThan(0);
|
||||
});
|
||||
});
|
||||
58
packages/common-utils/src/drain/config.ts
Normal file
58
packages/common-utils/src/drain/config.ts
Normal file
|
|
@ -0,0 +1,58 @@
|
|||
export interface MaskingInstructionConfig {
|
||||
pattern: string;
|
||||
maskWith: string;
|
||||
}
|
||||
|
||||
export class TemplateMinerConfig {
|
||||
drainDepth: number = 4;
|
||||
drainSimTh: number = 0.4;
|
||||
drainMaxChildren: number = 100;
|
||||
drainMaxClusters: number | null = null;
|
||||
drainExtraDelimiters: string[] = [];
|
||||
maskPrefix: string = '<';
|
||||
maskSuffix: string = '>';
|
||||
maskingInstructions: MaskingInstructionConfig[] = [];
|
||||
parametrizeNumericTokens: boolean = true;
|
||||
parameterExtractionCacheCapacity: number = 3000;
|
||||
|
||||
static fromJSON(json: Record<string, unknown>): TemplateMinerConfig {
|
||||
const config = new TemplateMinerConfig();
|
||||
if (typeof json.drain_depth === 'number')
|
||||
config.drainDepth = json.drain_depth;
|
||||
if (typeof json.drain_sim_th === 'number')
|
||||
config.drainSimTh = json.drain_sim_th;
|
||||
if (typeof json.drain_max_children === 'number')
|
||||
config.drainMaxChildren = json.drain_max_children;
|
||||
if (
|
||||
typeof json.drain_max_clusters === 'number' ||
|
||||
json.drain_max_clusters === null
|
||||
)
|
||||
config.drainMaxClusters = json.drain_max_clusters;
|
||||
if (Array.isArray(json.drain_extra_delimiters)) {
|
||||
config.drainExtraDelimiters = (
|
||||
json.drain_extra_delimiters as unknown[]
|
||||
).filter((v): v is string => typeof v === 'string');
|
||||
}
|
||||
if (typeof json.mask_prefix === 'string')
|
||||
config.maskPrefix = json.mask_prefix;
|
||||
if (typeof json.mask_suffix === 'string')
|
||||
config.maskSuffix = json.mask_suffix;
|
||||
if (typeof json.parametrize_numeric_tokens === 'boolean')
|
||||
config.parametrizeNumericTokens = json.parametrize_numeric_tokens;
|
||||
if (typeof json.parameter_extraction_cache_capacity === 'number')
|
||||
config.parameterExtractionCacheCapacity =
|
||||
json.parameter_extraction_cache_capacity;
|
||||
if (Array.isArray(json.masking_instructions)) {
|
||||
config.maskingInstructions = (json.masking_instructions as unknown[])
|
||||
.filter(
|
||||
(item): item is Record<string, string> =>
|
||||
typeof item === 'object' && item !== null,
|
||||
)
|
||||
.map(mi => ({
|
||||
pattern: mi.regex_pattern ?? mi.pattern,
|
||||
maskWith: mi.mask_with ?? mi.maskWith,
|
||||
}));
|
||||
}
|
||||
return config;
|
||||
}
|
||||
}
|
||||
370
packages/common-utils/src/drain/drain.ts
Normal file
370
packages/common-utils/src/drain/drain.ts
Normal file
|
|
@ -0,0 +1,370 @@
|
|||
import { LogCluster } from './log-cluster';
|
||||
import { LruCache } from './lru-cache';
|
||||
import { Node } from './node';
|
||||
|
||||
export class Drain {
|
||||
logClusterDepth: number;
|
||||
private maxNodeDepth: number;
|
||||
simTh: number;
|
||||
maxChildren: number;
|
||||
rootNode: Node;
|
||||
extraDelimiters: string[];
|
||||
maxClusters: number | null;
|
||||
paramStr: string;
|
||||
parametrizeNumericTokens: boolean;
|
||||
|
||||
private unlimitedStore: Map<number, LogCluster> | null;
|
||||
private limitedStore: LruCache<LogCluster> | null;
|
||||
clustersCounter: number;
|
||||
|
||||
constructor(
|
||||
depth: number = 4,
|
||||
simTh: number = 0.4,
|
||||
maxChildren: number = 100,
|
||||
maxClusters: number | null = null,
|
||||
extraDelimiters: string[] = [],
|
||||
paramStr: string = '<*>',
|
||||
parametrizeNumericTokens: boolean = true,
|
||||
) {
|
||||
if (depth < 3) {
|
||||
throw new Error('depth argument must be at least 3');
|
||||
}
|
||||
|
||||
this.logClusterDepth = depth;
|
||||
this.maxNodeDepth = depth - 2;
|
||||
this.simTh = simTh;
|
||||
this.maxChildren = maxChildren;
|
||||
this.rootNode = new Node();
|
||||
this.extraDelimiters = extraDelimiters;
|
||||
this.maxClusters = maxClusters;
|
||||
this.paramStr = paramStr;
|
||||
this.parametrizeNumericTokens = parametrizeNumericTokens;
|
||||
this.clustersCounter = 0;
|
||||
|
||||
if (maxClusters !== null) {
|
||||
this.unlimitedStore = null;
|
||||
this.limitedStore = new LruCache<LogCluster>(maxClusters);
|
||||
} else {
|
||||
this.unlimitedStore = new Map<number, LogCluster>();
|
||||
this.limitedStore = null;
|
||||
}
|
||||
}
|
||||
|
||||
get clusterCount(): number {
|
||||
if (this.unlimitedStore) return this.unlimitedStore.size;
|
||||
return this.limitedStore!.size;
|
||||
}
|
||||
|
||||
getTotalClusterSize(): number {
|
||||
let total = 0;
|
||||
if (this.unlimitedStore) {
|
||||
for (const c of this.unlimitedStore.values()) total += c.size;
|
||||
} else {
|
||||
for (const c of this.limitedStore!.values()) total += c.size;
|
||||
}
|
||||
return total;
|
||||
}
|
||||
|
||||
private clusterPeek(id: number): LogCluster | undefined {
|
||||
if (this.unlimitedStore) return this.unlimitedStore.get(id);
|
||||
return this.limitedStore!.peek(id);
|
||||
}
|
||||
|
||||
private clusterGet(id: number): LogCluster | undefined {
|
||||
if (this.unlimitedStore) return this.unlimitedStore.get(id);
|
||||
return this.limitedStore!.get(id);
|
||||
}
|
||||
|
||||
private clusterContains(id: number): boolean {
|
||||
if (this.unlimitedStore) return this.unlimitedStore.has(id);
|
||||
return this.limitedStore!.has(id);
|
||||
}
|
||||
|
||||
private clusterInsert(id: number, cluster: LogCluster): void {
|
||||
if (this.unlimitedStore) {
|
||||
this.unlimitedStore.set(id, cluster);
|
||||
} else {
|
||||
this.limitedStore!.put(id, cluster);
|
||||
}
|
||||
}
|
||||
|
||||
private static hasNumbers(s: string): boolean {
|
||||
for (let i = 0; i < s.length; i++) {
|
||||
const c = s.charCodeAt(i);
|
||||
if (c >= 48 && c <= 57) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
getContentAsTokens(content: string): string[] {
|
||||
let c = content.trim();
|
||||
for (const delimiter of this.extraDelimiters) {
|
||||
c = c.split(delimiter).join(' ');
|
||||
}
|
||||
if (c.length === 0) return [];
|
||||
return c.split(/\s+/);
|
||||
}
|
||||
|
||||
getSeqDistance(
|
||||
seq1: string[],
|
||||
seq2: string[],
|
||||
includeParams: boolean,
|
||||
): [number, number] {
|
||||
if (seq1.length !== seq2.length) {
|
||||
throw new Error('seq1 and seq2 must have equal length');
|
||||
}
|
||||
if (seq1.length === 0) return [1.0, 0];
|
||||
|
||||
let simTokens = 0;
|
||||
let paramCount = 0;
|
||||
|
||||
for (let i = 0; i < seq1.length; i++) {
|
||||
if (seq1[i] === this.paramStr) {
|
||||
paramCount++;
|
||||
continue;
|
||||
}
|
||||
if (seq1[i] === seq2[i]) {
|
||||
simTokens++;
|
||||
}
|
||||
}
|
||||
|
||||
if (includeParams) {
|
||||
simTokens += paramCount;
|
||||
}
|
||||
|
||||
return [simTokens / seq1.length, paramCount];
|
||||
}
|
||||
|
||||
createTemplate(seq1: string[], seq2: string[]): string[] {
|
||||
if (seq1.length !== seq2.length) {
|
||||
throw new Error('seq1 and seq2 must have equal length');
|
||||
}
|
||||
return seq1.map((t1, i) => (t1 === seq2[i] ? seq2[i] : this.paramStr));
|
||||
}
|
||||
|
||||
private fastMatch(
|
||||
clusterIds: number[],
|
||||
tokens: string[],
|
||||
simTh: number,
|
||||
includeParams: boolean,
|
||||
): number | null {
|
||||
let maxSim = -1;
|
||||
let maxParamCount = -1;
|
||||
let maxClusterId: number | null = null;
|
||||
|
||||
for (const cid of clusterIds) {
|
||||
const cluster = this.clusterPeek(cid);
|
||||
if (!cluster) continue;
|
||||
const [curSim, paramCount] = this.getSeqDistance(
|
||||
cluster.logTemplateTokens,
|
||||
tokens,
|
||||
includeParams,
|
||||
);
|
||||
if (
|
||||
curSim > maxSim ||
|
||||
(curSim === maxSim && paramCount > maxParamCount)
|
||||
) {
|
||||
maxSim = curSim;
|
||||
maxParamCount = paramCount;
|
||||
maxClusterId = cid;
|
||||
}
|
||||
}
|
||||
|
||||
if (maxSim >= simTh) {
|
||||
return maxClusterId;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private treeSearch(
|
||||
tokens: string[],
|
||||
simTh: number,
|
||||
includeParams: boolean,
|
||||
): number | null {
|
||||
const tokenCount = tokens.length;
|
||||
const tokenCountStr = String(tokenCount);
|
||||
|
||||
const firstNode = this.rootNode.keyToChildNode.get(tokenCountStr);
|
||||
if (!firstNode) return null;
|
||||
|
||||
if (tokenCount === 0) {
|
||||
const firstId = firstNode.clusterIds[0];
|
||||
return firstId !== undefined ? firstId : null;
|
||||
}
|
||||
|
||||
let curNode: Node = firstNode;
|
||||
let curNodeDepth = 1;
|
||||
for (const token of tokens) {
|
||||
if (curNodeDepth >= this.maxNodeDepth) break;
|
||||
if (curNodeDepth >= tokenCount) break;
|
||||
|
||||
const child: Node | undefined = curNode.keyToChildNode.get(token);
|
||||
if (child) {
|
||||
curNode = child;
|
||||
} else {
|
||||
const wildcardChild: Node | undefined = curNode.keyToChildNode.get(
|
||||
this.paramStr,
|
||||
);
|
||||
if (wildcardChild) {
|
||||
curNode = wildcardChild;
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
curNodeDepth++;
|
||||
}
|
||||
|
||||
return this.fastMatch(curNode.clusterIds, tokens, simTh, includeParams);
|
||||
}
|
||||
|
||||
private addSeqToPrefixTree(
|
||||
clusterId: number,
|
||||
templateTokens: string[],
|
||||
): void {
|
||||
const tokenCount = templateTokens.length;
|
||||
const tokenCountStr = String(tokenCount);
|
||||
|
||||
if (!this.rootNode.keyToChildNode.has(tokenCountStr)) {
|
||||
this.rootNode.keyToChildNode.set(tokenCountStr, new Node());
|
||||
}
|
||||
|
||||
let curNode = this.rootNode.keyToChildNode.get(tokenCountStr)!;
|
||||
|
||||
if (tokenCount === 0) {
|
||||
curNode.clusterIds = [clusterId];
|
||||
return;
|
||||
}
|
||||
|
||||
let currentDepth = 1;
|
||||
for (const token of templateTokens) {
|
||||
if (currentDepth >= this.maxNodeDepth || currentDepth >= tokenCount) {
|
||||
const newClusterIds = curNode.clusterIds.filter(cid =>
|
||||
this.clusterContains(cid),
|
||||
);
|
||||
newClusterIds.push(clusterId);
|
||||
curNode.clusterIds = newClusterIds;
|
||||
break;
|
||||
}
|
||||
|
||||
if (!curNode.keyToChildNode.has(token)) {
|
||||
if (this.parametrizeNumericTokens && Drain.hasNumbers(token)) {
|
||||
if (!curNode.keyToChildNode.has(this.paramStr)) {
|
||||
curNode.keyToChildNode.set(this.paramStr, new Node());
|
||||
}
|
||||
curNode = curNode.keyToChildNode.get(this.paramStr)!;
|
||||
} else if (curNode.keyToChildNode.has(this.paramStr)) {
|
||||
if (curNode.keyToChildNode.size < this.maxChildren) {
|
||||
curNode.keyToChildNode.set(token, new Node());
|
||||
curNode = curNode.keyToChildNode.get(token)!;
|
||||
} else {
|
||||
curNode = curNode.keyToChildNode.get(this.paramStr)!;
|
||||
}
|
||||
} else {
|
||||
if (curNode.keyToChildNode.size + 1 < this.maxChildren) {
|
||||
curNode.keyToChildNode.set(token, new Node());
|
||||
curNode = curNode.keyToChildNode.get(token)!;
|
||||
} else if (curNode.keyToChildNode.size + 1 === this.maxChildren) {
|
||||
curNode.keyToChildNode.set(this.paramStr, new Node());
|
||||
curNode = curNode.keyToChildNode.get(this.paramStr)!;
|
||||
} else {
|
||||
curNode = curNode.keyToChildNode.get(this.paramStr)!;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
curNode = curNode.keyToChildNode.get(token)!;
|
||||
}
|
||||
currentDepth++;
|
||||
}
|
||||
}
|
||||
|
||||
addLogMessage(content: string): [LogCluster, string] {
|
||||
const contentTokens = this.getContentAsTokens(content);
|
||||
const matchClusterId = this.treeSearch(contentTokens, this.simTh, false);
|
||||
|
||||
if (matchClusterId === null) {
|
||||
this.clustersCounter++;
|
||||
const clusterId = this.clustersCounter;
|
||||
const cluster = new LogCluster(contentTokens, clusterId);
|
||||
this.clusterInsert(clusterId, cluster);
|
||||
this.addSeqToPrefixTree(clusterId, contentTokens);
|
||||
return [cluster, 'cluster_created'];
|
||||
}
|
||||
|
||||
const existingCluster = this.clusterPeek(matchClusterId)!;
|
||||
const newTemplateTokens = this.createTemplate(
|
||||
contentTokens,
|
||||
existingCluster.logTemplateTokens,
|
||||
);
|
||||
|
||||
const updateType =
|
||||
newTemplateTokens.length === existingCluster.logTemplateTokens.length &&
|
||||
newTemplateTokens.every(
|
||||
(t, i) => t === existingCluster.logTemplateTokens[i],
|
||||
)
|
||||
? 'none'
|
||||
: 'cluster_template_changed';
|
||||
|
||||
existingCluster.logTemplateTokens = newTemplateTokens;
|
||||
existingCluster.size += 1;
|
||||
|
||||
// Touch to update LRU ordering
|
||||
this.clusterGet(matchClusterId);
|
||||
|
||||
return [existingCluster, updateType];
|
||||
}
|
||||
|
||||
private getClustersIdsForSeqLen(seqLen: number): number[] {
|
||||
const collectRecursive = (node: Node, ids: number[]): void => {
|
||||
ids.push(...node.clusterIds);
|
||||
for (const child of node.keyToChildNode.values()) {
|
||||
collectRecursive(child, ids);
|
||||
}
|
||||
};
|
||||
|
||||
const key = String(seqLen);
|
||||
const node = this.rootNode.keyToChildNode.get(key);
|
||||
if (!node) return [];
|
||||
const ids: number[] = [];
|
||||
collectRecursive(node, ids);
|
||||
return ids;
|
||||
}
|
||||
|
||||
match(
|
||||
content: string,
|
||||
fullSearchStrategy: string = 'never',
|
||||
): LogCluster | null {
|
||||
if (!['always', 'never', 'fallback'].includes(fullSearchStrategy)) {
|
||||
throw new Error(`Invalid full_search_strategy: ${fullSearchStrategy}`);
|
||||
}
|
||||
|
||||
const contentTokens = this.getContentAsTokens(content);
|
||||
const requiredSimTh = 1.0;
|
||||
|
||||
const fullSearch = (): LogCluster | null => {
|
||||
const allIds = this.getClustersIdsForSeqLen(contentTokens.length);
|
||||
const matchedId = this.fastMatch(
|
||||
allIds,
|
||||
contentTokens,
|
||||
requiredSimTh,
|
||||
true,
|
||||
);
|
||||
if (matchedId === null) return null;
|
||||
return this.clusterPeek(matchedId) ?? null;
|
||||
};
|
||||
|
||||
if (fullSearchStrategy === 'always') {
|
||||
return fullSearch();
|
||||
}
|
||||
|
||||
const matchId = this.treeSearch(contentTokens, requiredSimTh, true);
|
||||
if (matchId !== null) {
|
||||
return this.clusterPeek(matchId) ?? null;
|
||||
}
|
||||
|
||||
if (fullSearchStrategy === 'never') {
|
||||
return null;
|
||||
}
|
||||
|
||||
return fullSearch();
|
||||
}
|
||||
}
|
||||
9
packages/common-utils/src/drain/index.ts
Normal file
9
packages/common-utils/src/drain/index.ts
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
export type { MaskingInstructionConfig } from './config';
|
||||
export { TemplateMinerConfig } from './config';
|
||||
export { Drain } from './drain';
|
||||
export { LogCluster } from './log-cluster';
|
||||
export { LruCache } from './lru-cache';
|
||||
export { LogMasker, MaskingInstruction } from './masking';
|
||||
export { Node } from './node';
|
||||
export type { AddLogMessageResult, ExtractedParameter } from './template-miner';
|
||||
export { TemplateMiner } from './template-miner';
|
||||
19
packages/common-utils/src/drain/log-cluster.ts
Normal file
19
packages/common-utils/src/drain/log-cluster.ts
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
export class LogCluster {
|
||||
logTemplateTokens: string[];
|
||||
clusterId: number;
|
||||
size: number;
|
||||
|
||||
constructor(logTemplateTokens: string[], clusterId: number) {
|
||||
this.logTemplateTokens = [...logTemplateTokens];
|
||||
this.clusterId = clusterId;
|
||||
this.size = 1;
|
||||
}
|
||||
|
||||
getTemplate(): string {
|
||||
return this.logTemplateTokens.join(' ');
|
||||
}
|
||||
|
||||
toString(): string {
|
||||
return `ID=${String(this.clusterId).padEnd(5)} : size=${String(this.size).padEnd(10)}: ${this.getTemplate()}`;
|
||||
}
|
||||
}
|
||||
154
packages/common-utils/src/drain/lru-cache.ts
Normal file
154
packages/common-utils/src/drain/lru-cache.ts
Normal file
|
|
@ -0,0 +1,154 @@
|
|||
interface LruEntry<V> {
|
||||
key: number;
|
||||
value: V;
|
||||
prev: number | null;
|
||||
next: number | null;
|
||||
}
|
||||
|
||||
/**
|
||||
* LRU cache with separate peek (no eviction update) and get (eviction update) methods.
|
||||
* This mirrors the behavior of Python's cachetools.LRUCache used in Drain3,
|
||||
* where LogClusterCache.get() bypasses eviction ordering.
|
||||
*/
|
||||
export class LruCache<V> {
|
||||
private capacity: number;
|
||||
private map: Map<number, number> = new Map();
|
||||
private entries: (LruEntry<V> | null)[] = [];
|
||||
private freeSlots: number[] = [];
|
||||
private head: number | null = null;
|
||||
private tail: number | null = null;
|
||||
|
||||
constructor(capacity: number) {
|
||||
this.capacity = capacity;
|
||||
}
|
||||
|
||||
get size(): number {
|
||||
return this.map.size;
|
||||
}
|
||||
|
||||
has(key: number): boolean {
|
||||
return this.map.has(key);
|
||||
}
|
||||
|
||||
/** Read value without updating recency (used in fast_match). */
|
||||
peek(key: number): V | undefined {
|
||||
const slot = this.map.get(key);
|
||||
if (slot === undefined) return undefined;
|
||||
return this.entries[slot]?.value;
|
||||
}
|
||||
|
||||
/** Read value and mark as most recently used. */
|
||||
get(key: number): V | undefined {
|
||||
const slot = this.map.get(key);
|
||||
if (slot === undefined) return undefined;
|
||||
this.moveToHead(slot);
|
||||
return this.entries[slot]?.value;
|
||||
}
|
||||
|
||||
/** Insert or update. Returns the evicted [key, value] if cache was at capacity. */
|
||||
put(key: number, value: V): [number, V] | undefined {
|
||||
const existingSlot = this.map.get(key);
|
||||
if (existingSlot !== undefined) {
|
||||
const entry = this.entries[existingSlot]!;
|
||||
entry.value = value;
|
||||
this.moveToHead(existingSlot);
|
||||
return undefined;
|
||||
}
|
||||
|
||||
let evicted: [number, V] | undefined;
|
||||
if (this.map.size >= this.capacity) {
|
||||
evicted = this.evictTail();
|
||||
}
|
||||
|
||||
const slot = this.allocSlot({
|
||||
key,
|
||||
value,
|
||||
prev: null,
|
||||
next: this.head,
|
||||
});
|
||||
|
||||
if (this.head !== null) {
|
||||
this.entries[this.head]!.prev = slot;
|
||||
}
|
||||
this.head = slot;
|
||||
if (this.tail === null) {
|
||||
this.tail = slot;
|
||||
}
|
||||
|
||||
this.map.set(key, slot);
|
||||
return evicted;
|
||||
}
|
||||
|
||||
remove(key: number): V | undefined {
|
||||
const slot = this.map.get(key);
|
||||
if (slot === undefined) return undefined;
|
||||
this.map.delete(key);
|
||||
this.unlink(slot);
|
||||
const entry = this.entries[slot]!;
|
||||
this.entries[slot] = null;
|
||||
this.freeSlots.push(slot);
|
||||
return entry.value;
|
||||
}
|
||||
|
||||
values(): V[] {
|
||||
const result: V[] = [];
|
||||
for (const entry of this.entries) {
|
||||
if (entry !== null) {
|
||||
result.push(entry.value);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
private allocSlot(entry: LruEntry<V>): number {
|
||||
if (this.freeSlots.length > 0) {
|
||||
const slot = this.freeSlots.pop()!;
|
||||
this.entries[slot] = entry;
|
||||
return slot;
|
||||
}
|
||||
this.entries.push(entry);
|
||||
return this.entries.length - 1;
|
||||
}
|
||||
|
||||
private unlink(slot: number): void {
|
||||
const entry = this.entries[slot]!;
|
||||
if (entry.prev !== null) {
|
||||
this.entries[entry.prev]!.next = entry.next;
|
||||
} else {
|
||||
this.head = entry.next;
|
||||
}
|
||||
if (entry.next !== null) {
|
||||
this.entries[entry.next]!.prev = entry.prev;
|
||||
} else {
|
||||
this.tail = entry.prev;
|
||||
}
|
||||
}
|
||||
|
||||
private moveToHead(slot: number): void {
|
||||
if (this.head === slot) return;
|
||||
this.unlink(slot);
|
||||
const entry = this.entries[slot]!;
|
||||
entry.prev = null;
|
||||
entry.next = this.head;
|
||||
if (this.head !== null) {
|
||||
this.entries[this.head]!.prev = slot;
|
||||
}
|
||||
this.head = slot;
|
||||
if (this.tail === null) {
|
||||
this.tail = slot;
|
||||
}
|
||||
}
|
||||
|
||||
private evictTail(): [number, V] | undefined {
|
||||
if (this.tail === null) return undefined;
|
||||
const tailSlot = this.tail;
|
||||
const entry = this.entries[tailSlot]!;
|
||||
const key = entry.key;
|
||||
const value = entry.value;
|
||||
this.map.delete(key);
|
||||
this.unlink(tailSlot);
|
||||
this.entries[tailSlot] = null;
|
||||
this.freeSlots.push(tailSlot);
|
||||
return [key, value];
|
||||
}
|
||||
}
|
||||
60
packages/common-utils/src/drain/masking.ts
Normal file
60
packages/common-utils/src/drain/masking.ts
Normal file
|
|
@ -0,0 +1,60 @@
|
|||
export class MaskingInstruction {
|
||||
private regex: RegExp;
|
||||
private _pattern: string;
|
||||
maskWith: string;
|
||||
|
||||
constructor(pattern: string, maskWith: string) {
|
||||
this._pattern = pattern;
|
||||
this.regex = new RegExp(pattern, 'g');
|
||||
this.maskWith = maskWith;
|
||||
}
|
||||
|
||||
get pattern(): string {
|
||||
return this._pattern;
|
||||
}
|
||||
|
||||
mask(content: string, maskPrefix: string, maskSuffix: string): string {
|
||||
const replacement = maskPrefix + this.maskWith + maskSuffix;
|
||||
this.regex.lastIndex = 0;
|
||||
return content.replace(this.regex, replacement);
|
||||
}
|
||||
}
|
||||
|
||||
export class LogMasker {
|
||||
maskPrefix: string;
|
||||
maskSuffix: string;
|
||||
private _maskingInstructions: MaskingInstruction[];
|
||||
private maskNameToInstructions: Map<string, MaskingInstruction[]>;
|
||||
|
||||
constructor(
|
||||
maskingInstructions: MaskingInstruction[],
|
||||
maskPrefix: string,
|
||||
maskSuffix: string,
|
||||
) {
|
||||
this.maskPrefix = maskPrefix;
|
||||
this.maskSuffix = maskSuffix;
|
||||
this._maskingInstructions = maskingInstructions;
|
||||
this.maskNameToInstructions = new Map();
|
||||
for (const mi of maskingInstructions) {
|
||||
const list = this.maskNameToInstructions.get(mi.maskWith) ?? [];
|
||||
list.push(mi);
|
||||
this.maskNameToInstructions.set(mi.maskWith, list);
|
||||
}
|
||||
}
|
||||
|
||||
mask(content: string): string {
|
||||
let result = content;
|
||||
for (const mi of this._maskingInstructions) {
|
||||
result = mi.mask(result, this.maskPrefix, this.maskSuffix);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
get maskNames(): string[] {
|
||||
return Array.from(this.maskNameToInstructions.keys());
|
||||
}
|
||||
|
||||
instructionsByMaskName(maskName: string): MaskingInstruction[] {
|
||||
return this.maskNameToInstructions.get(maskName) ?? [];
|
||||
}
|
||||
}
|
||||
9
packages/common-utils/src/drain/node.ts
Normal file
9
packages/common-utils/src/drain/node.ts
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
export class Node {
|
||||
keyToChildNode: Map<string, Node>;
|
||||
clusterIds: number[];
|
||||
|
||||
constructor() {
|
||||
this.keyToChildNode = new Map();
|
||||
this.clusterIds = [];
|
||||
}
|
||||
}
|
||||
180
packages/common-utils/src/drain/template-miner.ts
Normal file
180
packages/common-utils/src/drain/template-miner.ts
Normal file
|
|
@ -0,0 +1,180 @@
|
|||
import { TemplateMinerConfig } from './config';
|
||||
import { Drain } from './drain';
|
||||
import { LogCluster } from './log-cluster';
|
||||
import { LruCache } from './lru-cache';
|
||||
import { LogMasker, MaskingInstruction } from './masking';
|
||||
|
||||
export interface ExtractedParameter {
|
||||
value: string;
|
||||
maskName: string;
|
||||
}
|
||||
|
||||
export interface AddLogMessageResult {
|
||||
changeType: string;
|
||||
clusterId: number;
|
||||
clusterSize: number;
|
||||
templateMined: string;
|
||||
clusterCount: number;
|
||||
}
|
||||
|
||||
export class TemplateMiner {
|
||||
drain: Drain;
|
||||
private masker: LogMasker;
|
||||
private extractionCache: LruCache<{
|
||||
regex: string;
|
||||
paramMap: Map<string, string>;
|
||||
}>;
|
||||
private extraDelimiters: string[];
|
||||
|
||||
constructor(config?: TemplateMinerConfig) {
|
||||
const cfg = config ?? new TemplateMinerConfig();
|
||||
const paramStr = cfg.maskPrefix + '*' + cfg.maskSuffix;
|
||||
|
||||
const maskingInstructions = cfg.maskingInstructions.map(
|
||||
mi => new MaskingInstruction(mi.pattern, mi.maskWith),
|
||||
);
|
||||
|
||||
this.masker = new LogMasker(
|
||||
maskingInstructions,
|
||||
cfg.maskPrefix,
|
||||
cfg.maskSuffix,
|
||||
);
|
||||
|
||||
this.drain = new Drain(
|
||||
cfg.drainDepth,
|
||||
cfg.drainSimTh,
|
||||
cfg.drainMaxChildren,
|
||||
cfg.drainMaxClusters,
|
||||
cfg.drainExtraDelimiters,
|
||||
paramStr,
|
||||
cfg.parametrizeNumericTokens,
|
||||
);
|
||||
|
||||
this.extractionCache = new LruCache(cfg.parameterExtractionCacheCapacity);
|
||||
this.extraDelimiters = cfg.drainExtraDelimiters;
|
||||
}
|
||||
|
||||
addLogMessage(logMessage: string): AddLogMessageResult {
|
||||
const maskedContent = this.masker.mask(logMessage);
|
||||
const [cluster, changeType] = this.drain.addLogMessage(maskedContent);
|
||||
return {
|
||||
changeType,
|
||||
clusterId: cluster.clusterId,
|
||||
clusterSize: cluster.size,
|
||||
templateMined: cluster.getTemplate(),
|
||||
clusterCount: this.drain.clusterCount,
|
||||
};
|
||||
}
|
||||
|
||||
match(
|
||||
logMessage: string,
|
||||
fullSearchStrategy: string = 'never',
|
||||
): LogCluster | null {
|
||||
const maskedContent = this.masker.mask(logMessage);
|
||||
return this.drain.match(maskedContent, fullSearchStrategy);
|
||||
}
|
||||
|
||||
extractParameters(
|
||||
logTemplate: string,
|
||||
logMessage: string,
|
||||
exactMatching: boolean = true,
|
||||
): ExtractedParameter[] | null {
|
||||
let message = logMessage;
|
||||
for (const delimiter of this.extraDelimiters) {
|
||||
message = message.split(delimiter).join(' ');
|
||||
}
|
||||
|
||||
const { regex: templateRegex, paramMap } =
|
||||
this.getTemplateParameterExtractionRegex(logTemplate, exactMatching);
|
||||
|
||||
const parameterMatch = message.match(new RegExp(templateRegex));
|
||||
if (!parameterMatch || !parameterMatch.groups) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const extracted: ExtractedParameter[] = [];
|
||||
for (const [groupName, maskName] of paramMap) {
|
||||
const value = parameterMatch.groups[groupName];
|
||||
if (value !== undefined) {
|
||||
extracted.push({ value, maskName });
|
||||
}
|
||||
}
|
||||
|
||||
// Sort by position in input string (left-to-right) to match Python Drain3 behavior,
|
||||
// which depends on CPython's set iteration order for mask name processing.
|
||||
extracted.sort((a, b) => {
|
||||
const posA = message.indexOf(a.value);
|
||||
const posB = message.indexOf(b.value);
|
||||
return posA - posB;
|
||||
});
|
||||
|
||||
return extracted;
|
||||
}
|
||||
|
||||
private getTemplateParameterExtractionRegex(
|
||||
logTemplate: string,
|
||||
exactMatching: boolean,
|
||||
): { regex: string; paramMap: Map<string, string> } {
|
||||
const cacheKey = simpleHash(`${logTemplate}|${exactMatching}`);
|
||||
const cached = this.extractionCache.peek(cacheKey);
|
||||
if (cached) return cached;
|
||||
|
||||
const paramMap = new Map<string, string>();
|
||||
let paramNameCounter = 0;
|
||||
|
||||
const maskNames = new Set(this.masker.maskNames);
|
||||
maskNames.add('*');
|
||||
|
||||
const escapedPrefix = escapeRegex(this.masker.maskPrefix);
|
||||
const escapedSuffix = escapeRegex(this.masker.maskSuffix);
|
||||
let templateRegex = escapeRegex(logTemplate);
|
||||
|
||||
for (const maskName of maskNames) {
|
||||
const searchStr = escapedPrefix + escapeRegex(maskName) + escapedSuffix;
|
||||
while (true) {
|
||||
const allowedPatterns: string[] = [];
|
||||
if (exactMatching && maskName !== '*') {
|
||||
const instructions = this.masker.instructionsByMaskName(maskName);
|
||||
for (const mi of instructions) {
|
||||
allowedPatterns.push(mi.pattern);
|
||||
}
|
||||
}
|
||||
if (!exactMatching || maskName === '*') {
|
||||
allowedPatterns.push('.+?');
|
||||
}
|
||||
|
||||
const paramGroupName = `p_${paramNameCounter}`;
|
||||
paramNameCounter++;
|
||||
paramMap.set(paramGroupName, maskName);
|
||||
|
||||
const joined = allowedPatterns.join('|');
|
||||
const captureRegex = `(?<${paramGroupName}>${joined})`;
|
||||
|
||||
if (templateRegex.includes(searchStr)) {
|
||||
templateRegex = templateRegex.replace(searchStr, captureRegex);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
templateRegex = templateRegex.replace(/\\ /g, '\\s+');
|
||||
templateRegex = `^${templateRegex}$`;
|
||||
|
||||
const result = { regex: templateRegex, paramMap };
|
||||
this.extractionCache.put(cacheKey, result);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
function escapeRegex(s: string): string {
|
||||
return s.replace(/[.*+?^${}()|[\]\\-]/g, '\\$&');
|
||||
}
|
||||
|
||||
function simpleHash(s: string): number {
|
||||
let hash = 5381;
|
||||
for (let i = 0; i < s.length; i++) {
|
||||
hash = ((hash * 33) ^ s.charCodeAt(i)) >>> 0;
|
||||
}
|
||||
return hash;
|
||||
}
|
||||
Loading…
Reference in a new issue