mirror of
https://github.com/hyperdxio/hyperdx
synced 2026-04-21 13:37:15 +00:00
updates for PR
This commit is contained in:
parent
cface4610c
commit
41fe0b8369
7 changed files with 690 additions and 46 deletions
|
|
@ -4,13 +4,14 @@ import { renderHook } from '@testing-library/react';
|
|||
|
||||
import { LuceneLanguageFormatter } from '../../components/SearchInput/SearchInputV2';
|
||||
import { useAutoCompleteOptions } from '../useAutoCompleteOptions';
|
||||
import { useCompleteKeyValues, useMultipleAllFields } from '../useMetadata';
|
||||
import { tokenizeAtCursor } from '../useAutoCompleteOptions';
|
||||
import { useAllKeyValues, useMultipleAllFields } from '../useMetadata';
|
||||
|
||||
// Mock dependencies
|
||||
jest.mock('../useMetadata', () => ({
|
||||
...jest.requireActual('../useMetadata.tsx'),
|
||||
useMultipleAllFields: jest.fn(),
|
||||
useCompleteKeyValues: jest.fn(),
|
||||
useAllKeyValues: jest.fn(),
|
||||
}));
|
||||
|
||||
const luceneFormatter = new LuceneLanguageFormatter();
|
||||
|
|
@ -49,7 +50,7 @@ describe('useAutoCompleteOptions', () => {
|
|||
data: mockFields,
|
||||
});
|
||||
|
||||
(useCompleteKeyValues as jest.Mock).mockReturnValue({
|
||||
(useAllKeyValues as jest.Mock).mockReturnValue({
|
||||
data: null,
|
||||
isFetching: false,
|
||||
});
|
||||
|
|
@ -79,7 +80,7 @@ describe('useAutoCompleteOptions', () => {
|
|||
});
|
||||
|
||||
it('should return key value options with correct lucene formatting', () => {
|
||||
(useCompleteKeyValues as jest.Mock).mockReturnValue({
|
||||
(useAllKeyValues as jest.Mock).mockReturnValue({
|
||||
data: ['frontend', 'backend'],
|
||||
isFetching: false,
|
||||
});
|
||||
|
|
@ -146,3 +147,165 @@ describe('useAutoCompleteOptions', () => {
|
|||
]);
|
||||
});
|
||||
});
|
||||
|
||||
describe('tokenizeAtCursor', () => {
|
||||
// Each case is tokenized with the cursor at the end of the input, so
|
||||
// `expectedToken` is the token the user is currently typing into.
|
||||
const cases: {
|
||||
name: string;
|
||||
input: string;
|
||||
expectedToken: string;
|
||||
expectedTokens: string[];
|
||||
}[] = [
|
||||
// Basic whitespace splitting
|
||||
{
|
||||
name: 'splits a bare whitespace-separated string into tokens',
|
||||
input: 'foo bar baz',
|
||||
expectedToken: 'baz',
|
||||
expectedTokens: ['foo', 'bar', 'baz'],
|
||||
},
|
||||
{
|
||||
name: 'returns a single token for input with no whitespace',
|
||||
input: 'FieldName',
|
||||
expectedToken: 'FieldName',
|
||||
expectedTokens: ['FieldName'],
|
||||
},
|
||||
{
|
||||
name: 'returns a single empty token for empty input',
|
||||
input: '',
|
||||
expectedToken: '',
|
||||
expectedTokens: [''],
|
||||
},
|
||||
{
|
||||
name: 'produces empty tokens for consecutive spaces (split-like semantics)',
|
||||
input: 'foo bar',
|
||||
expectedToken: 'bar',
|
||||
expectedTokens: ['foo', '', 'bar'],
|
||||
},
|
||||
{
|
||||
name: 'produces a trailing empty token when input ends in a space',
|
||||
input: 'foo ',
|
||||
expectedToken: '',
|
||||
expectedTokens: ['foo', ''],
|
||||
},
|
||||
|
||||
// Balanced quoted regions
|
||||
{
|
||||
name: 'keeps whitespace inside paired quotes as part of one token',
|
||||
input: 'Service:"hello world"',
|
||||
expectedToken: 'Service:"hello world"',
|
||||
expectedTokens: ['Service:"hello world"'],
|
||||
},
|
||||
{
|
||||
name: 'splits two paired-quote fields on the space between them',
|
||||
input: 'ServiceName:"clickhouse" SeverityText:"debug"',
|
||||
expectedToken: 'SeverityText:"debug"',
|
||||
expectedTokens: ['ServiceName:"clickhouse"', 'SeverityText:"debug"'],
|
||||
},
|
||||
{
|
||||
name: 'preserves escaped quotes inside a quoted region',
|
||||
input: 'Service:"he said \\"hi\\"" other',
|
||||
expectedToken: 'other',
|
||||
expectedTokens: ['Service:"he said \\"hi\\""', 'other'],
|
||||
},
|
||||
{
|
||||
name: 'treats a colon inside a quoted value as literal text',
|
||||
input: 'URL:"http://example.com/x" x',
|
||||
expectedToken: 'x',
|
||||
expectedTokens: ['URL:"http://example.com/x"', 'x'],
|
||||
},
|
||||
|
||||
// Unclosed quotes — reproduces the bug where `Field:" Other:"v"` (three
|
||||
// quotes with the first unclosed) previously collapsed into one token.
|
||||
{
|
||||
name: 'breaks at whitespace when a quote is followed by a new field pattern',
|
||||
input: 'ServiceName:" SeverityText:"debug"',
|
||||
expectedToken: 'SeverityText:"debug"',
|
||||
expectedTokens: ['ServiceName:"', 'SeverityText:"debug"'],
|
||||
},
|
||||
{
|
||||
name: 'treats a single unclosed quote at end of input as a literal',
|
||||
input: 'Service:"hel',
|
||||
expectedToken: 'Service:"hel',
|
||||
expectedTokens: ['Service:"hel'],
|
||||
},
|
||||
{
|
||||
// No `ident:` after the space, so the quote can still legitimately
|
||||
// extend — but there's no closing quote anywhere, so it's unclosed.
|
||||
name: 'handles an unclosed quote followed by whitespace then bare text',
|
||||
input: 'Service:"hello world',
|
||||
expectedToken: 'world',
|
||||
expectedTokens: ['Service:"hello', 'world'],
|
||||
},
|
||||
{
|
||||
name: 'handles multiple unclosed quotes across fields',
|
||||
input: 'A:" B:" C:"done"',
|
||||
expectedToken: 'C:"done"',
|
||||
expectedTokens: ['A:"', 'B:"', 'C:"done"'],
|
||||
},
|
||||
|
||||
// Identifier-like characters after whitespace
|
||||
{
|
||||
// The space inside the quoted value is followed by `!`, not `ident:`,
|
||||
// so the quote should still be able to close.
|
||||
name: 'does not bail out on whitespace followed by a non-identifier',
|
||||
input: 'Service:"hello !world"',
|
||||
expectedToken: 'Service:"hello !world"',
|
||||
expectedTokens: ['Service:"hello !world"'],
|
||||
},
|
||||
{
|
||||
name: 'does not treat whitespace + ident without colon as a new field',
|
||||
input: 'Service:"hello world done"',
|
||||
expectedToken: 'Service:"hello world done"',
|
||||
expectedTokens: ['Service:"hello world done"'],
|
||||
},
|
||||
{
|
||||
name: 'handles dotted identifiers in the new-field pattern',
|
||||
input: 'Foo:" ResourceAttributes.service.name:"x"',
|
||||
expectedToken: 'ResourceAttributes.service.name:"x"',
|
||||
expectedTokens: ['Foo:"', 'ResourceAttributes.service.name:"x"'],
|
||||
},
|
||||
];
|
||||
|
||||
it.each(cases)('$name', ({ input, expectedToken, expectedTokens }) => {
|
||||
const { token, tokens } = tokenizeAtCursor(input, input.length);
|
||||
expect(tokens).toEqual(expectedTokens);
|
||||
expect(token).toBe(expectedToken);
|
||||
});
|
||||
|
||||
// Cursor-positioning is orthogonal to tokenization — keep these separate
|
||||
// because each case exercises a different cursor offset for the same input.
|
||||
describe('cursor positioning', () => {
|
||||
it('returns the first token when the cursor is at position 0', () => {
|
||||
const { token, index } = tokenizeAtCursor('foo bar baz', 0);
|
||||
expect(token).toBe('foo');
|
||||
expect(index).toBe(0);
|
||||
});
|
||||
|
||||
it('returns the middle token when the cursor is inside it', () => {
|
||||
// 0123456789012
|
||||
// 'foo bar baz' — cursor at 5 is inside 'bar'
|
||||
const { token, index } = tokenizeAtCursor('foo bar baz', 5);
|
||||
expect(token).toBe('bar');
|
||||
expect(index).toBe(1);
|
||||
});
|
||||
|
||||
it('returns the token whose range contains the cursor in a quoted field', () => {
|
||||
const input = 'Service:"hello world" other';
|
||||
// cursor inside the quoted token
|
||||
const { token, index } = tokenizeAtCursor(input, 15);
|
||||
expect(token).toBe('Service:"hello world"');
|
||||
expect(index).toBe(0);
|
||||
});
|
||||
|
||||
it('returns the unclosed-quote token when the cursor is inside it', () => {
|
||||
// User is typing a value — the first quote is unclosed because the
|
||||
// next field pattern `SeverityText:` appears after whitespace. Cursor
|
||||
// sits just after `l`, inside the in-progress `ServiceName:"cl` token.
|
||||
const input = 'ServiceName:"cl SeverityText:"info"';
|
||||
const { token, index } = tokenizeAtCursor(input, 15);
|
||||
expect(token).toBe('ServiceName:"cl');
|
||||
expect(index).toBe(0);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ import {
|
|||
import { NOW } from '@/config';
|
||||
import {
|
||||
deduplicate2dArray,
|
||||
useCompleteKeyValues,
|
||||
useAllKeyValues,
|
||||
useMultipleAllFields,
|
||||
} from '@/hooks/useMetadata';
|
||||
import { toArray, useDebounce } from '@/utils';
|
||||
|
|
@ -21,20 +21,112 @@ export type TokenInfo = {
|
|||
tokens: string[];
|
||||
};
|
||||
|
||||
/** Splits input into tokens and finds which token the cursor is in */
|
||||
function tokenizeAtCursor(value: string, cursorPos: number): TokenInfo {
|
||||
const tokens = value.split(' ');
|
||||
let idx = 0;
|
||||
let pos = 0;
|
||||
const IDENT_RE = /[A-Za-z0-9_.]/;
|
||||
|
||||
function findMatchingQuote(value: string, startIdx: number): number {
|
||||
let i = startIdx + 1;
|
||||
while (i < value.length) {
|
||||
const ch = value[i];
|
||||
if (ch === '\\' && i + 1 < value.length) {
|
||||
i += 2;
|
||||
continue;
|
||||
}
|
||||
if (ch === '"') return i;
|
||||
if (ch === ' ' || ch === '\t' || ch === '\n') {
|
||||
let k = i;
|
||||
while (
|
||||
k < value.length &&
|
||||
(value[k] === ' ' || value[k] === '\t' || value[k] === '\n')
|
||||
)
|
||||
k++;
|
||||
const identStart = k;
|
||||
while (k < value.length && IDENT_RE.test(value[k])) k++;
|
||||
if (k > identStart && k < value.length && value[k] === ':') {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
i++;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
export function tokenizeAtCursor(value: string, cursorPos: number): TokenInfo {
|
||||
const tokens: string[] = [];
|
||||
// Start offsets of each token in the original string
|
||||
const starts: number[] = [];
|
||||
|
||||
let current = '';
|
||||
let currentStart = -1;
|
||||
let inQuotes = false;
|
||||
let escaped = false;
|
||||
|
||||
for (let i = 0; i < value.length; i++) {
|
||||
const ch = value[i];
|
||||
|
||||
if (escaped) {
|
||||
// Always include the escaped character verbatim (along with its backslash)
|
||||
current += ch;
|
||||
escaped = false;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ch === '\\' && inQuotes) {
|
||||
current += ch;
|
||||
escaped = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ch === '"') {
|
||||
if (inQuotes) {
|
||||
// Closing an already-opened quoted region.
|
||||
if (currentStart === -1) currentStart = i;
|
||||
current += ch;
|
||||
inQuotes = false;
|
||||
continue;
|
||||
}
|
||||
// Only enter a quoted region if there's a matching close ahead.
|
||||
if (findMatchingQuote(value, i) !== -1) {
|
||||
if (currentStart === -1) currentStart = i;
|
||||
current += ch;
|
||||
inQuotes = true;
|
||||
continue;
|
||||
}
|
||||
// Stray/unclosed quote — treat as a literal character.
|
||||
if (currentStart === -1) currentStart = i;
|
||||
current += ch;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!inQuotes && ch === ' ') {
|
||||
// Boundary: flush current token (even if empty, to mirror prior `split(' ')`
|
||||
// semantics where consecutive spaces produce empty tokens).
|
||||
tokens.push(current);
|
||||
starts.push(currentStart === -1 ? i : currentStart);
|
||||
current = '';
|
||||
currentStart = -1;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (currentStart === -1) currentStart = i;
|
||||
current += ch;
|
||||
}
|
||||
// Flush trailing token
|
||||
tokens.push(current);
|
||||
starts.push(currentStart === -1 ? value.length : currentStart);
|
||||
|
||||
// Locate token containing the cursor. The cursor sits *between* characters,
|
||||
// so a token covers [start, start+len]; we pick the last token whose range
|
||||
// contains cursorPos.
|
||||
let idx = tokens.length - 1;
|
||||
for (let i = 0; i < tokens.length; i++) {
|
||||
pos += tokens[i].length;
|
||||
if (pos >= cursorPos || i === tokens.length - 1) {
|
||||
const start = starts[i];
|
||||
const end = start + tokens[i].length;
|
||||
if (cursorPos <= end) {
|
||||
idx = i;
|
||||
break;
|
||||
}
|
||||
pos++; // account for the space
|
||||
idx = i + 1;
|
||||
}
|
||||
|
||||
return { token: tokens[idx] ?? '', index: idx, tokens };
|
||||
}
|
||||
|
||||
|
|
@ -118,13 +210,11 @@ export function useAutoCompleteOptions(
|
|||
|
||||
// Debounced fetch of values for the selected key from rollup tables
|
||||
const firstTc = tcs.length > 0 ? tcs[0] : undefined;
|
||||
const { data: keyValues, isFetching: isLoadingValues } = useCompleteKeyValues(
|
||||
{
|
||||
tableConnection: firstTc,
|
||||
searchField,
|
||||
dateRange: effectiveDateRange,
|
||||
},
|
||||
);
|
||||
const { data: keyValues, isFetching: isLoadingValues } = useAllKeyValues({
|
||||
tableConnection: firstTc,
|
||||
searchField,
|
||||
dateRange: effectiveDateRange,
|
||||
});
|
||||
|
||||
// Build key-value pair suggestions
|
||||
const keyValCompleteOptions = useMemo<
|
||||
|
|
|
|||
|
|
@ -201,7 +201,7 @@ function fieldToRollupParams(
|
|||
* Works for both map keys (e.g. "ResourceAttributes.http.method") and
|
||||
* native columns (e.g. "ServiceName").
|
||||
*/
|
||||
export function useCompleteKeyValues({
|
||||
export function useAllKeyValues({
|
||||
tableConnection,
|
||||
searchField,
|
||||
dateRange,
|
||||
|
|
@ -212,18 +212,11 @@ export function useCompleteKeyValues({
|
|||
}) {
|
||||
const metadata = useMetadataWithSettings();
|
||||
|
||||
// Debounce: only query after the field stabilizes for 300ms
|
||||
const [debouncedField, setDebouncedField] = useState<Field | null>(null);
|
||||
useEffect(() => {
|
||||
const timer = setTimeout(() => setDebouncedField(searchField), 300);
|
||||
return () => clearTimeout(timer);
|
||||
}, [searchField]);
|
||||
|
||||
const rollupParams = fieldToRollupParams(debouncedField, tableConnection);
|
||||
const rollupParams = fieldToRollupParams(searchField, tableConnection);
|
||||
|
||||
return useQuery<string[]>({
|
||||
queryKey: [
|
||||
'useCompleteKeyValues',
|
||||
'useAllKeyValues',
|
||||
tableConnection?.databaseName,
|
||||
tableConnection?.tableName,
|
||||
tableConnection?.connectionId,
|
||||
|
|
@ -233,10 +226,10 @@ export function useCompleteKeyValues({
|
|||
dateRange[1].getTime(),
|
||||
],
|
||||
queryFn: async ({ signal }) => {
|
||||
if (!tableConnection || !rollupParams || !debouncedField) return [];
|
||||
if (!tableConnection || !rollupParams || !searchField) return [];
|
||||
|
||||
// Try rollup first
|
||||
const rollupValues = await metadata.getCompleteKeyValues({
|
||||
const rollupValues = await metadata.getAllKeyValues({
|
||||
databaseName: tableConnection.databaseName,
|
||||
tableName: tableConnection.tableName,
|
||||
column: rollupParams.columnIdentifier,
|
||||
|
|
@ -264,7 +257,7 @@ export function useCompleteKeyValues({
|
|||
return metadata.getMapValues({
|
||||
databaseName: tableConnection.databaseName,
|
||||
tableName: tableConnection.tableName,
|
||||
column: debouncedField.path[0],
|
||||
column: searchField.path[0],
|
||||
connectionId: tableConnection.connectionId,
|
||||
});
|
||||
}
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ import {
|
|||
filterColumnMetaByType,
|
||||
JSDataType,
|
||||
} from '@hyperdx/common-utils/dist/clickhouse';
|
||||
import { inferGranularityFromMVSelect } from '@hyperdx/common-utils/dist/core/materializedViews';
|
||||
import { Metadata } from '@hyperdx/common-utils/dist/core/metadata';
|
||||
import { splitAndTrimWithBracket } from '@hyperdx/common-utils/dist/core/utils';
|
||||
import { isBuilderChartConfig } from '@hyperdx/common-utils/dist/guards';
|
||||
|
|
@ -348,8 +349,9 @@ export async function inferTableSourceConfig({
|
|||
// Check if SpanEvents column is available
|
||||
const hasSpanEvents = columns.some(col => col.name === 'Events.Timestamp');
|
||||
|
||||
// Check if metadata rollup tables exist
|
||||
const hasMetadataMVs =
|
||||
// Check if metadata rollup tables exist and, if so, infer the bucketing
|
||||
// granularity from the key-rollup view's `as_select`
|
||||
const rollupMeta =
|
||||
isOtelLogSchema || isOtelSpanSchema
|
||||
? await (async () => {
|
||||
const [keyMeta, kvMeta] = await Promise.all([
|
||||
|
|
@ -364,16 +366,22 @@ export async function inferTableSourceConfig({
|
|||
connectionId,
|
||||
}),
|
||||
]);
|
||||
return keyMeta != null && kvMeta != null;
|
||||
return keyMeta != null && kvMeta != null
|
||||
? { keyMeta, kvMeta }
|
||||
: undefined;
|
||||
})()
|
||||
: false;
|
||||
: undefined;
|
||||
|
||||
const metadataMVsConfig = hasMetadataMVs
|
||||
const metadataMVsConfig = rollupMeta
|
||||
? {
|
||||
metadataMaterializedViews: {
|
||||
keyRollupTable: `${tableName}_key_rollup_15m`,
|
||||
kvRollupTable: `${tableName}_kv_rollup_15m`,
|
||||
granularity: '15 minute',
|
||||
// Fall back to '15 minute' to preserve the prior default when the
|
||||
// MV's `as_select` doesn't contain a recognized bucketing function.
|
||||
granularity:
|
||||
inferGranularityFromMVSelect(rollupMeta.keyMeta.as_select) ??
|
||||
'15 minute',
|
||||
},
|
||||
}
|
||||
: {};
|
||||
|
|
|
|||
192
packages/common-utils/src/__tests__/materializedViews.test.ts
Normal file
192
packages/common-utils/src/__tests__/materializedViews.test.ts
Normal file
|
|
@ -0,0 +1,192 @@
|
|||
import { inferGranularityFromMVSelect } from '@/core/materializedViews';
|
||||
|
||||
describe('inferGranularityFromMVSelect', () => {
|
||||
describe('real MV schemas', () => {
|
||||
// Shape matches the `otel_logs_attr_kv_rollup_15m_mv` view in
|
||||
// docker/otel-collector/schema/seed/00006_otel_logs_rollups.sql.
|
||||
it('detects 15 minute from the otel_logs kv rollup MV select', () => {
|
||||
const asSelect = `WITH elements AS (
|
||||
SELECT
|
||||
'ResourceAttributes' AS ColumnIdentifier,
|
||||
toStartOfFifteenMinutes(Timestamp) AS Timestamp,
|
||||
replaceRegexpAll(entry.1, '\\\\[\\\\d+\\\\]', '[*]') AS Key,
|
||||
CAST(entry.2 AS String) AS Value
|
||||
FROM default.otel_logs
|
||||
ARRAY JOIN ResourceAttributes AS entry
|
||||
UNION ALL
|
||||
SELECT
|
||||
'LogAttributes' AS ColumnIdentifier,
|
||||
toStartOfFifteenMinutes(Timestamp) AS Timestamp,
|
||||
replaceRegexpAll(entry.1, '\\\\[\\\\d+\\\\]', '[*]') AS Key,
|
||||
CAST(entry.2 AS String) AS Value
|
||||
FROM default.otel_logs
|
||||
ARRAY JOIN LogAttributes AS entry
|
||||
)
|
||||
SELECT Timestamp, ColumnIdentifier, Key, Value, count() AS count FROM elements
|
||||
GROUP BY Timestamp, ColumnIdentifier, Key, Value`;
|
||||
|
||||
expect(inferGranularityFromMVSelect(asSelect)).toBe('15 minute');
|
||||
});
|
||||
|
||||
// Shape matches the `otel_traces_kv_rollup_15m_mv` view in
|
||||
// docker/otel-collector/schema/seed/00007_otel_traces_rollups.sql.
|
||||
it('detects 15 minute from the otel_traces kv rollup MV select', () => {
|
||||
const asSelect = `WITH elements AS (
|
||||
SELECT
|
||||
'ResourceAttributes' AS ColumnIdentifier,
|
||||
toStartOfFifteenMinutes(Timestamp) AS Timestamp,
|
||||
replaceRegexpAll(entry.1, '\\\\[\\\\d+\\\\]', '[*]') AS Key,
|
||||
CAST(entry.2 AS String) AS Value
|
||||
FROM default.otel_traces
|
||||
ARRAY JOIN ResourceAttributes AS entry
|
||||
UNION ALL
|
||||
SELECT
|
||||
'SpanAttributes' AS ColumnIdentifier,
|
||||
toStartOfFifteenMinutes(Timestamp) AS Timestamp,
|
||||
replaceRegexpAll(entry.1, '\\\\[\\\\d+\\\\]', '[*]') AS Key,
|
||||
CAST(entry.2 AS String) AS Value
|
||||
FROM default.otel_traces
|
||||
ARRAY JOIN SpanAttributes AS entry
|
||||
)
|
||||
SELECT Timestamp, ColumnIdentifier, Key, Value, count() AS count FROM elements
|
||||
GROUP BY Timestamp, ColumnIdentifier, Key, Value`;
|
||||
|
||||
expect(inferGranularityFromMVSelect(asSelect)).toBe('15 minute');
|
||||
});
|
||||
|
||||
// The key-rollup MV rolls up the kv rollup, so it doesn't bucket the
|
||||
// timestamp itself — it just selects the already-bucketed Timestamp.
|
||||
it('returns undefined when the select contains no bucketing function', () => {
|
||||
const asSelect = `SELECT
|
||||
Timestamp,
|
||||
ColumnIdentifier,
|
||||
Key,
|
||||
sum(count) as count
|
||||
FROM default.otel_logs_kv_rollup_15m
|
||||
GROUP BY ColumnIdentifier, Key, Timestamp`;
|
||||
|
||||
expect(inferGranularityFromMVSelect(asSelect)).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
describe('toStartOfInterval form', () => {
|
||||
it.each([
|
||||
['INTERVAL 5 MINUTE', '5 minute'],
|
||||
['INTERVAL 1 HOUR', '1 hour'],
|
||||
['INTERVAL 2 hour', '2 hour'],
|
||||
['INTERVAL 30 SECOND', '30 second'],
|
||||
['INTERVAL 1 DAY', '1 day'],
|
||||
])('parses %s', (interval, expected) => {
|
||||
const asSelect = `SELECT toStartOfInterval(Timestamp, ${interval}) AS ts, count() FROM t GROUP BY ts`;
|
||||
expect(inferGranularityFromMVSelect(asSelect)).toBe(expected);
|
||||
});
|
||||
|
||||
it('accepts plural units', () => {
|
||||
const asSelect = `SELECT toStartOfInterval(Timestamp, INTERVAL 10 MINUTES) AS ts FROM t`;
|
||||
expect(inferGranularityFromMVSelect(asSelect)).toBe('10 minute');
|
||||
});
|
||||
|
||||
it('handles extra arguments (origin, timezone)', () => {
|
||||
const asSelect = `SELECT toStartOfInterval(Timestamp, INTERVAL 1 DAY, toDateTime('2025-01-01'), 'America/Los_Angeles') AS ts FROM t`;
|
||||
expect(inferGranularityFromMVSelect(asSelect)).toBe('1 day');
|
||||
});
|
||||
|
||||
it('handles extra whitespace and newlines inside the call', () => {
|
||||
const asSelect = `SELECT
|
||||
toStartOfInterval (
|
||||
Timestamp,
|
||||
INTERVAL 15 MINUTE
|
||||
) AS ts
|
||||
FROM t`;
|
||||
expect(inferGranularityFromMVSelect(asSelect)).toBe('15 minute');
|
||||
});
|
||||
|
||||
it('ignores unknown units', () => {
|
||||
const asSelect = `SELECT toStartOfInterval(Timestamp, INTERVAL 1 WEEK) AS ts FROM t`;
|
||||
expect(inferGranularityFromMVSelect(asSelect)).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
describe('named bucket functions', () => {
|
||||
it.each([
|
||||
['toStartOfSecond', '1 second'],
|
||||
['toStartOfMinute', '1 minute'],
|
||||
['toStartOfFiveMinutes', '5 minute'],
|
||||
['toStartOfTenMinutes', '10 minute'],
|
||||
['toStartOfFifteenMinutes', '15 minute'],
|
||||
['toStartOfHour', '1 hour'],
|
||||
['toStartOfDay', '1 day'],
|
||||
])('maps %s to %s', (fn, expected) => {
|
||||
const asSelect = `SELECT ${fn}(Timestamp) AS ts, count() FROM t GROUP BY ts`;
|
||||
expect(inferGranularityFromMVSelect(asSelect)).toBe(expected);
|
||||
});
|
||||
|
||||
it('returns undefined for unrecognized toStartOf* functions', () => {
|
||||
// toStartOfMonth is a real CH function but not in NAMED_BUCKET_FUNCTIONS.
|
||||
const asSelect = `SELECT toStartOfMonth(Timestamp) AS ts FROM t`;
|
||||
expect(inferGranularityFromMVSelect(asSelect)).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
describe('first match wins', () => {
|
||||
it('returns the granularity of the first toStartOf call encountered', () => {
|
||||
const asSelect = `SELECT toStartOfHour(Timestamp) AS h, toStartOfMinute(Timestamp) AS m FROM t`;
|
||||
expect(inferGranularityFromMVSelect(asSelect)).toBe('1 hour');
|
||||
});
|
||||
|
||||
it('returns first call even when a later call would also match', () => {
|
||||
const asSelect = `SELECT toStartOfInterval(Timestamp, INTERVAL 5 MINUTE) AS a, toStartOfHour(Timestamp) AS b FROM t`;
|
||||
expect(inferGranularityFromMVSelect(asSelect)).toBe('5 minute');
|
||||
});
|
||||
});
|
||||
|
||||
describe('robustness against quoting and noise', () => {
|
||||
it('ignores toStartOf* tokens inside single-quoted string literals', () => {
|
||||
const asSelect = `SELECT 'toStartOfHour(Timestamp)' AS label, toStartOfMinute(Timestamp) AS ts FROM t`;
|
||||
expect(inferGranularityFromMVSelect(asSelect)).toBe('1 minute');
|
||||
});
|
||||
|
||||
it('ignores toStartOf* tokens inside backtick-quoted identifiers', () => {
|
||||
const asSelect = 'SELECT `toStartOfHour` AS col FROM t';
|
||||
expect(inferGranularityFromMVSelect(asSelect)).toBeUndefined();
|
||||
});
|
||||
|
||||
it('ignores toStartOf* substrings embedded in longer identifiers', () => {
|
||||
// my_toStartOfHour_col is a single identifier, not a function call.
|
||||
const asSelect = `SELECT my_toStartOfHour_col AS x FROM t`;
|
||||
expect(inferGranularityFromMVSelect(asSelect)).toBeUndefined();
|
||||
});
|
||||
|
||||
it('is not confused by commas inside nested calls', () => {
|
||||
const asSelect = `SELECT toStartOfInterval(coalesce(Timestamp, now()), INTERVAL 5 MINUTE) AS ts FROM t`;
|
||||
expect(inferGranularityFromMVSelect(asSelect)).toBe('5 minute');
|
||||
});
|
||||
|
||||
it('skips a toStartOf call without matching parens and stops scanning', () => {
|
||||
// Unterminated call — we stop rather than looping forever.
|
||||
const asSelect = `SELECT toStartOfHour(Timestamp FROM t`;
|
||||
expect(inferGranularityFromMVSelect(asSelect)).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
describe('degenerate inputs', () => {
|
||||
it('returns undefined for empty string', () => {
|
||||
expect(inferGranularityFromMVSelect('')).toBeUndefined();
|
||||
});
|
||||
|
||||
it('returns undefined when no toStartOf call is present', () => {
|
||||
const asSelect = `SELECT Timestamp, count() FROM t GROUP BY Timestamp`;
|
||||
expect(inferGranularityFromMVSelect(asSelect)).toBeUndefined();
|
||||
});
|
||||
|
||||
it('returns undefined when toStartOfInterval has no interval arg', () => {
|
||||
const asSelect = `SELECT toStartOfInterval(Timestamp) AS ts FROM t`;
|
||||
expect(inferGranularityFromMVSelect(asSelect)).toBeUndefined();
|
||||
});
|
||||
|
||||
it('returns undefined when toStartOfInterval has a malformed interval arg', () => {
|
||||
const asSelect = `SELECT toStartOfInterval(Timestamp, INTERVAL abc MINUTE) AS ts FROM t`;
|
||||
expect(inferGranularityFromMVSelect(asSelect)).toBeUndefined();
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
import { differenceInSeconds } from 'date-fns';
|
||||
|
||||
import { BaseClickhouseClient } from '@/clickhouse';
|
||||
import { BaseClickhouseClient, ChSql, chSql } from '@/clickhouse';
|
||||
import {
|
||||
BuilderChartConfigWithOptDateRange,
|
||||
CteChartConfig,
|
||||
|
|
@ -9,6 +9,7 @@ import {
|
|||
isLogSource,
|
||||
isTraceSource,
|
||||
MaterializedViewConfiguration,
|
||||
type SQLInterval,
|
||||
TLogSource,
|
||||
TSource,
|
||||
TTraceSource,
|
||||
|
|
@ -25,6 +26,178 @@ import {
|
|||
splitAndTrimWithBracket,
|
||||
} from './utils';
|
||||
|
||||
// ClickHouse named time-bucketing functions and their granularity equivalents.
|
||||
const NAMED_BUCKET_FUNCTIONS: Record<string, SQLInterval> = {
|
||||
toStartOfSecond: '1 second',
|
||||
toStartOfMinute: '1 minute',
|
||||
toStartOfFiveMinutes: '5 minute',
|
||||
toStartOfTenMinutes: '10 minute',
|
||||
toStartOfFifteenMinutes: '15 minute',
|
||||
toStartOfHour: '1 hour',
|
||||
toStartOfDay: '1 day',
|
||||
};
|
||||
|
||||
const VALID_INTERVAL_UNITS = new Set(['second', 'minute', 'hour', 'day']);
|
||||
|
||||
const isIdentChar = (ch: string) =>
|
||||
(ch >= 'a' && ch <= 'z') ||
|
||||
(ch >= 'A' && ch <= 'Z') ||
|
||||
(ch >= '0' && ch <= '9') ||
|
||||
ch === '_';
|
||||
|
||||
const isWhitespace = (ch: string) =>
|
||||
ch === ' ' || ch === '\t' || ch === '\n' || ch === '\r';
|
||||
|
||||
function findToStartOfCalls(
|
||||
input: string,
|
||||
): { fn: string; argsInner: string }[] {
|
||||
const out: { fn: string; argsInner: string }[] = [];
|
||||
const n = input.length;
|
||||
let i = 0;
|
||||
|
||||
// Skip the rest of a quoted region starting at `input[start]`.
|
||||
// Returns the index of the character just past the closing quote.
|
||||
const skipQuoted = (start: number, quote: string): number => {
|
||||
let p = start + 1;
|
||||
while (p < n) {
|
||||
const c = input[p];
|
||||
if (c === '\\' && p + 1 < n) {
|
||||
p += 2;
|
||||
continue;
|
||||
}
|
||||
if (c === quote) return p + 1;
|
||||
p++;
|
||||
}
|
||||
return n;
|
||||
};
|
||||
|
||||
while (i < n) {
|
||||
const ch = input[i];
|
||||
|
||||
if (ch === "'" || ch === '"' || ch === '`') {
|
||||
i = skipQuoted(i, ch);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Try to read an identifier starting at a word boundary. A preceding
|
||||
// identifier character would mean we're mid-token (e.g. `fooToStartOf…`).
|
||||
const atBoundary = i === 0 || !isIdentChar(input[i - 1]);
|
||||
if (!atBoundary || !isIdentChar(ch)) {
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
|
||||
let j = i;
|
||||
while (j < n && isIdentChar(input[j])) j++;
|
||||
const ident = input.substring(i, j);
|
||||
|
||||
if (!ident.startsWith('toStartOf')) {
|
||||
i = j;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Expect '(' (possibly after whitespace) for this to be a call.
|
||||
let k = j;
|
||||
while (k < n && isWhitespace(input[k])) k++;
|
||||
if (input[k] !== '(') {
|
||||
i = j;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Walk to the matching ')', honoring nested parens and quoted regions.
|
||||
const argStart = k + 1;
|
||||
let depth = 1;
|
||||
let p = argStart;
|
||||
while (p < n && depth > 0) {
|
||||
const c = input[p];
|
||||
if (c === "'" || c === '"' || c === '`') {
|
||||
p = skipQuoted(p, c);
|
||||
continue;
|
||||
}
|
||||
if (c === '(') depth++;
|
||||
else if (c === ')') {
|
||||
depth--;
|
||||
if (depth === 0) break;
|
||||
}
|
||||
p++;
|
||||
}
|
||||
if (depth !== 0) break; // unterminated call — stop scanning
|
||||
out.push({ fn: ident, argsInner: input.substring(argStart, p) });
|
||||
i = p + 1;
|
||||
}
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
function parseIntervalLiteral(expr: string): SQLInterval | undefined {
|
||||
const tokens: string[] = [];
|
||||
let cur = '';
|
||||
for (const ch of expr) {
|
||||
if (isWhitespace(ch)) {
|
||||
if (cur) tokens.push(cur);
|
||||
cur = '';
|
||||
} else {
|
||||
cur += ch;
|
||||
}
|
||||
}
|
||||
if (cur) tokens.push(cur);
|
||||
|
||||
if (tokens.length < 3) return undefined;
|
||||
if (tokens[0].toUpperCase() !== 'INTERVAL') return undefined;
|
||||
|
||||
const num = Number.parseInt(tokens[1], 10);
|
||||
if (!Number.isFinite(num) || num <= 0 || String(num) !== tokens[1]) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
// Accept both singular and plural forms (MINUTE / MINUTES).
|
||||
let unit = tokens[2].toLowerCase();
|
||||
if (unit.endsWith('s')) unit = unit.slice(0, -1);
|
||||
if (!VALID_INTERVAL_UNITS.has(unit)) return undefined;
|
||||
|
||||
return `${num} ${unit}` as SQLInterval;
|
||||
}
|
||||
|
||||
export function inferGranularityFromMVSelect(
|
||||
asSelect: string,
|
||||
): SQLInterval | undefined {
|
||||
for (const { fn, argsInner } of findToStartOfCalls(asSelect)) {
|
||||
if (fn in NAMED_BUCKET_FUNCTIONS) {
|
||||
return NAMED_BUCKET_FUNCTIONS[fn];
|
||||
}
|
||||
if (fn === 'toStartOfInterval') {
|
||||
const args = splitAndTrimWithBracket(argsInner);
|
||||
if (args.length < 2) continue;
|
||||
const parsed = parseIntervalLiteral(args[1]);
|
||||
if (parsed) return parsed;
|
||||
}
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
export function getNamedBucketFunction(
|
||||
granularity: SQLInterval,
|
||||
): string | undefined {
|
||||
for (const [fn, g] of Object.entries(NAMED_BUCKET_FUNCTIONS)) {
|
||||
if (g === granularity) return fn;
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
export function renderStartOfBucketExpr(
|
||||
granularity: SQLInterval,
|
||||
inner: ChSql,
|
||||
): ChSql {
|
||||
const namedFn = getNamedBucketFunction(granularity);
|
||||
if (namedFn) {
|
||||
// namedFn comes from a fixed allow-list (NAMED_BUCKET_FUNCTIONS keys), so
|
||||
// splicing it as raw SQL is safe.
|
||||
return chSql`${{ UNSAFE_RAW_SQL: namedFn }}(${inner})`;
|
||||
}
|
||||
const seconds = convertGranularityToSeconds(granularity);
|
||||
return chSql`toStartOfInterval(${inner}, INTERVAL ${{ Int64: seconds }} SECOND)`;
|
||||
}
|
||||
|
||||
type SelectItem = Exclude<
|
||||
BuilderChartConfigWithOptDateRange['select'],
|
||||
string
|
||||
|
|
|
|||
|
|
@ -21,7 +21,10 @@ import type {
|
|||
} from '@/types';
|
||||
import { isLogSource, isTraceSource, SourceKind } from '@/types';
|
||||
|
||||
import { optimizeGetKeyValuesCalls } from './materializedViews';
|
||||
import {
|
||||
optimizeGetKeyValuesCalls,
|
||||
renderStartOfBucketExpr,
|
||||
} from './materializedViews';
|
||||
import {
|
||||
getAlignedDateRange,
|
||||
getDistributedTableArgs,
|
||||
|
|
@ -376,7 +379,15 @@ export class Metadata {
|
|||
// Rollup path: query the key rollup table filtered by ColumnIdentifier and date range
|
||||
if (metadataMVs && alignedDateRange) {
|
||||
return this.cache.getOrFetch<string[]>(cacheKey, async () => {
|
||||
const timeFilter = chSql`AND Timestamp >= toStartOfFifteenMinutes(fromUnixTimestamp64Milli(${{ Int64: alignedDateRange[0].getTime() }})) AND Timestamp <= toStartOfFifteenMinutes(fromUnixTimestamp64Milli(${{ Int64: alignedDateRange[1].getTime() }}))`;
|
||||
const startExpr = renderStartOfBucketExpr(
|
||||
metadataMVs.granularity,
|
||||
chSql`fromUnixTimestamp64Milli(${{ Int64: alignedDateRange[0].getTime() }})`,
|
||||
);
|
||||
const endExpr = renderStartOfBucketExpr(
|
||||
metadataMVs.granularity,
|
||||
chSql`fromUnixTimestamp64Milli(${{ Int64: alignedDateRange[1].getTime() }})`,
|
||||
);
|
||||
const timeFilter = chSql`AND Timestamp >= ${startExpr} AND Timestamp <= ${endExpr}`;
|
||||
const sql = chSql`
|
||||
SELECT Key
|
||||
FROM ${tableExpr({ database: databaseName, table: metadataMVs.keyRollupTable })}
|
||||
|
|
@ -1191,7 +1202,7 @@ export class Metadata {
|
|||
* Autocomplete: fetches top values for a specific map key from the KV rollup table.
|
||||
* Only filters by date range — no WHERE conditions. Values ordered by frequency.
|
||||
*/
|
||||
async getCompleteKeyValues({
|
||||
async getAllKeyValues({
|
||||
databaseName,
|
||||
tableName,
|
||||
column,
|
||||
|
|
@ -1214,8 +1225,22 @@ export class Metadata {
|
|||
}): Promise<string[]> {
|
||||
if (!metadataMVs) return [];
|
||||
|
||||
const timeFilter = chSql`AND Timestamp >= toStartOfFifteenMinutes(fromUnixTimestamp64Milli(${{ Int64: dateRange[0].getTime() }})) AND Timestamp <= toStartOfFifteenMinutes(fromUnixTimestamp64Milli(${{ Int64: dateRange[1].getTime() }}))`;
|
||||
const cacheKey = `${connectionId}.${databaseName}.${tableName}.${column}.${key}.${dateRange[0].getTime()}.${dateRange[1].getTime()}.completeKeyValues`;
|
||||
// Align date range to rollup granularity for consistent cache keys
|
||||
const alignedDateRange = getAlignedDateRange(
|
||||
dateRange,
|
||||
metadataMVs.granularity,
|
||||
);
|
||||
|
||||
const startExpr = renderStartOfBucketExpr(
|
||||
metadataMVs.granularity,
|
||||
chSql`fromUnixTimestamp64Milli(${{ Int64: alignedDateRange[0].getTime() }})`,
|
||||
);
|
||||
const endExpr = renderStartOfBucketExpr(
|
||||
metadataMVs.granularity,
|
||||
chSql`fromUnixTimestamp64Milli(${{ Int64: alignedDateRange[1].getTime() }})`,
|
||||
);
|
||||
const timeFilter = chSql`AND Timestamp >= ${startExpr} AND Timestamp <= ${endExpr}`;
|
||||
const cacheKey = `${connectionId}.${databaseName}.${tableName}.${column}.${key}.${alignedDateRange[0].getTime()}.${alignedDateRange[1].getTime()}.allKeyValues`;
|
||||
|
||||
return this.cache.getOrFetch(cacheKey, async () => {
|
||||
try {
|
||||
|
|
|
|||
Loading…
Reference in a new issue