diff --git a/packages/app/src/hooks/__tests__/useAutoCompleteOptions.test.tsx b/packages/app/src/hooks/__tests__/useAutoCompleteOptions.test.tsx index c5c6060d..0850e9b4 100644 --- a/packages/app/src/hooks/__tests__/useAutoCompleteOptions.test.tsx +++ b/packages/app/src/hooks/__tests__/useAutoCompleteOptions.test.tsx @@ -4,13 +4,14 @@ import { renderHook } from '@testing-library/react'; import { LuceneLanguageFormatter } from '../../components/SearchInput/SearchInputV2'; import { useAutoCompleteOptions } from '../useAutoCompleteOptions'; -import { useCompleteKeyValues, useMultipleAllFields } from '../useMetadata'; +import { tokenizeAtCursor } from '../useAutoCompleteOptions'; +import { useAllKeyValues, useMultipleAllFields } from '../useMetadata'; // Mock dependencies jest.mock('../useMetadata', () => ({ ...jest.requireActual('../useMetadata.tsx'), useMultipleAllFields: jest.fn(), - useCompleteKeyValues: jest.fn(), + useAllKeyValues: jest.fn(), })); const luceneFormatter = new LuceneLanguageFormatter(); @@ -49,7 +50,7 @@ describe('useAutoCompleteOptions', () => { data: mockFields, }); - (useCompleteKeyValues as jest.Mock).mockReturnValue({ + (useAllKeyValues as jest.Mock).mockReturnValue({ data: null, isFetching: false, }); @@ -79,7 +80,7 @@ describe('useAutoCompleteOptions', () => { }); it('should return key value options with correct lucene formatting', () => { - (useCompleteKeyValues as jest.Mock).mockReturnValue({ + (useAllKeyValues as jest.Mock).mockReturnValue({ data: ['frontend', 'backend'], isFetching: false, }); @@ -146,3 +147,165 @@ describe('useAutoCompleteOptions', () => { ]); }); }); + +describe('tokenizeAtCursor', () => { + // Each case is tokenized with the cursor at the end of the input, so + // `expectedToken` is the token the user is currently typing into. + const cases: { + name: string; + input: string; + expectedToken: string; + expectedTokens: string[]; + }[] = [ + // Basic whitespace splitting + { + name: 'splits a bare whitespace-separated string into tokens', + input: 'foo bar baz', + expectedToken: 'baz', + expectedTokens: ['foo', 'bar', 'baz'], + }, + { + name: 'returns a single token for input with no whitespace', + input: 'FieldName', + expectedToken: 'FieldName', + expectedTokens: ['FieldName'], + }, + { + name: 'returns a single empty token for empty input', + input: '', + expectedToken: '', + expectedTokens: [''], + }, + { + name: 'produces empty tokens for consecutive spaces (split-like semantics)', + input: 'foo bar', + expectedToken: 'bar', + expectedTokens: ['foo', '', 'bar'], + }, + { + name: 'produces a trailing empty token when input ends in a space', + input: 'foo ', + expectedToken: '', + expectedTokens: ['foo', ''], + }, + + // Balanced quoted regions + { + name: 'keeps whitespace inside paired quotes as part of one token', + input: 'Service:"hello world"', + expectedToken: 'Service:"hello world"', + expectedTokens: ['Service:"hello world"'], + }, + { + name: 'splits two paired-quote fields on the space between them', + input: 'ServiceName:"clickhouse" SeverityText:"debug"', + expectedToken: 'SeverityText:"debug"', + expectedTokens: ['ServiceName:"clickhouse"', 'SeverityText:"debug"'], + }, + { + name: 'preserves escaped quotes inside a quoted region', + input: 'Service:"he said \\"hi\\"" other', + expectedToken: 'other', + expectedTokens: ['Service:"he said \\"hi\\""', 'other'], + }, + { + name: 'treats a colon inside a quoted value as literal text', + input: 'URL:"http://example.com/x" x', + expectedToken: 'x', + expectedTokens: ['URL:"http://example.com/x"', 'x'], + }, + + // Unclosed quotes — reproduces the bug where `Field:" Other:"v"` (three + // quotes with the first unclosed) previously collapsed into one token. + { + name: 'breaks at whitespace when a quote is followed by a new field pattern', + input: 'ServiceName:" SeverityText:"debug"', + expectedToken: 'SeverityText:"debug"', + expectedTokens: ['ServiceName:"', 'SeverityText:"debug"'], + }, + { + name: 'treats a single unclosed quote at end of input as a literal', + input: 'Service:"hel', + expectedToken: 'Service:"hel', + expectedTokens: ['Service:"hel'], + }, + { + // No `ident:` after the space, so the quote can still legitimately + // extend — but there's no closing quote anywhere, so it's unclosed. + name: 'handles an unclosed quote followed by whitespace then bare text', + input: 'Service:"hello world', + expectedToken: 'world', + expectedTokens: ['Service:"hello', 'world'], + }, + { + name: 'handles multiple unclosed quotes across fields', + input: 'A:" B:" C:"done"', + expectedToken: 'C:"done"', + expectedTokens: ['A:"', 'B:"', 'C:"done"'], + }, + + // Identifier-like characters after whitespace + { + // The space inside the quoted value is followed by `!`, not `ident:`, + // so the quote should still be able to close. + name: 'does not bail out on whitespace followed by a non-identifier', + input: 'Service:"hello !world"', + expectedToken: 'Service:"hello !world"', + expectedTokens: ['Service:"hello !world"'], + }, + { + name: 'does not treat whitespace + ident without colon as a new field', + input: 'Service:"hello world done"', + expectedToken: 'Service:"hello world done"', + expectedTokens: ['Service:"hello world done"'], + }, + { + name: 'handles dotted identifiers in the new-field pattern', + input: 'Foo:" ResourceAttributes.service.name:"x"', + expectedToken: 'ResourceAttributes.service.name:"x"', + expectedTokens: ['Foo:"', 'ResourceAttributes.service.name:"x"'], + }, + ]; + + it.each(cases)('$name', ({ input, expectedToken, expectedTokens }) => { + const { token, tokens } = tokenizeAtCursor(input, input.length); + expect(tokens).toEqual(expectedTokens); + expect(token).toBe(expectedToken); + }); + + // Cursor-positioning is orthogonal to tokenization — keep these separate + // because each case exercises a different cursor offset for the same input. + describe('cursor positioning', () => { + it('returns the first token when the cursor is at position 0', () => { + const { token, index } = tokenizeAtCursor('foo bar baz', 0); + expect(token).toBe('foo'); + expect(index).toBe(0); + }); + + it('returns the middle token when the cursor is inside it', () => { + // 0123456789012 + // 'foo bar baz' — cursor at 5 is inside 'bar' + const { token, index } = tokenizeAtCursor('foo bar baz', 5); + expect(token).toBe('bar'); + expect(index).toBe(1); + }); + + it('returns the token whose range contains the cursor in a quoted field', () => { + const input = 'Service:"hello world" other'; + // cursor inside the quoted token + const { token, index } = tokenizeAtCursor(input, 15); + expect(token).toBe('Service:"hello world"'); + expect(index).toBe(0); + }); + + it('returns the unclosed-quote token when the cursor is inside it', () => { + // User is typing a value — the first quote is unclosed because the + // next field pattern `SeverityText:` appears after whitespace. Cursor + // sits just after `l`, inside the in-progress `ServiceName:"cl` token. + const input = 'ServiceName:"cl SeverityText:"info"'; + const { token, index } = tokenizeAtCursor(input, 15); + expect(token).toBe('ServiceName:"cl'); + expect(index).toBe(0); + }); + }); +}); diff --git a/packages/app/src/hooks/useAutoCompleteOptions.tsx b/packages/app/src/hooks/useAutoCompleteOptions.tsx index 427ce2eb..d3de4398 100644 --- a/packages/app/src/hooks/useAutoCompleteOptions.tsx +++ b/packages/app/src/hooks/useAutoCompleteOptions.tsx @@ -7,7 +7,7 @@ import { import { NOW } from '@/config'; import { deduplicate2dArray, - useCompleteKeyValues, + useAllKeyValues, useMultipleAllFields, } from '@/hooks/useMetadata'; import { toArray, useDebounce } from '@/utils'; @@ -21,20 +21,112 @@ export type TokenInfo = { tokens: string[]; }; -/** Splits input into tokens and finds which token the cursor is in */ -function tokenizeAtCursor(value: string, cursorPos: number): TokenInfo { - const tokens = value.split(' '); - let idx = 0; - let pos = 0; +const IDENT_RE = /[A-Za-z0-9_.]/; + +function findMatchingQuote(value: string, startIdx: number): number { + let i = startIdx + 1; + while (i < value.length) { + const ch = value[i]; + if (ch === '\\' && i + 1 < value.length) { + i += 2; + continue; + } + if (ch === '"') return i; + if (ch === ' ' || ch === '\t' || ch === '\n') { + let k = i; + while ( + k < value.length && + (value[k] === ' ' || value[k] === '\t' || value[k] === '\n') + ) + k++; + const identStart = k; + while (k < value.length && IDENT_RE.test(value[k])) k++; + if (k > identStart && k < value.length && value[k] === ':') { + return -1; + } + } + i++; + } + return -1; +} + +export function tokenizeAtCursor(value: string, cursorPos: number): TokenInfo { + const tokens: string[] = []; + // Start offsets of each token in the original string + const starts: number[] = []; + + let current = ''; + let currentStart = -1; + let inQuotes = false; + let escaped = false; + + for (let i = 0; i < value.length; i++) { + const ch = value[i]; + + if (escaped) { + // Always include the escaped character verbatim (along with its backslash) + current += ch; + escaped = false; + continue; + } + + if (ch === '\\' && inQuotes) { + current += ch; + escaped = true; + continue; + } + + if (ch === '"') { + if (inQuotes) { + // Closing an already-opened quoted region. + if (currentStart === -1) currentStart = i; + current += ch; + inQuotes = false; + continue; + } + // Only enter a quoted region if there's a matching close ahead. + if (findMatchingQuote(value, i) !== -1) { + if (currentStart === -1) currentStart = i; + current += ch; + inQuotes = true; + continue; + } + // Stray/unclosed quote — treat as a literal character. + if (currentStart === -1) currentStart = i; + current += ch; + continue; + } + + if (!inQuotes && ch === ' ') { + // Boundary: flush current token (even if empty, to mirror prior `split(' ')` + // semantics where consecutive spaces produce empty tokens). + tokens.push(current); + starts.push(currentStart === -1 ? i : currentStart); + current = ''; + currentStart = -1; + continue; + } + + if (currentStart === -1) currentStart = i; + current += ch; + } + // Flush trailing token + tokens.push(current); + starts.push(currentStart === -1 ? value.length : currentStart); + + // Locate token containing the cursor. The cursor sits *between* characters, + // so a token covers [start, start+len]; we pick the last token whose range + // contains cursorPos. + let idx = tokens.length - 1; for (let i = 0; i < tokens.length; i++) { - pos += tokens[i].length; - if (pos >= cursorPos || i === tokens.length - 1) { + const start = starts[i]; + const end = start + tokens[i].length; + if (cursorPos <= end) { idx = i; break; } - pos++; // account for the space - idx = i + 1; } + return { token: tokens[idx] ?? '', index: idx, tokens }; } @@ -118,13 +210,11 @@ export function useAutoCompleteOptions( // Debounced fetch of values for the selected key from rollup tables const firstTc = tcs.length > 0 ? tcs[0] : undefined; - const { data: keyValues, isFetching: isLoadingValues } = useCompleteKeyValues( - { - tableConnection: firstTc, - searchField, - dateRange: effectiveDateRange, - }, - ); + const { data: keyValues, isFetching: isLoadingValues } = useAllKeyValues({ + tableConnection: firstTc, + searchField, + dateRange: effectiveDateRange, + }); // Build key-value pair suggestions const keyValCompleteOptions = useMemo< diff --git a/packages/app/src/hooks/useMetadata.tsx b/packages/app/src/hooks/useMetadata.tsx index 0ea7ccf5..c81683cb 100644 --- a/packages/app/src/hooks/useMetadata.tsx +++ b/packages/app/src/hooks/useMetadata.tsx @@ -201,7 +201,7 @@ function fieldToRollupParams( * Works for both map keys (e.g. "ResourceAttributes.http.method") and * native columns (e.g. "ServiceName"). */ -export function useCompleteKeyValues({ +export function useAllKeyValues({ tableConnection, searchField, dateRange, @@ -212,18 +212,11 @@ export function useCompleteKeyValues({ }) { const metadata = useMetadataWithSettings(); - // Debounce: only query after the field stabilizes for 300ms - const [debouncedField, setDebouncedField] = useState(null); - useEffect(() => { - const timer = setTimeout(() => setDebouncedField(searchField), 300); - return () => clearTimeout(timer); - }, [searchField]); - - const rollupParams = fieldToRollupParams(debouncedField, tableConnection); + const rollupParams = fieldToRollupParams(searchField, tableConnection); return useQuery({ queryKey: [ - 'useCompleteKeyValues', + 'useAllKeyValues', tableConnection?.databaseName, tableConnection?.tableName, tableConnection?.connectionId, @@ -233,10 +226,10 @@ export function useCompleteKeyValues({ dateRange[1].getTime(), ], queryFn: async ({ signal }) => { - if (!tableConnection || !rollupParams || !debouncedField) return []; + if (!tableConnection || !rollupParams || !searchField) return []; // Try rollup first - const rollupValues = await metadata.getCompleteKeyValues({ + const rollupValues = await metadata.getAllKeyValues({ databaseName: tableConnection.databaseName, tableName: tableConnection.tableName, column: rollupParams.columnIdentifier, @@ -264,7 +257,7 @@ export function useCompleteKeyValues({ return metadata.getMapValues({ databaseName: tableConnection.databaseName, tableName: tableConnection.tableName, - column: debouncedField.path[0], + column: searchField.path[0], connectionId: tableConnection.connectionId, }); } diff --git a/packages/app/src/source.ts b/packages/app/src/source.ts index e361f57a..4713a221 100644 --- a/packages/app/src/source.ts +++ b/packages/app/src/source.ts @@ -7,6 +7,7 @@ import { filterColumnMetaByType, JSDataType, } from '@hyperdx/common-utils/dist/clickhouse'; +import { inferGranularityFromMVSelect } from '@hyperdx/common-utils/dist/core/materializedViews'; import { Metadata } from '@hyperdx/common-utils/dist/core/metadata'; import { splitAndTrimWithBracket } from '@hyperdx/common-utils/dist/core/utils'; import { isBuilderChartConfig } from '@hyperdx/common-utils/dist/guards'; @@ -348,8 +349,9 @@ export async function inferTableSourceConfig({ // Check if SpanEvents column is available const hasSpanEvents = columns.some(col => col.name === 'Events.Timestamp'); - // Check if metadata rollup tables exist - const hasMetadataMVs = + // Check if metadata rollup tables exist and, if so, infer the bucketing + // granularity from the key-rollup view's `as_select` + const rollupMeta = isOtelLogSchema || isOtelSpanSchema ? await (async () => { const [keyMeta, kvMeta] = await Promise.all([ @@ -364,16 +366,22 @@ export async function inferTableSourceConfig({ connectionId, }), ]); - return keyMeta != null && kvMeta != null; + return keyMeta != null && kvMeta != null + ? { keyMeta, kvMeta } + : undefined; })() - : false; + : undefined; - const metadataMVsConfig = hasMetadataMVs + const metadataMVsConfig = rollupMeta ? { metadataMaterializedViews: { keyRollupTable: `${tableName}_key_rollup_15m`, kvRollupTable: `${tableName}_kv_rollup_15m`, - granularity: '15 minute', + // Fall back to '15 minute' to preserve the prior default when the + // MV's `as_select` doesn't contain a recognized bucketing function. + granularity: + inferGranularityFromMVSelect(rollupMeta.keyMeta.as_select) ?? + '15 minute', }, } : {}; diff --git a/packages/common-utils/src/__tests__/materializedViews.test.ts b/packages/common-utils/src/__tests__/materializedViews.test.ts new file mode 100644 index 00000000..c1c2bd6c --- /dev/null +++ b/packages/common-utils/src/__tests__/materializedViews.test.ts @@ -0,0 +1,192 @@ +import { inferGranularityFromMVSelect } from '@/core/materializedViews'; + +describe('inferGranularityFromMVSelect', () => { + describe('real MV schemas', () => { + // Shape matches the `otel_logs_attr_kv_rollup_15m_mv` view in + // docker/otel-collector/schema/seed/00006_otel_logs_rollups.sql. + it('detects 15 minute from the otel_logs kv rollup MV select', () => { + const asSelect = `WITH elements AS ( + SELECT + 'ResourceAttributes' AS ColumnIdentifier, + toStartOfFifteenMinutes(Timestamp) AS Timestamp, + replaceRegexpAll(entry.1, '\\\\[\\\\d+\\\\]', '[*]') AS Key, + CAST(entry.2 AS String) AS Value + FROM default.otel_logs + ARRAY JOIN ResourceAttributes AS entry + UNION ALL + SELECT + 'LogAttributes' AS ColumnIdentifier, + toStartOfFifteenMinutes(Timestamp) AS Timestamp, + replaceRegexpAll(entry.1, '\\\\[\\\\d+\\\\]', '[*]') AS Key, + CAST(entry.2 AS String) AS Value + FROM default.otel_logs + ARRAY JOIN LogAttributes AS entry +) +SELECT Timestamp, ColumnIdentifier, Key, Value, count() AS count FROM elements +GROUP BY Timestamp, ColumnIdentifier, Key, Value`; + + expect(inferGranularityFromMVSelect(asSelect)).toBe('15 minute'); + }); + + // Shape matches the `otel_traces_kv_rollup_15m_mv` view in + // docker/otel-collector/schema/seed/00007_otel_traces_rollups.sql. + it('detects 15 minute from the otel_traces kv rollup MV select', () => { + const asSelect = `WITH elements AS ( + SELECT + 'ResourceAttributes' AS ColumnIdentifier, + toStartOfFifteenMinutes(Timestamp) AS Timestamp, + replaceRegexpAll(entry.1, '\\\\[\\\\d+\\\\]', '[*]') AS Key, + CAST(entry.2 AS String) AS Value + FROM default.otel_traces + ARRAY JOIN ResourceAttributes AS entry + UNION ALL + SELECT + 'SpanAttributes' AS ColumnIdentifier, + toStartOfFifteenMinutes(Timestamp) AS Timestamp, + replaceRegexpAll(entry.1, '\\\\[\\\\d+\\\\]', '[*]') AS Key, + CAST(entry.2 AS String) AS Value + FROM default.otel_traces + ARRAY JOIN SpanAttributes AS entry +) +SELECT Timestamp, ColumnIdentifier, Key, Value, count() AS count FROM elements +GROUP BY Timestamp, ColumnIdentifier, Key, Value`; + + expect(inferGranularityFromMVSelect(asSelect)).toBe('15 minute'); + }); + + // The key-rollup MV rolls up the kv rollup, so it doesn't bucket the + // timestamp itself — it just selects the already-bucketed Timestamp. + it('returns undefined when the select contains no bucketing function', () => { + const asSelect = `SELECT + Timestamp, + ColumnIdentifier, + Key, + sum(count) as count +FROM default.otel_logs_kv_rollup_15m +GROUP BY ColumnIdentifier, Key, Timestamp`; + + expect(inferGranularityFromMVSelect(asSelect)).toBeUndefined(); + }); + }); + + describe('toStartOfInterval form', () => { + it.each([ + ['INTERVAL 5 MINUTE', '5 minute'], + ['INTERVAL 1 HOUR', '1 hour'], + ['INTERVAL 2 hour', '2 hour'], + ['INTERVAL 30 SECOND', '30 second'], + ['INTERVAL 1 DAY', '1 day'], + ])('parses %s', (interval, expected) => { + const asSelect = `SELECT toStartOfInterval(Timestamp, ${interval}) AS ts, count() FROM t GROUP BY ts`; + expect(inferGranularityFromMVSelect(asSelect)).toBe(expected); + }); + + it('accepts plural units', () => { + const asSelect = `SELECT toStartOfInterval(Timestamp, INTERVAL 10 MINUTES) AS ts FROM t`; + expect(inferGranularityFromMVSelect(asSelect)).toBe('10 minute'); + }); + + it('handles extra arguments (origin, timezone)', () => { + const asSelect = `SELECT toStartOfInterval(Timestamp, INTERVAL 1 DAY, toDateTime('2025-01-01'), 'America/Los_Angeles') AS ts FROM t`; + expect(inferGranularityFromMVSelect(asSelect)).toBe('1 day'); + }); + + it('handles extra whitespace and newlines inside the call', () => { + const asSelect = `SELECT + toStartOfInterval ( + Timestamp, + INTERVAL 15 MINUTE + ) AS ts +FROM t`; + expect(inferGranularityFromMVSelect(asSelect)).toBe('15 minute'); + }); + + it('ignores unknown units', () => { + const asSelect = `SELECT toStartOfInterval(Timestamp, INTERVAL 1 WEEK) AS ts FROM t`; + expect(inferGranularityFromMVSelect(asSelect)).toBeUndefined(); + }); + }); + + describe('named bucket functions', () => { + it.each([ + ['toStartOfSecond', '1 second'], + ['toStartOfMinute', '1 minute'], + ['toStartOfFiveMinutes', '5 minute'], + ['toStartOfTenMinutes', '10 minute'], + ['toStartOfFifteenMinutes', '15 minute'], + ['toStartOfHour', '1 hour'], + ['toStartOfDay', '1 day'], + ])('maps %s to %s', (fn, expected) => { + const asSelect = `SELECT ${fn}(Timestamp) AS ts, count() FROM t GROUP BY ts`; + expect(inferGranularityFromMVSelect(asSelect)).toBe(expected); + }); + + it('returns undefined for unrecognized toStartOf* functions', () => { + // toStartOfMonth is a real CH function but not in NAMED_BUCKET_FUNCTIONS. + const asSelect = `SELECT toStartOfMonth(Timestamp) AS ts FROM t`; + expect(inferGranularityFromMVSelect(asSelect)).toBeUndefined(); + }); + }); + + describe('first match wins', () => { + it('returns the granularity of the first toStartOf call encountered', () => { + const asSelect = `SELECT toStartOfHour(Timestamp) AS h, toStartOfMinute(Timestamp) AS m FROM t`; + expect(inferGranularityFromMVSelect(asSelect)).toBe('1 hour'); + }); + + it('returns first call even when a later call would also match', () => { + const asSelect = `SELECT toStartOfInterval(Timestamp, INTERVAL 5 MINUTE) AS a, toStartOfHour(Timestamp) AS b FROM t`; + expect(inferGranularityFromMVSelect(asSelect)).toBe('5 minute'); + }); + }); + + describe('robustness against quoting and noise', () => { + it('ignores toStartOf* tokens inside single-quoted string literals', () => { + const asSelect = `SELECT 'toStartOfHour(Timestamp)' AS label, toStartOfMinute(Timestamp) AS ts FROM t`; + expect(inferGranularityFromMVSelect(asSelect)).toBe('1 minute'); + }); + + it('ignores toStartOf* tokens inside backtick-quoted identifiers', () => { + const asSelect = 'SELECT `toStartOfHour` AS col FROM t'; + expect(inferGranularityFromMVSelect(asSelect)).toBeUndefined(); + }); + + it('ignores toStartOf* substrings embedded in longer identifiers', () => { + // my_toStartOfHour_col is a single identifier, not a function call. + const asSelect = `SELECT my_toStartOfHour_col AS x FROM t`; + expect(inferGranularityFromMVSelect(asSelect)).toBeUndefined(); + }); + + it('is not confused by commas inside nested calls', () => { + const asSelect = `SELECT toStartOfInterval(coalesce(Timestamp, now()), INTERVAL 5 MINUTE) AS ts FROM t`; + expect(inferGranularityFromMVSelect(asSelect)).toBe('5 minute'); + }); + + it('skips a toStartOf call without matching parens and stops scanning', () => { + // Unterminated call — we stop rather than looping forever. + const asSelect = `SELECT toStartOfHour(Timestamp FROM t`; + expect(inferGranularityFromMVSelect(asSelect)).toBeUndefined(); + }); + }); + + describe('degenerate inputs', () => { + it('returns undefined for empty string', () => { + expect(inferGranularityFromMVSelect('')).toBeUndefined(); + }); + + it('returns undefined when no toStartOf call is present', () => { + const asSelect = `SELECT Timestamp, count() FROM t GROUP BY Timestamp`; + expect(inferGranularityFromMVSelect(asSelect)).toBeUndefined(); + }); + + it('returns undefined when toStartOfInterval has no interval arg', () => { + const asSelect = `SELECT toStartOfInterval(Timestamp) AS ts FROM t`; + expect(inferGranularityFromMVSelect(asSelect)).toBeUndefined(); + }); + + it('returns undefined when toStartOfInterval has a malformed interval arg', () => { + const asSelect = `SELECT toStartOfInterval(Timestamp, INTERVAL abc MINUTE) AS ts FROM t`; + expect(inferGranularityFromMVSelect(asSelect)).toBeUndefined(); + }); + }); +}); diff --git a/packages/common-utils/src/core/materializedViews.ts b/packages/common-utils/src/core/materializedViews.ts index 51fac8f3..03e6272b 100644 --- a/packages/common-utils/src/core/materializedViews.ts +++ b/packages/common-utils/src/core/materializedViews.ts @@ -1,6 +1,6 @@ import { differenceInSeconds } from 'date-fns'; -import { BaseClickhouseClient } from '@/clickhouse'; +import { BaseClickhouseClient, ChSql, chSql } from '@/clickhouse'; import { BuilderChartConfigWithOptDateRange, CteChartConfig, @@ -9,6 +9,7 @@ import { isLogSource, isTraceSource, MaterializedViewConfiguration, + type SQLInterval, TLogSource, TSource, TTraceSource, @@ -25,6 +26,178 @@ import { splitAndTrimWithBracket, } from './utils'; +// ClickHouse named time-bucketing functions and their granularity equivalents. +const NAMED_BUCKET_FUNCTIONS: Record = { + toStartOfSecond: '1 second', + toStartOfMinute: '1 minute', + toStartOfFiveMinutes: '5 minute', + toStartOfTenMinutes: '10 minute', + toStartOfFifteenMinutes: '15 minute', + toStartOfHour: '1 hour', + toStartOfDay: '1 day', +}; + +const VALID_INTERVAL_UNITS = new Set(['second', 'minute', 'hour', 'day']); + +const isIdentChar = (ch: string) => + (ch >= 'a' && ch <= 'z') || + (ch >= 'A' && ch <= 'Z') || + (ch >= '0' && ch <= '9') || + ch === '_'; + +const isWhitespace = (ch: string) => + ch === ' ' || ch === '\t' || ch === '\n' || ch === '\r'; + +function findToStartOfCalls( + input: string, +): { fn: string; argsInner: string }[] { + const out: { fn: string; argsInner: string }[] = []; + const n = input.length; + let i = 0; + + // Skip the rest of a quoted region starting at `input[start]`. + // Returns the index of the character just past the closing quote. + const skipQuoted = (start: number, quote: string): number => { + let p = start + 1; + while (p < n) { + const c = input[p]; + if (c === '\\' && p + 1 < n) { + p += 2; + continue; + } + if (c === quote) return p + 1; + p++; + } + return n; + }; + + while (i < n) { + const ch = input[i]; + + if (ch === "'" || ch === '"' || ch === '`') { + i = skipQuoted(i, ch); + continue; + } + + // Try to read an identifier starting at a word boundary. A preceding + // identifier character would mean we're mid-token (e.g. `fooToStartOf…`). + const atBoundary = i === 0 || !isIdentChar(input[i - 1]); + if (!atBoundary || !isIdentChar(ch)) { + i++; + continue; + } + + let j = i; + while (j < n && isIdentChar(input[j])) j++; + const ident = input.substring(i, j); + + if (!ident.startsWith('toStartOf')) { + i = j; + continue; + } + + // Expect '(' (possibly after whitespace) for this to be a call. + let k = j; + while (k < n && isWhitespace(input[k])) k++; + if (input[k] !== '(') { + i = j; + continue; + } + + // Walk to the matching ')', honoring nested parens and quoted regions. + const argStart = k + 1; + let depth = 1; + let p = argStart; + while (p < n && depth > 0) { + const c = input[p]; + if (c === "'" || c === '"' || c === '`') { + p = skipQuoted(p, c); + continue; + } + if (c === '(') depth++; + else if (c === ')') { + depth--; + if (depth === 0) break; + } + p++; + } + if (depth !== 0) break; // unterminated call — stop scanning + out.push({ fn: ident, argsInner: input.substring(argStart, p) }); + i = p + 1; + } + + return out; +} + +function parseIntervalLiteral(expr: string): SQLInterval | undefined { + const tokens: string[] = []; + let cur = ''; + for (const ch of expr) { + if (isWhitespace(ch)) { + if (cur) tokens.push(cur); + cur = ''; + } else { + cur += ch; + } + } + if (cur) tokens.push(cur); + + if (tokens.length < 3) return undefined; + if (tokens[0].toUpperCase() !== 'INTERVAL') return undefined; + + const num = Number.parseInt(tokens[1], 10); + if (!Number.isFinite(num) || num <= 0 || String(num) !== tokens[1]) { + return undefined; + } + + // Accept both singular and plural forms (MINUTE / MINUTES). + let unit = tokens[2].toLowerCase(); + if (unit.endsWith('s')) unit = unit.slice(0, -1); + if (!VALID_INTERVAL_UNITS.has(unit)) return undefined; + + return `${num} ${unit}` as SQLInterval; +} + +export function inferGranularityFromMVSelect( + asSelect: string, +): SQLInterval | undefined { + for (const { fn, argsInner } of findToStartOfCalls(asSelect)) { + if (fn in NAMED_BUCKET_FUNCTIONS) { + return NAMED_BUCKET_FUNCTIONS[fn]; + } + if (fn === 'toStartOfInterval') { + const args = splitAndTrimWithBracket(argsInner); + if (args.length < 2) continue; + const parsed = parseIntervalLiteral(args[1]); + if (parsed) return parsed; + } + } + return undefined; +} + +export function getNamedBucketFunction( + granularity: SQLInterval, +): string | undefined { + for (const [fn, g] of Object.entries(NAMED_BUCKET_FUNCTIONS)) { + if (g === granularity) return fn; + } + return undefined; +} + +export function renderStartOfBucketExpr( + granularity: SQLInterval, + inner: ChSql, +): ChSql { + const namedFn = getNamedBucketFunction(granularity); + if (namedFn) { + // namedFn comes from a fixed allow-list (NAMED_BUCKET_FUNCTIONS keys), so + // splicing it as raw SQL is safe. + return chSql`${{ UNSAFE_RAW_SQL: namedFn }}(${inner})`; + } + const seconds = convertGranularityToSeconds(granularity); + return chSql`toStartOfInterval(${inner}, INTERVAL ${{ Int64: seconds }} SECOND)`; +} + type SelectItem = Exclude< BuilderChartConfigWithOptDateRange['select'], string diff --git a/packages/common-utils/src/core/metadata.ts b/packages/common-utils/src/core/metadata.ts index 381c26f0..a39bfcc1 100644 --- a/packages/common-utils/src/core/metadata.ts +++ b/packages/common-utils/src/core/metadata.ts @@ -21,7 +21,10 @@ import type { } from '@/types'; import { isLogSource, isTraceSource, SourceKind } from '@/types'; -import { optimizeGetKeyValuesCalls } from './materializedViews'; +import { + optimizeGetKeyValuesCalls, + renderStartOfBucketExpr, +} from './materializedViews'; import { getAlignedDateRange, getDistributedTableArgs, @@ -376,7 +379,15 @@ export class Metadata { // Rollup path: query the key rollup table filtered by ColumnIdentifier and date range if (metadataMVs && alignedDateRange) { return this.cache.getOrFetch(cacheKey, async () => { - const timeFilter = chSql`AND Timestamp >= toStartOfFifteenMinutes(fromUnixTimestamp64Milli(${{ Int64: alignedDateRange[0].getTime() }})) AND Timestamp <= toStartOfFifteenMinutes(fromUnixTimestamp64Milli(${{ Int64: alignedDateRange[1].getTime() }}))`; + const startExpr = renderStartOfBucketExpr( + metadataMVs.granularity, + chSql`fromUnixTimestamp64Milli(${{ Int64: alignedDateRange[0].getTime() }})`, + ); + const endExpr = renderStartOfBucketExpr( + metadataMVs.granularity, + chSql`fromUnixTimestamp64Milli(${{ Int64: alignedDateRange[1].getTime() }})`, + ); + const timeFilter = chSql`AND Timestamp >= ${startExpr} AND Timestamp <= ${endExpr}`; const sql = chSql` SELECT Key FROM ${tableExpr({ database: databaseName, table: metadataMVs.keyRollupTable })} @@ -1191,7 +1202,7 @@ export class Metadata { * Autocomplete: fetches top values for a specific map key from the KV rollup table. * Only filters by date range — no WHERE conditions. Values ordered by frequency. */ - async getCompleteKeyValues({ + async getAllKeyValues({ databaseName, tableName, column, @@ -1214,8 +1225,22 @@ export class Metadata { }): Promise { if (!metadataMVs) return []; - const timeFilter = chSql`AND Timestamp >= toStartOfFifteenMinutes(fromUnixTimestamp64Milli(${{ Int64: dateRange[0].getTime() }})) AND Timestamp <= toStartOfFifteenMinutes(fromUnixTimestamp64Milli(${{ Int64: dateRange[1].getTime() }}))`; - const cacheKey = `${connectionId}.${databaseName}.${tableName}.${column}.${key}.${dateRange[0].getTime()}.${dateRange[1].getTime()}.completeKeyValues`; + // Align date range to rollup granularity for consistent cache keys + const alignedDateRange = getAlignedDateRange( + dateRange, + metadataMVs.granularity, + ); + + const startExpr = renderStartOfBucketExpr( + metadataMVs.granularity, + chSql`fromUnixTimestamp64Milli(${{ Int64: alignedDateRange[0].getTime() }})`, + ); + const endExpr = renderStartOfBucketExpr( + metadataMVs.granularity, + chSql`fromUnixTimestamp64Milli(${{ Int64: alignedDateRange[1].getTime() }})`, + ); + const timeFilter = chSql`AND Timestamp >= ${startExpr} AND Timestamp <= ${endExpr}`; + const cacheKey = `${connectionId}.${databaseName}.${tableName}.${column}.${key}.${alignedDateRange[0].getTime()}.${alignedDateRange[1].getTime()}.allKeyValues`; return this.cache.getOrFetch(cacheKey, async () => { try {