updates for PR

This commit is contained in:
Aaron Knudtson 2026-04-17 16:51:18 -04:00
parent cface4610c
commit 41fe0b8369
7 changed files with 690 additions and 46 deletions

View file

@ -4,13 +4,14 @@ import { renderHook } from '@testing-library/react';
import { LuceneLanguageFormatter } from '../../components/SearchInput/SearchInputV2';
import { useAutoCompleteOptions } from '../useAutoCompleteOptions';
import { useCompleteKeyValues, useMultipleAllFields } from '../useMetadata';
import { tokenizeAtCursor } from '../useAutoCompleteOptions';
import { useAllKeyValues, useMultipleAllFields } from '../useMetadata';
// Mock dependencies
jest.mock('../useMetadata', () => ({
...jest.requireActual('../useMetadata.tsx'),
useMultipleAllFields: jest.fn(),
useCompleteKeyValues: jest.fn(),
useAllKeyValues: jest.fn(),
}));
const luceneFormatter = new LuceneLanguageFormatter();
@ -49,7 +50,7 @@ describe('useAutoCompleteOptions', () => {
data: mockFields,
});
(useCompleteKeyValues as jest.Mock).mockReturnValue({
(useAllKeyValues as jest.Mock).mockReturnValue({
data: null,
isFetching: false,
});
@ -79,7 +80,7 @@ describe('useAutoCompleteOptions', () => {
});
it('should return key value options with correct lucene formatting', () => {
(useCompleteKeyValues as jest.Mock).mockReturnValue({
(useAllKeyValues as jest.Mock).mockReturnValue({
data: ['frontend', 'backend'],
isFetching: false,
});
@ -146,3 +147,165 @@ describe('useAutoCompleteOptions', () => {
]);
});
});
describe('tokenizeAtCursor', () => {
// Each case is tokenized with the cursor at the end of the input, so
// `expectedToken` is the token the user is currently typing into.
const cases: {
name: string;
input: string;
expectedToken: string;
expectedTokens: string[];
}[] = [
// Basic whitespace splitting
{
name: 'splits a bare whitespace-separated string into tokens',
input: 'foo bar baz',
expectedToken: 'baz',
expectedTokens: ['foo', 'bar', 'baz'],
},
{
name: 'returns a single token for input with no whitespace',
input: 'FieldName',
expectedToken: 'FieldName',
expectedTokens: ['FieldName'],
},
{
name: 'returns a single empty token for empty input',
input: '',
expectedToken: '',
expectedTokens: [''],
},
{
name: 'produces empty tokens for consecutive spaces (split-like semantics)',
input: 'foo bar',
expectedToken: 'bar',
expectedTokens: ['foo', '', 'bar'],
},
{
name: 'produces a trailing empty token when input ends in a space',
input: 'foo ',
expectedToken: '',
expectedTokens: ['foo', ''],
},
// Balanced quoted regions
{
name: 'keeps whitespace inside paired quotes as part of one token',
input: 'Service:"hello world"',
expectedToken: 'Service:"hello world"',
expectedTokens: ['Service:"hello world"'],
},
{
name: 'splits two paired-quote fields on the space between them',
input: 'ServiceName:"clickhouse" SeverityText:"debug"',
expectedToken: 'SeverityText:"debug"',
expectedTokens: ['ServiceName:"clickhouse"', 'SeverityText:"debug"'],
},
{
name: 'preserves escaped quotes inside a quoted region',
input: 'Service:"he said \\"hi\\"" other',
expectedToken: 'other',
expectedTokens: ['Service:"he said \\"hi\\""', 'other'],
},
{
name: 'treats a colon inside a quoted value as literal text',
input: 'URL:"http://example.com/x" x',
expectedToken: 'x',
expectedTokens: ['URL:"http://example.com/x"', 'x'],
},
// Unclosed quotes — reproduces the bug where `Field:" Other:"v"` (three
// quotes with the first unclosed) previously collapsed into one token.
{
name: 'breaks at whitespace when a quote is followed by a new field pattern',
input: 'ServiceName:" SeverityText:"debug"',
expectedToken: 'SeverityText:"debug"',
expectedTokens: ['ServiceName:"', 'SeverityText:"debug"'],
},
{
name: 'treats a single unclosed quote at end of input as a literal',
input: 'Service:"hel',
expectedToken: 'Service:"hel',
expectedTokens: ['Service:"hel'],
},
{
// No `ident:` after the space, so the quote can still legitimately
// extend — but there's no closing quote anywhere, so it's unclosed.
name: 'handles an unclosed quote followed by whitespace then bare text',
input: 'Service:"hello world',
expectedToken: 'world',
expectedTokens: ['Service:"hello', 'world'],
},
{
name: 'handles multiple unclosed quotes across fields',
input: 'A:" B:" C:"done"',
expectedToken: 'C:"done"',
expectedTokens: ['A:"', 'B:"', 'C:"done"'],
},
// Identifier-like characters after whitespace
{
// The space inside the quoted value is followed by `!`, not `ident:`,
// so the quote should still be able to close.
name: 'does not bail out on whitespace followed by a non-identifier',
input: 'Service:"hello !world"',
expectedToken: 'Service:"hello !world"',
expectedTokens: ['Service:"hello !world"'],
},
{
name: 'does not treat whitespace + ident without colon as a new field',
input: 'Service:"hello world done"',
expectedToken: 'Service:"hello world done"',
expectedTokens: ['Service:"hello world done"'],
},
{
name: 'handles dotted identifiers in the new-field pattern',
input: 'Foo:" ResourceAttributes.service.name:"x"',
expectedToken: 'ResourceAttributes.service.name:"x"',
expectedTokens: ['Foo:"', 'ResourceAttributes.service.name:"x"'],
},
];
it.each(cases)('$name', ({ input, expectedToken, expectedTokens }) => {
const { token, tokens } = tokenizeAtCursor(input, input.length);
expect(tokens).toEqual(expectedTokens);
expect(token).toBe(expectedToken);
});
// Cursor-positioning is orthogonal to tokenization — keep these separate
// because each case exercises a different cursor offset for the same input.
describe('cursor positioning', () => {
it('returns the first token when the cursor is at position 0', () => {
const { token, index } = tokenizeAtCursor('foo bar baz', 0);
expect(token).toBe('foo');
expect(index).toBe(0);
});
it('returns the middle token when the cursor is inside it', () => {
// 0123456789012
// 'foo bar baz' — cursor at 5 is inside 'bar'
const { token, index } = tokenizeAtCursor('foo bar baz', 5);
expect(token).toBe('bar');
expect(index).toBe(1);
});
it('returns the token whose range contains the cursor in a quoted field', () => {
const input = 'Service:"hello world" other';
// cursor inside the quoted token
const { token, index } = tokenizeAtCursor(input, 15);
expect(token).toBe('Service:"hello world"');
expect(index).toBe(0);
});
it('returns the unclosed-quote token when the cursor is inside it', () => {
// User is typing a value — the first quote is unclosed because the
// next field pattern `SeverityText:` appears after whitespace. Cursor
// sits just after `l`, inside the in-progress `ServiceName:"cl` token.
const input = 'ServiceName:"cl SeverityText:"info"';
const { token, index } = tokenizeAtCursor(input, 15);
expect(token).toBe('ServiceName:"cl');
expect(index).toBe(0);
});
});
});

View file

@ -7,7 +7,7 @@ import {
import { NOW } from '@/config';
import {
deduplicate2dArray,
useCompleteKeyValues,
useAllKeyValues,
useMultipleAllFields,
} from '@/hooks/useMetadata';
import { toArray, useDebounce } from '@/utils';
@ -21,20 +21,112 @@ export type TokenInfo = {
tokens: string[];
};
/** Splits input into tokens and finds which token the cursor is in */
function tokenizeAtCursor(value: string, cursorPos: number): TokenInfo {
const tokens = value.split(' ');
let idx = 0;
let pos = 0;
const IDENT_RE = /[A-Za-z0-9_.]/;
function findMatchingQuote(value: string, startIdx: number): number {
let i = startIdx + 1;
while (i < value.length) {
const ch = value[i];
if (ch === '\\' && i + 1 < value.length) {
i += 2;
continue;
}
if (ch === '"') return i;
if (ch === ' ' || ch === '\t' || ch === '\n') {
let k = i;
while (
k < value.length &&
(value[k] === ' ' || value[k] === '\t' || value[k] === '\n')
)
k++;
const identStart = k;
while (k < value.length && IDENT_RE.test(value[k])) k++;
if (k > identStart && k < value.length && value[k] === ':') {
return -1;
}
}
i++;
}
return -1;
}
export function tokenizeAtCursor(value: string, cursorPos: number): TokenInfo {
const tokens: string[] = [];
// Start offsets of each token in the original string
const starts: number[] = [];
let current = '';
let currentStart = -1;
let inQuotes = false;
let escaped = false;
for (let i = 0; i < value.length; i++) {
const ch = value[i];
if (escaped) {
// Always include the escaped character verbatim (along with its backslash)
current += ch;
escaped = false;
continue;
}
if (ch === '\\' && inQuotes) {
current += ch;
escaped = true;
continue;
}
if (ch === '"') {
if (inQuotes) {
// Closing an already-opened quoted region.
if (currentStart === -1) currentStart = i;
current += ch;
inQuotes = false;
continue;
}
// Only enter a quoted region if there's a matching close ahead.
if (findMatchingQuote(value, i) !== -1) {
if (currentStart === -1) currentStart = i;
current += ch;
inQuotes = true;
continue;
}
// Stray/unclosed quote — treat as a literal character.
if (currentStart === -1) currentStart = i;
current += ch;
continue;
}
if (!inQuotes && ch === ' ') {
// Boundary: flush current token (even if empty, to mirror prior `split(' ')`
// semantics where consecutive spaces produce empty tokens).
tokens.push(current);
starts.push(currentStart === -1 ? i : currentStart);
current = '';
currentStart = -1;
continue;
}
if (currentStart === -1) currentStart = i;
current += ch;
}
// Flush trailing token
tokens.push(current);
starts.push(currentStart === -1 ? value.length : currentStart);
// Locate token containing the cursor. The cursor sits *between* characters,
// so a token covers [start, start+len]; we pick the last token whose range
// contains cursorPos.
let idx = tokens.length - 1;
for (let i = 0; i < tokens.length; i++) {
pos += tokens[i].length;
if (pos >= cursorPos || i === tokens.length - 1) {
const start = starts[i];
const end = start + tokens[i].length;
if (cursorPos <= end) {
idx = i;
break;
}
pos++; // account for the space
idx = i + 1;
}
return { token: tokens[idx] ?? '', index: idx, tokens };
}
@ -118,13 +210,11 @@ export function useAutoCompleteOptions(
// Debounced fetch of values for the selected key from rollup tables
const firstTc = tcs.length > 0 ? tcs[0] : undefined;
const { data: keyValues, isFetching: isLoadingValues } = useCompleteKeyValues(
{
tableConnection: firstTc,
searchField,
dateRange: effectiveDateRange,
},
);
const { data: keyValues, isFetching: isLoadingValues } = useAllKeyValues({
tableConnection: firstTc,
searchField,
dateRange: effectiveDateRange,
});
// Build key-value pair suggestions
const keyValCompleteOptions = useMemo<

View file

@ -201,7 +201,7 @@ function fieldToRollupParams(
* Works for both map keys (e.g. "ResourceAttributes.http.method") and
* native columns (e.g. "ServiceName").
*/
export function useCompleteKeyValues({
export function useAllKeyValues({
tableConnection,
searchField,
dateRange,
@ -212,18 +212,11 @@ export function useCompleteKeyValues({
}) {
const metadata = useMetadataWithSettings();
// Debounce: only query after the field stabilizes for 300ms
const [debouncedField, setDebouncedField] = useState<Field | null>(null);
useEffect(() => {
const timer = setTimeout(() => setDebouncedField(searchField), 300);
return () => clearTimeout(timer);
}, [searchField]);
const rollupParams = fieldToRollupParams(debouncedField, tableConnection);
const rollupParams = fieldToRollupParams(searchField, tableConnection);
return useQuery<string[]>({
queryKey: [
'useCompleteKeyValues',
'useAllKeyValues',
tableConnection?.databaseName,
tableConnection?.tableName,
tableConnection?.connectionId,
@ -233,10 +226,10 @@ export function useCompleteKeyValues({
dateRange[1].getTime(),
],
queryFn: async ({ signal }) => {
if (!tableConnection || !rollupParams || !debouncedField) return [];
if (!tableConnection || !rollupParams || !searchField) return [];
// Try rollup first
const rollupValues = await metadata.getCompleteKeyValues({
const rollupValues = await metadata.getAllKeyValues({
databaseName: tableConnection.databaseName,
tableName: tableConnection.tableName,
column: rollupParams.columnIdentifier,
@ -264,7 +257,7 @@ export function useCompleteKeyValues({
return metadata.getMapValues({
databaseName: tableConnection.databaseName,
tableName: tableConnection.tableName,
column: debouncedField.path[0],
column: searchField.path[0],
connectionId: tableConnection.connectionId,
});
}

View file

@ -7,6 +7,7 @@ import {
filterColumnMetaByType,
JSDataType,
} from '@hyperdx/common-utils/dist/clickhouse';
import { inferGranularityFromMVSelect } from '@hyperdx/common-utils/dist/core/materializedViews';
import { Metadata } from '@hyperdx/common-utils/dist/core/metadata';
import { splitAndTrimWithBracket } from '@hyperdx/common-utils/dist/core/utils';
import { isBuilderChartConfig } from '@hyperdx/common-utils/dist/guards';
@ -348,8 +349,9 @@ export async function inferTableSourceConfig({
// Check if SpanEvents column is available
const hasSpanEvents = columns.some(col => col.name === 'Events.Timestamp');
// Check if metadata rollup tables exist
const hasMetadataMVs =
// Check if metadata rollup tables exist and, if so, infer the bucketing
// granularity from the key-rollup view's `as_select`
const rollupMeta =
isOtelLogSchema || isOtelSpanSchema
? await (async () => {
const [keyMeta, kvMeta] = await Promise.all([
@ -364,16 +366,22 @@ export async function inferTableSourceConfig({
connectionId,
}),
]);
return keyMeta != null && kvMeta != null;
return keyMeta != null && kvMeta != null
? { keyMeta, kvMeta }
: undefined;
})()
: false;
: undefined;
const metadataMVsConfig = hasMetadataMVs
const metadataMVsConfig = rollupMeta
? {
metadataMaterializedViews: {
keyRollupTable: `${tableName}_key_rollup_15m`,
kvRollupTable: `${tableName}_kv_rollup_15m`,
granularity: '15 minute',
// Fall back to '15 minute' to preserve the prior default when the
// MV's `as_select` doesn't contain a recognized bucketing function.
granularity:
inferGranularityFromMVSelect(rollupMeta.keyMeta.as_select) ??
'15 minute',
},
}
: {};

View file

@ -0,0 +1,192 @@
import { inferGranularityFromMVSelect } from '@/core/materializedViews';
describe('inferGranularityFromMVSelect', () => {
describe('real MV schemas', () => {
// Shape matches the `otel_logs_attr_kv_rollup_15m_mv` view in
// docker/otel-collector/schema/seed/00006_otel_logs_rollups.sql.
it('detects 15 minute from the otel_logs kv rollup MV select', () => {
const asSelect = `WITH elements AS (
SELECT
'ResourceAttributes' AS ColumnIdentifier,
toStartOfFifteenMinutes(Timestamp) AS Timestamp,
replaceRegexpAll(entry.1, '\\\\[\\\\d+\\\\]', '[*]') AS Key,
CAST(entry.2 AS String) AS Value
FROM default.otel_logs
ARRAY JOIN ResourceAttributes AS entry
UNION ALL
SELECT
'LogAttributes' AS ColumnIdentifier,
toStartOfFifteenMinutes(Timestamp) AS Timestamp,
replaceRegexpAll(entry.1, '\\\\[\\\\d+\\\\]', '[*]') AS Key,
CAST(entry.2 AS String) AS Value
FROM default.otel_logs
ARRAY JOIN LogAttributes AS entry
)
SELECT Timestamp, ColumnIdentifier, Key, Value, count() AS count FROM elements
GROUP BY Timestamp, ColumnIdentifier, Key, Value`;
expect(inferGranularityFromMVSelect(asSelect)).toBe('15 minute');
});
// Shape matches the `otel_traces_kv_rollup_15m_mv` view in
// docker/otel-collector/schema/seed/00007_otel_traces_rollups.sql.
it('detects 15 minute from the otel_traces kv rollup MV select', () => {
const asSelect = `WITH elements AS (
SELECT
'ResourceAttributes' AS ColumnIdentifier,
toStartOfFifteenMinutes(Timestamp) AS Timestamp,
replaceRegexpAll(entry.1, '\\\\[\\\\d+\\\\]', '[*]') AS Key,
CAST(entry.2 AS String) AS Value
FROM default.otel_traces
ARRAY JOIN ResourceAttributes AS entry
UNION ALL
SELECT
'SpanAttributes' AS ColumnIdentifier,
toStartOfFifteenMinutes(Timestamp) AS Timestamp,
replaceRegexpAll(entry.1, '\\\\[\\\\d+\\\\]', '[*]') AS Key,
CAST(entry.2 AS String) AS Value
FROM default.otel_traces
ARRAY JOIN SpanAttributes AS entry
)
SELECT Timestamp, ColumnIdentifier, Key, Value, count() AS count FROM elements
GROUP BY Timestamp, ColumnIdentifier, Key, Value`;
expect(inferGranularityFromMVSelect(asSelect)).toBe('15 minute');
});
// The key-rollup MV rolls up the kv rollup, so it doesn't bucket the
// timestamp itself — it just selects the already-bucketed Timestamp.
it('returns undefined when the select contains no bucketing function', () => {
const asSelect = `SELECT
Timestamp,
ColumnIdentifier,
Key,
sum(count) as count
FROM default.otel_logs_kv_rollup_15m
GROUP BY ColumnIdentifier, Key, Timestamp`;
expect(inferGranularityFromMVSelect(asSelect)).toBeUndefined();
});
});
describe('toStartOfInterval form', () => {
it.each([
['INTERVAL 5 MINUTE', '5 minute'],
['INTERVAL 1 HOUR', '1 hour'],
['INTERVAL 2 hour', '2 hour'],
['INTERVAL 30 SECOND', '30 second'],
['INTERVAL 1 DAY', '1 day'],
])('parses %s', (interval, expected) => {
const asSelect = `SELECT toStartOfInterval(Timestamp, ${interval}) AS ts, count() FROM t GROUP BY ts`;
expect(inferGranularityFromMVSelect(asSelect)).toBe(expected);
});
it('accepts plural units', () => {
const asSelect = `SELECT toStartOfInterval(Timestamp, INTERVAL 10 MINUTES) AS ts FROM t`;
expect(inferGranularityFromMVSelect(asSelect)).toBe('10 minute');
});
it('handles extra arguments (origin, timezone)', () => {
const asSelect = `SELECT toStartOfInterval(Timestamp, INTERVAL 1 DAY, toDateTime('2025-01-01'), 'America/Los_Angeles') AS ts FROM t`;
expect(inferGranularityFromMVSelect(asSelect)).toBe('1 day');
});
it('handles extra whitespace and newlines inside the call', () => {
const asSelect = `SELECT
toStartOfInterval (
Timestamp,
INTERVAL 15 MINUTE
) AS ts
FROM t`;
expect(inferGranularityFromMVSelect(asSelect)).toBe('15 minute');
});
it('ignores unknown units', () => {
const asSelect = `SELECT toStartOfInterval(Timestamp, INTERVAL 1 WEEK) AS ts FROM t`;
expect(inferGranularityFromMVSelect(asSelect)).toBeUndefined();
});
});
describe('named bucket functions', () => {
it.each([
['toStartOfSecond', '1 second'],
['toStartOfMinute', '1 minute'],
['toStartOfFiveMinutes', '5 minute'],
['toStartOfTenMinutes', '10 minute'],
['toStartOfFifteenMinutes', '15 minute'],
['toStartOfHour', '1 hour'],
['toStartOfDay', '1 day'],
])('maps %s to %s', (fn, expected) => {
const asSelect = `SELECT ${fn}(Timestamp) AS ts, count() FROM t GROUP BY ts`;
expect(inferGranularityFromMVSelect(asSelect)).toBe(expected);
});
it('returns undefined for unrecognized toStartOf* functions', () => {
// toStartOfMonth is a real CH function but not in NAMED_BUCKET_FUNCTIONS.
const asSelect = `SELECT toStartOfMonth(Timestamp) AS ts FROM t`;
expect(inferGranularityFromMVSelect(asSelect)).toBeUndefined();
});
});
describe('first match wins', () => {
it('returns the granularity of the first toStartOf call encountered', () => {
const asSelect = `SELECT toStartOfHour(Timestamp) AS h, toStartOfMinute(Timestamp) AS m FROM t`;
expect(inferGranularityFromMVSelect(asSelect)).toBe('1 hour');
});
it('returns first call even when a later call would also match', () => {
const asSelect = `SELECT toStartOfInterval(Timestamp, INTERVAL 5 MINUTE) AS a, toStartOfHour(Timestamp) AS b FROM t`;
expect(inferGranularityFromMVSelect(asSelect)).toBe('5 minute');
});
});
describe('robustness against quoting and noise', () => {
it('ignores toStartOf* tokens inside single-quoted string literals', () => {
const asSelect = `SELECT 'toStartOfHour(Timestamp)' AS label, toStartOfMinute(Timestamp) AS ts FROM t`;
expect(inferGranularityFromMVSelect(asSelect)).toBe('1 minute');
});
it('ignores toStartOf* tokens inside backtick-quoted identifiers', () => {
const asSelect = 'SELECT `toStartOfHour` AS col FROM t';
expect(inferGranularityFromMVSelect(asSelect)).toBeUndefined();
});
it('ignores toStartOf* substrings embedded in longer identifiers', () => {
// my_toStartOfHour_col is a single identifier, not a function call.
const asSelect = `SELECT my_toStartOfHour_col AS x FROM t`;
expect(inferGranularityFromMVSelect(asSelect)).toBeUndefined();
});
it('is not confused by commas inside nested calls', () => {
const asSelect = `SELECT toStartOfInterval(coalesce(Timestamp, now()), INTERVAL 5 MINUTE) AS ts FROM t`;
expect(inferGranularityFromMVSelect(asSelect)).toBe('5 minute');
});
it('skips a toStartOf call without matching parens and stops scanning', () => {
// Unterminated call — we stop rather than looping forever.
const asSelect = `SELECT toStartOfHour(Timestamp FROM t`;
expect(inferGranularityFromMVSelect(asSelect)).toBeUndefined();
});
});
describe('degenerate inputs', () => {
it('returns undefined for empty string', () => {
expect(inferGranularityFromMVSelect('')).toBeUndefined();
});
it('returns undefined when no toStartOf call is present', () => {
const asSelect = `SELECT Timestamp, count() FROM t GROUP BY Timestamp`;
expect(inferGranularityFromMVSelect(asSelect)).toBeUndefined();
});
it('returns undefined when toStartOfInterval has no interval arg', () => {
const asSelect = `SELECT toStartOfInterval(Timestamp) AS ts FROM t`;
expect(inferGranularityFromMVSelect(asSelect)).toBeUndefined();
});
it('returns undefined when toStartOfInterval has a malformed interval arg', () => {
const asSelect = `SELECT toStartOfInterval(Timestamp, INTERVAL abc MINUTE) AS ts FROM t`;
expect(inferGranularityFromMVSelect(asSelect)).toBeUndefined();
});
});
});

View file

@ -1,6 +1,6 @@
import { differenceInSeconds } from 'date-fns';
import { BaseClickhouseClient } from '@/clickhouse';
import { BaseClickhouseClient, ChSql, chSql } from '@/clickhouse';
import {
BuilderChartConfigWithOptDateRange,
CteChartConfig,
@ -9,6 +9,7 @@ import {
isLogSource,
isTraceSource,
MaterializedViewConfiguration,
type SQLInterval,
TLogSource,
TSource,
TTraceSource,
@ -25,6 +26,178 @@ import {
splitAndTrimWithBracket,
} from './utils';
// ClickHouse named time-bucketing functions and their granularity equivalents.
const NAMED_BUCKET_FUNCTIONS: Record<string, SQLInterval> = {
toStartOfSecond: '1 second',
toStartOfMinute: '1 minute',
toStartOfFiveMinutes: '5 minute',
toStartOfTenMinutes: '10 minute',
toStartOfFifteenMinutes: '15 minute',
toStartOfHour: '1 hour',
toStartOfDay: '1 day',
};
const VALID_INTERVAL_UNITS = new Set(['second', 'minute', 'hour', 'day']);
const isIdentChar = (ch: string) =>
(ch >= 'a' && ch <= 'z') ||
(ch >= 'A' && ch <= 'Z') ||
(ch >= '0' && ch <= '9') ||
ch === '_';
const isWhitespace = (ch: string) =>
ch === ' ' || ch === '\t' || ch === '\n' || ch === '\r';
function findToStartOfCalls(
input: string,
): { fn: string; argsInner: string }[] {
const out: { fn: string; argsInner: string }[] = [];
const n = input.length;
let i = 0;
// Skip the rest of a quoted region starting at `input[start]`.
// Returns the index of the character just past the closing quote.
const skipQuoted = (start: number, quote: string): number => {
let p = start + 1;
while (p < n) {
const c = input[p];
if (c === '\\' && p + 1 < n) {
p += 2;
continue;
}
if (c === quote) return p + 1;
p++;
}
return n;
};
while (i < n) {
const ch = input[i];
if (ch === "'" || ch === '"' || ch === '`') {
i = skipQuoted(i, ch);
continue;
}
// Try to read an identifier starting at a word boundary. A preceding
// identifier character would mean we're mid-token (e.g. `fooToStartOf…`).
const atBoundary = i === 0 || !isIdentChar(input[i - 1]);
if (!atBoundary || !isIdentChar(ch)) {
i++;
continue;
}
let j = i;
while (j < n && isIdentChar(input[j])) j++;
const ident = input.substring(i, j);
if (!ident.startsWith('toStartOf')) {
i = j;
continue;
}
// Expect '(' (possibly after whitespace) for this to be a call.
let k = j;
while (k < n && isWhitespace(input[k])) k++;
if (input[k] !== '(') {
i = j;
continue;
}
// Walk to the matching ')', honoring nested parens and quoted regions.
const argStart = k + 1;
let depth = 1;
let p = argStart;
while (p < n && depth > 0) {
const c = input[p];
if (c === "'" || c === '"' || c === '`') {
p = skipQuoted(p, c);
continue;
}
if (c === '(') depth++;
else if (c === ')') {
depth--;
if (depth === 0) break;
}
p++;
}
if (depth !== 0) break; // unterminated call — stop scanning
out.push({ fn: ident, argsInner: input.substring(argStart, p) });
i = p + 1;
}
return out;
}
function parseIntervalLiteral(expr: string): SQLInterval | undefined {
const tokens: string[] = [];
let cur = '';
for (const ch of expr) {
if (isWhitespace(ch)) {
if (cur) tokens.push(cur);
cur = '';
} else {
cur += ch;
}
}
if (cur) tokens.push(cur);
if (tokens.length < 3) return undefined;
if (tokens[0].toUpperCase() !== 'INTERVAL') return undefined;
const num = Number.parseInt(tokens[1], 10);
if (!Number.isFinite(num) || num <= 0 || String(num) !== tokens[1]) {
return undefined;
}
// Accept both singular and plural forms (MINUTE / MINUTES).
let unit = tokens[2].toLowerCase();
if (unit.endsWith('s')) unit = unit.slice(0, -1);
if (!VALID_INTERVAL_UNITS.has(unit)) return undefined;
return `${num} ${unit}` as SQLInterval;
}
export function inferGranularityFromMVSelect(
asSelect: string,
): SQLInterval | undefined {
for (const { fn, argsInner } of findToStartOfCalls(asSelect)) {
if (fn in NAMED_BUCKET_FUNCTIONS) {
return NAMED_BUCKET_FUNCTIONS[fn];
}
if (fn === 'toStartOfInterval') {
const args = splitAndTrimWithBracket(argsInner);
if (args.length < 2) continue;
const parsed = parseIntervalLiteral(args[1]);
if (parsed) return parsed;
}
}
return undefined;
}
export function getNamedBucketFunction(
granularity: SQLInterval,
): string | undefined {
for (const [fn, g] of Object.entries(NAMED_BUCKET_FUNCTIONS)) {
if (g === granularity) return fn;
}
return undefined;
}
export function renderStartOfBucketExpr(
granularity: SQLInterval,
inner: ChSql,
): ChSql {
const namedFn = getNamedBucketFunction(granularity);
if (namedFn) {
// namedFn comes from a fixed allow-list (NAMED_BUCKET_FUNCTIONS keys), so
// splicing it as raw SQL is safe.
return chSql`${{ UNSAFE_RAW_SQL: namedFn }}(${inner})`;
}
const seconds = convertGranularityToSeconds(granularity);
return chSql`toStartOfInterval(${inner}, INTERVAL ${{ Int64: seconds }} SECOND)`;
}
type SelectItem = Exclude<
BuilderChartConfigWithOptDateRange['select'],
string

View file

@ -21,7 +21,10 @@ import type {
} from '@/types';
import { isLogSource, isTraceSource, SourceKind } from '@/types';
import { optimizeGetKeyValuesCalls } from './materializedViews';
import {
optimizeGetKeyValuesCalls,
renderStartOfBucketExpr,
} from './materializedViews';
import {
getAlignedDateRange,
getDistributedTableArgs,
@ -376,7 +379,15 @@ export class Metadata {
// Rollup path: query the key rollup table filtered by ColumnIdentifier and date range
if (metadataMVs && alignedDateRange) {
return this.cache.getOrFetch<string[]>(cacheKey, async () => {
const timeFilter = chSql`AND Timestamp >= toStartOfFifteenMinutes(fromUnixTimestamp64Milli(${{ Int64: alignedDateRange[0].getTime() }})) AND Timestamp <= toStartOfFifteenMinutes(fromUnixTimestamp64Milli(${{ Int64: alignedDateRange[1].getTime() }}))`;
const startExpr = renderStartOfBucketExpr(
metadataMVs.granularity,
chSql`fromUnixTimestamp64Milli(${{ Int64: alignedDateRange[0].getTime() }})`,
);
const endExpr = renderStartOfBucketExpr(
metadataMVs.granularity,
chSql`fromUnixTimestamp64Milli(${{ Int64: alignedDateRange[1].getTime() }})`,
);
const timeFilter = chSql`AND Timestamp >= ${startExpr} AND Timestamp <= ${endExpr}`;
const sql = chSql`
SELECT Key
FROM ${tableExpr({ database: databaseName, table: metadataMVs.keyRollupTable })}
@ -1191,7 +1202,7 @@ export class Metadata {
* Autocomplete: fetches top values for a specific map key from the KV rollup table.
* Only filters by date range no WHERE conditions. Values ordered by frequency.
*/
async getCompleteKeyValues({
async getAllKeyValues({
databaseName,
tableName,
column,
@ -1214,8 +1225,22 @@ export class Metadata {
}): Promise<string[]> {
if (!metadataMVs) return [];
const timeFilter = chSql`AND Timestamp >= toStartOfFifteenMinutes(fromUnixTimestamp64Milli(${{ Int64: dateRange[0].getTime() }})) AND Timestamp <= toStartOfFifteenMinutes(fromUnixTimestamp64Milli(${{ Int64: dateRange[1].getTime() }}))`;
const cacheKey = `${connectionId}.${databaseName}.${tableName}.${column}.${key}.${dateRange[0].getTime()}.${dateRange[1].getTime()}.completeKeyValues`;
// Align date range to rollup granularity for consistent cache keys
const alignedDateRange = getAlignedDateRange(
dateRange,
metadataMVs.granularity,
);
const startExpr = renderStartOfBucketExpr(
metadataMVs.granularity,
chSql`fromUnixTimestamp64Milli(${{ Int64: alignedDateRange[0].getTime() }})`,
);
const endExpr = renderStartOfBucketExpr(
metadataMVs.granularity,
chSql`fromUnixTimestamp64Milli(${{ Int64: alignedDateRange[1].getTime() }})`,
);
const timeFilter = chSql`AND Timestamp >= ${startExpr} AND Timestamp <= ${endExpr}`;
const cacheKey = `${connectionId}.${databaseName}.${tableName}.${column}.${key}.${alignedDateRange[0].getTime()}.${alignedDateRange[1].getTime()}.allKeyValues`;
return this.cache.getOrFetch(cacheKey, async () => {
try {