fix: reject wrapped toStartOf expressions in parseToStartOfFunction (#1876)

## Summary

- Fixes a bug where `parseToStartOfFunction` incorrectly parsed `toStartOf` expressions nested inside wrapper functions (e.g. `-toInt64(toStartOfInterval(timestamp, toIntervalMinute(15)))`), causing invalid SQL with missing arguments in the time filter WHERE clause.
- Adds a prefix guard that returns `undefined` when `toStartOf` is not the outermost function, preventing the broken optimization from being applied.

Fixes [HDX-3662](https://linear.app/clickhouse/issue/HDX-3662/invalid-sql-generated-for-tables-with-wrapped-tostartof-in-order-by)

## Test plan

- [x] Added `parseToStartOfFunction` unit tests for wrapped expressions (`-toInt64(...)`, `negate(...)`)
- [x] Added `optimizeTimestampValueExpression` unit test for primary key with wrapped `toStartOf`
- [x] Added `timeFilterExpr` unit test verifying no broken compound filter is generated
- [x] Added `renderChartConfig` snapshot test against the full input schema from the bug report
- [x] All 637 unit tests pass


Made with [Cursor](https://cursor.com)
This commit is contained in:
Dan Hable 2026-03-10 14:00:08 -05:00 committed by GitHub
parent 1e6fcf1c02
commit 3bc5abbfb1
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 70 additions and 0 deletions

View file

@ -0,0 +1,5 @@
---
"@hyperdx/common-utils": patch
---
fix: Reject wrapped toStartOf expressions in parseToStartOfFunction to prevent invalid SQL generation

View file

@ -766,3 +766,5 @@ exports[`renderChartConfig should generate sql for a single sum metric 1`] = `
toFloat64OrDefault(toString(Rate))
) AS \\"Value\\",toStartOfInterval(toDateTime(\`__hdx_time_bucket2\`), INTERVAL 5 minute) AS \`__hdx_time_bucket\` FROM Bucketed WHERE (\`__hdx_time_bucket2\` >= fromUnixTimestamp64Milli(1739318400000) AND \`__hdx_time_bucket2\` <= fromUnixTimestamp64Milli(1765670400000)) GROUP BY toStartOfInterval(toDateTime(\`__hdx_time_bucket2\`), INTERVAL 5 minute) AS \`__hdx_time_bucket\` ORDER BY toStartOfInterval(toDateTime(\`__hdx_time_bucket2\`), INTERVAL 5 minute) AS \`__hdx_time_bucket\` LIMIT 10 SETTINGS optimize_read_in_order = 0, cast_keep_nullable = 1, additional_result_filter = 'x != 2', count_distinct_implementation = 'uniqCombined64', async_insert_busy_timeout_min_ms = 20000"
`;
exports[`renderChartConfig should not generate invalid SQL when primary key wraps toStartOfInterval 1`] = `"SELECT timestamp, cluster_id, service_id FROM default.http_request_logs WHERE (timestamp >= fromUnixTimestamp64Milli(1739319154000) AND timestamp <= fromUnixTimestamp64Milli(1739491954000)) LIMIT 200 OFFSET 0 SETTINGS optimize_read_in_order = 0, cast_keep_nullable = 1, additional_result_filter = 'x != 2', count_distinct_implementation = 'uniqCombined64', async_insert_busy_timeout_min_ms = 20000"`;

View file

@ -1176,6 +1176,18 @@ describe('renderChartConfig', () => {
"toStartOfMinute(timestamp), ServiceName, ResourceAttributes['timestamp'], timestamp",
expected: `(toStartOfMinute(timestamp) >= toStartOfMinute(fromUnixTimestamp64Milli(1739319154000)) AND toStartOfMinute(timestamp) <= toStartOfMinute(fromUnixTimestamp64Milli(1739491954000)))`,
},
{
description:
'with wrapped toStartOfInterval in primary key (should not optimize)',
timestampValueExpression: `timestamp`,
dateRange: [
new Date('2025-02-12 00:12:34Z'),
new Date('2025-02-14 00:12:34Z'),
],
primaryKey:
'-toInt64(toStartOfInterval(timestamp, toIntervalMinute(15))), service_id, timestamp',
expected: `(timestamp >= fromUnixTimestamp64Milli(1739319154000) AND timestamp <= fromUnixTimestamp64Milli(1739491954000))`,
},
];
beforeEach(() => {
@ -1223,6 +1235,40 @@ describe('renderChartConfig', () => {
);
});
it('should not generate invalid SQL when primary key wraps toStartOfInterval', async () => {
mockMetadata.getTableMetadata.mockResolvedValue({
primary_key:
'proxy_tier, status, is_customer_content, -toInt64(toStartOfInterval(timestamp, toIntervalMinute(15))), service_id',
} as any);
const config: ChartConfigWithOptDateRange = {
displayType: DisplayType.Table,
connection: 'test-connection',
from: {
databaseName: 'default',
tableName: 'http_request_logs',
},
select: 'timestamp, cluster_id, service_id',
where: '',
whereLanguage: 'sql',
timestampValueExpression: 'timestamp',
dateRange: [
new Date('2025-02-12 00:12:34Z'),
new Date('2025-02-14 00:12:34Z'),
],
limit: { limit: 200, offset: 0 },
};
const generatedSql = await renderChartConfig(
config,
mockMetadata,
querySettings,
);
const actual = parameterizedQueryToSql(generatedSql);
expect(actual).not.toContain('toStartOfInterval(fromUnixTimestamp64Milli');
expect(actual).toMatchSnapshot();
});
describe('Aggregate Merge Functions', () => {
it('should generate SQL for an aggregate merge function', async () => {
const config: ChartConfigWithOptDateRange = {

View file

@ -1375,6 +1375,14 @@ describe('utils', () => {
expr: 'toStartOfDay(',
expected: undefined,
},
{
expr: '-toInt64(toStartOfInterval(timestamp, toIntervalMinute(15)))',
expected: undefined,
},
{
expr: 'negate(toStartOfMinute(timestamp))',
expected: undefined,
},
])('Should parse $expr', ({ expr, expected }) => {
expect(parseToStartOfFunction(expr)).toEqual(expected);
});
@ -1466,6 +1474,12 @@ describe('utils', () => {
primaryKey: 'toStartOfMinute(`Time stamp`), other_column, `Time stamp`',
expected: '`Time stamp`, toStartOfMinute(`Time stamp`)',
},
{
timestampValueExpression: 'Timestamp',
primaryKey:
'-toInt64(toStartOfInterval(Timestamp, toIntervalMinute(15))), service_id, Timestamp',
expected: 'Timestamp',
},
] as const;
it.each(testCases)(

View file

@ -624,6 +624,9 @@ export function parseToStartOfFunction(
const toStartOfMatches = expr.match(/(toStartOf\w+)\s*\(/);
if (toStartOfMatches) {
const prefix = expr.substring(0, toStartOfMatches.index!);
if (prefix.trim() !== '') return undefined;
const [toStartOfSubstring, toStartOfFunction] = toStartOfMatches;
const argsStartIndex =