hyperdx/packages/common-utils/src/core/utils.ts

750 lines
21 KiB
TypeScript
Raw Normal View History

// Port from ChartUtils + source.ts
import { add as fnsAdd, format as fnsFormat } from 'date-fns';
import { formatInTimeZone } from 'date-fns-tz';
import { z } from 'zod';
export { default as objectHash } from 'object-hash';
import {
ChartConfigWithDateRange,
ChartConfigWithOptTimestamp,
DashboardFilter,
DashboardFilterSchema,
DashboardSchema,
DashboardTemplateSchema,
DashboardWithoutId,
QuerySettings,
SQLInterval,
TileTemplateSchema,
TSourceUnion,
} from '@/types';
/** The default maximum number of buckets setting when determining a bucket duration for 'auto' granularity */
export const DEFAULT_AUTO_GRANULARITY_MAX_BUCKETS = 60;
export const isBrowser: boolean =
typeof window !== 'undefined' && typeof window.document !== 'undefined';
export const isNode: boolean =
typeof process !== 'undefined' &&
process.versions != null &&
process.versions.node != null;
export function splitAndTrimCSV(input: string): string[] {
return input
.split(',')
.map(column => column.trim())
.filter(column => column.length > 0);
}
// Replace splitAndTrimCSV, should remove splitAndTrimCSV later
export function splitAndTrimWithBracket(input: string): string[] {
let parenCount: number = 0;
let squareCount: number = 0;
let inSingleQuote: boolean = false;
let inDoubleQuote: boolean = false;
const res: string[] = [];
let cur: string = '';
for (const c of input + ',') {
if (c === '"' && !inSingleQuote) {
inDoubleQuote = !inDoubleQuote;
cur += c;
continue;
}
if (c === "'" && !inDoubleQuote) {
inSingleQuote = !inSingleQuote;
cur += c;
continue;
}
// Only count brackets when not in quotes
if (!inSingleQuote && !inDoubleQuote) {
if (c === '(') {
parenCount++;
} else if (c === ')') {
parenCount--;
} else if (c === '[') {
squareCount++;
} else if (c === ']') {
squareCount--;
}
}
if (
c === ',' &&
parenCount === 0 &&
squareCount === 0 &&
!inSingleQuote &&
!inDoubleQuote
) {
const trimString = cur.trim();
if (trimString) res.push(trimString);
cur = '';
} else {
cur += c;
}
}
return res;
}
// If a user specifies a timestampValueExpression with multiple columns,
// this will return the first one. We'll want to refine this over time
export function getFirstTimestampValueExpression(valueExpression: string) {
return splitAndTrimWithBracket(valueExpression)[0];
}
fix: Fix sidebar when selecting JSON property (#1231) Closes HDX-2042 Closes HDX-2524 Closes HDX-2307 Closes #1010 # Summary This PR fixes errors that occurred when attempting to open the sidebar by clicking a log table row using a JSON logs table schema. The error was caused by `node-sql-parser` throwing exceptions when parsing SQL with JSON Expressions, resulting in HyperDX being unable to extract aliases from the SQL. In the long term, we'll want to have a true ClickHouse SQL parser. In the short term, this is fixed by: 1. Finding and replacing all JSON expressions in the sql with placeholder tokens, prior to parsing with node-sql-parser 2. Parsing with node-sql-parser to find aliases correctly 3. Replacing the placeholder tokens with the original JSON expressions ## Testing (All of the following use a JSON schema) ### Before <details> <summary>When selecting a JSON column with an alias</summary> <img width="1126" height="96" alt="Screenshot 2025-10-01 at 2 28 19 PM" src="https://github.com/user-attachments/assets/c35ed870-9986-4b30-9890-e1ca8ff6c92c" /> <img width="372" height="142" alt="Screenshot 2025-10-01 at 2 28 06 PM" src="https://github.com/user-attachments/assets/d65fdce4-6625-4308-b5d0-6f845a0f2f05" /> </details> <details> <summary>When filtering by a JSON column and using an alias on a non-JSON property</summary> <img width="800" height="103" alt="Screenshot 2025-10-01 at 2 29 44 PM" src="https://github.com/user-attachments/assets/aa7faabb-316b-4103-8840-74ac08519efb" /> <img width="372" height="142" alt="Screenshot 2025-10-01 at 2 28 06 PM" src="https://github.com/user-attachments/assets/eb86cce5-eee4-40f9-af93-2451bff32444" /> </details> ### After <details> <summary>When selecting a JSON column with an alias</summary> <img width="1126" height="96" alt="Screenshot 2025-10-01 at 2 28 19 PM" src="https://github.com/user-attachments/assets/678ba290-5215-4cc5-8fee-1bf67955aaa2" /> <img width="725" height="696" alt="Screenshot 2025-10-01 at 2 30 42 PM" src="https://github.com/user-attachments/assets/5da48109-a0cd-4b5f-a5e3-bd700116d81b" /> </details> <details> <summary>When filtering by a JSON column and using an alias on a non-JSON property</summary> <img width="800" height="103" alt="Screenshot 2025-10-01 at 2 29 44 PM" src="https://github.com/user-attachments/assets/715de816-639e-4ffd-9e09-341bd0b2ee4a" /> <img width="1271" height="888" alt="Screenshot 2025-10-01 at 2 30 24 PM" src="https://github.com/user-attachments/assets/b3b766de-be70-4161-b9ca-8aae9330b5f2" /> </details>
2025-10-06 20:52:55 +00:00
/** Returns true if the given expression is a JSON expression, eg. `col.key.nestedKey` or "json_col"."key" */
export const isJsonExpression = (expr: string) => {
if (!expr.includes('.')) return false;
let isInDoubleQuote = false;
let isInBacktick = false;
let isInSingleQuote = false;
const parts: string[] = [];
let current = '';
for (const c of expr) {
if (c === "'" && !isInDoubleQuote && !isInBacktick) {
isInSingleQuote = !isInSingleQuote;
} else if (isInSingleQuote) {
continue;
} else if (c === '"' && !isInBacktick) {
isInDoubleQuote = !isInDoubleQuote;
current += c;
} else if (c === '`' && !isInDoubleQuote) {
isInBacktick = !isInBacktick;
current += c;
} else if (c === '.' && !isInDoubleQuote && !isInBacktick) {
parts.push(current);
current = '';
} else {
current += c;
}
}
if (!isInDoubleQuote && !isInBacktick) {
parts.push(current);
}
if (parts.some(p => p.trim().length === 0)) return false;
return (
parts.filter(
p =>
p.trim().length > 0 &&
isNaN(Number(p)) &&
!(p.startsWith("'") && p.endsWith("'")),
).length > 1
);
};
/**
* Finds and returns expressions within the given SQL string that represent JSON references (eg. `col.key.nestedKey`)
*
* Note - This function does not distinguish between json references and `table.column` references - both are returned.
*/
export function findJsonExpressions(sql: string) {
const expressions: { index: number; expr: string }[] = [];
let isInDoubleQuote = false;
let isInBacktick = false;
let currentExpr = '';
const finishExpression = (expr: string, endIndex: number) => {
if (isJsonExpression(expr)) {
expressions.push({ index: endIndex - expr.length, expr });
}
currentExpr = '';
};
let i = 0;
let isInJsonTypeSpecifier = false;
while (i < sql.length) {
const c = sql.charAt(i);
if (c === "'" && !isInDoubleQuote && !isInBacktick) {
// Skip string literals
while (i < sql.length && sql.charAt(i) !== c) {
i++;
}
currentExpr = '';
} else if (c === '"' && !isInBacktick) {
isInDoubleQuote = !isInDoubleQuote;
currentExpr += c;
} else if (c === '`' && !isInDoubleQuote) {
isInBacktick = !isInBacktick;
currentExpr += c;
} else if (/[\s{},+*/[\]]/.test(c)) {
isInJsonTypeSpecifier = false;
finishExpression(currentExpr, i);
} else if ('()'.includes(c) && !isInJsonTypeSpecifier) {
finishExpression(currentExpr, i);
} else if (c === ':') {
isInJsonTypeSpecifier = true;
currentExpr += c;
} else {
currentExpr += c;
}
i++;
}
finishExpression(currentExpr, i);
return expressions;
}
/**
* Replaces expressions within the given SQL string that represent JSON expressions (eg. `col.key.nestedKey`).
* Such expression are replaced with placeholders like `__hdx_json_replacement_0`. The resulting string and a
* map of replacements --> original expressions is returned.
*
* Note - This function does not distinguish between json references and `table.column` references - both are replaced.
*/
export function replaceJsonExpressions(sql: string) {
const jsonExpressions = findJsonExpressions(sql);
const replacements = new Map<string, string>();
let sqlWithReplacements = sql;
let indexOffsetFromInserts = 0;
let replacementCounter = 0;
for (const { expr, index } of jsonExpressions) {
const replacement = `__hdx_json_replacement_${replacementCounter++}`;
replacements.set(replacement, expr);
const effectiveIndex = index + indexOffsetFromInserts;
sqlWithReplacements =
sqlWithReplacements.slice(0, effectiveIndex) +
replacement +
sqlWithReplacements.slice(effectiveIndex + expr.length);
indexOffsetFromInserts += replacement.length - expr.length;
}
return { sqlWithReplacements, replacements };
}
/**
* To best support Pre-aggregation in Materialized Views, any new
* granularities should be multiples of all smaller granularities.
* */
export enum Granularity {
FifteenSecond = '15 second',
ThirtySecond = '30 second',
OneMinute = '1 minute',
FiveMinute = '5 minute',
TenMinute = '10 minute',
FifteenMinute = '15 minute',
ThirtyMinute = '30 minute',
OneHour = '1 hour',
TwoHour = '2 hour',
SixHour = '6 hour',
TwelveHour = '12 hour',
OneDay = '1 day',
TwoDay = '2 day',
SevenDay = '7 day',
ThirtyDay = '30 day',
}
export function hashCode(str: string) {
let hash = 0,
i,
chr;
if (str.length === 0) return hash;
for (i = 0; i < str.length; i++) {
chr = str.charCodeAt(i);
hash = (hash << 5) - hash + chr;
hash |= 0; // Convert to 32bit integer
}
return hash;
}
export function convertDateRangeToGranularityString(
dateRange: [Date, Date],
maxNumBuckets: number = DEFAULT_AUTO_GRANULARITY_MAX_BUCKETS,
): Granularity {
const start = dateRange[0].getTime();
const end = dateRange[1].getTime();
const diffSeconds = Math.floor((end - start) / 1000);
const granularitySizeSeconds = Math.ceil(diffSeconds / maxNumBuckets);
if (granularitySizeSeconds <= 15) {
return Granularity.FifteenSecond;
} else if (granularitySizeSeconds <= 30) {
return Granularity.ThirtySecond;
} else if (granularitySizeSeconds <= 60) {
return Granularity.OneMinute;
} else if (granularitySizeSeconds <= 5 * 60) {
return Granularity.FiveMinute;
} else if (granularitySizeSeconds <= 15 * 60) {
// 10 minute granularity is skipped so that every auto-inferred granularity is a multiple
// of all smaller granularities, which makes it more likely that a materialized view can be used.
return Granularity.FifteenMinute;
} else if (granularitySizeSeconds <= 30 * 60) {
return Granularity.ThirtyMinute;
} else if (granularitySizeSeconds <= 3600) {
return Granularity.OneHour;
} else if (granularitySizeSeconds <= 2 * 3600) {
return Granularity.TwoHour;
} else if (granularitySizeSeconds <= 6 * 3600) {
return Granularity.SixHour;
} else if (granularitySizeSeconds <= 12 * 3600) {
return Granularity.TwelveHour;
} else if (granularitySizeSeconds <= 24 * 3600) {
return Granularity.OneDay;
} else if (granularitySizeSeconds <= 2 * 24 * 3600) {
return Granularity.TwoDay;
} else if (granularitySizeSeconds <= 7 * 24 * 3600) {
return Granularity.SevenDay;
} else if (granularitySizeSeconds <= 30 * 24 * 3600) {
return Granularity.ThirtyDay;
}
return Granularity.ThirtyDay;
}
export function convertGranularityToSeconds(granularity: SQLInterval): number {
const [num, unit] = granularity.split(' ');
const numInt = Number.parseInt(num);
switch (unit) {
case 'second':
return numInt;
case 'minute':
return numInt * 60;
case 'hour':
return numInt * 60 * 60;
case 'day':
return numInt * 60 * 60 * 24;
default:
return 0;
}
}
// Note: roundToNearestMinutes is broken in date-fns currently
// additionally it doesn't support seconds or > 30min
// so we need to write our own :(
// see: https://github.com/date-fns/date-fns/pull/3267/files
export function toStartOfInterval(date: Date, granularity: SQLInterval): Date {
const [num, unit] = granularity.split(' ');
const numInt = Number.parseInt(num);
const roundFn = Math.floor;
switch (unit) {
case 'second':
return new Date(
Date.UTC(
date.getUTCFullYear(),
date.getUTCMonth(),
date.getUTCDate(),
date.getUTCHours(),
date.getUTCMinutes(),
roundFn(date.getUTCSeconds() / numInt) * numInt,
),
);
case 'minute':
return new Date(
Date.UTC(
date.getUTCFullYear(),
date.getUTCMonth(),
date.getUTCDate(),
date.getUTCHours(),
roundFn(date.getUTCMinutes() / numInt) * numInt,
),
);
case 'hour':
return new Date(
Date.UTC(
date.getUTCFullYear(),
date.getUTCMonth(),
date.getUTCDate(),
roundFn(date.getUTCHours() / numInt) * numInt,
),
);
case 'day': {
// Clickhouse uses the # of days since unix epoch to round dates
// see: https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/DateLUTImpl.h#L1059
const daysSinceEpoch = date.getTime() / 1000 / 60 / 60 / 24;
const daysSinceEpochRounded = roundFn(daysSinceEpoch / numInt) * numInt;
return new Date(daysSinceEpochRounded * 1000 * 60 * 60 * 24);
}
default:
return date;
}
}
export function timeBucketByGranularity(
start: Date,
end: Date,
granularity: SQLInterval,
): Date[] {
const buckets: Date[] = [];
let current = toStartOfInterval(start, granularity);
const granularitySeconds = convertGranularityToSeconds(granularity);
while (current < end) {
buckets.push(current);
current = fnsAdd(current, {
seconds: granularitySeconds,
});
}
return buckets;
}
export const _useTry = <T>(fn: () => T): [null | Error | unknown, null | T] => {
let output: T | null = null;
let error: any = null;
try {
output = fn();
return [error, output];
} catch (e) {
error = e;
return [error, output];
}
};
export const parseJSON = <T = any>(json: string) => {
const [error, result] = _useTry<T>(() => JSON.parse(json));
return result;
};
// Date formatting
const TIME_TOKENS = {
normal: {
'12h': 'MMM d h:mm:ss a',
'24h': 'MMM d HH:mm:ss',
},
short: {
'12h': 'MMM d h:mma',
'24h': 'MMM d HH:mm',
},
withMs: {
'12h': 'MMM d h:mm:ss.SSS a',
'24h': 'MMM d HH:mm:ss.SSS',
},
withYear: {
'12h': 'MMM d yyyy h:mm:ss a',
'24h': 'MMM d yyyy HH:mm:ss',
},
time: {
'12h': 'h:mm:ss a',
'24h': 'HH:mm:ss',
},
};
export const formatDate = (
date: Date,
{
isUTC = false,
format = 'normal',
clock = '12h',
}: {
isUTC?: boolean;
format?: 'normal' | 'short' | 'withMs' | 'time' | 'withYear';
clock?: '12h' | '24h';
},
) => {
const formatStr = TIME_TOKENS[format][clock];
return isUTC
? formatInTimeZone(date, 'Etc/UTC', formatStr)
: fnsFormat(date, formatStr);
};
type Dashboard = z.infer<typeof DashboardSchema>;
type DashboardTemplate = z.infer<typeof DashboardTemplateSchema>;
type TileTemplate = z.infer<typeof TileTemplateSchema>;
export function convertToDashboardTemplate(
input: Dashboard,
sources: TSourceUnion[],
): DashboardTemplate {
const output: DashboardTemplate = {
version: '0.1.0',
name: input.name,
tiles: [],
};
const convertToTileTemplate = (
input: Dashboard['tiles'][0],
sources: TSourceUnion[],
): TileTemplate => {
const tile = TileTemplateSchema.strip().parse(structuredClone(input));
// Extract name from source or default to '' if not found
tile.config.source = (
sources.find(source => source.id === tile.config.source) ?? { name: '' }
).name;
return tile;
};
const convertToFilterTemplate = (
input: DashboardFilter,
sources: TSourceUnion[],
): DashboardFilter => {
const filter = DashboardFilterSchema.strip().parse(structuredClone(input));
// Extract name from source or default to '' if not found
filter.source =
sources.find(source => source.id === input.source)?.name ?? '';
return filter;
};
for (const tile of input.tiles) {
output.tiles.push(convertToTileTemplate(tile, sources));
}
if (input.filters) {
output.filters = [];
for (const filter of input.filters ?? []) {
output.filters.push(convertToFilterTemplate(filter, sources));
}
}
return output;
}
export function convertToDashboardDocument(
input: DashboardTemplate,
): DashboardWithoutId {
const output: DashboardWithoutId = {
name: input.name,
tiles: [],
tags: [],
};
// expecting that input.tiles[0-n].config.source fields are already converted to ids
const convertToTileDocument = (
input: TileTemplate,
): DashboardWithoutId['tiles'][0] => {
return structuredClone(input);
};
// expecting that input.filters[0-n].source fields are already converted to ids
const convertToFilterDocument = (input: DashboardFilter): DashboardFilter => {
return structuredClone(input);
};
for (const tile of input.tiles) {
output.tiles.push(convertToTileDocument(tile));
}
if (input.filters) {
output.filters = [];
for (const filter of input.filters) {
output.filters.push(convertToFilterDocument(filter));
}
}
return output;
}
export const getFirstOrderingItem = (
orderBy: ChartConfigWithDateRange['orderBy'],
) => {
if (!orderBy || orderBy.length === 0) return undefined;
return typeof orderBy === 'string'
? splitAndTrimWithBracket(orderBy)[0]
: orderBy[0];
};
export const removeTrailingDirection = (s: string) => {
const upper = s.trim().toUpperCase();
if (upper.endsWith('DESC')) {
return s.slice(0, upper.lastIndexOf('DESC')).trim();
} else if (upper.endsWith('ASC')) {
return s.slice(0, upper.lastIndexOf('ASC')).trim();
}
return s;
};
export const isTimestampExpressionInFirstOrderBy = (
config: ChartConfigWithOptTimestamp,
) => {
const firstOrderingItem = getFirstOrderingItem(config.orderBy);
2025-10-07 15:35:42 +00:00
if (!firstOrderingItem || config.timestampValueExpression == null)
return false;
const firstOrderingExpression =
typeof firstOrderingItem === 'string'
? removeTrailingDirection(firstOrderingItem)
: firstOrderingItem.valueExpression;
const timestampValueExpressions = splitAndTrimWithBracket(
config.timestampValueExpression,
);
return timestampValueExpressions.some(tve =>
firstOrderingExpression.includes(tve),
);
};
export const isFirstOrderByAscending = (
orderBy: ChartConfigWithDateRange['orderBy'],
): boolean => {
const primaryOrderingItem = getFirstOrderingItem(orderBy);
if (!primaryOrderingItem) return false;
const isDescending =
typeof primaryOrderingItem === 'string'
? primaryOrderingItem.trim().toUpperCase().endsWith('DESC')
: primaryOrderingItem.ordering === 'DESC';
return !isDescending;
};
feat: Optimize and fix filtering on toStartOfX primary key expressions (#1265) Closes HDX-2576 Closes HDX-2491 # Summary It is a common optimization to have a primary key like `toStartOfDay(Timestamp), ..., Timestamp`. This PR improves the experience when using such a primary key in the following ways: 1. HyperDX will now automatically filter on both `toStartOfDay(Timestamp)` and `Timestamp` in this case, instead of just `Timestamp`. This improves performance by better utilizing the primary index. Previously, this required a manual change to the source's Timestamp Column setting. 2. HyperDX now applies the same `toStartOfX` function to the right-hand-side of timestamp comparisons. So when filtering using an expression like `toStartOfDay(Timestamp)`, the generated SQL will have the condition `toStartOfDay(Timestamp) >= toStartOfDay(<selected start time>) AND toStartOfDay(Timestamp) <= toStartOfDay(<selected end time>)`. This resolves an issue where some data would be incorrectly filtered out when filtering on such timestamp expressions (such as time ranges less than 1 minute). With this change, teams should no longer need to have multiple columns in their source timestamp column configuration. However, if they do, they will now have correct filtering. ## Testing ### Testing the fix The part of this PR that fixes time filtering can be tested with the default logs table schema. Simply set the Timestamp Column source setting to `TimestampTime, toStartOfMinute(TimestampTime)`. Then, in the logs search, filter for a timespan < 1 minute. <details> <summary>Without the fix, you should see no logs, since they're incorrectly filtered out by the toStartOfMinute(TimestampTime) filter</summary> https://github.com/user-attachments/assets/915d3922-55f8-4742-b686-5090cdecef60 </details> <details> <summary>With the fix, you should see logs in the selected time range</summary> https://github.com/user-attachments/assets/f75648e4-3f48-47b0-949f-2409ce075a75 </details> ### Testing the optimization The optimization part of this change is that when a table has a primary key like `toStartOfMinute(TimestampTime), ..., TimestampTime` and the Timestamp Column for the source is just `Timestamp`, the query will automatically filter by both `toStartOfMinute(TimestampTime)` and `TimestampTime`. To test this, you'll need to create a table with such a primary key, then create a source based on that table. Optionally, you could copy data from the default `otel_logs` table into the new table (`INSERT INTO default.otel_logs_toStartOfMinute_Key SELECT * FROM default.otel_logs`). <details> <summary>DDL for log table with optimized key</summary> ```sql CREATE TABLE default.otel_logs_toStartOfMinute_Key ( `Timestamp` DateTime64(9) CODEC(Delta(8), ZSTD(1)), `TimestampTime` DateTime DEFAULT toDateTime(Timestamp), `TraceId` String CODEC(ZSTD(1)), `SpanId` String CODEC(ZSTD(1)), `TraceFlags` UInt8, `SeverityText` LowCardinality(String) CODEC(ZSTD(1)), `SeverityNumber` UInt8, `ServiceName` LowCardinality(String) CODEC(ZSTD(1)), `Body` String CODEC(ZSTD(1)), `ResourceSchemaUrl` LowCardinality(String) CODEC(ZSTD(1)), `ResourceAttributes` Map(LowCardinality(String), String) CODEC(ZSTD(1)), `ScopeSchemaUrl` LowCardinality(String) CODEC(ZSTD(1)), `ScopeName` String CODEC(ZSTD(1)), `ScopeVersion` LowCardinality(String) CODEC(ZSTD(1)), `ScopeAttributes` Map(LowCardinality(String), String) CODEC(ZSTD(1)), `LogAttributes` Map(LowCardinality(String), String) CODEC(ZSTD(1)), `__hdx_materialized_k8s.pod.name` String MATERIALIZED ResourceAttributes['k8s.pod.name'] CODEC(ZSTD(1)), INDEX idx_trace_id TraceId TYPE bloom_filter(0.001) GRANULARITY 1, INDEX idx_res_attr_key mapKeys(ResourceAttributes) TYPE bloom_filter(0.01) GRANULARITY 1, INDEX idx_res_attr_value mapValues(ResourceAttributes) TYPE bloom_filter(0.01) GRANULARITY 1, INDEX idx_scope_attr_key mapKeys(ScopeAttributes) TYPE bloom_filter(0.01) GRANULARITY 1, INDEX idx_scope_attr_value mapValues(ScopeAttributes) TYPE bloom_filter(0.01) GRANULARITY 1, INDEX idx_log_attr_key mapKeys(LogAttributes) TYPE bloom_filter(0.01) GRANULARITY 1, INDEX idx_log_attr_value mapValues(LogAttributes) TYPE bloom_filter(0.01) GRANULARITY 1, INDEX idx_body Body TYPE tokenbf_v1(32768, 3, 0) GRANULARITY 8, INDEX idx_lower_body lower(Body) TYPE tokenbf_v1(32768, 3, 0) GRANULARITY 8 ) ENGINE = SharedMergeTree('/clickhouse/tables/{uuid}/{shard}', '{replica}') PARTITION BY toDate(TimestampTime) PRIMARY KEY (toStartOfMinute(TimestampTime), ServiceName, TimestampTime) ORDER BY (toStartOfMinute(TimestampTime), ServiceName, TimestampTime, Timestamp) TTL TimestampTime + toIntervalDay(90) SETTINGS index_granularity = 8192, ttl_only_drop_parts = 1 ``` </details> Once you have that source, you can inspect the queries generated for that source. Whenever a date range filter is selected, the query should have a `WHERE` predicate that filters on both `TimestampTime` and `toStartOfMinute(TimestampTime)`, despite `toStartOfMinute(TimestampTime)` not being included in the Timestamp Column of the source's configuration.
2025-10-27 17:20:36 +00:00
/**
* Parses a single expression of the form
* `toStartOf<Interval>(column[, timezone])` or `toStartOfInterval(column[, interval[, origin[, timezone]]])`.
* Returns undefined if the expression is not of this form.
*/
export function parseToStartOfFunction(
expr: string,
):
| { function: string; columnArgument: string; formattedRemainingArgs: string }
| undefined {
const parts = splitAndTrimWithBracket(expr);
if (parts.length !== 1) return undefined;
const toStartOfMatches = expr.match(/(toStartOf\w+)\s*\(/);
if (toStartOfMatches) {
const [toStartOfSubstring, toStartOfFunction] = toStartOfMatches;
const argsStartIndex =
expr.indexOf(toStartOfSubstring) + toStartOfSubstring.length;
const argsEndIndex = expr.lastIndexOf(')');
const args = splitAndTrimWithBracket(
expr.substring(argsStartIndex, argsEndIndex),
);
const columnArgument = args[0];
if (columnArgument == null) {
console.error(`Failed to parse column argument from ${expr}`);
return undefined;
}
const formattedRemainingArgs =
args.length > 1 ? `, ${args.slice(1).join(', ')}` : '';
return {
function: toStartOfFunction.trim(),
columnArgument,
formattedRemainingArgs,
};
}
}
/**
* Returns an optimized timestamp value expression for a table based on its timestampValueExpression and primary key.
*
* When a table has a sort key like `toStartOfMinute(timestamp), ..., timestamp`, it is more performant
* to filter by toStartOfMinute(timestamp) and timestamp, instead of just timestamp.
*/
export function optimizeTimestampValueExpression(
timestampValueExpression: string,
primaryKey: string | undefined,
) {
if (!primaryKey || !timestampValueExpression) return timestampValueExpression;
const timestampValueExprs = [timestampValueExpression];
const primaryKeyExprs = splitAndTrimWithBracket(primaryKey);
for (const primaryKeyExpr of primaryKeyExprs) {
const toStartOf = parseToStartOfFunction(primaryKeyExpr);
if (
primaryKeyExpr === timestampValueExpression.trim() ||
(primaryKeyExpr.startsWith('toUnixTimestamp') &&
primaryKeyExpr.includes(timestampValueExpression)) ||
(primaryKeyExpr.startsWith('toDateTime') &&
primaryKeyExpr.includes(timestampValueExpression))
) {
// We only want to add expressions that come before the timestampExpr in the primary key
break;
} else if (
toStartOf &&
toStartOf.columnArgument === timestampValueExpression.trim()
) {
timestampValueExprs.push(primaryKeyExpr);
}
}
return timestampValueExprs.join(', ');
}
feat: Align date ranges to MV Granularity (#1575) Closes HDX-3124 # Summary This PR makes the following changes 1. Date ranges for all MV queries are now aligned to the MV Granularity 2. Each chart type now has an indicator when the date range has been adjusted to align with either the MV Granularity or (in the case of Line/Bar charts) the Chart Granularity. 3. The useQueriedChartConfig, useRenderedSqlChartConfig, and useOffsetPaginatedQuery hooks have been updated to get the MV-optimized chart configuration from the useMVOptimizationExplanation, which allows us to share the `EXPLAIN ESTIMATE` query results between the MV Optimization Indicator (the lightning bolt icon on each chart) and the chart itself. This roughly halves the number of EXPLAIN ESTIMATE queries that are made. ## Demo <img width="1628" height="1220" alt="Screenshot 2026-01-08 at 11 42 39 AM" src="https://github.com/user-attachments/assets/80a06e3a-bbfc-4193-b6b7-5e0056c588d3" /> <img width="1627" height="1131" alt="Screenshot 2026-01-08 at 11 40 54 AM" src="https://github.com/user-attachments/assets/69879e3d-3a83-4c4d-9604-0552a01c17d7" /> ## Testing To test locally with an MV, you can use the following DDL <details> <summary>DDL For an MV</summary> ```sql CREATE TABLE default.metrics_rollup_1m ( `Timestamp` DateTime, `ServiceName` LowCardinality(String), `SpanKind` LowCardinality(String), `StatusCode` LowCardinality(String), `count` SimpleAggregateFunction(sum, UInt64), `sum__Duration` SimpleAggregateFunction(sum, UInt64), `avg__Duration` AggregateFunction(avg, UInt64), `quantile__Duration` AggregateFunction(quantileTDigest(0.5), UInt64), `min__Duration` SimpleAggregateFunction(min, UInt64), `max__Duration` SimpleAggregateFunction(max, UInt64) ) ENGINE = AggregatingMergeTree PARTITION BY toDate(Timestamp) ORDER BY (Timestamp, StatusCode, SpanKind, ServiceName) SETTINGS index_granularity = 8192; CREATE MATERIALIZED VIEW default.metrics_rollup_1m_mv TO default.metrics_rollup_1m ( `Timestamp` DateTime, `ServiceName` LowCardinality(String), `SpanKind` LowCardinality(String), `version` LowCardinality(String), `StatusCode` LowCardinality(String), `count` UInt64, `sum__Duration` Int64, `avg__Duration` AggregateFunction(avg, UInt64), `quantile__Duration` AggregateFunction(quantileTDigest(0.5), UInt64), `min__Duration` SimpleAggregateFunction(min, UInt64), `max__Duration` SimpleAggregateFunction(max, UInt64) ) AS SELECT toStartOfMinute(Timestamp) AS Timestamp, ServiceName, SpanKind, StatusCode, count() AS count, sum(Duration) AS sum__Duration, avgState(Duration) AS avg__Duration, quantileTDigestState(0.5)(Duration) AS quantile__Duration, minSimpleState(Duration) AS min__Duration, maxSimpleState(Duration) AS max__Duration FROM default.otel_traces GROUP BY Timestamp, ServiceName, SpanKind, StatusCode; ``` </details>
2026-01-09 16:07:52 +00:00
export function getAlignedDateRange(
[originalStart, originalEnd]: [Date, Date],
granularity: SQLInterval,
): [Date, Date] {
// Round the start time down to the previous interval boundary
const alignedStart = toStartOfInterval(originalStart, granularity);
// Round the end time up to the next interval boundary
let alignedEnd = toStartOfInterval(originalEnd, granularity);
if (alignedEnd.getTime() < originalEnd.getTime()) {
const intervalSeconds = convertGranularityToSeconds(granularity);
alignedEnd = fnsAdd(alignedEnd, { seconds: intervalSeconds });
}
return [alignedStart, alignedEnd];
}
export function isDateRangeEqual(range1: [Date, Date], range2: [Date, Date]) {
return (
range1[0].getTime() === range2[0].getTime() &&
range1[1].getTime() === range2[1].getTime()
);
}
/*
This function extracts the SETTINGS clause from the end(!) of the sql string.
*/
export function extractSettingsClauseFromEnd(
sqlInput: string,
): [string, string | undefined] {
const sql = sqlInput.trim().endsWith(';')
? sqlInput.trim().slice(0, -1)
: sqlInput.trim();
const settingsIndex = sql.toUpperCase().indexOf('SETTINGS');
if (settingsIndex === -1) {
return [sql, undefined] as const;
}
const settingsClause = sql.substring(settingsIndex).trim();
const remaining = sql.substring(0, settingsIndex).trim();
return [remaining, settingsClause] as const;
}
export function parseToNumber(input: string): number | undefined {
const trimmed = input.trim();
if (trimmed === '') {
return undefined;
}
const num = Number(trimmed);
return Number.isFinite(num) ? num : undefined;
}
export function joinQuerySettings(
querySettings: QuerySettings | undefined,
): string | undefined {
if (!querySettings?.length) {
return undefined;
}
const emptyFiltered = querySettings.filter(
({ setting, value }) => setting.length && value.length,
);
const formattedPairs = emptyFiltered.map(
({ setting, value }) =>
`${setting} = ${parseToNumber(value) ?? `'${value}'`}`,
);
return formattedPairs.join(', ');
}