hyperdx/packages/common-utils/src/core/utils.ts
Drew Davis 518bda7d20
feat: Add dashboard template gallery (#2010)
## Summary

This PR adds a gallery of importable dashboard templates to the dashboards page. The existing Dashboard import functionality is modified to support importing dashboard templates which are included in the app source code bundle.

### Screenshots or video

https://github.com/user-attachments/assets/eae37214-f012-44dd-83ef-086749846260

### How to test locally or on Vercel

This can be tested as shown above in the preview environment.

### References



- Linear Issue: Closes HDX-3661 Closes HDX-3814
- Related PRs:
2026-04-01 17:33:07 +00:00

1014 lines
29 KiB
TypeScript

// Port from ChartUtils + source.ts
import { add as fnsAdd, format as fnsFormat } from 'date-fns';
import { formatInTimeZone } from 'date-fns-tz';
import { z } from 'zod';
export { default as objectHash } from 'object-hash';
import { isBuilderSavedChartConfig, isRawSqlSavedChartConfig } from '@/guards';
import {
BuilderChartConfig,
BuilderChartConfigWithDateRange,
BuilderChartConfigWithOptTimestamp,
Connection,
DashboardFilter,
DashboardFilterSchema,
DashboardSchema,
DashboardTemplateSchema,
DashboardWithoutId,
QuerySettings,
SQLInterval,
TileTemplateSchema,
TSource,
} from '@/types';
import { SkipIndexMetadata, TableMetadata } from './metadata';
/** The default maximum number of buckets setting when determining a bucket duration for 'auto' granularity */
export const DEFAULT_AUTO_GRANULARITY_MAX_BUCKETS = 60;
export const isBrowser: boolean =
typeof window !== 'undefined' && typeof window.document !== 'undefined';
export const isNode: boolean =
typeof process !== 'undefined' &&
process.versions != null &&
process.versions.node != null;
export function splitAndTrimCSV(input: string): string[] {
return input
.split(',')
.map(column => column.trim())
.filter(column => column.length > 0);
}
// Replace splitAndTrimCSV, should remove splitAndTrimCSV later
export function splitAndTrimWithBracket(input: string): string[] {
let parenCount: number = 0;
let squareCount: number = 0;
let inSingleQuote: boolean = false;
let inDoubleQuote: boolean = false;
const res: string[] = [];
let cur: string = '';
for (const c of input + ',') {
if (c === '"' && !inSingleQuote) {
inDoubleQuote = !inDoubleQuote;
cur += c;
continue;
}
if (c === "'" && !inDoubleQuote) {
inSingleQuote = !inSingleQuote;
cur += c;
continue;
}
// Only count brackets when not in quotes
if (!inSingleQuote && !inDoubleQuote) {
if (c === '(') {
parenCount++;
} else if (c === ')') {
parenCount--;
} else if (c === '[') {
squareCount++;
} else if (c === ']') {
squareCount--;
}
}
if (
c === ',' &&
parenCount === 0 &&
squareCount === 0 &&
!inSingleQuote &&
!inDoubleQuote
) {
const trimString = cur.trim();
if (trimString) res.push(trimString);
cur = '';
} else {
cur += c;
}
}
return res;
}
// If a user specifies a timestampValueExpression with multiple columns,
// this will return the first one. We'll want to refine this over time
export function getFirstTimestampValueExpression(valueExpression: string) {
return splitAndTrimWithBracket(valueExpression)[0];
}
/** Returns true if the given expression is a JSON expression, eg. `col.key.nestedKey` or "json_col"."key" */
export const isJsonExpression = (expr: string) => {
if (!expr.includes('.')) return false;
let isInDoubleQuote = false;
let isInBacktick = false;
let isInSingleQuote = false;
const parts: string[] = [];
let current = '';
for (const c of expr) {
if (c === "'" && !isInDoubleQuote && !isInBacktick) {
isInSingleQuote = !isInSingleQuote;
} else if (isInSingleQuote) {
continue;
} else if (c === '"' && !isInBacktick) {
isInDoubleQuote = !isInDoubleQuote;
current += c;
} else if (c === '`' && !isInDoubleQuote) {
isInBacktick = !isInBacktick;
current += c;
} else if (c === '.' && !isInDoubleQuote && !isInBacktick) {
parts.push(current);
current = '';
} else {
current += c;
}
}
if (!isInDoubleQuote && !isInBacktick) {
parts.push(current);
}
if (parts.some(p => p.trim().length === 0)) return false;
return (
parts.filter(
p =>
p.trim().length > 0 &&
isNaN(Number(p)) &&
!(p.startsWith("'") && p.endsWith("'")),
).length > 1
);
};
/**
* Finds and returns expressions within the given SQL string that represent JSON references (eg. `col.key.nestedKey`)
*
* Note - This function does not distinguish between json references and `table.column` references - both are returned.
*/
export function findJsonExpressions(sql: string) {
const expressions: { index: number; expr: string }[] = [];
let isInDoubleQuote = false;
let isInBacktick = false;
let currentExpr = '';
const finishExpression = (expr: string, endIndex: number) => {
if (isJsonExpression(expr)) {
expressions.push({ index: endIndex - expr.length, expr });
}
currentExpr = '';
};
let i = 0;
let isInJsonTypeSpecifier = false;
while (i < sql.length) {
const c = sql.charAt(i);
if (c === "'" && !isInDoubleQuote && !isInBacktick) {
// Skip string literals
while (i < sql.length && sql.charAt(i) !== c) {
i++;
}
currentExpr = '';
} else if (c === '"' && !isInBacktick) {
isInDoubleQuote = !isInDoubleQuote;
currentExpr += c;
} else if (c === '`' && !isInDoubleQuote) {
isInBacktick = !isInBacktick;
currentExpr += c;
} else if (/[\s{},+*/[\]]/.test(c)) {
isInJsonTypeSpecifier = false;
finishExpression(currentExpr, i);
} else if ('()'.includes(c) && !isInJsonTypeSpecifier) {
finishExpression(currentExpr, i);
} else if (c === ':') {
isInJsonTypeSpecifier = true;
currentExpr += c;
} else {
currentExpr += c;
}
i++;
}
finishExpression(currentExpr, i);
return expressions;
}
/**
* Replaces expressions within the given SQL string that represent JSON expressions (eg. `col.key.nestedKey`).
* Such expression are replaced with placeholders like `__hdx_json_replacement_0`. The resulting string and a
* map of replacements --> original expressions is returned.
*
* Note - This function does not distinguish between json references and `table.column` references - both are replaced.
*/
export function replaceJsonExpressions(sql: string) {
const jsonExpressions = findJsonExpressions(sql);
const replacements = new Map<string, string>();
let sqlWithReplacements = sql;
let indexOffsetFromInserts = 0;
let replacementCounter = 0;
for (const { expr, index } of jsonExpressions) {
const replacement = `__hdx_json_replacement_${replacementCounter++}`;
replacements.set(replacement, expr);
const effectiveIndex = index + indexOffsetFromInserts;
sqlWithReplacements =
sqlWithReplacements.slice(0, effectiveIndex) +
replacement +
sqlWithReplacements.slice(effectiveIndex + expr.length);
indexOffsetFromInserts += replacement.length - expr.length;
}
return { sqlWithReplacements, replacements };
}
/**
* To best support Pre-aggregation in Materialized Views, any new
* granularities should be multiples of all smaller granularities.
* */
export enum Granularity {
FifteenSecond = '15 second',
ThirtySecond = '30 second',
OneMinute = '1 minute',
FiveMinute = '5 minute',
TenMinute = '10 minute',
FifteenMinute = '15 minute',
ThirtyMinute = '30 minute',
OneHour = '1 hour',
TwoHour = '2 hour',
SixHour = '6 hour',
TwelveHour = '12 hour',
OneDay = '1 day',
TwoDay = '2 day',
SevenDay = '7 day',
ThirtyDay = '30 day',
}
export function hashCode(str: string) {
let hash = 0,
i,
chr;
if (str.length === 0) return hash;
for (i = 0; i < str.length; i++) {
chr = str.charCodeAt(i);
hash = (hash << 5) - hash + chr;
hash |= 0; // Convert to 32bit integer
}
return hash;
}
export function convertDateRangeToGranularityString(
dateRange: [Date, Date],
maxNumBuckets: number = DEFAULT_AUTO_GRANULARITY_MAX_BUCKETS,
): Granularity {
const start = dateRange[0].getTime();
const end = dateRange[1].getTime();
const diffSeconds = Math.floor((end - start) / 1000);
const granularitySizeSeconds = Math.ceil(diffSeconds / maxNumBuckets);
if (granularitySizeSeconds <= 15) {
return Granularity.FifteenSecond;
} else if (granularitySizeSeconds <= 30) {
return Granularity.ThirtySecond;
} else if (granularitySizeSeconds <= 60) {
return Granularity.OneMinute;
} else if (granularitySizeSeconds <= 5 * 60) {
return Granularity.FiveMinute;
} else if (granularitySizeSeconds <= 15 * 60) {
// 10 minute granularity is skipped so that every auto-inferred granularity is a multiple
// of all smaller granularities, which makes it more likely that a materialized view can be used.
return Granularity.FifteenMinute;
} else if (granularitySizeSeconds <= 30 * 60) {
return Granularity.ThirtyMinute;
} else if (granularitySizeSeconds <= 3600) {
return Granularity.OneHour;
} else if (granularitySizeSeconds <= 2 * 3600) {
return Granularity.TwoHour;
} else if (granularitySizeSeconds <= 6 * 3600) {
return Granularity.SixHour;
} else if (granularitySizeSeconds <= 12 * 3600) {
return Granularity.TwelveHour;
} else if (granularitySizeSeconds <= 24 * 3600) {
return Granularity.OneDay;
} else if (granularitySizeSeconds <= 2 * 24 * 3600) {
return Granularity.TwoDay;
} else if (granularitySizeSeconds <= 7 * 24 * 3600) {
return Granularity.SevenDay;
} else if (granularitySizeSeconds <= 30 * 24 * 3600) {
return Granularity.ThirtyDay;
}
return Granularity.ThirtyDay;
}
export function convertGranularityToSeconds(granularity: SQLInterval): number {
const [num, unit] = granularity.split(' ');
const numInt = Number.parseInt(num);
switch (unit) {
case 'second':
return numInt;
case 'minute':
return numInt * 60;
case 'hour':
return numInt * 60 * 60;
case 'day':
return numInt * 60 * 60 * 24;
default:
return 0;
}
}
// Note: roundToNearestMinutes is broken in date-fns currently
// additionally it doesn't support seconds or > 30min
// so we need to write our own :(
// see: https://github.com/date-fns/date-fns/pull/3267/files
export function toStartOfInterval(date: Date, granularity: SQLInterval): Date {
const [num, unit] = granularity.split(' ');
const numInt = Number.parseInt(num);
const roundFn = Math.floor;
switch (unit) {
case 'second':
return new Date(
Date.UTC(
date.getUTCFullYear(),
date.getUTCMonth(),
date.getUTCDate(),
date.getUTCHours(),
date.getUTCMinutes(),
roundFn(date.getUTCSeconds() / numInt) * numInt,
),
);
case 'minute':
return new Date(
Date.UTC(
date.getUTCFullYear(),
date.getUTCMonth(),
date.getUTCDate(),
date.getUTCHours(),
roundFn(date.getUTCMinutes() / numInt) * numInt,
),
);
case 'hour':
return new Date(
Date.UTC(
date.getUTCFullYear(),
date.getUTCMonth(),
date.getUTCDate(),
roundFn(date.getUTCHours() / numInt) * numInt,
),
);
case 'day': {
// Clickhouse uses the # of days since unix epoch to round dates
// see: https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/DateLUTImpl.h#L1059
const daysSinceEpoch = date.getTime() / 1000 / 60 / 60 / 24;
const daysSinceEpochRounded = roundFn(daysSinceEpoch / numInt) * numInt;
return new Date(daysSinceEpochRounded * 1000 * 60 * 60 * 24);
}
default:
return date;
}
}
export function timeBucketByGranularity(
start: Date,
end: Date,
granularity: SQLInterval,
): Date[] {
const buckets: Date[] = [];
let current = toStartOfInterval(start, granularity);
const granularitySeconds = convertGranularityToSeconds(granularity);
while (current < end) {
buckets.push(current);
current = fnsAdd(current, {
seconds: granularitySeconds,
});
}
return buckets;
}
export const _useTry = <T>(fn: () => T): [null | Error | unknown, null | T] => {
let output: T | null = null;
let error: any = null;
try {
output = fn();
return [error, output];
} catch (e) {
error = e;
return [error, output];
}
};
export const parseJSON = <T = any>(json: string) => {
const [error, result] = _useTry<T>(() => JSON.parse(json));
return result;
};
// Date formatting
const TIME_TOKENS = {
normal: {
'12h': 'MMM d h:mm:ss a',
'24h': 'MMM d HH:mm:ss',
},
short: {
'12h': 'MMM d h:mma',
'24h': 'MMM d HH:mm',
},
withMs: {
'12h': 'MMM d h:mm:ss.SSS a',
'24h': 'MMM d HH:mm:ss.SSS',
},
withYear: {
'12h': 'MMM d yyyy h:mm:ss a',
'24h': 'MMM d yyyy HH:mm:ss',
},
time: {
'12h': 'h:mm:ss a',
'24h': 'HH:mm:ss',
},
};
export const formatDate = (
date: Date,
{
isUTC = false,
format = 'normal',
clock = '12h',
}: {
isUTC?: boolean;
format?: 'normal' | 'short' | 'withMs' | 'time' | 'withYear';
clock?: '12h' | '24h';
},
) => {
const formatStr = TIME_TOKENS[format][clock];
return isUTC
? formatInTimeZone(date, 'Etc/UTC', formatStr)
: fnsFormat(date, formatStr);
};
type Dashboard = z.infer<typeof DashboardSchema>;
type DashboardTemplate = z.infer<typeof DashboardTemplateSchema>;
type TileTemplate = z.infer<typeof TileTemplateSchema>;
export function convertToDashboardTemplate(
input: Dashboard,
sources: TSource[],
connections: Connection[] = [],
): DashboardTemplate {
const output: DashboardTemplate = {
version: '0.1.0',
name: input.name,
tags: input.tags.length > 0 ? input.tags : undefined,
tiles: [],
};
const convertToTileTemplate = (
input: Dashboard['tiles'][0],
sources: TSource[],
connections: Connection[],
): TileTemplate => {
const tile = TileTemplateSchema.strip().parse(structuredClone(input));
// Extract name from source/connection or default to '' if not found
const tileConfig = tile.config;
if (isBuilderSavedChartConfig(tileConfig)) {
tileConfig.source = (
sources.find(source => source.id === tileConfig.source) ?? { name: '' }
).name;
} else if (isRawSqlSavedChartConfig(tileConfig)) {
tileConfig.connection = (
connections.find(conn => conn.id === tileConfig.connection) ?? {
name: '',
}
).name;
if (tileConfig.source) {
tileConfig.source =
sources.find(source => source.id === tileConfig.source)?.name ?? '';
}
}
return tile;
};
const convertToFilterTemplate = (
input: DashboardFilter,
sources: TSource[],
): DashboardFilter => {
const filter = DashboardFilterSchema.strip().parse(structuredClone(input));
// Extract name from source or default to '' if not found
filter.source =
sources.find(source => source.id === input.source)?.name ?? '';
return filter;
};
for (const tile of input.tiles) {
output.tiles.push(convertToTileTemplate(tile, sources, connections));
}
if (input.filters) {
output.filters = [];
for (const filter of input.filters ?? []) {
output.filters.push(convertToFilterTemplate(filter, sources));
}
}
if (input.containers) {
output.containers = structuredClone(input.containers);
}
return output;
}
export function convertToDashboardDocument(
input: DashboardTemplate,
): DashboardWithoutId {
const output: DashboardWithoutId = {
name: input.name,
tiles: [],
tags: input.tags ?? [],
};
// expecting that input.tiles[0-n].config.source fields are already converted to ids
const convertToTileDocument = (
input: TileTemplate,
): DashboardWithoutId['tiles'][0] => {
return structuredClone(input);
};
// expecting that input.filters[0-n].source fields are already converted to ids
const convertToFilterDocument = (input: DashboardFilter): DashboardFilter => {
return structuredClone(input);
};
for (const tile of input.tiles) {
output.tiles.push(convertToTileDocument(tile));
}
if (input.filters) {
output.filters = [];
for (const filter of input.filters) {
output.filters.push(convertToFilterDocument(filter));
}
}
if (input.containers) {
output.containers = structuredClone(input.containers);
}
return output;
}
export const getFirstOrderingItem = (
orderBy: BuilderChartConfigWithDateRange['orderBy'],
) => {
if (!orderBy || orderBy.length === 0) return undefined;
return typeof orderBy === 'string'
? splitAndTrimWithBracket(orderBy)[0]
: orderBy[0];
};
export const removeTrailingDirection = (s: string) => {
const upper = s.trim().toUpperCase();
if (upper.endsWith('DESC')) {
return s.slice(0, upper.lastIndexOf('DESC')).trim();
} else if (upper.endsWith('ASC')) {
return s.slice(0, upper.lastIndexOf('ASC')).trim();
}
return s;
};
export const isTimestampExpressionInFirstOrderBy = (
config: BuilderChartConfigWithOptTimestamp,
) => {
const firstOrderingItem = getFirstOrderingItem(config.orderBy);
if (!firstOrderingItem || config.timestampValueExpression == null)
return false;
const firstOrderingExpression =
typeof firstOrderingItem === 'string'
? removeTrailingDirection(firstOrderingItem)
: firstOrderingItem.valueExpression;
const timestampValueExpressions = splitAndTrimWithBracket(
config.timestampValueExpression,
);
return timestampValueExpressions.some(tve =>
firstOrderingExpression.includes(tve),
);
};
export const isFirstOrderByAscending = (
orderBy: BuilderChartConfigWithDateRange['orderBy'],
): boolean => {
const primaryOrderingItem = getFirstOrderingItem(orderBy);
if (!primaryOrderingItem) return false;
const isDescending =
typeof primaryOrderingItem === 'string'
? primaryOrderingItem.trim().toUpperCase().endsWith('DESC')
: primaryOrderingItem.ordering === 'DESC';
return !isDescending;
};
/**
* Parses a single expression of the form
* `toStartOf<Interval>(column[, timezone])` or `toStartOfInterval(column[, interval[, origin[, timezone]]])`.
* Returns undefined if the expression is not of this form.
*/
export function parseToStartOfFunction(
expr: string,
):
| { function: string; columnArgument: string; formattedRemainingArgs: string }
| undefined {
const parts = splitAndTrimWithBracket(expr);
if (parts.length !== 1) return undefined;
const toStartOfMatches = expr.match(/(toStartOf\w+)\s*\(/);
if (toStartOfMatches) {
const prefix = expr.substring(0, toStartOfMatches.index!);
if (prefix.trim() !== '') return undefined;
const [toStartOfSubstring, toStartOfFunction] = toStartOfMatches;
const argsStartIndex =
expr.indexOf(toStartOfSubstring) + toStartOfSubstring.length;
const argsEndIndex = expr.lastIndexOf(')');
const args = splitAndTrimWithBracket(
expr.substring(argsStartIndex, argsEndIndex),
);
const columnArgument = args[0];
if (columnArgument == null) {
console.error(`Failed to parse column argument from ${expr}`);
return undefined;
}
const formattedRemainingArgs =
args.length > 1 ? `, ${args.slice(1).join(', ')}` : '';
return {
function: toStartOfFunction.trim(),
columnArgument,
formattedRemainingArgs,
};
}
}
/**
* Returns an optimized timestamp value expression for a table based on its timestampValueExpression and primary key.
*
* When a table has a sort key like `toStartOfMinute(timestamp), ..., timestamp`, it is more performant
* to filter by toStartOfMinute(timestamp) and timestamp, instead of just timestamp.
*/
export function optimizeTimestampValueExpression(
timestampValueExpression: string,
primaryKey: string | undefined,
) {
if (!primaryKey || !timestampValueExpression) return timestampValueExpression;
const timestampValueExprs = [timestampValueExpression];
const primaryKeyExprs = splitAndTrimWithBracket(primaryKey);
for (const primaryKeyExpr of primaryKeyExprs) {
const toStartOf = parseToStartOfFunction(primaryKeyExpr);
if (
primaryKeyExpr === timestampValueExpression.trim() ||
(primaryKeyExpr.startsWith('toUnixTimestamp') &&
primaryKeyExpr.includes(timestampValueExpression)) ||
(primaryKeyExpr.startsWith('toDateTime') &&
primaryKeyExpr.includes(timestampValueExpression))
) {
// We only want to add expressions that come before the timestampExpr in the primary key
break;
} else if (
toStartOf &&
toStartOf.columnArgument === timestampValueExpression.trim()
) {
timestampValueExprs.push(primaryKeyExpr);
}
}
return timestampValueExprs.join(', ');
}
export function getAlignedDateRange(
[originalStart, originalEnd]: [Date, Date],
granularity: SQLInterval,
): [Date, Date] {
// Round the start time down to the previous interval boundary
const alignedStart = toStartOfInterval(originalStart, granularity);
// Round the end time up to the next interval boundary
let alignedEnd = toStartOfInterval(originalEnd, granularity);
if (alignedEnd.getTime() < originalEnd.getTime()) {
const intervalSeconds = convertGranularityToSeconds(granularity);
alignedEnd = fnsAdd(alignedEnd, { seconds: intervalSeconds });
}
return [alignedStart, alignedEnd];
}
export function isDateRangeEqual(range1: [Date, Date], range2: [Date, Date]) {
return (
range1[0].getTime() === range2[0].getTime() &&
range1[1].getTime() === range2[1].getTime()
);
}
/*
This function extracts the SETTINGS clause from the end(!) of the sql string.
*/
export function extractSettingsClauseFromEnd(
sqlInput: string,
): [string, string | undefined] {
const sql = sqlInput.trim().endsWith(';')
? sqlInput.trim().slice(0, -1)
: sqlInput.trim();
const settingsIndex = sql.toUpperCase().indexOf('SETTINGS');
if (settingsIndex === -1) {
return [sql, undefined] as const;
}
const settingsClause = sql.substring(settingsIndex).trim();
const remaining = sql.substring(0, settingsIndex).trim();
return [remaining, settingsClause] as const;
}
export function parseToNumber(input: string): number | undefined {
const trimmed = input.trim();
if (trimmed === '') {
return undefined;
}
const num = Number(trimmed);
return Number.isFinite(num) ? num : undefined;
}
export function joinQuerySettings(
querySettings: QuerySettings | undefined,
): string | undefined {
if (!querySettings?.length) {
return undefined;
}
const emptyFiltered = querySettings.filter(
({ setting, value }) => setting.length && value.length,
);
const formattedPairs = emptyFiltered.map(
({ setting, value }) =>
`${setting} = ${parseToNumber(value) ?? `'${value}'`}`,
);
return formattedPairs.join(', ');
}
// A discriminated union type for different tokenizers above
export type TextIndexTokenizer =
| { type: 'splitByNonAlpha' }
| { type: 'splitByString'; separators: string[] }
| { type: 'ngrams'; n: number }
| {
type: 'sparseGrams';
minLength: number;
maxLength: number;
minCutoffLength?: number;
}
| { type: 'array' };
/**
* Parses the tokenizer and any associated tokenizer parameters from a text index type definition.
*
* Examples:
* - `text(tokenizer = splitByNonAlpha)` -> `{ type: 'splitByNonAlpha' }`
* - `text(tokenizer = splitByString([', ', '; ', '\n', '\\']))` -> `{ type: 'splitByString', separators: [', ', '; ', '\n', '\\'] }`
* - `text(preprocessor=lower(s), tokenizer=sparseGrams(2, 5, 10))` -> `{ type: 'sparseGrams', minLength: 2, maxLength: 5, minCutoffLength: 10 }`
*/
export function parseTokenizerFromTextIndex({
typeFull,
}: SkipIndexMetadata): TextIndexTokenizer | undefined {
const textPattern = /^\s*text\s*\((.+)\)\s*$/;
const match = typeFull.match(textPattern);
if (!match) {
console.error(`Invalid text index type ${typeFull}.`);
return undefined;
}
const argsString = match[1].trim();
const args = splitAndTrimWithBracket(argsString).map(arg => {
const [key, value] = arg.split('=').map(s => s.trim());
return { key, value };
});
const tokenizerArgRaw = args.find(arg => arg.key === 'tokenizer')?.value;
// Strip surrounding quotes if present (e.g., 'splitByNonAlpha' -> splitByNonAlpha)
const tokenizerArg = stripQuotes(tokenizerArgRaw ?? '');
if (!tokenizerArg) {
console.error(
`Invalid tokenizer argument in index type ${typeFull}: ${tokenizerArg}`,
argsString,
splitAndTrimWithBracket(argsString),
);
return undefined;
}
const tokenizerName = tokenizerArg.split('(')[0].trim();
const tokenizerArgsString = tokenizerArg
.substring(tokenizerArg.indexOf('(') + 1, tokenizerArg.lastIndexOf(')'))
.trim();
switch (tokenizerName) {
case 'splitByNonAlpha':
return { type: 'splitByNonAlpha' };
case 'array':
return { type: 'array' };
case 'ngrams': {
// Default n is 3
if (!tokenizerArgsString) {
return { type: 'ngrams', n: 3 };
}
return { type: 'ngrams', n: Number.parseInt(tokenizerArgsString, 10) };
}
case 'sparseGrams': {
const args = tokenizerArgsString
.split(',')
.map(s => s.trim())
.filter(s => !!s);
const tokenizer: TextIndexTokenizer = {
type: 'sparseGrams',
minLength: 3,
maxLength: 10,
};
if (args.length >= 1) tokenizer.minLength = Number.parseInt(args[0], 10);
if (args.length >= 2) tokenizer.maxLength = Number.parseInt(args[1], 10);
if (args.length >= 3)
tokenizer.minCutoffLength = Number.parseInt(args[2], 10);
return tokenizer;
}
case 'splitByString': {
if (!tokenizerArgsString) {
// Default separator is space
return { type: 'splitByString', separators: [' '] };
}
const unescape = (str: string) => {
const escapeCharacters = [
{ pattern: /\\a/g, replacement: 'a' },
{ pattern: /\\b/g, replacement: 'b' },
{ pattern: /\\e/g, replacement: 'e' },
{ pattern: /\\f/g, replacement: '\f' },
{ pattern: /\\n/g, replacement: '\n' },
{ pattern: /\\r/g, replacement: '\r' },
{ pattern: /\\t/g, replacement: '\t' },
{ pattern: /\\v/g, replacement: '\v' },
{ pattern: /\\0/g, replacement: '\0' },
{ pattern: /\\\\/g, replacement: '\\' },
{ pattern: /\\'/g, replacement: "'" },
{ pattern: /\\"/g, replacement: '"' },
{ pattern: /\\`/g, replacement: '`' },
{ pattern: /\\\//g, replacement: '/' },
{ pattern: /\\=/g, replacement: '=' },
];
for (const { pattern, replacement } of escapeCharacters) {
str = str.replace(pattern, replacement);
}
return str;
};
const separatorsString = tokenizerArgsString.match(/\[(.*)\]/);
if (!separatorsString) {
// If no array is provided, default to space
return { type: 'splitByString', separators: [' '] };
}
const arrayContent = separatorsString[1];
// Split by commas outside of quotes
const separators: string[] = [];
let current = '';
let inQuote = false;
let quoteChar = '';
for (let i = 0; i < arrayContent.length; i++) {
const char = arrayContent[i];
if ((char === "'" || char === '"') && !inQuote) {
inQuote = true;
quoteChar = char;
} else if (char === quoteChar && inQuote) {
if (arrayContent[i - 1] !== '\\' || arrayContent[i - 2] === '\\') {
inQuote = false;
quoteChar = '';
}
} else if (char === ',' && !inQuote) {
const trimmed = current.trim();
if (trimmed) {
// Remove quotes and unescape characters
const value = trimmed.replace(/^['"]|['"]$/g, '');
const unescapedValue = unescape(value);
separators.push(unescapedValue);
}
current = '';
continue;
}
current += char;
}
// Add last separator
const trimmed = current.trim();
if (trimmed) {
const value = trimmed.replace(/^['"]|['"]$/g, '');
const unescapedValue = unescape(value);
separators.push(unescapedValue);
}
return { type: 'splitByString', separators };
}
default:
console.error(`Unknown tokenizer ${tokenizerName} in type ${typeFull}.`);
return undefined;
}
}
/**
* Converts an aliasMap (e.g. from chSqlToAliasMap) to an array of WITH clause entries.
* These WITH clauses define aliases as expressions (isSubquery: false),
* making them available in WHERE and other clauses.
*/
export function aliasMapToWithClauses(
aliasMap: Record<string, string | undefined> | undefined,
): BuilderChartConfig['with'] {
if (!aliasMap) {
return undefined;
}
const withClauses = Object.entries(aliasMap)
.filter(
(entry): entry is [string, string] =>
entry[1] != null && entry[1].trim() !== '',
)
.map(([name, value]) => ({
name,
sql: {
sql: value,
params: {},
},
isSubquery: false,
}));
return withClauses.length > 0 ? withClauses : undefined;
}
const stripQuotes = (s: string) => s.replace(/^["'`]|["'`]$/g, '');
/** Parses and returns the cluster, database, and table from the given distributed table metadata */
export function getDistributedTableArgs(
tableMetadata: TableMetadata,
): { cluster: string; database: string; table: string } | undefined {
const args = tableMetadata.engine_full.match(/Distributed\((.+)\)$/)?.[1];
const splitArgs = splitAndTrimWithBracket(args ?? '');
if (splitArgs.length < 3) {
console.error(
`Failed to parse engine arguments for Distributed table: ${tableMetadata.engine_full}`,
);
return undefined;
}
return {
cluster: stripQuotes(splitArgs[0]),
database: stripQuotes(splitArgs[1]),
table: stripQuotes(splitArgs[2]),
};
}