fix: use CTE instead of listing all index parts in query (#666)

## feat: allow CTE definitions to be nested chart configs

In order to easily use a CTE for fixing large index issues with delta
trace events, this commit updates the type and `renderWith` function to
render a nested chart config.

Ref: HDX-1343

---

## fix: use CTE instead of listing all index parts in query

Instead of sending 2 queries to the DB and enumerating all of parts
and offsets in the query, this change uses a CTE to select the parts.
This reduces the size of the HTTP request, which fixes the URI too
long response.

Ref: HDX-1343
This commit is contained in:
Dan Hable 2025-03-14 08:34:47 -05:00 committed by GitHub
parent 5db2767015
commit a9dfa14930
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 181 additions and 82 deletions

View file

@ -0,0 +1,6 @@
---
"@hyperdx/common-utils": minor
"@hyperdx/app": patch
---
Added support to CTE rendering where you can now specify a CTE using a full chart config object instance. This CTE capability is then used to avoid the URI too long error for delta event queries.

View file

@ -253,83 +253,77 @@ export default function DBDeltaChart({
config: ChartConfigWithOptDateRange;
outlierSqlCondition: string;
}) {
const { data: outlierPartIds } = useQueriedChartConfig({
const { data: outlierData } = useQueriedChartConfig({
...config,
select: '_part, _part_offset',
with: [
{
name: 'PartIds',
sql: {
...config,
select: 'tuple(_part, _part_offset)',
filters: [
...(config.filters ?? []),
{
type: 'sql',
condition: `${outlierSqlCondition}`,
},
],
orderBy: [{ ordering: 'DESC', valueExpression: 'rand()' }],
limit: { limit: 1000 },
},
},
],
select: '*',
filters: [
...(config.filters ?? []),
{
type: 'sql',
condition: `${outlierSqlCondition}`,
},
],
orderBy: [{ ordering: 'DESC', valueExpression: 'rand()' }],
limit: { limit: 1000 },
});
const { data: outlierData } = useQueriedChartConfig(
{
...config,
select: '*',
filters: [
...(config.filters ?? []),
{
type: 'sql',
condition: `${outlierSqlCondition}`,
},
{
type: 'sql',
condition: `indexHint((_part, _part_offset) IN (${outlierPartIds?.data
?.map((r: any) => `('${r._part}', ${r._part_offset})`)
?.join(',')}))`,
},
],
orderBy: [{ ordering: 'DESC', valueExpression: 'rand()' }],
limit: { limit: 1000 },
},
{
enabled: (outlierPartIds?.data?.length ?? 0) > 0,
},
);
const { data: inlierPartIds } = useQueriedChartConfig({
...config,
select: '_part, _part_offset',
filters: [
...(config.filters ?? []),
{
type: 'sql',
condition: `NOT (${outlierSqlCondition})`,
condition: `indexHint((_part, _part_offset) IN PartIds)`,
},
],
orderBy: [{ ordering: 'DESC', valueExpression: 'rand()' }],
limit: { limit: 1000 },
});
const { data: inlierData } = useQueriedChartConfig(
{
...config,
select: '*',
filters: [
...(config.filters ?? []),
{
type: 'sql',
condition: `NOT (${outlierSqlCondition})`,
const { data: inlierData } = useQueriedChartConfig({
...config,
with: [
{
name: 'PartIds',
sql: {
...config,
select: '_part, _part_offset',
filters: [
...(config.filters ?? []),
{
type: 'sql',
condition: `NOT (${outlierSqlCondition})`,
},
],
orderBy: [{ ordering: 'DESC', valueExpression: 'rand()' }],
limit: { limit: 1000 },
},
{
type: 'sql',
condition: `indexHint((_part, _part_offset) IN (${inlierPartIds?.data
?.map((r: any) => `('${r._part}', ${r._part_offset})`)
?.join(',')}))`,
},
],
orderBy: [{ ordering: 'DESC', valueExpression: 'rand()' }],
limit: { limit: 1000 },
},
{
enabled: (inlierPartIds?.data?.length ?? 0) > 0,
},
);
},
],
select: '*',
filters: [
...(config.filters ?? []),
{
type: 'sql',
condition: `NOT (${outlierSqlCondition})`,
},
{
type: 'sql',
condition: `indexHint((_part, _part_offset) IN PartIds)`,
},
],
orderBy: [{ ordering: 'DESC', valueExpression: 'rand()' }],
limit: { limit: 1000 },
});
// TODO: Is loading state
const { sortedProperties, outlierValueOccurences, inlierValueOccurences } =

View file

@ -107,3 +107,7 @@ exports[`renderChartConfig should generate sql for a single sum metric 1`] = `
toFloat64OrNull(toString(Value))
),toStartOfInterval(toDateTime(\`__hdx_time_bucket2\`), INTERVAL 5 minute) AS \`__hdx_time_bucket\` FROM Bucketed WHERE (\`__hdx_time_bucket2\` >= fromUnixTimestamp64Milli(1739318400000) AND \`__hdx_time_bucket2\` <= fromUnixTimestamp64Milli(1765670400000)) GROUP BY toStartOfInterval(toDateTime(\`__hdx_time_bucket2\`), INTERVAL 5 minute) AS \`__hdx_time_bucket\` ORDER BY toStartOfInterval(toDateTime(\`__hdx_time_bucket2\`), INTERVAL 5 minute) AS \`__hdx_time_bucket\` LIMIT 10"
`;
exports[`renderChartConfig should render a chart config CTE configuration correctly 1`] = `"WITH Parts AS (SELECT _part, _part_offset FROM default.some_table WHERE ((FieldA = 'test')) ORDER BY rand() DESC LIMIT 1000) SELECT * FROM Parts WHERE ((FieldA = 'test') AND (indexHint((_part, _part_offset) IN (SELECT tuple(_part, _part_offset) FROM Parts)))) ORDER BY rand() DESC LIMIT 1000"`;
exports[`renderChartConfig should render a string CTE configuration correctly 1`] = `"WITH TestCte AS (SELECT TimeUnix, Line FROM otel_logs) SELECT Line FROM TestCte"`;

View file

@ -158,4 +158,70 @@ describe('renderChartConfig', () => {
const actual = parameterizedQueryToSql(generatedSql);
expect(actual).toMatchSnapshot();
});
it('should render a string CTE configuration correctly', async () => {
const config: ChartConfigWithOptDateRange = {
connection: 'test-connection',
from: {
databaseName: '',
tableName: 'TestCte',
},
with: [{ name: 'TestCte', sql: 'SELECT TimeUnix, Line FROM otel_logs' }],
select: [{ valueExpression: 'Line' }],
where: '',
whereLanguage: 'sql',
};
const generatedSql = await renderChartConfig(config, mockMetadata);
const actual = parameterizedQueryToSql(generatedSql);
expect(actual).toMatchSnapshot();
});
it('should render a chart config CTE configuration correctly', async () => {
const config: ChartConfigWithOptDateRange = {
connection: 'test-connection',
with: [
{
name: 'Parts',
sql: {
select: '_part, _part_offset',
from: { databaseName: 'default', tableName: 'some_table' },
where: '',
whereLanguage: 'sql',
filters: [
{
type: 'sql',
condition: `FieldA = 'test'`,
},
],
orderBy: [{ ordering: 'DESC', valueExpression: 'rand()' }],
limit: { limit: 1000 },
},
},
],
select: '*',
filters: [
{
type: 'sql',
condition: `FieldA = 'test'`,
},
{
type: 'sql',
condition: `indexHint((_part, _part_offset) IN (SELECT tuple(_part, _part_offset) FROM Parts))`,
},
],
from: {
databaseName: '',
tableName: 'Parts',
},
where: '',
whereLanguage: 'sql',
orderBy: [{ ordering: 'DESC', valueExpression: 'rand()' }],
limit: { limit: 1000 },
};
const generatedSql = await renderChartConfig(config, mockMetadata);
const actual = parameterizedQueryToSql(generatedSql);
expect(actual).toMatchSnapshot();
});
});

View file

@ -7,6 +7,7 @@ import { CustomSchemaSQLSerializerV2, SearchQueryBuilder } from '@/queryParser';
import {
AggregateFunction,
AggregateFunctionWithCombinators,
ChartConfigSchema,
ChartConfigWithDateRange,
ChartConfigWithOptDateRange,
MetricsDataType,
@ -416,7 +417,7 @@ async function timeFilterExpr({
connectionId: string;
databaseName: string;
tableName: string;
with?: { name: string; sql: ChSql }[];
with?: ChartConfigWithDateRange['with'];
includedDataInterval?: string;
}) {
const valueExpressions = timestampValueExpression.split(',');
@ -499,11 +500,8 @@ async function renderSelect(
);
}
function renderFrom({
from,
}: {
from: ChartConfigWithDateRange['from'];
}): ChSql {
function renderFrom(chartConfig: ChartConfigWithDateRange): ChSql {
const from = chartConfig.from;
return concatChSql(
'.',
chSql`${from.databaseName === '' ? '' : { Identifier: from.databaseName }}`,
@ -528,7 +526,7 @@ async function renderWhereExpression({
from: ChartConfigWithDateRange['from'];
implicitColumnExpression?: string;
connectionId: string;
with?: { name: string; sql: ChSql }[];
with?: ChartConfigWithDateRange['with'];
}): Promise<ChSql> {
let _condition = condition;
if (language === 'lucene') {
@ -737,21 +735,49 @@ type ChartConfigWithOptDateRangeEx = ChartConfigWithOptDateRange & {
includedDataInterval?: string;
};
function renderWith(
async function renderWith(
chartConfig: ChartConfigWithOptDateRangeEx,
metadata: Metadata,
): ChSql | undefined {
): Promise<ChSql | undefined> {
const { with: withClauses } = chartConfig;
if (withClauses) {
return concatChSql(
',',
withClauses.map(clause => {
if (clause.isSubquery === false) {
return chSql`(${clause.sql}) AS ${{ Identifier: clause.name }}`;
}
// Can not use identifier here
return chSql`${clause.name} AS (${clause.sql})`;
}),
await Promise.all(
withClauses.map(async clause => {
// The sql logic can be specified as either a string, ChSql instance or a
// chart config object.
let resolvedSql: ChSql;
if (typeof clause.sql === 'string') {
resolvedSql = chSql`${{ Identifier: clause.sql }}`;
} else if (clause.sql && 'sql' in clause.sql) {
resolvedSql = clause.sql;
} else if (
clause.sql &&
('select' in clause.sql || 'connection' in clause.sql)
) {
resolvedSql = await renderChartConfig(
{
...clause.sql,
connection: chartConfig.connection,
timestampValueExpression:
chartConfig.timestampValueExpression || '',
} as ChartConfigWithOptDateRangeEx,
metadata,
);
} else {
throw new Error(
`ChartConfig with clause is an unsupported type: ${clause.sql}`,
);
}
if (clause.isSubquery === false) {
return chSql`(${resolvedSql}) AS ${{ Identifier: clause.name }}`;
}
// Can not use identifier here
return chSql`${clause.name} AS (${resolvedSql})`;
}),
),
);
}
@ -1100,7 +1126,7 @@ export async function renderChartConfig(
? await translateMetricChartConfig(rawChartConfig, metadata)
: rawChartConfig;
const withClauses = renderWith(chartConfig, metadata);
const withClauses = await renderWith(chartConfig, metadata);
const select = await renderSelect(chartConfig, metadata);
const from = renderFrom(chartConfig);
const where = await renderWhere(chartConfig, metadata);

View file

@ -102,6 +102,12 @@ export const LimitSchema = z.object({
limit: z.number().optional(),
offset: z.number().optional(),
});
export const ChSqlSchema = z.object({
sql: z.string(),
params: z.record(z.string(), z.any()),
});
export const SelectSQLStatementSchema = z.object({
select: SelectListSchema,
from: z.object({
@ -119,10 +125,7 @@ export const SelectSQLStatementSchema = z.object({
.array(
z.object({
name: z.string(),
sql: z.object({
sql: z.string(),
params: z.record(z.string(), z.any()),
}),
sql: z.lazy(() => ChSqlSchema.or(ChartConfigSchema)),
// If true, it'll render as WITH ident AS (subquery)
// If false, it'll be a "variable" ex. WITH (sql) AS ident
// where sql can be any expression, ex. a constant string