fix: update sum metric query based on v1 integration test (#650)

Fix the sum query to produce the correct results from the min/max test case from v1.

Ref: HDX-1421
This commit is contained in:
Dan Hable 2025-03-07 01:03:03 -06:00 committed by GitHub
parent 7f1a5a0b09
commit 99b60d50b2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 251 additions and 97 deletions

View file

@ -0,0 +1,6 @@
---
"@hyperdx/common-utils": patch
"@hyperdx/api": patch
---
Fixed sum metric query to pass integration test case from v1.

View file

@ -21,15 +21,41 @@ Array [
]
`;
exports[`renderChartConfig Query Metrics calculates min_rate/max_rate correctly for sum metrics: maxSum 1`] = `
Array [
Object {
"__hdx_time_bucket": "2022-01-05T00:00:00Z",
"max(toFloat64OrNull(toString(Value)))": 24,
},
Object {
"__hdx_time_bucket": "2022-01-05T00:10:00Z",
"max(toFloat64OrNull(toString(Value)))": 134,
},
]
`;
exports[`renderChartConfig Query Metrics calculates min_rate/max_rate correctly for sum metrics: minSum 1`] = `
Array [
Object {
"__hdx_time_bucket": "2022-01-05T00:00:00Z",
"min(toFloat64OrNull(toString(Value)))": 15,
},
Object {
"__hdx_time_bucket": "2022-01-05T00:10:00Z",
"min(toFloat64OrNull(toString(Value)))": 52,
},
]
`;
exports[`renderChartConfig Query Metrics handles counter resets correctly for sum metrics 1`] = `
Array [
Object {
"__hdx_time_bucket": "2022-01-05T00:00:00Z",
"sum(toFloat64OrNull(toString(Rate)))": 15,
"sum(toFloat64OrNull(toString(Value)))": 15,
},
Object {
"__hdx_time_bucket": "2022-01-05T00:10:00Z",
"sum(toFloat64OrNull(toString(Rate)))": 52,
"sum(toFloat64OrNull(toString(Value)))": 52,
},
]
`;
@ -115,19 +141,19 @@ exports[`renderChartConfig Query Metrics single sum rate 1`] = `
Array [
Object {
"__hdx_time_bucket": "2022-01-05T00:00:00Z",
"sum(toFloat64OrNull(toString(Rate)))": 19,
"sum(toFloat64OrNull(toString(Value)))": 19,
},
Object {
"__hdx_time_bucket": "2022-01-05T00:05:00Z",
"sum(toFloat64OrNull(toString(Rate)))": 79,
"sum(toFloat64OrNull(toString(Value)))": 79,
},
Object {
"__hdx_time_bucket": "2022-01-05T00:10:00Z",
"sum(toFloat64OrNull(toString(Rate)))": 5813,
"sum(toFloat64OrNull(toString(Value)))": 5813,
},
Object {
"__hdx_time_bucket": "2022-01-05T00:15:00Z",
"sum(toFloat64OrNull(toString(Rate)))": 78754,
"sum(toFloat64OrNull(toString(Value)))": 78754,
},
]
`;

View file

@ -532,12 +532,23 @@ describe('renderChartConfig', () => {
expect(await queryData(query)).toMatchSnapshot();
});
// FIXME: here are the expected values
// [0, 1, 8, 8, 15, 15, 23, 25, 25, 67]
// [0, 2, 9, 9, 24, 34, 44, 66, 66, 158]
// min -> [15, 52]
// max -> [24, 134]
it.skip('calculates min_rate/max_rate correctly for sum metrics', async () => {
it('calculates min_rate/max_rate correctly for sum metrics', async () => {
// Based on the data inserted in the fixture, the expected stream of values
// for each series after adjusting for the zero reset should be:
// MIN_VARIANT_0: [0, 1, 8, 8, 15, 15, 23, 25, 25, 67]
// MIN_VARIANT_1: [0, 2, 9, 9, 24, 34, 44, 66, 66, 158]
//
// At the 10 minute buckets, should result in three buckets for each where
// the first bucket is outside the query window.
// MIN_VARIANT_0: [0], [1, 8, 8, 15], [15, 23, 25, 25, 67]]
// MIN_VARIANT_1: [0], [2, 9, 9, 24], [34, 44, 66, 66, 158]]
//
// When comparing the value at the end of the buckets over the filtered
// time frame it should result in the following counts added per bucket as:
// MIN_VARIANT_0: [15, 52]
// MIN_VARIANT_1: [24, 134]
//
// These values are what we apply the aggregation functions to.
const minQuery = await renderChartConfig(
{
select: [
@ -562,6 +573,7 @@ describe('renderChartConfig', () => {
},
metadata,
);
expect(await queryData(minQuery)).toMatchSnapshot('minSum');
const maxQuery = await renderChartConfig(
{
@ -587,6 +599,7 @@ describe('renderChartConfig', () => {
},
metadata,
);
expect(await queryData(maxQuery)).toMatchSnapshot('maxSum');
});
});
});

View file

@ -27,3 +27,78 @@ exports[`renderChartConfig should generate sql for a single gauge metric 1`] = `
TO toUnixTimestamp(toStartOfInterval(fromUnixTimestamp64Milli(1765670400000), INTERVAL 1 minute))
STEP 60 LIMIT 10"
`;
exports[`renderChartConfig should generate sql for a single histogram metric 1`] = `
"WITH HistRate AS (SELECT *, any(BucketCounts) OVER (ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS PrevBucketCounts,
any(CountLength) OVER (ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS PrevCountLength,
any(AttributesHash) OVER (ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS PrevAttributesHash,
IF(AggregationTemporality = 1,
BucketCounts,
IF(AttributesHash = PrevAttributesHash AND CountLength = PrevCountLength,
arrayMap((prev, curr) -> IF(curr < prev, curr, toUInt64(toInt64(curr) - toInt64(prev))), PrevBucketCounts, BucketCounts),
BucketCounts)) as BucketRates
FROM (
SELECT *, cityHash64(mapConcat(ScopeAttributes, ResourceAttributes, Attributes)) AS AttributesHash,
length(BucketCounts) as CountLength
FROM default.otel_metrics_histogram)
WHERE MetricName = 'http.server.duration'
ORDER BY Attributes, TimeUnix ASC
),RawHist AS (
SELECT *, toUInt64( 0.5 * arraySum(BucketRates)) AS Rank,
arrayCumSum(BucketRates) as CumRates,
arrayFirstIndex(x -> if(x > Rank, 1, 0), CumRates) AS BucketLowIdx,
IF(BucketLowIdx = length(BucketRates),
ExplicitBounds[length(ExplicitBounds)], -- if the low bound is the last bucket, use the last bound value
IF(BucketLowIdx > 1, -- indexes are 1-based
ExplicitBounds[BucketLowIdx] + (ExplicitBounds[BucketLowIdx + 1] - ExplicitBounds[BucketLowIdx]) *
intDivOrZero(
Rank - CumRates[BucketLowIdx - 1],
CumRates[BucketLowIdx] - CumRates[BucketLowIdx - 1]),
arrayElement(ExplicitBounds, BucketLowIdx + 1) * intDivOrZero(Rank, CumRates[BucketLowIdx]))) as Rate
FROM HistRate) SELECT sum(
toFloat64OrNull(toString(Rate))
) FROM RawHist WHERE (TimeUnix >= fromUnixTimestamp64Milli(1739318400000) AND TimeUnix <= fromUnixTimestamp64Milli(1765670400000)) LIMIT 10"
`;
exports[`renderChartConfig should generate sql for a single sum metric 1`] = `
"WITH Source AS (
SELECT
*,
cityHash64(mapConcat(ScopeAttributes, ResourceAttributes, Attributes)) AS AttributesHash,
IF(AggregationTemporality = 1,
SUM(Value) OVER (PARTITION BY AttributesHash ORDER BY AttributesHash, TimeUnix ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW),
deltaSum(Value) OVER (PARTITION BY AttributesHash ORDER BY AttributesHash, TimeUnix ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)
) AS Value
FROM default.otel_metrics_sum
WHERE (TimeUnix >= toStartOfInterval(fromUnixTimestamp64Milli(1739318400000), INTERVAL 5 minute) - INTERVAL 5 minute AND TimeUnix <= toStartOfInterval(fromUnixTimestamp64Milli(1765670400000), INTERVAL 5 minute) + INTERVAL 5 minute) AND ((MetricName = 'db.client.connections.usage'))),Bucketed AS (
SELECT
toStartOfInterval(toDateTime(TimeUnix), INTERVAL 5 minute) AS \`__hdx_time_bucket2\`,
AttributesHash,
last_value(Source.Value) AS \`__hdx_value_high\`,
any(\`__hdx_value_high\`) OVER(PARTITION BY AttributesHash ORDER BY \`__hdx_time_bucket2\` ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS \`__hdx_value_high_prev\`,
\`__hdx_value_high\` - \`__hdx_value_high_prev\` AS Value,
any(ResourceAttributes) AS ResourceAttributes,
any(ResourceSchemaUrl) AS ResourceSchemaUrl,
any(ScopeName) AS ScopeName,
any(ScopeVersion) AS ScopeVersion,
any(ScopeAttributes) AS ScopeAttributes,
any(ScopeDroppedAttrCount) AS ScopeDroppedAttrCount,
any(ScopeSchemaUrl) AS ScopeSchemaUrl,
any(ServiceName) AS ServiceName,
any(MetricName) AS MetricName,
any(MetricDescription) AS MetricDescription,
any(MetricUnit) AS MetricUnit,
any(Attributes) AS Attributes,
any(StartTimeUnix) AS StartTimeUnix,
any(Flags) AS Flags,
any(AggregationTemporality) AS AggregationTemporality,
any(IsMonotonic) AS IsMonotonic
FROM Source
GROUP BY AttributesHash, \`__hdx_time_bucket2\`
ORDER BY AttributesHash, \`__hdx_time_bucket2\`
) SELECT avg(
toFloat64OrNull(toString(Value))
),toStartOfInterval(toDateTime(\`__hdx_time_bucket2\`), INTERVAL 5 minute) AS \`__hdx_time_bucket\` FROM Bucketed WHERE (\`__hdx_time_bucket2\` >= fromUnixTimestamp64Milli(1739318400000) AND \`__hdx_time_bucket2\` <= fromUnixTimestamp64Milli(1765670400000)) GROUP BY toStartOfInterval(toDateTime(\`__hdx_time_bucket2\`), INTERVAL 5 minute) AS \`__hdx_time_bucket\` ORDER BY toStartOfInterval(toDateTime(\`__hdx_time_bucket2\`), INTERVAL 5 minute) AS \`__hdx_time_bucket\` WITH FILL FROM toUnixTimestamp(toStartOfInterval(fromUnixTimestamp64Milli(1739318400000), INTERVAL 5 minute))
TO toUnixTimestamp(toStartOfInterval(fromUnixTimestamp64Milli(1765670400000), INTERVAL 5 minute))
STEP 300 LIMIT 10"
`;

View file

@ -94,29 +94,33 @@ describe('renderChartConfig', () => {
const generatedSql = await renderChartConfig(config, mockMetadata);
const actual = parameterizedQueryToSql(generatedSql);
expect(actual).toBe(
'WITH RawSum AS (SELECT *,\n' +
' any(Value) OVER (ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS PrevValue,\n' +
' any(AttributesHash) OVER (ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS PrevAttributesHash,\n' +
' IF(AggregationTemporality = 1,\n' +
' Value,IF(Value - PrevValue < 0 AND AttributesHash = PrevAttributesHash, Value,\n' +
' IF(AttributesHash != PrevAttributesHash, 0, Value - PrevValue))) as Rate\n' +
' FROM (\n' +
' SELECT *, \n' +
' cityHash64(mapConcat(ScopeAttributes, ResourceAttributes, Attributes)) AS AttributesHash\n' +
' FROM default.otel_metrics_sum\n' +
" WHERE MetricName = 'db.client.connections.usage'\n" +
' ORDER BY AttributesHash, TimeUnix ASC\n' +
' ) ) SELECT avg(\n' +
' toFloat64OrNull(toString(Rate))\n' +
' ),toStartOfInterval(toDateTime(TimeUnix), INTERVAL 5 minute) AS `__hdx_time_bucket`' +
' FROM RawSum WHERE (TimeUnix >= fromUnixTimestamp64Milli(1739318400000) AND TimeUnix <= fromUnixTimestamp64Milli(1765670400000))' +
' GROUP BY toStartOfInterval(toDateTime(TimeUnix), INTERVAL 5 minute) AS `__hdx_time_bucket`' +
' ORDER BY toStartOfInterval(toDateTime(TimeUnix), INTERVAL 5 minute) AS `__hdx_time_bucket`' +
' WITH FILL FROM toUnixTimestamp(toStartOfInterval(fromUnixTimestamp64Milli(1739318400000), INTERVAL 5 minute))\n' +
' TO toUnixTimestamp(toStartOfInterval(fromUnixTimestamp64Milli(1765670400000), INTERVAL 5 minute))\n' +
' STEP 300' +
' LIMIT 10',
expect(actual).toMatchSnapshot();
});
it('should throw error for string select on sum metric', async () => {
const config: ChartConfigWithOptDateRange = {
displayType: DisplayType.Line,
connection: 'test-connection',
metricTables: {
gauge: 'otel_metrics_gauge',
histogram: 'otel_metrics_histogram',
sum: 'otel_metrics_sum',
},
from: {
databaseName: 'default',
tableName: '',
},
select: 'Value',
where: '',
whereLanguage: 'sql',
timestampValueExpression: 'TimeUnix',
dateRange: [new Date('2025-02-12'), new Date('2025-12-14')],
granularity: '5 minute',
limit: { limit: 10 },
};
await expect(renderChartConfig(config, mockMetadata)).rejects.toThrow(
'multi select or string select on metrics not supported',
);
});
@ -152,39 +156,6 @@ describe('renderChartConfig', () => {
const generatedSql = await renderChartConfig(config, mockMetadata);
const actual = parameterizedQueryToSql(generatedSql);
expect(actual).toBe(
'WITH HistRate AS (SELECT *, any(BucketCounts) OVER (ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS PrevBucketCounts,\n' +
' any(CountLength) OVER (ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS PrevCountLength,\n' +
' any(AttributesHash) OVER (ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS PrevAttributesHash,\n' +
' IF(AggregationTemporality = 1,\n' +
' BucketCounts,\n' +
' IF(AttributesHash = PrevAttributesHash AND CountLength = PrevCountLength,\n' +
' arrayMap((prev, curr) -> IF(curr < prev, curr, toUInt64(toInt64(curr) - toInt64(prev))), PrevBucketCounts, BucketCounts),\n' +
' BucketCounts)) as BucketRates\n' +
' FROM (\n' +
' SELECT *, cityHash64(mapConcat(ScopeAttributes, ResourceAttributes, Attributes)) AS AttributesHash,\n' +
' length(BucketCounts) as CountLength\n' +
' FROM default.otel_metrics_histogram)\n' +
" WHERE MetricName = 'http.server.duration'\n " +
' ORDER BY Attributes, TimeUnix ASC\n' +
' ),RawHist AS (\n' +
' SELECT *, toUInt64( 0.5 * arraySum(BucketRates)) AS Rank,\n' +
' arrayCumSum(BucketRates) as CumRates,\n' +
' arrayFirstIndex(x -> if(x > Rank, 1, 0), CumRates) AS BucketLowIdx,\n' +
' IF(BucketLowIdx = length(BucketRates),\n' +
' ExplicitBounds[length(ExplicitBounds)], -- if the low bound is the last bucket, use the last bound value\n' +
' IF(BucketLowIdx > 1, -- indexes are 1-based\n' +
' ExplicitBounds[BucketLowIdx] + (ExplicitBounds[BucketLowIdx + 1] - ExplicitBounds[BucketLowIdx]) *\n' +
' intDivOrZero(\n' +
' Rank - CumRates[BucketLowIdx - 1],\n' +
' CumRates[BucketLowIdx] - CumRates[BucketLowIdx - 1]),\n' +
' arrayElement(ExplicitBounds, BucketLowIdx + 1) * intDivOrZero(Rank, CumRates[BucketLowIdx]))) as Rate\n' +
' FROM HistRate) SELECT sum(\n' +
' toFloat64OrNull(toString(Rate))\n' +
' )' +
' FROM RawHist' +
' WHERE (TimeUnix >= fromUnixTimestamp64Milli(1739318400000) AND TimeUnix <= fromUnixTimestamp64Milli(1765670400000))' +
' LIMIT 10',
);
expect(actual).toMatchSnapshot();
});
});

View file

@ -407,6 +407,7 @@ async function timeFilterExpr({
metadata,
connectionId,
with: withClauses,
includedDataInterval,
}: {
timestampValueExpression: string;
dateRange: [Date, Date];
@ -416,6 +417,7 @@ async function timeFilterExpr({
databaseName: string;
tableName: string;
with?: { name: string; sql: ChSql }[];
includedDataInterval?: string;
}) {
const valueExpressions = timestampValueExpression.split(',');
const startTime = dateRange[0].getTime();
@ -443,23 +445,23 @@ async function timeFilterExpr({
);
}
const startTimeCond = includedDataInterval
? chSql`toStartOfInterval(fromUnixTimestamp64Milli(${{ Int64: startTime }}), INTERVAL ${includedDataInterval}) - INTERVAL ${includedDataInterval}`
: chSql`fromUnixTimestamp64Milli(${{ Int64: startTime }})`;
const endTimeCond = includedDataInterval
? chSql`toStartOfInterval(fromUnixTimestamp64Milli(${{ Int64: endTime }}), INTERVAL ${includedDataInterval}) + INTERVAL ${includedDataInterval}`
: chSql`fromUnixTimestamp64Milli(${{ Int64: endTime }})`;
// If it's a date type
if (columnMeta?.type === 'Date') {
return chSql`(${unsafeTimestampValueExpression} ${
dateRangeStartInclusive ? '>=' : '>'
} toDate(fromUnixTimestamp64Milli(${{
Int64: startTime,
}})) AND ${unsafeTimestampValueExpression} <= toDate(fromUnixTimestamp64Milli(${{
Int64: endTime,
}})))`;
} toDate(${startTimeCond}) AND ${unsafeTimestampValueExpression} <= toDate(${endTimeCond}))`;
} else {
return chSql`(${unsafeTimestampValueExpression} ${
dateRangeStartInclusive ? '>=' : '>'
} fromUnixTimestamp64Milli(${{
Int64: startTime,
}}) AND ${unsafeTimestampValueExpression} <= fromUnixTimestamp64Milli(${{
Int64: endTime,
}}))`;
} ${startTimeCond} AND ${unsafeTimestampValueExpression} <= ${endTimeCond})`;
}
}),
);
@ -654,6 +656,7 @@ async function renderWhere(
databaseName: chartConfig.from.databaseName,
tableName: chartConfig.from.tableName,
with: chartConfig.with,
includedDataInterval: chartConfig.includedDataInterval,
})
: [],
whereSearchCondition,
@ -732,6 +735,7 @@ function renderLimit(
// for metric SQL generation.
type ChartConfigWithOptDateRangeEx = ChartConfigWithOptDateRange & {
with?: { name: string; sql: ChSql }[];
includedDataInterval?: string;
};
function renderWith(
@ -872,36 +876,95 @@ async function translateMetricChartConfig(
timestampValueExpression: timeBucketCol,
};
} else if (metricType === MetricsDataType.Sum && metricName) {
const timeBucketCol = '__hdx_time_bucket2';
const valueHighCol = '`__hdx_value_high`';
const valueHighPrevCol = '`__hdx_value_high_prev`';
const timeExpr = timeBucketExpr({
interval: chartConfig.granularity || 'auto',
timestampValueExpression:
chartConfig.timestampValueExpression || 'TimeUnix',
dateRange: chartConfig.dateRange,
alias: timeBucketCol,
});
// Render the where clause to limit data selection on the source CTE but also search forward/back one
// bucket window to ensure that there is enough data to compute a reasonable value on the ends of the
// series.
const where = await renderWhere(
{
...chartConfig,
from: {
...from,
tableName: metricTables[MetricsDataType.Gauge],
},
filters: [
{
type: 'sql',
condition: `MetricName = '${metricName}'`,
},
],
includedDataInterval:
chartConfig.granularity === 'auto' &&
Array.isArray(chartConfig.dateRange)
? convertDateRangeToGranularityString(chartConfig.dateRange, 60)
: chartConfig.granularity,
},
metadata,
);
return {
...restChartConfig,
with: [
{
name: 'RawSum',
sql: chSql`SELECT *,
any(Value) OVER (ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS PrevValue,
any(AttributesHash) OVER (ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS PrevAttributesHash,
IF(AggregationTemporality = 1,
Value,IF(Value - PrevValue < 0 AND AttributesHash = PrevAttributesHash, Value,
IF(AttributesHash != PrevAttributesHash, 0, Value - PrevValue))) as Rate
FROM (
SELECT *,
cityHash64(mapConcat(ScopeAttributes, ResourceAttributes, Attributes)) AS AttributesHash
name: 'Source',
sql: chSql`
SELECT
*,
cityHash64(mapConcat(ScopeAttributes, ResourceAttributes, Attributes)) AS AttributesHash,
IF(AggregationTemporality = 1,
SUM(Value) OVER (PARTITION BY AttributesHash ORDER BY AttributesHash, TimeUnix ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW),
deltaSum(Value) OVER (PARTITION BY AttributesHash ORDER BY AttributesHash, TimeUnix ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)
) AS Value
FROM ${renderFrom({ from: { ...from, tableName: metricTables[MetricsDataType.Sum] } })}
WHERE MetricName = '${metricName}'
ORDER BY AttributesHash, TimeUnix ASC
) `,
WHERE ${where}`,
},
],
select: [
{
..._select,
valueExpression: 'Rate',
name: 'Bucketed',
sql: chSql`
SELECT
${timeExpr},
AttributesHash,
last_value(Source.Value) AS ${valueHighCol},
any(${valueHighCol}) OVER(PARTITION BY AttributesHash ORDER BY \`${timeBucketCol}\` ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS ${valueHighPrevCol},
${valueHighCol} - ${valueHighPrevCol} AS Value,
any(ResourceAttributes) AS ResourceAttributes,
any(ResourceSchemaUrl) AS ResourceSchemaUrl,
any(ScopeName) AS ScopeName,
any(ScopeVersion) AS ScopeVersion,
any(ScopeAttributes) AS ScopeAttributes,
any(ScopeDroppedAttrCount) AS ScopeDroppedAttrCount,
any(ScopeSchemaUrl) AS ScopeSchemaUrl,
any(ServiceName) AS ServiceName,
any(MetricName) AS MetricName,
any(MetricDescription) AS MetricDescription,
any(MetricUnit) AS MetricUnit,
any(Attributes) AS Attributes,
any(StartTimeUnix) AS StartTimeUnix,
any(Flags) AS Flags,
any(AggregationTemporality) AS AggregationTemporality,
any(IsMonotonic) AS IsMonotonic
FROM Source
GROUP BY AttributesHash, \`${timeBucketCol}\`
ORDER BY AttributesHash, \`${timeBucketCol}\`
`,
},
],
select,
from: {
databaseName: '',
tableName: 'RawSum',
tableName: 'Bucketed',
},
timestampValueExpression: `\`${timeBucketCol}\``,
};
} else if (metricType === MetricsDataType.Histogram && metricName) {
// histograms are only valid for quantile selections