fix: fix histogram metric query (#737)

Fix for the histogram query based on the late night session at KubeCon.

Ref: HDX-1572
This commit is contained in:
Dan Hable 2025-04-14 17:04:13 -05:00 committed by GitHub
parent 08009ac3b3
commit 4865ce7a62
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 216 additions and 98 deletions

View file

@ -0,0 +1,5 @@
---
"@hyperdx/common-utils": patch
---
Fixes the histogram query to perform quantile calculation across all data points

View file

@ -60,6 +60,15 @@ Array [
]
`;
exports[`renderChartConfig Query Metrics should include multiple data points in percentile computation (p50) 1`] = `
Array [
Object {
"__hdx_time_bucket": "2022-01-05T00:00:00Z",
"any(toFloat64OrNull(toString(Rate)))": 2.9,
},
]
`;
exports[`renderChartConfig Query Metrics single avg gauge with group-by 1`] = `
Array [
Object {
@ -192,11 +201,11 @@ exports[`renderChartConfig Query Metrics two_timestamps_bounded histogram (p25)
Array [
Object {
"__hdx_time_bucket": "2022-01-05T00:00:00Z",
"sum(toFloat64OrNull(toString(Rate)))": 0,
"any(toFloat64OrNull(toString(Rate)))": 0,
},
Object {
"__hdx_time_bucket": "2022-01-05T00:01:00Z",
"sum(toFloat64OrNull(toString(Rate)))": 10,
"any(toFloat64OrNull(toString(Rate)))": 10,
},
]
`;
@ -205,11 +214,11 @@ exports[`renderChartConfig Query Metrics two_timestamps_bounded histogram (p50)
Array [
Object {
"__hdx_time_bucket": "2022-01-05T00:00:00Z",
"sum(toFloat64OrNull(toString(Rate)))": 0,
"any(toFloat64OrNull(toString(Rate)))": 0,
},
Object {
"__hdx_time_bucket": "2022-01-05T00:01:00Z",
"sum(toFloat64OrNull(toString(Rate)))": 20,
"any(toFloat64OrNull(toString(Rate)))": 13.333333333333332,
},
]
`;
@ -218,11 +227,11 @@ exports[`renderChartConfig Query Metrics two_timestamps_bounded histogram (p90)
Array [
Object {
"__hdx_time_bucket": "2022-01-05T00:00:00Z",
"sum(toFloat64OrNull(toString(Rate)))": 0,
"any(toFloat64OrNull(toString(Rate)))": 0,
},
Object {
"__hdx_time_bucket": "2022-01-05T00:01:00Z",
"sum(toFloat64OrNull(toString(Rate)))": 30,
"any(toFloat64OrNull(toString(Rate)))": 30,
},
]
`;
@ -231,11 +240,11 @@ exports[`renderChartConfig Query Metrics two_timestamps_lower_bound_inf histogra
Array [
Object {
"__hdx_time_bucket": "2022-01-05T00:00:00Z",
"sum(toFloat64OrNull(toString(Rate)))": 0,
"any(toFloat64OrNull(toString(Rate)))": 0,
},
Object {
"__hdx_time_bucket": "2022-01-05T00:01:00Z",
"sum(toFloat64OrNull(toString(Rate)))": 1,
"any(toFloat64OrNull(toString(Rate)))": 1,
},
]
`;
@ -244,11 +253,11 @@ exports[`renderChartConfig Query Metrics two_timestamps_upper_bound_inf histogra
Array [
Object {
"__hdx_time_bucket": "2022-01-05T00:00:00Z",
"sum(toFloat64OrNull(toString(Rate)))": 0,
"any(toFloat64OrNull(toString(Rate)))": 0,
},
Object {
"__hdx_time_bucket": "2022-01-05T00:01:00Z",
"sum(toFloat64OrNull(toString(Rate)))": 30,
"any(toFloat64OrNull(toString(Rate)))": 30,
},
]
`;

View file

@ -446,6 +446,36 @@ describe('renderChartConfig', () => {
AggregationTemporality: 2, // Cumulative
...point,
}));
const histPointsE = [
{
BucketCounts: [1, 1, 1, 1, 1, 1],
ExplicitBounds: [1, 2, 5, 8, 13],
TimeUnix: new Date(now),
ResourceAttributes: { host: 'test-a' },
},
{
BucketCounts: [2, 2, 2, 2, 2, 2],
ExplicitBounds: [1, 2, 5, 8, 13],
TimeUnix: new Date(now + ms('5s')),
ResourceAttributes: { host: 'test-b' },
},
{
BucketCounts: [2, 1, 2, 1, 2, 1],
ExplicitBounds: [1, 2, 5, 8, 13],
TimeUnix: new Date(now + ms('1m')),
ResourceAttributes: { host: 'test-a' },
},
{
BucketCounts: [3, 3, 2, 2, 3, 3],
ExplicitBounds: [1, 2, 5, 8, 13],
TimeUnix: new Date(now + ms('65s')),
ResourceAttributes: { host: 'test-b' },
},
].map(point => ({
MetricName: 'test.multiple_series',
AggregationTemporality: 2, // Cumulative
...point,
}));
await Promise.all([
bulkInsertMetricsGauge([...gaugePointsA, ...gaugePointsB]),
@ -462,6 +492,7 @@ describe('renderChartConfig', () => {
...histPointsB,
...histPointsC,
...histPointsD,
...histPointsE,
]),
]);
});
@ -655,6 +686,10 @@ describe('renderChartConfig', () => {
});
it('calculates min_rate/max_rate correctly for sum metrics', async () => {
// Raw Data is
// MIN_VARIANT_0: [0, 1, 8, 0, 7, 7, 15, 17, 0, 42]
// MIN_VARIANT_1: [0, 2, 9, 0, 15, 25 35, 57, 0, 92]
//
// Based on the data inserted in the fixture, the expected stream of values
// for each series after adjusting for the zero reset should be:
// MIN_VARIANT_0: [0, 1, 8, 8, 15, 15, 23, 25, 25, 67]
@ -725,20 +760,10 @@ describe('renderChartConfig', () => {
Since the AggregationTemporality is 2(cumulative), we need to calculate the delta between the two points:
delta: [10, 10, 10] - [0, 0, 0] = [10, 10, 10]
Total observations: 10 + 10 + 10 = 30
Cumulative counts: [10, 20, 30]
p50 point:
Rank = 0.5 * 30 = 15
This falls in the second bucket (since 10 < 15 20)
We need to interpolate between the lower and upper bounds of the second bucket:
Lower bound: 10
Upper bound: 30
Position in bucket: (15 - 10) / (20 - 10) = 0.5
Interpolated value: 10 + (30 - 10) * 0.5 = 10 + 10 = 20
Thus the first point value would be 0 since it's at the start of the bounds.
The second point value would be 20 since that is the median point value delta from the first point.
cum sum = [10, 20, 30]
rank = 0.5 * 30 = 15 (between bounds 10 - 30)
interpolate: 10 + ((15 - 10) / 30) * (30 - 10) = 13.3333
*/
const query = await renderChartConfig(
{
@ -952,6 +977,57 @@ describe('renderChartConfig', () => {
expect(res).toMatchSnapshot();
});
it('should include multiple data points in percentile computation (p50)', async () => {
/*
bounds: [1, 2, 5, 8, 13]
host = test-a:
p1 = [1, 1, 1, 1, 1, 1]
p2 = [2, 1, 2, 1, 2, 1]
host = test-b:
p1 = [2, 2, 2, 2, 2, 2]
p2 = [3, 3, 2, 2, 3, 3]
Compute the diff between adjacent points for each unique host (deltaSumForEach)
host = test-a, diff = [1, 0, 1, 0, 1, 0]
host = test-b, diff = [1, 1, 0, 0, 1, 1]
Sum the diffs together to obtain a combined count for the different series
sum elements(d) = [2, 1, 1, 0, 2, 1]
Now compute the p50 value:
sum(d) = 7
cum sum = [2, 3, 4, 4, 6, 7]
rank = 0.5 * 7 = 3.5 (between bounds 2 - 5)
interpolate: 2 + ((3.5 - 2) / 5) * (5 - 2) = 2.9
Since all the points fall within a single granularity interval the result should be a single row
with the value 2.9.
*/
const query = await renderChartConfig(
{
select: [
{
aggFn: 'quantile',
level: 0.5,
metricName: 'test.multiple_series',
metricType: MetricsDataType.Histogram,
valueExpression: 'Value',
},
],
from: metricSource.from,
where: '',
metricTables: TEST_METRIC_TABLES,
dateRange: [new Date(now), new Date(now + ms('2m'))],
granularity: '5 minute',
timestampValueExpression: metricSource.timestampValueExpression,
connection: connection.id,
},
metadata,
);
const res = await queryData(query);
expect(res).toMatchSnapshot();
});
// HDX-1515: Handle counter reset in histogram metric in the same way that the counter reset
// is handled for sum metrics.
it.skip('three_timestamps_bounded histogram with reset (p50)', async () => {

View file

@ -36,39 +36,47 @@ exports[`renderChartConfig should generate sql for a single gauge metric 1`] = `
`;
exports[`renderChartConfig should generate sql for a single histogram metric 1`] = `
"WITH HistRate AS (
"WITH Source AS (
SELECT
*,
cityHash64(mapConcat(ScopeAttributes, ResourceAttributes, Attributes)) AS AttributesHash,
length(BucketCounts) as CountLength,
any(BucketCounts) OVER (ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS PrevBucketCounts,
any(CountLength) OVER (ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS PrevCountLength,
any(AttributesHash) OVER (ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS PrevAttributesHash,
IF(AggregationTemporality = 1,
BucketCounts,
IF(AttributesHash = PrevAttributesHash AND CountLength = PrevCountLength,
arrayMap((prev, curr) -> IF(curr < prev, curr, toUInt64(toInt64(curr) - toInt64(prev))), PrevBucketCounts, BucketCounts),
BucketCounts)) as BucketRates
sumForEach(BucketCounts) OVER (PARTITION BY AttributesHash ORDER BY AttributesHash, TimeUnix ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW),
deltaSumForEach(BucketCounts) OVER (PARTITION BY AttributesHash ORDER BY AttributesHash, TimeUnix ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)
) AS MonoBucketCounts
FROM default.otel_metrics_histogram
WHERE (TimeUnix >= fromUnixTimestamp64Milli(1739318400000) AND TimeUnix <= fromUnixTimestamp64Milli(1765670400000)) AND ((MetricName = 'http.server.duration'))
ORDER BY Attributes, TimeUnix ASC
),RawHist AS (
ORDER BY AttributesHash ASC
),Bucketed AS (
SELECT
*,
toUInt64( 0.5 * arraySum(BucketRates)) AS Rank,
arrayCumSum(BucketRates) as CumRates,
arrayFirstIndex(x -> if(x > Rank, 1, 0), CumRates) AS BucketLowIdx,
IF(BucketLowIdx = length(BucketRates),
arrayElement(ExplicitBounds, length(ExplicitBounds)),
IF(BucketLowIdx > 1,
arrayElement(ExplicitBounds, BucketLowIdx - 1) + (arrayElement(ExplicitBounds, BucketLowIdx) - arrayElement(ExplicitBounds, BucketLowIdx - 1)) *
IF(arrayElement(CumRates, BucketLowIdx) > arrayElement(CumRates, BucketLowIdx - 1),
(Rank - arrayElement(CumRates, BucketLowIdx - 1)) / (arrayElement(CumRates, BucketLowIdx) - arrayElement(CumRates, BucketLowIdx - 1)), 0),
arrayElement(ExplicitBounds, BucketLowIdx)
)) as Rate
FROM HistRate) SELECT sum(
any(Source.ExplicitBounds) AS ExplicitBounds,
sumForEach(MonoBucketCounts) AS TotalBucketCounts,
arrayCumSum(TotalBucketCounts) AS BucketCumCounts,
0.5 * arraySum(TotalBucketCounts) AS Rank,
arrayFirstIndex(x -> if(x > Rank, 1, 0), BucketCumCounts) AS BucketLowIdx,
CASE
WHEN BucketLowIdx = 0 THEN 0
WHEN BucketLowIdx = 1 THEN ExplicitBounds[BucketLowIdx]
WHEN BucketLowIdx = length(TotalBucketCounts) THEN ExplicitBounds[length(ExplicitBounds)]
ELSE
ExplicitBounds[BucketLowIdx - 1] +
((Rank - ExplicitBounds[BucketLowIdx - 1]) / (ExplicitBounds[BucketLowIdx])) * (ExplicitBounds[BucketLowIdx] - ExplicitBounds[BucketLowIdx - 1])
END AS Value
FROM Source
GROUP BY
ORDER BY ASC
),Rates AS (
SELECT
\`__hdx_time_bucket\`,
any(Bucketed.Value) AS \`__hdx_value_high\`,
any(\`__hdx_value_high\`) OVER (ORDER BY \`__hdx_time_bucket\` ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS \`__hdx_prev_high\`,
\`__hdx_value_high\` - \`__hdx_prev_high\` AS Rate
FROM Bucketed
GROUP BY \`__hdx_time_bucket\`
) SELECT any(
toFloat64OrNull(toString(Rate))
) FROM RawHist WHERE (TimeUnix >= fromUnixTimestamp64Milli(1739318400000) AND TimeUnix <= fromUnixTimestamp64Milli(1765670400000)) LIMIT 10"
) FROM Rates WHERE (\`__hdx_time_bucket\` >= fromUnixTimestamp64Milli(1739318400000) AND \`__hdx_time_bucket\` <= fromUnixTimestamp64Milli(1765670400000)) LIMIT 10"
`;
exports[`renderChartConfig should generate sql for a single sum metric 1`] = `

View file

@ -1061,87 +1061,107 @@ async function translateMetricChartConfig(
throw new Error('quantile must be specified for histogram metrics');
}
// Render the where clause to limit data selection on the source CTE but also search forward/back one
// bucket window to ensure that there is enough data to compute a reasonable value on the ends of the
// series.
const where = await renderWhere(
{
...chartConfig,
from: {
...from,
tableName: metricTables[MetricsDataType.Histogram],
},
filters: [
...(filters ?? []),
{
type: 'sql',
condition: `MetricName = '${metricName}'`,
},
],
includedDataInterval:
chartConfig.granularity === 'auto' &&
Array.isArray(chartConfig.dateRange)
? convertDateRangeToGranularityString(chartConfig.dateRange, 60)
: chartConfig.granularity,
// Render the various clauses from the user input so they can be woven into the CTE queries. The dateRange
// is manipulated to search forward/back one bucket window to ensure that there is enough data to compute
// a reasonable value on the ends of the series.
const cteChartConfig = {
...chartConfig,
from: {
...from,
tableName: metricTables[MetricsDataType.Histogram],
},
metadata,
);
filters: [
...(filters ?? []),
{
type: 'sql',
condition: `MetricName = '${metricName}'`,
},
],
includedDataInterval:
chartConfig.granularity === 'auto' &&
Array.isArray(chartConfig.dateRange)
? convertDateRangeToGranularityString(chartConfig.dateRange, 60)
: chartConfig.granularity,
} as ChartConfigWithOptDateRangeEx;
const timeBucketSelect = isUsingGranularity(cteChartConfig)
? timeBucketExpr({
interval: cteChartConfig.granularity,
timestampValueExpression: cteChartConfig.timestampValueExpression,
dateRange: cteChartConfig.dateRange,
})
: chSql``;
const where = await renderWhere(cteChartConfig, metadata);
const groupBy = (await renderGroupBy(chartConfig, metadata)) || chSql``;
return {
...restChartConfig,
with: [
{
name: 'HistRate',
name: 'Source',
sql: chSql`
SELECT
*,
cityHash64(mapConcat(ScopeAttributes, ResourceAttributes, Attributes)) AS AttributesHash,
length(BucketCounts) as CountLength,
any(BucketCounts) OVER (ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS PrevBucketCounts,
any(CountLength) OVER (ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS PrevCountLength,
any(AttributesHash) OVER (ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS PrevAttributesHash,
IF(AggregationTemporality = 1,
BucketCounts,
IF(AttributesHash = PrevAttributesHash AND CountLength = PrevCountLength,
arrayMap((prev, curr) -> IF(curr < prev, curr, toUInt64(toInt64(curr) - toInt64(prev))), PrevBucketCounts, BucketCounts),
BucketCounts)) as BucketRates
sumForEach(BucketCounts) OVER (PARTITION BY AttributesHash ORDER BY AttributesHash, TimeUnix ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW),
deltaSumForEach(BucketCounts) OVER (PARTITION BY AttributesHash ORDER BY AttributesHash, TimeUnix ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)
) AS MonoBucketCounts
FROM ${renderFrom({ from: { ...from, tableName: metricTables[MetricsDataType.Histogram] } })}
WHERE ${where}
ORDER BY Attributes, TimeUnix ASC
ORDER BY AttributesHash ASC
`,
},
{
name: 'RawHist',
name: 'Bucketed',
sql: chSql`
SELECT
*,
toUInt64( ${{ Float64: level }} * arraySum(BucketRates)) AS Rank,
arrayCumSum(BucketRates) as CumRates,
arrayFirstIndex(x -> if(x > Rank, 1, 0), CumRates) AS BucketLowIdx,
IF(BucketLowIdx = length(BucketRates),
arrayElement(ExplicitBounds, length(ExplicitBounds)),
IF(BucketLowIdx > 1,
arrayElement(ExplicitBounds, BucketLowIdx - 1) + (arrayElement(ExplicitBounds, BucketLowIdx) - arrayElement(ExplicitBounds, BucketLowIdx - 1)) *
IF(arrayElement(CumRates, BucketLowIdx) > arrayElement(CumRates, BucketLowIdx - 1),
(Rank - arrayElement(CumRates, BucketLowIdx - 1)) / (arrayElement(CumRates, BucketLowIdx) - arrayElement(CumRates, BucketLowIdx - 1)), 0),
arrayElement(ExplicitBounds, BucketLowIdx)
)) as Rate
FROM HistRate`,
${timeBucketSelect.sql ? chSql`${timeBucketSelect},` : ''}
any(Source.ExplicitBounds) AS ExplicitBounds,
sumForEach(MonoBucketCounts) AS TotalBucketCounts,
arrayCumSum(TotalBucketCounts) AS BucketCumCounts,
${{ Float64: level }} * arraySum(TotalBucketCounts) AS Rank,
arrayFirstIndex(x -> if(x > Rank, 1, 0), BucketCumCounts) AS BucketLowIdx,
CASE
WHEN BucketLowIdx = 0 THEN 0
WHEN BucketLowIdx = 1 THEN ExplicitBounds[BucketLowIdx]
WHEN BucketLowIdx = length(TotalBucketCounts) THEN ExplicitBounds[length(ExplicitBounds)]
ELSE
ExplicitBounds[BucketLowIdx - 1] +
((Rank - ExplicitBounds[BucketLowIdx - 1]) / (ExplicitBounds[BucketLowIdx])) * (ExplicitBounds[BucketLowIdx] - ExplicitBounds[BucketLowIdx - 1])
END AS Value
FROM Source
GROUP BY ${groupBy}
ORDER BY ${groupBy} ASC
`,
},
{
name: 'Rates',
sql: chSql`
SELECT
\`__hdx_time_bucket\`,
any(Bucketed.Value) AS \`__hdx_value_high\`,
any(\`__hdx_value_high\`) OVER (ORDER BY \`__hdx_time_bucket\` ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS \`__hdx_prev_high\`,
\`__hdx_value_high\` - \`__hdx_prev_high\` AS Rate
FROM Bucketed
GROUP BY \`__hdx_time_bucket\`
`,
},
],
select: [
{
..._selectRest,
aggFn: 'sum',
aggFn: 'any', // groupings happen further in the CTE stack, just need to grab the value
aggCondition: '', // clear up the condition since the where clause is already applied at the upstream CTE
valueExpression: 'Rate',
},
],
from: {
databaseName: '',
tableName: 'RawHist',
tableName: 'Rates',
},
where: '', // clear up the condition since the where clause is already applied at the upstream CTE
timestampValueExpression: '`__hdx_time_bucket`',
};
}