fix: Fix Services Dashboard Database tab charts (#1435)

Closes HDX-2960

# Summary

This PR makes two fixes to improve the charts on the Database tab of the Services dashboard.

1. Previously, the charts were not rendering a series per query, since ClickHouse returns type `Nullable(String)` for the statement expression (`coalesce(nullif(SpanAttributes['db.query.text'], ''), nullif(SpanAttributes['db.statement'], '')) AS "Statement"`). Our `convertCHDataTypeToJSType()` function failed to map this to a `String` type in JS, which resulted in the column not being inferred as a group column by `inferGroupColumns()` in `formatResponseForTimeChart()`.
2. Once we started rendering a series per query, the page immediately started OOM crashing on any serious volume of data because there are too many distinct groups/queries being returned. To fix this, the query now selects only the queries with the highest values in across any time bucket. The queries do the following:
    1. First, apply filters and group by query and time bucket
    2. Then, `groupArray` the values and time buckets for each query
    3. Select the top 60 queries by max value across all time buckets
    4. `arrayJoin(zip())` to transform the results back into the row-per-group-per-time-bucket format that `formatResponseForTimeChart` expects.

(This is the same approach recently applied to the `Request Error Rate by endpoint` chart on the HTTP tab). 


## Before

<img width="1453" height="791" alt="Screenshot 2025-12-03 at 10 58 31 AM" src="https://github.com/user-attachments/assets/ffa697e4-25bb-4ac6-aed2-703cc3c547bf" />

## After

<img width="1451" height="825" alt="Screenshot 2025-12-03 at 10 57 40 AM" src="https://github.com/user-attachments/assets/42e46d2a-361e-490e-8976-18edeca39e0f" />
This commit is contained in:
Drew Davis 2025-12-03 15:04:24 -05:00 committed by GitHub
parent bd96c98cbf
commit ff422206c5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 295 additions and 63 deletions

View file

@ -0,0 +1,6 @@
---
"@hyperdx/common-utils": patch
"@hyperdx/app": patch
---
fix: Fix Services Dashboard Database tab charts

View file

@ -319,7 +319,8 @@ export const LegendRenderer = memo<{
);
});
const HARD_LINES_LIMIT = 60;
export const HARD_LINES_LIMIT = 60;
export const MemoChart = memo(function MemoChart({
graphResults,
setIsClickActive,

View file

@ -1,5 +1,6 @@
import { useCallback, useEffect, useMemo, useState } from 'react';
import dynamic from 'next/dynamic';
import { pick } from 'lodash';
import {
parseAsString,
parseAsStringEnum,
@ -8,7 +9,10 @@ import {
} from 'nuqs';
import { UseControllerProps, useForm } from 'react-hook-form';
import { tcFromSource } from '@hyperdx/common-utils/dist/core/metadata';
import { DEFAULT_AUTO_GRANULARITY_MAX_BUCKETS } from '@hyperdx/common-utils/dist/core/renderChartConfig';
import {
ChartConfigWithDateRange,
CteChartConfig,
DisplayType,
Filter,
SourceKind,
@ -27,6 +31,7 @@ import {
import { IconPlayerPlay } from '@tabler/icons-react';
import {
convertDateRangeToGranularityString,
ERROR_RATE_PERCENTAGE_NUMBER_FORMAT,
INTEGER_NUMBER_FORMAT,
MS_NUMBER_FORMAT,
@ -54,6 +59,8 @@ import { useSource, useSources } from '@/source';
import { Histogram } from '@/SVGIcons';
import { parseTimeQuery, useNewTimeQuery } from '@/timeQuery';
import { HARD_LINES_LIMIT } from './HDXMultiSeriesTimeChart';
type AppliedConfig = {
source?: string | null;
service?: string | null;
@ -61,6 +68,8 @@ type AppliedConfig = {
whereLanguage?: 'sql' | 'lucene' | null;
};
const MAX_NUM_SERIES = HARD_LINES_LIMIT;
function getScopedFilters(
source: TSource,
appliedConfig: AppliedConfig,
@ -551,6 +560,248 @@ function DatabaseTab({
return window.location.pathname + '?' + searchParams.toString();
}, []);
const totalTimePerQueryConfig =
useMemo<ChartConfigWithDateRange | null>(() => {
if (!source) return null;
return {
with: [
{
name: 'queries_by_total_time',
isSubquery: true,
chartConfig: {
...pick(source, [
'timestampValueExpression',
'connection',
'from',
]),
where: appliedConfig.where || '',
whereLanguage: appliedConfig.whereLanguage || 'sql',
select: [
{
alias: 'total_query_time_ms',
aggFn: 'sum',
valueExpression: expressions.durationInMillis,
aggCondition: '',
},
{
alias: 'Statement',
valueExpression: expressions.dbStatement,
},
],
groupBy: 'Statement',
filters: [
...getScopedFilters(source, appliedConfig, false),
{ type: 'sql', condition: expressions.isDbSpan },
],
// Date range and granularity add an `__hdx_time_bucket` column to select and group by
dateRange: searchedTimeRange,
granularity: convertDateRangeToGranularityString(
searchedTimeRange,
DEFAULT_AUTO_GRANULARITY_MAX_BUCKETS,
),
} as CteChartConfig,
},
{
name: 'top_queries_by_total_time',
isSubquery: true,
chartConfig: {
connection: source.connection,
select: [
{ valueExpression: 'Statement' },
{
valueExpression: 'groupArray(total_query_time_ms)',
alias: 'total_query_time_ms',
},
{
valueExpression: 'groupArray(__hdx_time_bucket)',
alias: '__hdx_time_buckets',
},
],
from: { databaseName: '', tableName: 'queries_by_total_time' },
groupBy: 'Statement',
where: '',
// Select the top MAX_NUM_SERIES queries by max time in any bucket
orderBy: 'max(queries_by_total_time.total_query_time_ms) DESC',
limit: { limit: MAX_NUM_SERIES },
timestampValueExpression: '', // required only to satisfy CTE schema
},
},
{
name: 'zipped_series',
isSubquery: true,
chartConfig: {
connection: source.connection,
select: [
{ valueExpression: 'Statement' },
{
valueExpression:
'arrayJoin(arrayZip(total_query_time_ms, __hdx_time_buckets))',
alias: 'zipped',
},
],
from: {
databaseName: '',
tableName: 'top_queries_by_total_time',
},
where: '',
timestampValueExpression: '', // required only to satisfy CTE schema
},
},
],
select: [
{ valueExpression: 'Statement' },
{
valueExpression: 'tupleElement(zipped, 1)',
alias: 'Total Query Time',
},
{
valueExpression: 'tupleElement(zipped, 2)',
alias: 'series_time_bucket',
},
],
from: { databaseName: '', tableName: 'zipped_series' },
where: '',
displayType: DisplayType.StackedBar,
numberFormat: MS_NUMBER_FORMAT,
groupBy: 'Statement, zipped',
dateRange: searchedTimeRange,
timestampValueExpression: 'series_time_bucket',
connection: source.connection,
} satisfies ChartConfigWithDateRange;
}, [
appliedConfig,
expressions.dbStatement,
expressions.durationInMillis,
expressions.isDbSpan,
searchedTimeRange,
source,
]);
const totalThroughputPerQueryConfig =
useMemo<ChartConfigWithDateRange | null>(() => {
if (!source) return null;
return {
with: [
{
name: 'queries_by_total_count',
isSubquery: true,
chartConfig: {
...pick(source, [
'timestampValueExpression',
'connection',
'from',
]),
where: appliedConfig.where || '',
whereLanguage: appliedConfig.whereLanguage || 'sql',
select: [
{
alias: 'total_query_count',
aggFn: 'count',
valueExpression: '',
aggCondition: '',
},
{
alias: 'Statement',
valueExpression: expressions.dbStatement,
},
],
groupBy: 'Statement',
filters: [
...getScopedFilters(source, appliedConfig, false),
{ type: 'sql', condition: expressions.isDbSpan },
],
// Date range and granularity add an `__hdx_time_bucket` column to select and group by
dateRange: searchedTimeRange,
granularity: convertDateRangeToGranularityString(
searchedTimeRange,
DEFAULT_AUTO_GRANULARITY_MAX_BUCKETS,
),
} as CteChartConfig,
},
{
name: 'top_queries_by_total_count',
isSubquery: true,
chartConfig: {
connection: source.connection,
select: [
{ valueExpression: 'Statement' },
{
valueExpression: 'groupArray(total_query_count)',
alias: 'total_query_count',
},
{
valueExpression: 'groupArray(__hdx_time_bucket)',
alias: '__hdx_time_buckets',
},
],
from: { databaseName: '', tableName: 'queries_by_total_count' },
groupBy: 'Statement',
where: '',
// Select the top MAX_NUM_SERIES queries by max time in any bucket
orderBy: 'max(queries_by_total_count.total_query_count) DESC',
limit: { limit: MAX_NUM_SERIES },
timestampValueExpression: '', // required only to satisfy CTE schema
},
},
{
name: 'zipped_series',
isSubquery: true,
chartConfig: {
connection: source.connection,
select: [
{ valueExpression: 'Statement' },
{
valueExpression:
'arrayJoin(arrayZip(total_query_count, __hdx_time_buckets))',
alias: 'zipped',
},
],
from: {
databaseName: '',
tableName: 'top_queries_by_total_count',
},
where: '',
timestampValueExpression: '', // required only to satisfy CTE schema
},
},
],
select: [
{ valueExpression: 'Statement' },
{
valueExpression: 'tupleElement(zipped, 1)',
alias: 'Total Query Count',
},
{
valueExpression: 'tupleElement(zipped, 2)',
alias: 'series_time_bucket',
},
],
from: { databaseName: '', tableName: 'zipped_series' },
where: '',
displayType: DisplayType.StackedBar,
numberFormat: {
...INTEGER_NUMBER_FORMAT,
unit: 'queries',
},
groupBy: 'Statement, zipped',
dateRange: searchedTimeRange,
timestampValueExpression: 'series_time_bucket',
connection: source.connection,
} satisfies ChartConfigWithDateRange;
}, [
appliedConfig,
expressions.dbStatement,
expressions.isDbSpan,
searchedTimeRange,
source,
]);
return (
<Grid mt="md" grow={false} w="100%" maw="100%" overflow="hidden">
<Grid.Col span={6}>
@ -558,30 +809,11 @@ function DatabaseTab({
<Group justify="space-between" align="center" mb="sm">
<Text size="sm">Total Time Consumed per Query</Text>
</Group>
{source && (
{source && totalTimePerQueryConfig && (
<DBTimeChart
sourceId={source.id}
config={{
...source,
displayType: DisplayType.StackedBar,
where: appliedConfig.where || '',
whereLanguage: appliedConfig.whereLanguage || 'sql',
select: [
{
alias: 'Total Query Time',
aggFn: 'sum',
valueExpression: expressions.durationInMillis,
aggCondition: '',
},
],
filters: [
...getScopedFilters(source, appliedConfig, false),
{ type: 'sql', condition: expressions.isDbSpan },
],
numberFormat: MS_NUMBER_FORMAT,
groupBy: expressions.dbStatement,
dateRange: searchedTimeRange,
}}
config={totalTimePerQueryConfig}
disableQueryChunking
/>
)}
</ChartBox>
@ -591,33 +823,11 @@ function DatabaseTab({
<Group justify="space-between" align="center" mb="sm">
<Text size="sm">Throughput per Query</Text>
</Group>
{source && (
{source && totalThroughputPerQueryConfig && (
<DBTimeChart
sourceId={source.id}
config={{
...source,
displayType: DisplayType.StackedBar,
where: appliedConfig.where || '',
whereLanguage: appliedConfig.whereLanguage || 'sql',
select: [
{
alias: 'Total Query Count',
aggFn: 'count',
valueExpression: expressions.durationInMillis,
aggCondition: '',
},
],
filters: [
...getScopedFilters(source, appliedConfig, false),
{ type: 'sql', condition: expressions.isDbSpan },
],
numberFormat: {
...INTEGER_NUMBER_FORMAT,
unit: 'queries',
},
groupBy: expressions.dbStatement,
dateRange: searchedTimeRange,
}}
config={totalThroughputPerQueryConfig}
disableQueryChunking
/>
)}
</ChartBox>

View file

@ -197,6 +197,7 @@ function ActiveTimeTooltip({
function DBTimeChartComponent({
config,
disableQueryChunking,
enabled = true,
logReferenceTimestamp,
onTimeRangeSelect,
@ -208,6 +209,7 @@ function DBTimeChartComponent({
sourceId,
}: {
config: ChartConfigWithDateRange;
disableQueryChunking?: boolean;
enabled?: boolean;
logReferenceTimestamp?: number;
onSettled?: () => void;
@ -241,7 +243,7 @@ function DBTimeChartComponent({
placeholderData: (prev: any) => prev,
queryKey: [queryKeyPrefix, queriedConfig, 'chunked'],
enabled,
enableQueryChunking: true,
enableQueryChunking: !disableQueryChunking,
});
const previousPeriodChartConfig: ChartConfigWithDateRange = useMemo(() => {

View file

@ -76,6 +76,7 @@ export const convertCHDataTypeToJSType = (
return JSDataType.Number;
} else if (
dataType.startsWith('String') ||
dataType.startsWith('Nullable(String)') ||
dataType.startsWith('FixedString') ||
dataType.startsWith('Enum') ||
dataType.startsWith('UUID') ||

View file

@ -379,13 +379,19 @@ async function renderSelectList(
// This metadata query is executed in an attempt tp optimize the selects by favoring materialized fields
// on a view/table that already perform the computation in select. This optimization is not currently
// supported for queries using CTEs so skip the metadata fetch if there are CTE objects in the config.
const materializedFields = chartConfig.with?.length
? undefined
: await metadata.getMaterializedColumnsLookupTable({
connectionId: chartConfig.connection,
databaseName: chartConfig.from.databaseName,
tableName: chartConfig.from.tableName,
});
let materializedFields: Map<string, string> | undefined;
try {
// This will likely error for a CTE
materializedFields = chartConfig.with?.length
? undefined
: await metadata.getMaterializedColumnsLookupTable({
connectionId: chartConfig.connection,
databaseName: chartConfig.from.databaseName,
tableName: chartConfig.from.tableName,
});
} catch {
// ignore
}
const isRatio =
chartConfig.seriesReturnType === 'ratio' && selectList.length === 2;
@ -681,13 +687,19 @@ async function renderWhereExpression({
// on a view/table that already perform the computation in select. This optimization is not currently
// supported for queries using CTEs so skip the metadata fetch if there are CTE objects in the config.
const materializedFields = withClauses?.length
? undefined
: await metadata.getMaterializedColumnsLookupTable({
connectionId,
databaseName: from.databaseName,
tableName: from.tableName,
});
let materializedFields: Map<string, string> | undefined;
try {
// This will likely error for a CTE
materializedFields = withClauses?.length
? undefined
: await metadata.getMaterializedColumnsLookupTable({
connectionId,
databaseName: from.databaseName,
tableName: from.tableName,
});
} catch {
// ignore
}
const _sqlPrefix = 'SELECT * FROM `t` WHERE ';
const rawSQL = `${_sqlPrefix}${_condition}`;