mirror of
https://github.com/hyperdxio/hyperdx
synced 2026-04-21 13:37:15 +00:00
Closes HDX-3154 This PR adds a feature that allows the user to add settings to a source. These settings are then added to the end of every query that is rendered through the `renderChartConfig` function, along with any other chart specific settings. See: https://clickhouse.com/docs/sql-reference/statements/select#settings-in-select-query Most of the work was to pass the `source` or `source.querySettings` value through the code to the `renderChartConfig` calls and to update the related tests. There are also some UI changes in the `SourceForm` components. `SQLParser.Parser` from the `node-sql-parser` throws an error when it encounters a SETTINGS clause in a sql string, so a function was added to remove that clause from any sql that is passed to the parser. It assumes that the SETTINGS clause will always be at the end of the sql string, it removes any part of the string including and after the SETTINGS clause. https://github.com/user-attachments/assets/7ac3b852-2c86-4431-88bc-106f982343bb
298 lines
8.6 KiB
TypeScript
298 lines
8.6 KiB
TypeScript
import SqlString from 'sqlstring';
|
|
import { chSql } from '@hyperdx/common-utils/dist/clickhouse';
|
|
import { Metadata } from '@hyperdx/common-utils/dist/core/metadata';
|
|
import { renderChartConfig } from '@hyperdx/common-utils/dist/core/renderChartConfig';
|
|
import { TSource } from '@hyperdx/common-utils/dist/types';
|
|
import { useQuery } from '@tanstack/react-query';
|
|
|
|
import { useClickhouseClient } from '@/clickhouse';
|
|
|
|
import { useMetadataWithSettings } from './useMetadata';
|
|
|
|
export type SpanAggregationRow = {
|
|
serverServiceName: string;
|
|
serverStatusCode: string;
|
|
requestCount: number;
|
|
clientServiceName?: string;
|
|
};
|
|
|
|
async function getServiceMapQuery({
|
|
source,
|
|
dateRange,
|
|
traceId,
|
|
metadata,
|
|
samplingFactor,
|
|
}: {
|
|
source: TSource;
|
|
dateRange: [Date, Date];
|
|
traceId?: string;
|
|
metadata: Metadata;
|
|
samplingFactor: number;
|
|
}) {
|
|
// Don't sample if we're looking for a specific trace
|
|
const effectiveSamplingLevel = traceId ? 1 : samplingFactor;
|
|
|
|
const baseCTEConfig = {
|
|
from: source.from,
|
|
connection: source.connection,
|
|
dateRange,
|
|
timestampValueExpression: source.timestampValueExpression,
|
|
filters: [
|
|
// Sample a subset of traces, for performance in the following join
|
|
{
|
|
type: 'sql' as const,
|
|
condition: `cityHash64(${source.traceIdExpression}) % ${effectiveSamplingLevel} = 0`,
|
|
},
|
|
// Optionally filter for a specific trace ID
|
|
...(traceId
|
|
? [
|
|
{
|
|
type: 'sql' as const,
|
|
condition: SqlString.format('?? = ?', [
|
|
source.traceIdExpression,
|
|
traceId,
|
|
]),
|
|
},
|
|
]
|
|
: []),
|
|
],
|
|
select: [
|
|
{
|
|
valueExpression: source.traceIdExpression ?? 'TraceId',
|
|
alias: 'traceId',
|
|
},
|
|
{
|
|
valueExpression: source.spanIdExpression ?? 'SpanId',
|
|
alias: 'spanId',
|
|
},
|
|
{
|
|
valueExpression: source.serviceNameExpression ?? 'ServiceName',
|
|
alias: 'serviceName',
|
|
},
|
|
{
|
|
valueExpression: source.parentSpanIdExpression ?? 'ParentSpanId',
|
|
alias: 'parentSpanId',
|
|
},
|
|
{
|
|
valueExpression: source.statusCodeExpression ?? 'StatusCode',
|
|
alias: 'statusCode',
|
|
},
|
|
],
|
|
};
|
|
|
|
const [serverCTE, clientCTE] = await Promise.all([
|
|
renderChartConfig(
|
|
{
|
|
...baseCTEConfig,
|
|
filters: [
|
|
...baseCTEConfig.filters,
|
|
{
|
|
type: 'sql',
|
|
condition: `${source.spanKindExpression} IN ('Server', 'Consumer', 'SPAN_KIND_SERVER', 'SPAN_KIND_CONSUMER')`,
|
|
},
|
|
],
|
|
where: '',
|
|
},
|
|
metadata,
|
|
source.querySettings,
|
|
),
|
|
renderChartConfig(
|
|
{
|
|
...baseCTEConfig,
|
|
filters: [
|
|
...baseCTEConfig.filters,
|
|
{
|
|
type: 'sql',
|
|
condition: `${source.spanKindExpression} IN ('Client', 'Producer', 'SPAN_KIND_CLIENT', 'SPAN_KIND_PRODUCER')`,
|
|
},
|
|
],
|
|
where: '',
|
|
},
|
|
metadata,
|
|
source.querySettings,
|
|
),
|
|
]);
|
|
|
|
// Left join to support services which receive requests from clients that are not instrumented.
|
|
// Ordering helps ensure stable graph layout.
|
|
return chSql`
|
|
WITH
|
|
ServerSpans AS (${serverCTE}),
|
|
ClientSpans AS (${clientCTE})
|
|
SELECT
|
|
ServerSpans.serviceName AS serverServiceName,
|
|
ServerSpans.statusCode AS serverStatusCode,
|
|
ClientSpans.serviceName AS clientServiceName,
|
|
count(*) * ${{ Int64: effectiveSamplingLevel }} as requestCount
|
|
FROM ServerSpans
|
|
LEFT JOIN ClientSpans
|
|
ON ServerSpans.traceId = ClientSpans.traceId
|
|
AND ServerSpans.parentSpanId = ClientSpans.spanId
|
|
WHERE (ClientSpans.serviceName IS NULL OR ServerSpans.serviceName != ClientSpans.serviceName)
|
|
GROUP BY serverServiceName, serverStatusCode, clientServiceName
|
|
ORDER BY serverServiceName, serverStatusCode, clientServiceName
|
|
`;
|
|
}
|
|
|
|
type IncomingRequestStats = {
|
|
totalRequests: number;
|
|
requestCountByStatus: Map<string, number>;
|
|
errorPercentage: number;
|
|
};
|
|
|
|
export type ServiceAggregation = {
|
|
serviceName: string;
|
|
incomingRequests: IncomingRequestStats;
|
|
incomingRequestsByClient: Map<string, IncomingRequestStats>;
|
|
};
|
|
|
|
export function aggregateServiceMapData(data: SpanAggregationRow[]) {
|
|
// Aggregate data by service
|
|
const services = new Map<string, ServiceAggregation>();
|
|
for (const row of data) {
|
|
const {
|
|
serverServiceName,
|
|
serverStatusCode,
|
|
clientServiceName,
|
|
requestCount,
|
|
} = row;
|
|
|
|
if (!services.has(serverServiceName)) {
|
|
services.set(serverServiceName, {
|
|
serviceName: serverServiceName,
|
|
incomingRequests: {
|
|
totalRequests: 0,
|
|
requestCountByStatus: new Map(),
|
|
errorPercentage: 0,
|
|
},
|
|
incomingRequestsByClient: new Map(),
|
|
});
|
|
}
|
|
|
|
const service = services.get(serverServiceName)!;
|
|
|
|
// Add to total incoming request count
|
|
service.incomingRequests.totalRequests += requestCount;
|
|
|
|
// Add to request count per status
|
|
const currentStatusCount =
|
|
service.incomingRequests.requestCountByStatus.get(serverStatusCode) || 0;
|
|
service.incomingRequests.requestCountByStatus.set(
|
|
serverStatusCode,
|
|
currentStatusCount + requestCount,
|
|
);
|
|
|
|
// Add to request count per client per status
|
|
if (clientServiceName) {
|
|
if (!service.incomingRequestsByClient.has(clientServiceName)) {
|
|
service.incomingRequestsByClient.set(clientServiceName, {
|
|
totalRequests: 0,
|
|
requestCountByStatus: new Map(),
|
|
errorPercentage: 0,
|
|
});
|
|
}
|
|
|
|
const perClientStats =
|
|
service.incomingRequestsByClient.get(clientServiceName)!;
|
|
perClientStats.totalRequests += requestCount;
|
|
|
|
const currentClientStatusCount =
|
|
perClientStats.requestCountByStatus.get(serverStatusCode) || 0;
|
|
perClientStats.requestCountByStatus.set(
|
|
serverStatusCode,
|
|
currentClientStatusCount + requestCount,
|
|
);
|
|
|
|
if (!services.has(clientServiceName)) {
|
|
services.set(clientServiceName, {
|
|
serviceName: clientServiceName,
|
|
incomingRequests: {
|
|
totalRequests: 0,
|
|
requestCountByStatus: new Map(),
|
|
errorPercentage: 0,
|
|
},
|
|
incomingRequestsByClient: new Map(),
|
|
});
|
|
}
|
|
}
|
|
}
|
|
|
|
// Calculate error percentages for all services and their client stats
|
|
for (const service of services.values()) {
|
|
// Calculate error percentage for total incoming requests
|
|
const errorCount =
|
|
service.incomingRequests.requestCountByStatus.get('Error') || 0;
|
|
service.incomingRequests.errorPercentage =
|
|
service.incomingRequests.totalRequests > 0
|
|
? (errorCount / service.incomingRequests.totalRequests) * 100
|
|
: 0;
|
|
|
|
// Calculate error percentage for each client
|
|
for (const clientStats of service.incomingRequestsByClient.values()) {
|
|
const clientErrorCount =
|
|
clientStats.requestCountByStatus.get('Error') || 0;
|
|
clientStats.errorPercentage =
|
|
clientStats.totalRequests > 0
|
|
? (clientErrorCount / clientStats.totalRequests) * 100
|
|
: 0;
|
|
}
|
|
}
|
|
|
|
return services;
|
|
}
|
|
|
|
export default function useServiceMap({
|
|
source,
|
|
dateRange,
|
|
traceId,
|
|
samplingFactor,
|
|
}: {
|
|
source: TSource;
|
|
dateRange: [Date, Date];
|
|
traceId?: string;
|
|
samplingFactor: number;
|
|
}) {
|
|
const client = useClickhouseClient();
|
|
const metadata = useMetadataWithSettings();
|
|
|
|
return useQuery({
|
|
queryKey: ['serviceMapData', traceId, source, dateRange, samplingFactor],
|
|
queryFn: async ({ signal }) => {
|
|
const query = await getServiceMapQuery({
|
|
source,
|
|
dateRange,
|
|
traceId,
|
|
metadata,
|
|
samplingFactor,
|
|
});
|
|
|
|
const data = await client
|
|
.query({
|
|
query: query.sql,
|
|
query_params: query.params,
|
|
connectionId: source.connection,
|
|
format: 'JSON',
|
|
abort_signal: signal,
|
|
clickhouse_settings: {
|
|
max_execution_time: 60,
|
|
join_algorithm: 'auto',
|
|
},
|
|
})
|
|
.then(res => res.json<Record<string, string>>())
|
|
.then(data =>
|
|
data.data.map((row: Record<string, string>) => ({
|
|
serverServiceName: row.serverServiceName,
|
|
serverStatusCode: row.serverStatusCode,
|
|
clientServiceName: row.clientServiceName,
|
|
requestCount: Number.parseInt(row.requestCount),
|
|
})),
|
|
);
|
|
|
|
return aggregateServiceMapData(data);
|
|
},
|
|
// Prevent refetching and updating the map layout
|
|
staleTime: Infinity,
|
|
refetchOnWindowFocus: false,
|
|
retry: 1,
|
|
});
|
|
}
|