2025-11-04 20:20:26 +00:00
|
|
|
import SqlString from 'sqlstring';
|
|
|
|
|
import { chSql } from '@hyperdx/common-utils/dist/clickhouse';
|
|
|
|
|
import { Metadata } from '@hyperdx/common-utils/dist/core/metadata';
|
|
|
|
|
import { renderChartConfig } from '@hyperdx/common-utils/dist/core/renderChartConfig';
|
|
|
|
|
import { TSource } from '@hyperdx/common-utils/dist/types';
|
|
|
|
|
import { useQuery } from '@tanstack/react-query';
|
|
|
|
|
|
|
|
|
|
import { useClickhouseClient } from '@/clickhouse';
|
|
|
|
|
|
|
|
|
|
import { useMetadataWithSettings } from './useMetadata';
|
|
|
|
|
|
|
|
|
|
export type SpanAggregationRow = {
|
|
|
|
|
serverServiceName: string;
|
|
|
|
|
serverStatusCode: string;
|
|
|
|
|
requestCount: number;
|
|
|
|
|
clientServiceName?: string;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
async function getServiceMapQuery({
|
|
|
|
|
source,
|
|
|
|
|
dateRange,
|
|
|
|
|
traceId,
|
|
|
|
|
metadata,
|
|
|
|
|
samplingFactor,
|
|
|
|
|
}: {
|
|
|
|
|
source: TSource;
|
|
|
|
|
dateRange: [Date, Date];
|
|
|
|
|
traceId?: string;
|
|
|
|
|
metadata: Metadata;
|
|
|
|
|
samplingFactor: number;
|
|
|
|
|
}) {
|
|
|
|
|
// Don't sample if we're looking for a specific trace
|
|
|
|
|
const effectiveSamplingLevel = traceId ? 1 : samplingFactor;
|
|
|
|
|
|
|
|
|
|
const baseCTEConfig = {
|
|
|
|
|
from: source.from,
|
|
|
|
|
connection: source.connection,
|
|
|
|
|
dateRange,
|
|
|
|
|
timestampValueExpression: source.timestampValueExpression,
|
|
|
|
|
filters: [
|
|
|
|
|
// Sample a subset of traces, for performance in the following join
|
|
|
|
|
{
|
|
|
|
|
type: 'sql' as const,
|
|
|
|
|
condition: `cityHash64(${source.traceIdExpression}) % ${effectiveSamplingLevel} = 0`,
|
|
|
|
|
},
|
|
|
|
|
// Optionally filter for a specific trace ID
|
|
|
|
|
...(traceId
|
|
|
|
|
? [
|
|
|
|
|
{
|
|
|
|
|
type: 'sql' as const,
|
|
|
|
|
condition: SqlString.format('?? = ?', [
|
|
|
|
|
source.traceIdExpression,
|
|
|
|
|
traceId,
|
|
|
|
|
]),
|
|
|
|
|
},
|
|
|
|
|
]
|
|
|
|
|
: []),
|
|
|
|
|
],
|
|
|
|
|
select: [
|
|
|
|
|
{
|
|
|
|
|
valueExpression: source.traceIdExpression ?? 'TraceId',
|
|
|
|
|
alias: 'traceId',
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
valueExpression: source.spanIdExpression ?? 'SpanId',
|
|
|
|
|
alias: 'spanId',
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
valueExpression: source.serviceNameExpression ?? 'ServiceName',
|
|
|
|
|
alias: 'serviceName',
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
valueExpression: source.parentSpanIdExpression ?? 'ParentSpanId',
|
|
|
|
|
alias: 'parentSpanId',
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
valueExpression: source.statusCodeExpression ?? 'StatusCode',
|
|
|
|
|
alias: 'statusCode',
|
|
|
|
|
},
|
|
|
|
|
],
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
const [serverCTE, clientCTE] = await Promise.all([
|
|
|
|
|
renderChartConfig(
|
|
|
|
|
{
|
|
|
|
|
...baseCTEConfig,
|
|
|
|
|
filters: [
|
|
|
|
|
...baseCTEConfig.filters,
|
|
|
|
|
{
|
|
|
|
|
type: 'sql',
|
2025-12-29 15:55:26 +00:00
|
|
|
condition: `${source.spanKindExpression} IN ('Server', 'Consumer', 'SPAN_KIND_SERVER', 'SPAN_KIND_CONSUMER')`,
|
2025-11-04 20:20:26 +00:00
|
|
|
},
|
|
|
|
|
],
|
|
|
|
|
where: '',
|
|
|
|
|
},
|
|
|
|
|
metadata,
|
2026-01-21 16:07:30 +00:00
|
|
|
source.querySettings,
|
2025-11-04 20:20:26 +00:00
|
|
|
),
|
|
|
|
|
renderChartConfig(
|
|
|
|
|
{
|
|
|
|
|
...baseCTEConfig,
|
|
|
|
|
filters: [
|
|
|
|
|
...baseCTEConfig.filters,
|
|
|
|
|
{
|
|
|
|
|
type: 'sql',
|
2025-12-29 15:55:26 +00:00
|
|
|
condition: `${source.spanKindExpression} IN ('Client', 'Producer', 'SPAN_KIND_CLIENT', 'SPAN_KIND_PRODUCER')`,
|
2025-11-04 20:20:26 +00:00
|
|
|
},
|
|
|
|
|
],
|
|
|
|
|
where: '',
|
|
|
|
|
},
|
|
|
|
|
metadata,
|
2026-01-21 16:07:30 +00:00
|
|
|
source.querySettings,
|
2025-11-04 20:20:26 +00:00
|
|
|
),
|
|
|
|
|
]);
|
|
|
|
|
|
|
|
|
|
// Left join to support services which receive requests from clients that are not instrumented.
|
|
|
|
|
// Ordering helps ensure stable graph layout.
|
|
|
|
|
return chSql`
|
|
|
|
|
WITH
|
|
|
|
|
ServerSpans AS (${serverCTE}),
|
|
|
|
|
ClientSpans AS (${clientCTE})
|
|
|
|
|
SELECT
|
|
|
|
|
ServerSpans.serviceName AS serverServiceName,
|
|
|
|
|
ServerSpans.statusCode AS serverStatusCode,
|
|
|
|
|
ClientSpans.serviceName AS clientServiceName,
|
|
|
|
|
count(*) * ${{ Int64: effectiveSamplingLevel }} as requestCount
|
|
|
|
|
FROM ServerSpans
|
|
|
|
|
LEFT JOIN ClientSpans
|
|
|
|
|
ON ServerSpans.traceId = ClientSpans.traceId
|
|
|
|
|
AND ServerSpans.parentSpanId = ClientSpans.spanId
|
|
|
|
|
WHERE (ClientSpans.serviceName IS NULL OR ServerSpans.serviceName != ClientSpans.serviceName)
|
|
|
|
|
GROUP BY serverServiceName, serverStatusCode, clientServiceName
|
|
|
|
|
ORDER BY serverServiceName, serverStatusCode, clientServiceName
|
|
|
|
|
`;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
type IncomingRequestStats = {
|
|
|
|
|
totalRequests: number;
|
|
|
|
|
requestCountByStatus: Map<string, number>;
|
|
|
|
|
errorPercentage: number;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
export type ServiceAggregation = {
|
|
|
|
|
serviceName: string;
|
|
|
|
|
incomingRequests: IncomingRequestStats;
|
|
|
|
|
incomingRequestsByClient: Map<string, IncomingRequestStats>;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
export function aggregateServiceMapData(data: SpanAggregationRow[]) {
|
|
|
|
|
// Aggregate data by service
|
|
|
|
|
const services = new Map<string, ServiceAggregation>();
|
|
|
|
|
for (const row of data) {
|
|
|
|
|
const {
|
|
|
|
|
serverServiceName,
|
|
|
|
|
serverStatusCode,
|
|
|
|
|
clientServiceName,
|
|
|
|
|
requestCount,
|
|
|
|
|
} = row;
|
|
|
|
|
|
|
|
|
|
if (!services.has(serverServiceName)) {
|
|
|
|
|
services.set(serverServiceName, {
|
|
|
|
|
serviceName: serverServiceName,
|
|
|
|
|
incomingRequests: {
|
|
|
|
|
totalRequests: 0,
|
|
|
|
|
requestCountByStatus: new Map(),
|
|
|
|
|
errorPercentage: 0,
|
|
|
|
|
},
|
|
|
|
|
incomingRequestsByClient: new Map(),
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const service = services.get(serverServiceName)!;
|
|
|
|
|
|
|
|
|
|
// Add to total incoming request count
|
|
|
|
|
service.incomingRequests.totalRequests += requestCount;
|
|
|
|
|
|
|
|
|
|
// Add to request count per status
|
|
|
|
|
const currentStatusCount =
|
|
|
|
|
service.incomingRequests.requestCountByStatus.get(serverStatusCode) || 0;
|
|
|
|
|
service.incomingRequests.requestCountByStatus.set(
|
|
|
|
|
serverStatusCode,
|
|
|
|
|
currentStatusCount + requestCount,
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
// Add to request count per client per status
|
|
|
|
|
if (clientServiceName) {
|
|
|
|
|
if (!service.incomingRequestsByClient.has(clientServiceName)) {
|
|
|
|
|
service.incomingRequestsByClient.set(clientServiceName, {
|
|
|
|
|
totalRequests: 0,
|
|
|
|
|
requestCountByStatus: new Map(),
|
|
|
|
|
errorPercentage: 0,
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const perClientStats =
|
|
|
|
|
service.incomingRequestsByClient.get(clientServiceName)!;
|
|
|
|
|
perClientStats.totalRequests += requestCount;
|
|
|
|
|
|
|
|
|
|
const currentClientStatusCount =
|
|
|
|
|
perClientStats.requestCountByStatus.get(serverStatusCode) || 0;
|
|
|
|
|
perClientStats.requestCountByStatus.set(
|
|
|
|
|
serverStatusCode,
|
|
|
|
|
currentClientStatusCount + requestCount,
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
if (!services.has(clientServiceName)) {
|
|
|
|
|
services.set(clientServiceName, {
|
|
|
|
|
serviceName: clientServiceName,
|
|
|
|
|
incomingRequests: {
|
|
|
|
|
totalRequests: 0,
|
|
|
|
|
requestCountByStatus: new Map(),
|
|
|
|
|
errorPercentage: 0,
|
|
|
|
|
},
|
|
|
|
|
incomingRequestsByClient: new Map(),
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Calculate error percentages for all services and their client stats
|
|
|
|
|
for (const service of services.values()) {
|
|
|
|
|
// Calculate error percentage for total incoming requests
|
|
|
|
|
const errorCount =
|
|
|
|
|
service.incomingRequests.requestCountByStatus.get('Error') || 0;
|
|
|
|
|
service.incomingRequests.errorPercentage =
|
|
|
|
|
service.incomingRequests.totalRequests > 0
|
|
|
|
|
? (errorCount / service.incomingRequests.totalRequests) * 100
|
|
|
|
|
: 0;
|
|
|
|
|
|
|
|
|
|
// Calculate error percentage for each client
|
|
|
|
|
for (const clientStats of service.incomingRequestsByClient.values()) {
|
|
|
|
|
const clientErrorCount =
|
|
|
|
|
clientStats.requestCountByStatus.get('Error') || 0;
|
|
|
|
|
clientStats.errorPercentage =
|
|
|
|
|
clientStats.totalRequests > 0
|
|
|
|
|
? (clientErrorCount / clientStats.totalRequests) * 100
|
|
|
|
|
: 0;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return services;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
export default function useServiceMap({
|
|
|
|
|
source,
|
|
|
|
|
dateRange,
|
|
|
|
|
traceId,
|
|
|
|
|
samplingFactor,
|
|
|
|
|
}: {
|
|
|
|
|
source: TSource;
|
|
|
|
|
dateRange: [Date, Date];
|
|
|
|
|
traceId?: string;
|
|
|
|
|
samplingFactor: number;
|
|
|
|
|
}) {
|
|
|
|
|
const client = useClickhouseClient();
|
|
|
|
|
const metadata = useMetadataWithSettings();
|
|
|
|
|
|
|
|
|
|
return useQuery({
|
|
|
|
|
queryKey: ['serviceMapData', traceId, source, dateRange, samplingFactor],
|
|
|
|
|
queryFn: async ({ signal }) => {
|
|
|
|
|
const query = await getServiceMapQuery({
|
|
|
|
|
source,
|
|
|
|
|
dateRange,
|
|
|
|
|
traceId,
|
|
|
|
|
metadata,
|
|
|
|
|
samplingFactor,
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
const data = await client
|
|
|
|
|
.query({
|
|
|
|
|
query: query.sql,
|
|
|
|
|
query_params: query.params,
|
|
|
|
|
connectionId: source.connection,
|
|
|
|
|
format: 'JSON',
|
|
|
|
|
abort_signal: signal,
|
|
|
|
|
clickhouse_settings: {
|
|
|
|
|
max_execution_time: 60,
|
|
|
|
|
join_algorithm: 'auto',
|
|
|
|
|
},
|
|
|
|
|
})
|
|
|
|
|
.then(res => res.json<Record<string, string>>())
|
|
|
|
|
.then(data =>
|
|
|
|
|
data.data.map((row: Record<string, string>) => ({
|
|
|
|
|
serverServiceName: row.serverServiceName,
|
|
|
|
|
serverStatusCode: row.serverStatusCode,
|
|
|
|
|
clientServiceName: row.clientServiceName,
|
|
|
|
|
requestCount: Number.parseInt(row.requestCount),
|
|
|
|
|
})),
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
return aggregateServiceMapData(data);
|
|
|
|
|
},
|
|
|
|
|
// Prevent refetching and updating the map layout
|
|
|
|
|
staleTime: Infinity,
|
|
|
|
|
refetchOnWindowFocus: false,
|
|
|
|
|
retry: 1,
|
|
|
|
|
});
|
|
|
|
|
}
|