Merge branch 'main' into event-deltas/field-filtering

This commit is contained in:
Alex Fedotyev 2026-03-03 15:44:39 -08:00 committed by GitHub
commit 4e8b4d6e9d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 306 additions and 12 deletions

View file

@ -0,0 +1,6 @@
---
"@hyperdx/api": patch
"@hyperdx/app": patch
---
fix: tile alerts with groupBy now correctly track and display group names

View file

@ -30,6 +30,7 @@ import { ITeam } from '@/models/team';
import Webhook, { IWebhook } from '@/models/webhook';
import * as checkAlert from '@/tasks/checkAlerts';
import {
alertHasGroupBy,
doesExceedThreshold,
getPreviousAlertHistories,
processAlert,
@ -122,6 +123,90 @@ describe('checkAlerts', () => {
});
});
describe('alertHasGroupBy', () => {
const makeDetails = (
overrides: Partial<{
alertGroupBy: string;
taskType: AlertTaskType;
tileGroupBy: string;
}> = {},
): AlertDetails => {
const base = {
alert: { groupBy: overrides.alertGroupBy } as any,
source: {} as any,
previousMap: new Map(),
};
if (overrides.taskType === AlertTaskType.TILE) {
return {
...base,
taskType: AlertTaskType.TILE,
tile: {
config: { groupBy: overrides.tileGroupBy ?? '' },
} as any,
dashboard: {} as any,
};
}
return {
...base,
taskType: AlertTaskType.SAVED_SEARCH,
savedSearch: {} as any,
};
};
it('should return false for saved search alert without groupBy', () => {
expect(alertHasGroupBy(makeDetails())).toBe(false);
});
it('should return false for saved search alert with empty groupBy', () => {
expect(alertHasGroupBy(makeDetails({ alertGroupBy: '' }))).toBe(false);
});
it('should return true for saved search alert with groupBy', () => {
expect(
alertHasGroupBy(makeDetails({ alertGroupBy: 'ServiceName' })),
).toBe(true);
});
it('should return false for tile alert without groupBy', () => {
expect(
alertHasGroupBy(makeDetails({ taskType: AlertTaskType.TILE })),
).toBe(false);
});
it('should return false for tile alert with empty tile groupBy', () => {
expect(
alertHasGroupBy(
makeDetails({ taskType: AlertTaskType.TILE, tileGroupBy: '' }),
),
).toBe(false);
});
it('should return true for tile alert with tile config groupBy', () => {
expect(
alertHasGroupBy(
makeDetails({
taskType: AlertTaskType.TILE,
tileGroupBy: 'ServiceName',
}),
),
).toBe(true);
});
it('should return true for tile alert when alert.groupBy is set (even if tile groupBy is empty)', () => {
expect(
alertHasGroupBy(
makeDetails({
taskType: AlertTaskType.TILE,
alertGroupBy: 'ServiceName',
tileGroupBy: '',
}),
),
).toBe(true);
});
});
describe('Alert Templates', () => {
// Create a mock metadata object with the necessary methods
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
@ -194,6 +279,7 @@ describe('checkAlerts', () => {
attributes: {},
granularity: '1m',
group: 'http',
isGroupedAlert: false,
startTime: new Date('2023-03-17T22:13:03.103Z'),
endTime: new Date('2023-03-17T22:13:59.103Z'),
value: 10,
@ -224,6 +310,7 @@ describe('checkAlerts', () => {
endTime: new Date('2023-03-17T22:13:59.103Z'),
attributes: {},
granularity: '5 minute',
isGroupedAlert: false,
value: 5,
};
@ -2398,6 +2485,186 @@ describe('checkAlerts', () => {
);
});
it('TILE alert (metrics) with groupBy - should track per-group alerts', async () => {
const { team, webhook, connection, teamWebhooksById, clickhouseClient } =
await setupSavedSearchAlertTest();
const now = new Date('2023-11-16T22:12:00.000Z');
// Alert window is [22:05, 22:10), place data within that range
const eventMs = now.getTime() - ms('7m'); // 22:05
// Insert gauge metrics for two different services
// Note: ResourceAttributes must differ per service so that
// AttributesHash (cityHash64 of mapConcat(ScopeAttributes, ResourceAttributes, Attributes))
// produces distinct hashes. Otherwise, the Bucketed CTE collapses all rows into one group.
const gaugePoints = [
// service-a: high CPU values (should trigger alert)
{
MetricName: 'test.cpu',
ServiceName: 'service-a',
Value: 50,
TimeUnix: new Date(eventMs),
ResourceAttributes: { 'service.name': 'service-a', host: 'host1' },
},
{
MetricName: 'test.cpu',
ServiceName: 'service-a',
Value: 40,
TimeUnix: new Date(eventMs + ms('1m')),
ResourceAttributes: { 'service.name': 'service-a', host: 'host1' },
},
// service-b: high CPU values (should also trigger alert)
{
MetricName: 'test.cpu',
ServiceName: 'service-b',
Value: 30,
TimeUnix: new Date(eventMs),
ResourceAttributes: { 'service.name': 'service-b', host: 'host1' },
},
{
MetricName: 'test.cpu',
ServiceName: 'service-b',
Value: 20,
TimeUnix: new Date(eventMs + ms('1m')),
ResourceAttributes: { 'service.name': 'service-b', host: 'host1' },
},
];
await bulkInsertMetricsGauge(gaugePoints);
const source = await Source.create({
kind: 'metric',
team: team._id,
from: {
databaseName: DEFAULT_DATABASE,
tableName: '',
},
metricTables: {
gauge: DEFAULT_METRICS_TABLE.GAUGE,
histogram: DEFAULT_METRICS_TABLE.HISTOGRAM,
sum: DEFAULT_METRICS_TABLE.SUM,
},
timestampValueExpression: 'TimeUnix',
connection: connection.id,
name: 'Metrics',
});
const dashboard = await new Dashboard({
name: 'My Dashboard',
team: team._id,
tiles: [
{
id: '17quud',
x: 0,
y: 0,
w: 6,
h: 4,
config: {
name: 'CPU by Service',
select: [
{
aggFn: 'max',
valueExpression: 'Value',
metricType: 'gauge',
metricName: 'test.cpu',
},
],
where: '',
displayType: 'line',
source: source.id,
groupBy: 'ServiceName',
},
},
],
}).save();
const tile = dashboard.tiles?.find((t: any) => t.id === '17quud');
if (!tile) throw new Error('tile not found');
const details = await createAlertDetails(
team,
source,
{
source: AlertSource.TILE,
channel: {
type: 'webhook',
webhookId: webhook._id.toString(),
},
interval: '5m',
thresholdType: AlertThresholdType.ABOVE,
threshold: 1,
dashboardId: dashboard.id,
tileId: '17quud',
},
{
taskType: AlertTaskType.TILE,
tile,
dashboard,
},
);
// First run: should trigger alerts for both services
await processAlertAtTime(
now,
details,
clickhouseClient,
connection.id,
alertProvider,
teamWebhooksById,
);
expect((await Alert.findById(details.alert.id))!.state).toBe('ALERT');
// Check that we have 2 alert histories (one per group)
const alertHistories = await AlertHistory.find({
alert: details.alert.id,
}).sort({ createdAt: 1, group: 1 });
expect(alertHistories.length).toBe(2);
// Both groups should be in ALERT state with non-empty group names
const groups = alertHistories.map(h => h.group);
expect(groups.some(g => g?.includes('service-a'))).toBe(true);
expect(groups.some(g => g?.includes('service-b'))).toBe(true);
expect(alertHistories[0].state).toBe('ALERT');
expect(alertHistories[1].state).toBe('ALERT');
// Webhook should be called twice (once per group)
expect(slack.postMessageToWebhook).toHaveBeenCalledTimes(2);
// Validate webhook messages contain correct group names
const calls = (slack.postMessageToWebhook as jest.Mock).mock.calls;
const messages = calls.map((call: any) => ({
url: call[0],
text: call[1].text,
body: call[1].blocks[0].text.text,
}));
// Both calls should target the correct webhook URL
expect(messages[0].url).toBe('https://hooks.slack.com/services/123');
expect(messages[1].url).toBe('https://hooks.slack.com/services/123');
// Title should reference the chart name and dashboard
for (const msg of messages) {
expect(msg.text).toContain('CPU by Service');
expect(msg.text).toContain('My Dashboard');
expect(msg.text).toContain('exceeds 1');
}
// Body should contain Group: "ServiceName:service-a" or "ServiceName:service-b"
const bodies = messages.map((m: any) => m.body);
expect(
bodies.some(
(b: string) => b.includes('Group:') && b.includes('service-a'),
),
).toBe(true);
expect(
bodies.some(
(b: string) => b.includes('Group:') && b.includes('service-b'),
),
).toBe(true);
});
// TODO: revisit this once the auto-resolve feature is implemented
it('should check 3 time buckets [1 error, 3 errors, 1 error] with threshold 2 and maintain ALERT state with 3 lastValues entries', async () => {
const {

View file

@ -50,6 +50,26 @@ import {
} from '@/tasks/util';
import logger from '@/utils/logger';
/**
* Determine if an alert has group-by behavior.
* For saved search alerts, groupBy is on alert.groupBy.
* For tile alerts, groupBy is on tile.config.groupBy.
*/
export const alertHasGroupBy = (details: AlertDetails): boolean => {
const { alert } = details;
if (alert.groupBy && alert.groupBy.length > 0) {
return true;
}
if (
details.taskType === AlertTaskType.TILE &&
details.tile.config.groupBy &&
details.tile.config.groupBy.length > 0
) {
return true;
}
return false;
};
export const doesExceedThreshold = (
thresholdType: AlertThresholdType,
threshold: number,
@ -72,6 +92,7 @@ const fireChannelEvent = async ({
dashboard,
endTime,
group,
isGroupedAlert,
metadata,
savedSearch,
source,
@ -88,6 +109,7 @@ const fireChannelEvent = async ({
dashboard?: IDashboard | null;
endTime: Date;
group?: string;
isGroupedAlert: boolean;
metadata: Metadata;
savedSearch?: ISavedSearch | null;
source?: ISource | null;
@ -140,6 +162,7 @@ const fireChannelEvent = async ({
endTime,
granularity: `${windowSizeInMins} minute`,
group,
isGroupedAlert,
savedSearch,
source,
startTime,
@ -390,10 +413,10 @@ export const processAlert = async (
try {
const windowSizeInMins = ms(alert.interval) / 60000;
const nowInMinsRoundDown = roundDownToXMinutes(windowSizeInMins)(now);
const hasGroupBy = alert.groupBy && alert.groupBy.length > 0;
const hasGroupBy = alertHasGroupBy(details);
// Check if we should skip this alert check based on last evaluation time
if (shouldSkipAlertCheck(details, !!hasGroupBy, nowInMinsRoundDown)) {
if (shouldSkipAlertCheck(details, hasGroupBy, nowInMinsRoundDown)) {
logger.info(
{
windowSizeInMins,
@ -409,7 +432,7 @@ export const processAlert = async (
const dateRange = getAlertEvaluationDateRange(
details,
!!hasGroupBy,
hasGroupBy,
nowInMinsRoundDown,
windowSizeInMins,
);
@ -515,6 +538,7 @@ export const processAlert = async (
startTime,
endTime: fns.addMinutes(startTime, windowSizeInMins),
group,
isGroupedAlert: hasGroupBy,
metadata,
savedSearch: (details as any).savedSearch,
source,

View file

@ -67,6 +67,7 @@ export type AlertMessageTemplateDefaultView = {
endTime: Date;
granularity: string;
group?: string;
isGroupedAlert: boolean;
savedSearch?: ISavedSearch | null;
source?: ISource | null;
startTime: Date;
@ -527,11 +528,6 @@ export const renderAlertTemplate = async ({
const startTime = view.startTime.getTime();
const endTime = view.endTime.getTime();
// Generate eventId with explicit distinction between grouped and non-grouped alerts
// to prevent collisions between empty group names and non-grouped alerts
const isGroupedAlert = Boolean(
alert.groupBy && alert.groupBy.trim() !== '',
);
const eventId = objectHash({
alertId: alert.id,
channel: {
@ -539,9 +535,8 @@ export const renderAlertTemplate = async ({
id: channel.channel._id.toString(),
},
// Explicitly track if this is a grouped alert
isGrouped: isGroupedAlert,
// Only include groupId if this is actually a grouped alert with a non-empty group value
...(isGroupedAlert && group ? { groupId: group } : {}),
isGrouped: view.isGroupedAlert,
...(view.isGroupedAlert && group ? { groupId: group } : {}),
});
await notifyChannel({

View file

@ -14,6 +14,8 @@ import {
ChartAlertBaseSchema,
} from '@hyperdx/common-utils/dist/types';
import { IS_DEV } from '@/config';
export function intervalToGranularity(interval: AlertInterval) {
if (interval === '1m') return Granularity.OneMinute;
if (interval === '5m') return Granularity.FiveMinute;
@ -94,7 +96,7 @@ export const ALERT_INTERVAL_OPTIONS: Record<AlertInterval, string> = {
};
export const TILE_ALERT_INTERVAL_OPTIONS = _.pick(ALERT_INTERVAL_OPTIONS, [
// Exclude 1m
...(IS_DEV ? (['1m'] as const) : []),
'5m',
'15m',
'30m',