feat: Show alert execution errors in the UI

This commit is contained in:
Drew Davis 2026-04-17 09:28:55 -04:00
parent 7953c0281b
commit 700bab2b47
22 changed files with 1475 additions and 63 deletions

View file

@ -0,0 +1,7 @@
---
"@hyperdx/common-utils": patch
"@hyperdx/api": patch
"@hyperdx/app": patch
---
feat: Show alert execution errors in the UI

View file

@ -105,6 +105,43 @@
],
"description": "Channel type."
},
"AlertErrorType": {
"type": "string",
"enum": [
"QUERY_ERROR",
"WEBHOOK_ERROR",
"INVALID_ALERT",
"UNKNOWN"
],
"description": "Category of error recorded during alert execution."
},
"AlertExecutionError": {
"type": "object",
"description": "An error recorded during a recent alert execution.",
"required": [
"timestamp",
"type",
"message"
],
"properties": {
"timestamp": {
"type": "string",
"format": "date-time",
"description": "When the error occurred.",
"example": "2026-04-17T12:00:00.000Z"
},
"type": {
"$ref": "#/components/schemas/AlertErrorType",
"description": "Category of the error.",
"example": "QUERY_ERROR"
},
"message": {
"type": "string",
"description": "Human-readable error message.",
"example": "Query timed out after 30s"
}
}
},
"AlertSilenced": {
"type": "object",
"description": "Silencing metadata.",
@ -270,6 +307,14 @@
"description": "Silencing metadata.",
"nullable": true
},
"executionErrors": {
"type": "array",
"nullable": true,
"description": "Errors recorded during the most recent alert execution, if any.",
"items": {
"$ref": "#/components/schemas/AlertExecutionError"
}
},
"createdAt": {
"type": "string",
"nullable": true,

View file

@ -1,5 +1,6 @@
import {
ALERT_INTERVAL_TO_MINUTES,
AlertErrorType,
AlertThresholdType,
} from '@hyperdx/common-utils/dist/types';
export { AlertThresholdType } from '@hyperdx/common-utils/dist/types';
@ -15,6 +16,12 @@ export enum AlertState {
OK = 'OK',
}
export interface IAlertError {
timestamp: Date;
type: AlertErrorType;
message: string;
}
// follow 'ms' pkg formats
export type AlertInterval =
| '1m'
@ -73,6 +80,9 @@ export interface IAlert {
at: Date;
until: Date;
};
// Errors recorded during the most recent execution
executionErrors?: IAlertError[];
createdAt: Date;
updatedAt: Date;
}
@ -192,6 +202,22 @@ const AlertSchema = new Schema<IAlert>(
required: false,
},
},
executionErrors: {
type: [
{
_id: false,
timestamp: { type: Date, required: true },
type: {
type: String,
enum: AlertErrorType,
required: true,
},
message: { type: String, required: true },
},
],
required: false,
default: undefined,
},
},
{
timestamps: true,

View file

@ -1,4 +1,5 @@
import {
AlertErrorType,
AlertThresholdType,
DisplayType,
} from '@hyperdx/common-utils/dist/types';
@ -812,4 +813,95 @@ describe('alerts router', () => {
expect(res.body.data.history[1].counts).toBe(0);
});
});
describe('errors propagation', () => {
it('returns the errors field on a single alert response', async () => {
const dashboard = await agent
.post('/dashboards')
.send(MOCK_DASHBOARD)
.expect(200);
const alert = await agent
.post('/alerts')
.send(
makeAlertInput({
dashboardId: dashboard.body.id,
tileId: dashboard.body.tiles[0].id,
webhookId: webhook._id.toString(),
}),
)
.expect(200);
const errorTimestamp = new Date('2026-04-17T12:00:00.000Z');
await Alert.updateOne(
{ _id: alert.body.data._id },
{
$set: {
executionErrors: [
{
timestamp: errorTimestamp,
type: AlertErrorType.QUERY_ERROR,
message: 'ClickHouse returned 500',
},
],
},
},
);
const res = await agent.get(`/alerts/${alert.body.data._id}`).expect(200);
expect(res.body.data.executionErrors).toHaveLength(1);
expect(res.body.data.executionErrors[0].type).toBe(
AlertErrorType.QUERY_ERROR,
);
expect(res.body.data.executionErrors[0].message).toBe(
'ClickHouse returned 500',
);
expect(
new Date(res.body.data.executionErrors[0].timestamp).toISOString(),
).toBe(errorTimestamp.toISOString());
});
it('returns the errors field on the alerts list response', async () => {
const dashboard = await agent
.post('/dashboards')
.send(MOCK_DASHBOARD)
.expect(200);
const alert = await agent
.post('/alerts')
.send(
makeAlertInput({
dashboardId: dashboard.body.id,
tileId: dashboard.body.tiles[0].id,
webhookId: webhook._id.toString(),
}),
)
.expect(200);
await Alert.updateOne(
{ _id: alert.body.data._id },
{
$set: {
executionErrors: [
{
timestamp: new Date('2026-04-17T12:00:00.000Z'),
type: AlertErrorType.WEBHOOK_ERROR,
message: 'webhook delivery failed',
},
],
},
},
);
const list = await agent.get('/alerts').expect(200);
expect(list.body.data).toHaveLength(1);
expect(list.body.data[0].executionErrors).toHaveLength(1);
expect(list.body.data[0].executionErrors[0].type).toBe(
AlertErrorType.WEBHOOK_ERROR,
);
expect(list.body.data[0].executionErrors[0].message).toBe(
'webhook delivery failed',
);
});
});
});

View file

@ -82,6 +82,7 @@ const formatAlertResponse = (
'tileId',
'createdAt',
'updatedAt',
'executionErrors',
]),
};
};

View file

@ -1,3 +1,4 @@
import { AlertErrorType } from '@hyperdx/common-utils/dist/types';
import _ from 'lodash';
import { ObjectId } from 'mongodb';
import request from 'supertest';
@ -1167,4 +1168,67 @@ describe('External API Alerts', () => {
.expect(401);
});
});
describe('Errors field', () => {
it('returns recorded execution errors on GET by id', async () => {
const { alert } = await createTestAlert();
const errorTimestamp = new Date('2026-04-17T12:00:00.000Z');
await Alert.updateOne(
{ _id: alert.id },
{
$set: {
executionErrors: [
{
timestamp: errorTimestamp,
type: AlertErrorType.QUERY_ERROR,
message: 'ClickHouse returned 500',
},
],
},
},
);
const res = await authRequest(
'get',
`${ALERTS_BASE_URL}/${alert.id}`,
).expect(200);
expect(res.body.data.executionErrors).toHaveLength(1);
expect(res.body.data.executionErrors[0].type).toBe(
AlertErrorType.QUERY_ERROR,
);
expect(res.body.data.executionErrors[0].message).toBe(
'ClickHouse returned 500',
);
expect(res.body.data.executionErrors[0].timestamp).toBe(
errorTimestamp.toISOString(),
);
});
it('returns recorded execution errors on the list endpoint', async () => {
const { alert } = await createTestAlert();
await Alert.updateOne(
{ _id: alert.id },
{
$set: {
executionErrors: [
{
timestamp: new Date('2026-04-17T12:00:00.000Z'),
type: AlertErrorType.WEBHOOK_ERROR,
message: 'webhook delivery failed',
},
],
},
},
);
const res = await authRequest('get', ALERTS_BASE_URL).expect(200);
const match = res.body.data.find((a: any) => a.id === alert.id);
expect(match).toBeDefined();
expect(match.executionErrors).toHaveLength(1);
expect(match.executionErrors[0].type).toBe(AlertErrorType.WEBHOOK_ERROR);
expect(match.executionErrors[0].message).toBe('webhook delivery failed');
});
});
});

View file

@ -48,6 +48,31 @@ import { alertSchema, objectIdSchema } from '@/utils/zod';
* type: string
* enum: [webhook]
* description: Channel type.
* AlertErrorType:
* type: string
* enum: [QUERY_ERROR, WEBHOOK_ERROR, INVALID_ALERT, UNKNOWN]
* description: Category of error recorded during alert execution.
* AlertExecutionError:
* type: object
* description: An error recorded during a recent alert execution.
* required:
* - timestamp
* - type
* - message
* properties:
* timestamp:
* type: string
* format: date-time
* description: When the error occurred.
* example: "2026-04-17T12:00:00.000Z"
* type:
* $ref: '#/components/schemas/AlertErrorType'
* description: Category of the error.
* example: "QUERY_ERROR"
* message:
* type: string
* description: Human-readable error message.
* example: "Query timed out after 30s"
* AlertSilenced:
* type: object
* description: Silencing metadata.
@ -176,6 +201,12 @@ import { alertSchema, objectIdSchema } from '@/utils/zod';
* $ref: '#/components/schemas/AlertSilenced'
* description: Silencing metadata.
* nullable: true
* executionErrors:
* type: array
* nullable: true
* description: Errors recorded during the most recent alert execution, if any.
* items:
* $ref: '#/components/schemas/AlertExecutionError'
* createdAt:
* type: string
* nullable: true

View file

@ -1,5 +1,6 @@
import { ClickhouseClient } from '@hyperdx/common-utils/dist/clickhouse/node';
import {
AlertErrorType,
AlertState,
AlertThresholdType,
SourceKind,
@ -2539,14 +2540,662 @@ describe('checkAlerts', () => {
);
// Alert should remain in its default OK state and no history/webhooks should be emitted
expect((await Alert.findById(details.alert.id))!.state).toBe('OK');
const updated = await Alert.findById(details.alert.id);
expect(updated!.state).toBe('OK');
expect(
await AlertHistory.countDocuments({ alert: details.alert.id }),
).toBe(0);
expect(slack.postMessageToWebhook).not.toHaveBeenCalled();
// The invalid alert configuration should be recorded on the Alert
expect(updated!.executionErrors).toBeDefined();
expect(updated!.executionErrors!.length).toBe(1);
expect(updated!.executionErrors![0].type).toBe(
AlertErrorType.INVALID_ALERT,
);
expect(updated!.executionErrors![0].message).toMatch(
/thresholdMax is required/,
);
},
);
describe('execution error recording', () => {
const setupTileAlertForErrors = async ({
webhookSettings,
}: Partial<{
webhookSettings: Partial<IWebhook>;
}> = {}) => {
const fixture = await setupSavedSearchAlertTest({
webhookSettings: webhookSettings as IWebhook,
});
const dashboard = await new Dashboard({
name: 'Errors Dashboard',
team: fixture.team._id,
tiles: [
{
id: 'tile-err',
x: 0,
y: 0,
w: 6,
h: 4,
config: {
name: 'Logs Count',
select: [
{
aggFn: 'count',
aggCondition: 'ServiceName:api',
valueExpression: '',
aggConditionLanguage: 'lucene',
},
],
where: '',
displayType: 'line',
granularity: 'auto',
source: fixture.source.id,
groupBy: '',
},
},
],
}).save();
const tile = dashboard.tiles?.find((t: any) => t.id === 'tile-err');
if (!tile) throw new Error('tile not found');
return { ...fixture, dashboard, tile };
};
it('records a QUERY_ERROR and does not touch state/history when the ClickHouse query fails', async () => {
const {
team,
webhook,
connection,
source,
savedSearch,
teamWebhooksById,
clickhouseClient,
} = await setupSavedSearchAlertTest();
const details = await createAlertDetails(
team,
source,
{
source: AlertSource.SAVED_SEARCH,
channel: {
type: 'webhook',
webhookId: webhook._id.toString(),
},
interval: '5m',
thresholdType: AlertThresholdType.ABOVE,
threshold: 1,
savedSearchId: savedSearch.id,
},
{
taskType: AlertTaskType.SAVED_SEARCH,
savedSearch,
},
);
// Seed the alert document with an existing ALERT state to prove the
// query-failure branch does NOT modify state.
await Alert.updateOne(
{ _id: details.alert.id },
{ $set: { state: AlertState.ALERT } },
);
jest
.spyOn(clickhouseClient, 'queryChartConfig')
.mockRejectedValueOnce(new Error('clickhouse kaput'));
await processAlertAtTime(
new Date('2023-11-16T22:12:00.000Z'),
details,
clickhouseClient,
connection.id,
alertProvider,
teamWebhooksById,
);
const updated = await Alert.findById(details.alert.id);
// State must be untouched — still ALERT
expect(updated!.state).toBe(AlertState.ALERT);
// No AlertHistory created
expect(
await AlertHistory.countDocuments({ alert: details.alert.id }),
).toBe(0);
// No webhook fired
expect(slack.postMessageToWebhook).not.toHaveBeenCalled();
// Error recorded
expect(updated!.executionErrors).toBeDefined();
expect(updated!.executionErrors!.length).toBe(1);
expect(updated!.executionErrors![0].type).toBe(
AlertErrorType.QUERY_ERROR,
);
expect(updated!.executionErrors![0].message).toContain(
'clickhouse kaput',
);
});
it('leaves OK state untouched when the ClickHouse query fails', async () => {
const {
team,
webhook,
connection,
source,
savedSearch,
teamWebhooksById,
clickhouseClient,
} = await setupSavedSearchAlertTest();
const details = await createAlertDetails(
team,
source,
{
source: AlertSource.SAVED_SEARCH,
channel: {
type: 'webhook',
webhookId: webhook._id.toString(),
},
interval: '5m',
thresholdType: AlertThresholdType.ABOVE,
threshold: 1,
savedSearchId: savedSearch.id,
},
{
taskType: AlertTaskType.SAVED_SEARCH,
savedSearch,
},
);
jest
.spyOn(clickhouseClient, 'queryChartConfig')
.mockRejectedValueOnce(new Error('boom'));
await processAlertAtTime(
new Date('2023-11-16T22:12:00.000Z'),
details,
clickhouseClient,
connection.id,
alertProvider,
teamWebhooksById,
);
const updated = await Alert.findById(details.alert.id);
// Default state is OK — must stay OK (not flipped to ALERT or anything else)
expect(updated!.state).toBe(AlertState.OK);
expect(
await AlertHistory.countDocuments({ alert: details.alert.id }),
).toBe(0);
expect(updated!.executionErrors![0].type).toBe(
AlertErrorType.QUERY_ERROR,
);
});
it('sets state to ALERT and records a WEBHOOK_ERROR when the query succeeds but the generic webhook fails', async () => {
global.fetch = jest.fn().mockResolvedValue({
ok: false,
status: 500,
text: jest.fn().mockResolvedValue('webhook exploded'),
}) as any;
const {
team,
webhook,
connection,
source,
teamWebhooksById,
clickhouseClient,
dashboard,
} = await setupTileAlertForErrors({
webhookSettings: {
service: WebhookService.Generic,
url: 'https://webhook.site/fail',
name: 'Generic Webhook',
description: 'generic webhook',
body: JSON.stringify({ text: '{{title}}' }),
},
});
const now = new Date('2023-11-16T22:12:00.000Z');
const eventMs = now.getTime() - ms('5m');
await bulkInsertLogs([
{
ServiceName: 'api',
Timestamp: new Date(eventMs),
SeverityText: 'error',
Body: 'oh no',
},
{
ServiceName: 'api',
Timestamp: new Date(eventMs),
SeverityText: 'error',
Body: 'oh no',
},
]);
const tile = dashboard.tiles?.find((t: any) => t.id === 'tile-err');
const details = await createAlertDetails(
team,
source,
{
source: AlertSource.TILE,
channel: {
type: 'webhook',
webhookId: webhook._id.toString(),
},
interval: '5m',
thresholdType: AlertThresholdType.ABOVE,
threshold: 1,
dashboardId: dashboard.id,
tileId: 'tile-err',
},
{
taskType: AlertTaskType.TILE,
tile: tile!,
dashboard,
},
);
await processAlertAtTime(
now,
details,
clickhouseClient,
connection.id,
alertProvider,
teamWebhooksById,
);
const updated = await Alert.findById(details.alert.id);
expect(updated!.state).toBe(AlertState.ALERT);
// Query succeeded, so AlertHistory should have been written
expect(
await AlertHistory.countDocuments({ alert: details.alert.id }),
).toBe(1);
expect(updated!.executionErrors).toBeDefined();
expect(updated!.executionErrors!.length).toBe(1);
expect(updated!.executionErrors![0].type).toBe(
AlertErrorType.WEBHOOK_ERROR,
);
expect(updated!.executionErrors![0].message).toContain(
'webhook exploded',
);
});
it('sets state to OK and records a WEBHOOK_ERROR when a resolving webhook send fails', async () => {
const fetchMock = jest.fn();
fetchMock
.mockResolvedValueOnce({
ok: true,
status: 200,
text: jest.fn().mockResolvedValue(''),
})
.mockResolvedValueOnce({
ok: false,
status: 500,
text: jest.fn().mockResolvedValue('resolve send failed'),
});
global.fetch = fetchMock as any;
const {
team,
webhook,
connection,
source,
teamWebhooksById,
clickhouseClient,
dashboard,
} = await setupTileAlertForErrors({
webhookSettings: {
service: WebhookService.Generic,
url: 'https://webhook.site/ok',
name: 'Generic Webhook',
description: 'generic webhook',
body: JSON.stringify({ text: '{{title}}' }),
},
});
const firstWindowEnd = new Date('2023-11-16T22:10:00.000Z');
const alertingNow = new Date('2023-11-16T22:12:00.000Z');
const resolvingNow = new Date('2023-11-16T22:17:00.000Z');
await bulkInsertLogs([
{
ServiceName: 'api',
Timestamp: new Date(firstWindowEnd.getTime() - ms('3m')),
SeverityText: 'error',
Body: 'oh no',
},
{
ServiceName: 'api',
Timestamp: new Date(firstWindowEnd.getTime() - ms('3m')),
SeverityText: 'error',
Body: 'oh no',
},
]);
const tile = dashboard.tiles?.find((t: any) => t.id === 'tile-err');
const details = await createAlertDetails(
team,
source,
{
source: AlertSource.TILE,
channel: {
type: 'webhook',
webhookId: webhook._id.toString(),
},
interval: '5m',
thresholdType: AlertThresholdType.ABOVE,
threshold: 1,
dashboardId: dashboard.id,
tileId: 'tile-err',
},
{
taskType: AlertTaskType.TILE,
tile: tile!,
dashboard,
},
);
// First window — alert fires (first fetch succeeds)
await processAlertAtTime(
alertingNow,
details,
clickhouseClient,
connection.id,
alertProvider,
teamWebhooksById,
);
expect((await Alert.findById(details.alert.id))!.state).toBe(
AlertState.ALERT,
);
// Next window — no data, should resolve; but the webhook send fails
await processAlertAtTime(
resolvingNow,
details,
clickhouseClient,
connection.id,
alertProvider,
teamWebhooksById,
);
const updated = await Alert.findById(details.alert.id);
expect(updated!.state).toBe(AlertState.OK);
expect(updated!.executionErrors).toBeDefined();
expect(updated!.executionErrors!.length).toBe(1);
expect(updated!.executionErrors![0].type).toBe(
AlertErrorType.WEBHOOK_ERROR,
);
});
it('clears errors after a successful execution', async () => {
const {
team,
webhook,
connection,
source,
teamWebhooksById,
clickhouseClient,
dashboard,
} = await setupTileAlertForErrors();
const tile = dashboard.tiles?.find((t: any) => t.id === 'tile-err');
const details = await createAlertDetails(
team,
source,
{
source: AlertSource.TILE,
channel: {
type: 'webhook',
webhookId: webhook._id.toString(),
},
interval: '5m',
thresholdType: AlertThresholdType.ABOVE,
threshold: 1,
dashboardId: dashboard.id,
tileId: 'tile-err',
},
{
taskType: AlertTaskType.TILE,
tile: tile!,
dashboard,
},
);
// Seed a stale error so we can verify it gets cleared
await Alert.updateOne(
{ _id: details.alert.id },
{
$set: {
executionErrors: [
{
timestamp: new Date('2023-11-16T22:00:00.000Z'),
type: AlertErrorType.QUERY_ERROR,
message: 'old error',
},
],
},
},
);
const now = new Date('2023-11-16T22:12:00.000Z');
await bulkInsertLogs([
{
ServiceName: 'api',
Timestamp: new Date(now.getTime() - ms('5m')),
SeverityText: 'error',
Body: 'hi',
},
]);
await processAlertAtTime(
now,
details,
clickhouseClient,
connection.id,
alertProvider,
teamWebhooksById,
);
const updated = await Alert.findById(details.alert.id);
// Slack webhook (default) succeeded (mocked) → errors should be cleared
expect((updated!.executionErrors ?? []).length).toBe(0);
});
it('records one WEBHOOK_ERROR per failing group for a grouped alert', async () => {
// Every generic-webhook fetch fails. With two alerting groups in a
// single execution, the alert should end up with two WEBHOOK_ERRORs.
const fetchMock = jest.fn().mockResolvedValue({
ok: false,
status: 500,
text: jest.fn().mockResolvedValue('group webhook failed'),
});
global.fetch = fetchMock as any;
const {
team,
webhook,
connection,
source,
savedSearch,
teamWebhooksById,
clickhouseClient,
} = await setupSavedSearchAlertTest({
webhookSettings: {
service: WebhookService.Generic,
url: 'https://webhook.site/group-fail',
name: 'Generic Webhook',
description: 'generic webhook',
body: JSON.stringify({ text: '{{title}}' }),
} as IWebhook,
});
const now = new Date('2023-11-16T22:12:00.000Z');
const eventMs = new Date('2023-11-16T22:05:00.000Z');
await bulkInsertLogs([
{
ServiceName: 'service-a',
Timestamp: eventMs,
SeverityText: 'error',
Body: 'Error from service-a',
},
{
ServiceName: 'service-a',
Timestamp: eventMs,
SeverityText: 'error',
Body: 'Error from service-a',
},
{
ServiceName: 'service-b',
Timestamp: eventMs,
SeverityText: 'error',
Body: 'Error from service-b',
},
{
ServiceName: 'service-b',
Timestamp: eventMs,
SeverityText: 'error',
Body: 'Error from service-b',
},
]);
const details = await createAlertDetails(
team,
source,
{
source: AlertSource.SAVED_SEARCH,
channel: {
type: 'webhook',
webhookId: webhook._id.toString(),
},
interval: '5m',
thresholdType: AlertThresholdType.ABOVE,
threshold: 1,
savedSearchId: savedSearch.id,
groupBy: 'ServiceName',
},
{
taskType: AlertTaskType.SAVED_SEARCH,
savedSearch,
},
);
await processAlertAtTime(
now,
details,
clickhouseClient,
connection.id,
alertProvider,
teamWebhooksById,
);
const updated = await Alert.findById(details.alert.id);
// Query succeeded → alert state should reflect the query result (ALERT,
// since both groups exceeded the threshold) and per-group histories
// should have been written.
expect(updated!.state).toBe(AlertState.ALERT);
const histories = await AlertHistory.find({
alert: details.alert.id,
});
expect(histories.length).toBe(2);
expect(histories.every(h => h.state === AlertState.ALERT)).toBe(true);
// Each group attempted to send a webhook and each one failed, so there
// should be exactly one WEBHOOK_ERROR per group (two total).
expect(fetchMock).toHaveBeenCalledTimes(2);
expect(updated!.executionErrors).toBeDefined();
expect(updated!.executionErrors!.length).toBe(2);
expect(
updated!.executionErrors!.every(
e => e.type === AlertErrorType.WEBHOOK_ERROR,
),
).toBe(true);
expect(
updated!.executionErrors!.every(e =>
e.message.includes('group webhook failed'),
),
).toBe(true);
});
it('records a WEBHOOK_ERROR when the referenced webhook is not found', async () => {
// Don't pre-create a webhook — we'll reference one that doesn't exist.
const { team, connection, source, savedSearch, clickhouseClient } =
await setupSavedSearchAlertTest();
// Fresh map with no webhooks in it, mimicking a deleted webhook.
const emptyWebhooksById = new Map<string, IWebhook>();
const missingWebhookId = new mongoose.Types.ObjectId().toString();
const now = new Date('2023-11-16T22:12:00.000Z');
const eventMs = new Date('2023-11-16T22:05:00.000Z');
await bulkInsertLogs([
{
ServiceName: 'api',
Timestamp: eventMs,
SeverityText: 'error',
Body: 'oh no',
},
{
ServiceName: 'api',
Timestamp: eventMs,
SeverityText: 'error',
Body: 'oh no',
},
]);
const details = await createAlertDetails(
team,
source,
{
source: AlertSource.SAVED_SEARCH,
channel: {
type: 'webhook',
webhookId: missingWebhookId,
},
interval: '5m',
thresholdType: AlertThresholdType.ABOVE,
threshold: 1,
savedSearchId: savedSearch.id,
},
{
taskType: AlertTaskType.SAVED_SEARCH,
savedSearch,
},
);
await processAlertAtTime(
now,
details,
clickhouseClient,
connection.id,
alertProvider,
emptyWebhooksById,
);
const updated = await Alert.findById(details.alert.id);
// Query succeeded, state should flip to ALERT, history written
expect(updated!.state).toBe(AlertState.ALERT);
expect(
await AlertHistory.countDocuments({ alert: details.alert.id }),
).toBe(1);
// A descriptive WEBHOOK_ERROR should be recorded so the user can debug
expect(updated!.executionErrors).toBeDefined();
expect(updated!.executionErrors!.length).toBe(1);
expect(updated!.executionErrors![0].type).toBe(
AlertErrorType.WEBHOOK_ERROR,
);
expect(updated!.executionErrors![0].message).toContain(
'Webhook not found',
);
// Hint the user on what to do about it
expect(updated!.executionErrors![0].message).toMatch(/deleted|update/);
// No actual network call should have been attempted
expect(slack.postMessageToWebhook).not.toHaveBeenCalled();
});
});
it('TILE alert (events) - generic webhook', async () => {
const fetchMock = jest.fn().mockResolvedValue({
ok: true,

View file

@ -36,6 +36,7 @@ describe('CheckAlertTask', () => {
getAlertTasks: jest.fn(),
getWebhooks: jest.fn(),
updateAlertState: jest.fn(),
recordAlertErrors: jest.fn(),
asyncDispose: jest.fn(),
buildChartLink: jest.fn(),
buildLogSearchLink: jest.fn(),

View file

@ -27,6 +27,7 @@ import {
isRawSqlSavedChartConfig,
} from '@hyperdx/common-utils/dist/guards';
import {
AlertErrorType,
AlertThresholdType,
BuilderChartConfigWithOptDateRange,
ChartConfigWithOptDateRange,
@ -43,7 +44,7 @@ import ms from 'ms';
import { serializeError } from 'serialize-error';
import { ALERT_HISTORY_QUERY_CONCURRENCY } from '@/controllers/alertHistory';
import { AlertState, IAlert } from '@/models/alert';
import { AlertState, IAlert, IAlertError } from '@/models/alert';
import AlertHistory, { IAlertHistory } from '@/models/alertHistory';
import { IDashboard } from '@/models/dashboard';
import { ISavedSearch } from '@/models/savedSearch';
@ -137,6 +138,29 @@ export async function computeAliasWithClauses(
return aliasMapToWithClauses(aliasMap);
}
export class InvalidAlertError extends Error {
constructor(message: string) {
super(message);
this.name = 'InvalidAlertError';
}
}
const makeAlertError = (
type: AlertErrorType,
message: string,
): IAlertError => ({
timestamp: new Date(),
type,
message: message.slice(0, 10000),
});
const getErrorMessage = (e: unknown): string => {
if (e instanceof Error) {
return e.message;
}
return String(e);
};
export const doesExceedThreshold = (
{
threshold,
@ -161,7 +185,7 @@ export const doesExceedThreshold = (
case AlertThresholdType.BETWEEN:
case AlertThresholdType.NOT_BETWEEN:
if (thresholdMax == null) {
throw new Error(
throw new InvalidAlertError(
`thresholdMax is required for threshold type "${thresholdType}"`,
);
}
@ -687,6 +711,9 @@ export const processAlert = async (
) => {
const { alert, previousMap } = details;
const source = 'source' in details ? details.source : undefined;
// Errors collected during this execution. Webhook errors accumulate here; query
// and validation errors are recorded via recordAlertErrors before returning.
const executionErrors: IAlertError[] = [];
try {
const windowSizeInMins = ms(alert.interval) / 60000;
const scheduleStartAt = normalizeScheduleStartAt({
@ -841,13 +868,29 @@ export const processAlert = async (
? { readonly: '2' }
: {};
// Query for alert data
const checksData = await clickhouseClient.queryChartConfig({
config: optimizedChartConfig,
metadata,
opts: { clickhouse_settings: clickHouseSettings },
querySettings: source?.querySettings,
});
// Query for alert data. If the query fails, record the error and exit
// without touching alert state or creating an AlertHistory.
let checksData;
try {
checksData = await clickhouseClient.queryChartConfig({
config: optimizedChartConfig,
metadata,
opts: { clickhouse_settings: clickHouseSettings },
querySettings: source?.querySettings,
});
} catch (e) {
logger.error(
{
alertId: alert.id,
error: serializeError(e),
},
'Alert query failed, skipping state/history update',
);
await alertProvider.recordAlertErrors(alert.id, [
makeAlertError(AlertErrorType.QUERY_ERROR, getErrorMessage(e)),
]);
return;
}
logger.info(
{
@ -926,6 +969,9 @@ export const processAlert = async (
{ alertId: alert.id, group, error: serializeError(e) },
'Failed to fire channel event',
);
executionErrors.push(
makeAlertError(AlertErrorType.WEBHOOK_ERROR, getErrorMessage(e)),
);
}
};
@ -986,7 +1032,11 @@ export const processAlert = async (
await sendNotificationIfResolved(previous, history, '');
const historyRecords = Array.from(histories.values());
await alertProvider.updateAlertState(alert.id, historyRecords);
await alertProvider.updateAlertState(
alert.id,
historyRecords,
executionErrors,
);
return;
}
@ -1121,7 +1171,11 @@ export const processAlert = async (
// Save all history records and update alert state
const historyRecords = Array.from(histories.values());
await alertProvider.updateAlertState(alert.id, historyRecords);
await alertProvider.updateAlertState(
alert.id,
historyRecords,
executionErrors,
);
} catch (e) {
// Uncomment this for better error messages locally
// console.error(e);
@ -1132,6 +1186,25 @@ export const processAlert = async (
},
'Failed to process alert',
);
// Record error without touching state/history.
const message = getErrorMessage(e);
const type =
e instanceof InvalidAlertError
? AlertErrorType.INVALID_ALERT
: AlertErrorType.UNKNOWN;
try {
await alertProvider.recordAlertErrors(alert.id, [
makeAlertError(type, message),
]);
} catch (recordErr) {
logger.error(
{
alertId: alert.id,
error: serializeError(recordErr),
},
'Failed to persist alert execution error',
);
}
}
};

View file

@ -9,7 +9,12 @@ import { URLSearchParams } from 'url';
import * as config from '@/config';
import { LOCAL_APP_TEAM } from '@/controllers/team';
import { connectDB, mongooseConnection, ObjectId } from '@/models';
import Alert, { AlertSource, AlertState, type IAlert } from '@/models/alert';
import Alert, {
AlertSource,
AlertState,
type IAlert,
type IAlertError,
} from '@/models/alert';
import AlertHistory, { IAlertHistory } from '@/models/alertHistory';
import Connection, { IConnection } from '@/models/connection';
import Dashboard from '@/models/dashboard';
@ -332,7 +337,11 @@ export default class DefaultAlertProvider implements AlertProvider {
return url.toString();
}
async updateAlertState(alertId: string, histories: IAlertHistory[]) {
async updateAlertState(
alertId: string,
histories: IAlertHistory[],
errors: IAlertError[],
) {
// Save history records first (in parallel), then update alert state
// Use Promise.allSettled to handle partial failures gracefully
const historyResults = await Promise.allSettled(
@ -368,10 +377,17 @@ export default class DefaultAlertProvider implements AlertProvider {
? AlertState.ALERT
: AlertState.OK;
// Update alert state based on successfully saved histories
// Update alert state + errors based on this execution
await Alert.updateOne(
{ _id: new mongoose.Types.ObjectId(alertId) },
{ $set: { state: finalState } },
{ $set: { state: finalState, executionErrors: errors } },
);
}
async recordAlertErrors(alertId: string, errors: IAlertError[]) {
await Alert.updateOne(
{ _id: new mongoose.Types.ObjectId(alertId) },
{ $set: { executionErrors: errors } },
);
}

View file

@ -3,7 +3,7 @@ import { Tile } from '@hyperdx/common-utils/dist/types';
import _ from 'lodash';
import { ObjectId } from '@/models';
import { IAlert } from '@/models/alert';
import { IAlert, IAlertError } from '@/models/alert';
import { IAlertHistory } from '@/models/alertHistory';
import { IConnection } from '@/models/connection';
import { IDashboard } from '@/models/dashboard';
@ -79,8 +79,20 @@ export interface AlertProvider {
* Save the given AlertHistory records and update the associated alert's state.
* Uses Promise.allSettled to handle partial failures gracefully.
* The alert state is determined from successfully saved histories, or falls back to all histories if all saves fail.
* Also replaces the alert's `executionErrors` field with the provided errors from the current execution.
*/
updateAlertState(alertId: string, histories: IAlertHistory[]): Promise<void>;
updateAlertState(
alertId: string,
histories: IAlertHistory[],
errors: IAlertError[],
): Promise<void>;
/**
* Replace the alert's `executionErrors` field without changing state or creating history.
* Use this when an error prevents the normal state/history update from running
* (e.g. a ClickHouse query error).
*/
recordAlertErrors(alertId: string, errors: IAlertError[]): Promise<void>;
/** Fetch all webhooks for the given team, returning a map of webhook ID to webhook */
getWebhooks(teamId: string | ObjectId): Promise<Map<string, IWebhook>>;

View file

@ -332,7 +332,7 @@ export const handleSendGenericWebhook = async (
},
'Failed to compile generic webhook body',
);
return;
throw new Error('Failed to build webhook request body', { cause: e });
}
try {
@ -354,6 +354,8 @@ export const handleSendGenericWebhook = async (
},
'Failed to send generic webhook message',
);
// rethrow so that it can be recorded in alert errors
throw e;
}
};
@ -474,7 +476,7 @@ const getPopulatedChannel = (
channelType: AlertChannelType,
channelIdOrNamePrefix: string,
teamWebhooksById: Map<string, IWebhook>,
): PopulatedAlertChannel | undefined => {
): PopulatedAlertChannel => {
switch (channelType) {
case 'webhook': {
const webhook =
@ -488,13 +490,15 @@ const getPopulatedChannel = (
},
'webhook not found',
);
return undefined;
throw new Error(
`Webhook not found. The webhook may have been deleted — update the alert's notification channel.`,
);
}
return { type: 'webhook', channel: webhook };
}
default: {
logger.error({ channelType }, 'Unsupported alert channel type');
return undefined;
throw new Error('Unsupported alert destination');
}
}
};

View file

@ -1,4 +1,5 @@
import {
AlertErrorType,
AlertThresholdType,
BuilderSavedChartConfig,
DashboardFilter,
@ -6,13 +7,14 @@ import {
SavedChartConfig,
} from '@hyperdx/common-utils/dist/types';
import { omit } from 'lodash';
import { FlattenMaps, LeanDocument } from 'mongoose';
import type { ObjectId } from '@/models';
import {
AlertChannel,
AlertDocument,
AlertInterval,
AlertState,
IAlert,
} from '@/models/alert';
import type { DashboardDocument } from '@/models/dashboard';
import { SeriesTile } from '@/routers/external-api/v2/utils/dashboards';
@ -246,13 +248,16 @@ export type ExternalAlert = {
at: string;
until: string;
};
executionErrors?: {
timestamp: string;
type: AlertErrorType;
message: string;
}[];
createdAt?: string;
updatedAt?: string;
};
type AlertDocumentObject =
| AlertDocument
| FlattenMaps<LeanDocument<AlertDocument>>;
type AlertDocumentObject = IAlert & { _id: ObjectId };
function hasCreatedAt(
alert: AlertDocumentObject,
@ -296,6 +301,19 @@ function transformSilencedToExternalSilenced(
: undefined;
}
function transformErrorsToExternalErrors(
errors: AlertDocumentObject['executionErrors'],
): ExternalAlert['executionErrors'] {
return errors?.map(err => ({
timestamp:
err.timestamp instanceof Date
? err.timestamp.toISOString()
: String(err.timestamp),
type: err.type,
message: err.message,
}));
}
export function translateAlertDocumentToExternalAlert(
alert: AlertDocument,
): ExternalAlert {
@ -326,6 +344,7 @@ export function translateAlertDocumentToExternalAlert(
savedSearchId: alertObj.savedSearch?.toString(),
groupBy: alertObj.groupBy,
silenced: transformSilencedToExternalSilenced(alertObj.silenced),
executionErrors: transformErrorsToExternalErrors(alertObj.executionErrors),
createdAt: hasCreatedAt(alertObj)
? alertObj.createdAt.toISOString()
: undefined,

View file

@ -161,7 +161,7 @@ function AlertDetails({ alert }: { alert: AlertsPageItem }) {
</Group>
<Group>
<AlertHistoryCardList history={alert.history} alertUrl={alertUrl} />
<AlertHistoryCardList alert={alert} alertUrl={alertUrl} />
<AckAlert alert={alert} />
</Group>
</div>

View file

@ -331,9 +331,7 @@ const AlertForm = ({
)}
{alert && (
<Group>
{alert.history.length > 0 && (
<AlertHistoryCardList history={alert.history} />
)}
<AlertHistoryCardList alert={alert} />
<AckAlert alert={alert} />
</Group>
)}

View file

@ -125,9 +125,7 @@ export function TileAlertEditor({
</Group>
</UnstyledButton>
<Group gap="xs">
{alertItem && alertItem.history.length > 0 && (
<AlertHistoryCardList history={alertItem.history} />
)}
{alertItem && <AlertHistoryCardList alert={alertItem} />}
{alertItem && <AckAlert alert={alertItem} />}
<Tooltip label="Remove alert">
<ActionIcon

View file

@ -1,8 +1,27 @@
import * as React from 'react';
import cx from 'classnames';
import { formatRelative } from 'date-fns';
import { AlertHistory, AlertState } from '@hyperdx/common-utils/dist/types';
import { Tooltip } from '@mantine/core';
import {
AlertError,
AlertErrorType,
AlertHistory,
AlertState,
} from '@hyperdx/common-utils/dist/types';
import {
Badge,
Code,
Group,
Modal,
Stack,
Text,
Tooltip,
UnstyledButton,
} from '@mantine/core';
import { useDisclosure } from '@mantine/hooks';
import { IconExclamationCircle } from '@tabler/icons-react';
import type { AlertsPageItem } from '@/types';
import { FormatTime } from '@/useFormatTime';
import styles from '../../../styles/AlertsPage.module.scss';
@ -63,13 +82,103 @@ function AlertHistoryCard({
);
}
const ALERT_ERROR_TYPE_LABELS: Record<AlertErrorType, string> = {
[AlertErrorType.INVALID_ALERT]: 'Invalid Configuration',
[AlertErrorType.QUERY_ERROR]: 'Query Error',
[AlertErrorType.WEBHOOK_ERROR]: 'Webhook Error',
[AlertErrorType.UNKNOWN]: 'Unknown Error',
};
function AlertErrorsIndicator({ alert }: { alert: AlertsPageItem }) {
const [opened, { open, close }] = useDisclosure(false);
const { uniqueErrors, uniqueTypes } = React.useMemo(() => {
const map = new Map<string, AlertError>();
for (const error of alert.executionErrors ?? []) {
const key = `${error.type}||${error.message}`;
const existing = map.get(key);
if (
!existing ||
new Date(error.timestamp).getTime() >
new Date(existing.timestamp).getTime()
) {
map.set(key, error);
}
}
const errors = Array.from(map.values());
const types = Array.from(new Set(errors.map(error => error.type)));
return { uniqueErrors: errors, uniqueTypes: types };
}, [alert.executionErrors]);
if (uniqueErrors.length === 0) return null;
const errorType =
uniqueTypes.length === 1
? ALERT_ERROR_TYPE_LABELS[uniqueTypes[0]]
: 'Multiple Errors';
return (
<>
<Tooltip
label={`${errorType} (Click for details)`}
multiline
maw={400}
withArrow
color="dark"
>
<UnstyledButton
data-testid={`alert-error-icon-${alert._id}`}
onClick={open}
style={{
display: 'inline-flex',
color: 'var(--mantine-color-red-6)',
cursor: 'pointer',
}}
aria-label="View alert execution errors"
>
<IconExclamationCircle size={18} />
</UnstyledButton>
</Tooltip>
<Modal
opened={opened}
onClose={close}
size="lg"
title="Alert Execution Errors"
data-testid={`alert-error-modal-${alert._id}`}
>
<Stack gap="md">
{uniqueErrors.map((error, idx) => (
<Stack key={idx} gap={4}>
<Text size="sm">
{ALERT_ERROR_TYPE_LABELS[error.type]} at{' '}
<FormatTime value={error.timestamp} />
</Text>
<Code
flex={1}
block
style={{
whiteSpace: 'pre-wrap',
}}
>
{error.message}
</Code>
</Stack>
))}
</Stack>
</Modal>
</>
);
}
export function AlertHistoryCardList({
history,
alert,
alertUrl,
}: {
history: AlertHistory[];
alert: AlertsPageItem;
alertUrl?: string;
}) {
const { history } = alert;
const items = React.useMemo(() => {
if (history.length < HISTORY_ITEMS) {
return history;
@ -85,18 +194,27 @@ export function AlertHistoryCardList({
}, [history]);
return (
<div className={styles.historyCardWrapper}>
{paddingItems.map((_, index) => (
<Tooltip label="No data" withArrow key={index}>
<div className={styles.historyCard} />
</Tooltip>
))}
{items
.slice()
.reverse()
.map((history, index) => (
<AlertHistoryCard key={index} history={history} alertUrl={alertUrl} />
))}
</div>
<Group gap="xs" wrap="nowrap">
<AlertErrorsIndicator alert={alert} />
{items.length > 0 && (
<div className={styles.historyCardWrapper}>
{paddingItems.map((_, index) => (
<Tooltip label="No data" withArrow key={index}>
<div className={styles.historyCard} />
</Tooltip>
))}
{items
.slice()
.reverse()
.map((history, index) => (
<AlertHistoryCard
key={index}
history={history}
alertUrl={alertUrl}
/>
))}
</div>
)}
</Group>
);
}

View file

@ -1,5 +1,6 @@
import { DisplayType } from '@hyperdx/common-utils/dist/types';
import { SEEDED_ERROR_ALERT } from '../global-setup-fullstack';
import { AlertsPage } from '../page-objects/AlertsPage';
import { DashboardPage } from '../page-objects/DashboardPage';
import { SearchPage } from '../page-objects/SearchPage';
@ -445,3 +446,43 @@ test.describe('Alert Creation', { tag: ['@alerts', '@full-stack'] }, () => {
},
);
});
test.describe(
'Alert Execution Errors',
{ tag: ['@alerts', '@full-stack'] },
() => {
let alertsPage: AlertsPage;
test.beforeEach(async ({ page }) => {
alertsPage = new AlertsPage(page);
await alertsPage.goto();
await expect(alertsPage.pageContainer).toBeVisible();
});
test('shows alert errors with the correct type and message', async () => {
const seededCard = alertsPage.getAlertCardByName(
SEEDED_ERROR_ALERT.savedSearchName,
);
await expect(seededCard).toBeVisible({ timeout: 10000 });
const errorIcon = alertsPage.getErrorIconForAlertCard(seededCard);
await expect(errorIcon).toBeVisible();
// Modal is hidden before the click
await expect(alertsPage.errorModal).toBeHidden();
await alertsPage.openErrorModalForAlertCard(seededCard);
await expect(alertsPage.errorModal).toBeVisible();
// QUERY_ERROR renders with the "Query Error" type label in the modal
await expect(
alertsPage.errorModal.getByText(/Query Error/),
).toBeVisible();
// The <code> block contains the full seeded error message (not truncated)
await expect(alertsPage.errorModalMessage).toContainText(
SEEDED_ERROR_ALERT.errorMessage,
);
});
},
);

View file

@ -46,6 +46,51 @@ const AUTH_FILE = path.join(__dirname, '.auth/user.json');
const MONGO_URI =
process.env.MONGO_URI || `mongodb://localhost:${MONGO_PORT}/hyperdx-e2e`;
/**
* Seeded test data with predictable identifiers so E2E tests can look it up.
* Exported so tests can reference the same constants instead of hard-coding.
*/
export const SEEDED_ERROR_ALERT = {
savedSearchName: 'E2E Errored Alert Search',
webhookName: 'E2E Error Webhook',
// URL gets appended with a unique suffix inside the seeder to stay idempotent
// if the user record already exists (409 path).
webhookUrlBase: 'https://example.com/e2e-error-webhook',
errorType: 'QUERY_ERROR',
errorMessage:
'ClickHouse returned 500: DB::Exception: Timeout exceeded: elapsed 30s, maximum: 30s while executing query.',
};
/**
* Run a mongosh script against the e2e MongoDB container by piping the script
* through stdin. Using stdin (rather than `--eval "<...>"`) avoids having to
* escape quotes in the script body, so callers can pass multi-line JavaScript
* with string literals verbatim.
*
* Throws if the docker-compose file can't be found (meaning we're not running
* in the expected Docker-backed e2e environment).
*/
function runMongoshScript(script: string): string {
const dockerComposeFile = path.join(__dirname, 'docker-compose.yml');
if (!fs.existsSync(dockerComposeFile)) {
throw new Error(
`docker-compose.yml not found at ${dockerComposeFile} — e2e Docker stack unavailable`,
);
}
const e2eSlot = process.env.HDX_E2E_SLOT || '0';
const e2eProject = `e2e-${e2eSlot}`;
return execSync(
`docker compose -p ${e2eProject} -f "${dockerComposeFile}" exec -T db mongosh --quiet`,
{
encoding: 'utf-8',
stdio: ['pipe', 'pipe', 'pipe'],
input: script,
},
);
}
/**
* Clears the MongoDB database to ensure a clean slate for tests
*/
@ -53,19 +98,8 @@ function clearDatabase() {
console.log('Clearing MongoDB database for fresh test run...');
try {
const dockerComposeFile = path.join(__dirname, 'docker-compose.yml');
const e2eSlot = process.env.HDX_E2E_SLOT || '0';
const e2eProject = `e2e-${e2eSlot}`;
if (fs.existsSync(dockerComposeFile)) {
execSync(
`docker compose -p ${e2eProject} -f "${dockerComposeFile}" exec -T db mongosh --quiet --eval "use hyperdx-e2e; db.dropDatabase()" 2>&1`,
{ encoding: 'utf-8', stdio: 'pipe' },
);
console.log(' ✓ Database cleared successfully (via Docker)');
return;
}
throw new Error('Could not connect to MongoDB');
runMongoshScript("use('hyperdx-e2e'); db.dropDatabase();");
console.log(' ✓ Database cleared successfully (via Docker)');
} catch (error) {
console.warn(' ⚠ Warning: Could not clear database');
console.warn(` ${error instanceof Error ? error.message : String(error)}`);
@ -271,6 +305,11 @@ async function globalSetup(_config: FullConfig) {
await context.storageState({ path: AUTH_FILE });
console.log(` Auth state saved to ${AUTH_FILE}`);
// Seed an alert that has execution errors recorded so tests can exercise
// the /alerts error-icon + modal UI without having to run the check-alerts
// background job.
await seedAlertWithErrors(page, API_URL, sources);
console.log('Full-stack E2E setup complete');
console.log(
' Using local ClickHouse with seeded test data for logs, traces, metrics, and K8s',
@ -284,4 +323,120 @@ async function globalSetup(_config: FullConfig) {
}
}
/**
* Seeds an alert with a recorded execution error. The alert is created via the
* API (so all referenced documents saved search, webhook exist and the
* alerts list endpoint populates correctly), then the `errors` array is
* patched in directly via mongosh since it's only ever set by the check-alerts
* background job in normal operation.
*/
async function seedAlertWithErrors(
page: import('@playwright/test').Page,
apiUrl: string,
sources: Array<{ _id: string; kind: string }>,
) {
console.log('Seeding alert with errors for UI tests');
const logSource = sources.find(s => s.kind === 'log');
if (!logSource) {
console.warn(' ⚠ No log source available — skipping alert seed');
return;
}
// 1) Saved search for the alert to reference. The router is mounted at
// `/saved-search` (see api-app.ts) — not `/savedSearches`.
const savedSearchRes = await page.request.post(`${apiUrl}/saved-search`, {
data: {
name: SEEDED_ERROR_ALERT.savedSearchName,
select: '',
where: 'SeverityText: "error"',
whereLanguage: 'lucene',
source: logSource._id,
tags: [],
},
});
if (!savedSearchRes.ok()) {
console.warn(
` ⚠ Could not create saved search (${savedSearchRes.status()}): ${await savedSearchRes.text()}`,
);
return;
}
const savedSearch = await savedSearchRes.json();
// 2) Webhook for the alert's notification channel. Use a timestamped URL so
// a stale team (e.g. if clearDatabase silently failed) doesn't collide with
// the webhook uniqueness constraint on (team, service, url).
const uniqueUrl = `${SEEDED_ERROR_ALERT.webhookUrlBase}-${Date.now()}`;
const webhookRes = await page.request.post(`${apiUrl}/webhooks`, {
data: {
name: SEEDED_ERROR_ALERT.webhookName,
service: 'generic',
url: uniqueUrl,
body: JSON.stringify({ text: '{{title}}' }),
},
});
if (!webhookRes.ok()) {
console.warn(
` ⚠ Could not create webhook (${webhookRes.status()}): ${await webhookRes.text()}`,
);
return;
}
const webhook = (await webhookRes.json()).data;
// 3) Alert — saved search source, referencing the webhook above.
const alertRes = await page.request.post(`${apiUrl}/alerts`, {
data: {
source: 'saved_search',
savedSearchId: savedSearch._id ?? savedSearch.id,
channel: { type: 'webhook', webhookId: webhook._id ?? webhook.id },
interval: '5m',
threshold: 1,
thresholdType: 'above',
name: 'E2E Errored Alert',
},
});
if (!alertRes.ok()) {
console.warn(
` ⚠ Could not create alert (${alertRes.status()}): ${await alertRes.text()}`,
);
return;
}
const alert = (await alertRes.json()).data;
const alertId: string = alert._id ?? alert.id;
// 4) Patch the `executionErrors` array directly via mongosh. The
// check-alerts job is the only code that writes this field in normal
// operation, so we write it here to avoid having to run that job during
// setup.
const patchScript = `
use('hyperdx-e2e');
db.alerts.updateOne(
{ _id: ObjectId(${JSON.stringify(alertId)}) },
{
$set: {
executionErrors: [
{
timestamp: new Date(),
type: ${JSON.stringify(SEEDED_ERROR_ALERT.errorType)},
message: ${JSON.stringify(SEEDED_ERROR_ALERT.errorMessage)}
}
],
state: 'OK'
}
}
);
`;
try {
runMongoshScript(patchScript);
console.log(
` ✓ Seeded alert "${alert.name}" (${alertId}) with a ${SEEDED_ERROR_ALERT.errorType}`,
);
} catch (error) {
console.warn(
` ⚠ Could not patch alert errors: ${error instanceof Error ? error.message : String(error)}`,
);
}
}
export default globalSetup;

View file

@ -62,6 +62,52 @@ export class AlertsPage {
await this.alertsButton.click();
}
/**
* Get the alert card that contains a given name (saved search or dashboard/tile name).
* Scopes all further lookups to a single alert row so assertions aren't polluted
* by other tests' data.
*/
getAlertCardByName(name: string) {
return this.alertsPageContainer
.locator('[data-testid^="alert-card-"]')
.filter({ hasText: name });
}
/**
* Get the error-indicator icon button inside a given alert card.
* The icon is only rendered when the alert has recorded execution errors.
*/
getErrorIconForAlertCard(alertCard: Locator) {
return alertCard.locator('[data-testid^="alert-error-icon-"]');
}
/**
* Get the error details modal (rendered at the page level via Mantine portal).
* The modal is identified by its Mantine role and title rather than by the
* per-alert data-testid so callers don't need to know the alert id.
*/
get errorModal() {
return this.page.getByRole('dialog', { name: 'Alert Execution Errors' });
}
/**
* Get the full error message Code block inside the currently-open error modal.
* Uses the native <code> role so we don't leak styling-level implementation
* details (the Mantine Code component renders as <code>).
*/
get errorModalMessage() {
return this.errorModal.locator('pre');
}
/**
* Open the error modal for an alert card and wait for it to become visible.
*/
async openErrorModalForAlertCard(alertCard: Locator) {
const icon = this.getErrorIconForAlertCard(alertCard);
await icon.scrollIntoViewIfNeeded();
await icon.click();
}
// Getters for assertions
get pageContainer() {

View file

@ -297,6 +297,21 @@ export enum AlertState {
OK = 'OK',
}
export enum AlertErrorType {
QUERY_ERROR = 'QUERY_ERROR',
WEBHOOK_ERROR = 'WEBHOOK_ERROR',
INVALID_ALERT = 'INVALID_ALERT',
UNKNOWN = 'UNKNOWN',
}
export const AlertErrorSchema = z.object({
timestamp: z.union([z.string(), z.date()]),
type: z.nativeEnum(AlertErrorType),
message: z.string(),
});
export type AlertError = z.infer<typeof AlertErrorSchema>;
export enum AlertSource {
SAVED_SEARCH = 'saved_search',
TILE = 'tile',
@ -1368,6 +1383,7 @@ export const AlertsPageItemSchema = z.object({
until: z.string(),
})
.optional(),
executionErrors: z.array(AlertErrorSchema).optional(),
});
export type AlertsPageItem = z.infer<typeof AlertsPageItemSchema>;