mirror of
https://github.com/hyperdxio/hyperdx
synced 2026-04-21 13:37:15 +00:00
add addsystemalerts task - still need documentation update
This commit is contained in:
parent
ef89608c65
commit
9d1358526f
3 changed files with 174 additions and 2 deletions
|
|
@ -7,8 +7,8 @@ export type AlertType = 'presence' | 'absence';
|
|||
|
||||
export enum SystemAlertName {
|
||||
ANOMALOUS_ERRORS = 'Anomalous HTTP Server Errors',
|
||||
ANOMALOUS_REQUESTS = 'Anomalous HTTP Server Requests',
|
||||
ANOMALOUS_ERROR_LOGS = 'Anomalous General Error Logs',
|
||||
ANOMALOUS_REQUESTS = 'Anomalous HTTP Successful Requests',
|
||||
ANOMALOUS_ERROR_EVENTS = 'Anomalous Error Events',
|
||||
}
|
||||
|
||||
export enum AlertState {
|
||||
|
|
|
|||
168
packages/api/src/tasks/alerts/addSystemAlerts.ts
Normal file
168
packages/api/src/tasks/alerts/addSystemAlerts.ts
Normal file
|
|
@ -0,0 +1,168 @@
|
|||
import { serializeError } from 'serialize-error';
|
||||
|
||||
import { AggFn } from '@/clickhouse';
|
||||
import Alert, {
|
||||
AlertChannel,
|
||||
AnomalyModel,
|
||||
CheckerType,
|
||||
SystemAlertName,
|
||||
} from '@/models/alert';
|
||||
import Team from '@/models/team';
|
||||
import logger from '@/utils/logger';
|
||||
|
||||
type SystemAlertConfig = {
|
||||
name: SystemAlertName;
|
||||
where: string;
|
||||
message: string;
|
||||
models: AnomalyModel[];
|
||||
interval: string;
|
||||
};
|
||||
|
||||
const SYSTEM_ALERT_CONFIGS: SystemAlertConfig[] = [
|
||||
{
|
||||
name: SystemAlertName.ANOMALOUS_ERRORS,
|
||||
where: 'level:"error" span.kind:"server"',
|
||||
message: [
|
||||
`Alert for ${SystemAlertName.ANOMALOUS_ERRORS}`,
|
||||
'Observed {{value}} requests with errors returned in the past {{granularity}}(s).',
|
||||
].join('\n\n'),
|
||||
models: [
|
||||
{
|
||||
name: 'zscore',
|
||||
enabled: true,
|
||||
params: {
|
||||
threshold: 10,
|
||||
},
|
||||
},
|
||||
],
|
||||
interval: '5m',
|
||||
},
|
||||
{
|
||||
name: SystemAlertName.ANOMALOUS_REQUESTS,
|
||||
where: 'level:"ok" span.kind:"server"',
|
||||
message: [
|
||||
`Alert for ${SystemAlertName.ANOMALOUS_REQUESTS}`,
|
||||
'Observed {{value}} requests returned in the past {{granularity}}(s).',
|
||||
].join('\n\n'),
|
||||
models: [
|
||||
{
|
||||
name: 'zscore',
|
||||
enabled: true,
|
||||
params: {
|
||||
threshold: 10,
|
||||
},
|
||||
},
|
||||
],
|
||||
interval: '5m',
|
||||
},
|
||||
{
|
||||
name: SystemAlertName.ANOMALOUS_ERROR_EVENTS,
|
||||
where: 'level:error',
|
||||
message: [
|
||||
`Alert for ${SystemAlertName.ANOMALOUS_ERROR_EVENTS}`,
|
||||
'Observed {{value}} error logs returned in the past {{granularity}}(s).',
|
||||
].join('\n\n'),
|
||||
models: [
|
||||
{
|
||||
name: 'zscore',
|
||||
enabled: true,
|
||||
params: {
|
||||
threshold: 10,
|
||||
},
|
||||
},
|
||||
],
|
||||
interval: '5m',
|
||||
},
|
||||
];
|
||||
|
||||
async function createAlertIfMissing(
|
||||
teamId: string,
|
||||
channel: AlertChannel,
|
||||
config: SystemAlertConfig,
|
||||
): Promise<void> {
|
||||
const { name, where, message, models, interval } = config;
|
||||
try {
|
||||
await Alert.create({
|
||||
team: teamId,
|
||||
isSystem: true,
|
||||
name: name,
|
||||
interval: interval,
|
||||
threshold: 1,
|
||||
type: 'presence',
|
||||
cron: '* * * * *',
|
||||
timezone: 'UTC',
|
||||
source: 'CUSTOM',
|
||||
channel: channel,
|
||||
checker: {
|
||||
type: CheckerType.Anomaly,
|
||||
config: {
|
||||
mode: 'any',
|
||||
models: models,
|
||||
},
|
||||
},
|
||||
customConfig: {
|
||||
series: [
|
||||
{
|
||||
table: 'logs',
|
||||
type: 'table',
|
||||
where: where,
|
||||
aggFn: AggFn.Count,
|
||||
groupBy: [],
|
||||
},
|
||||
],
|
||||
},
|
||||
historyWindow: 1440,
|
||||
message: message,
|
||||
});
|
||||
} catch (e) {
|
||||
logger.error({
|
||||
message: 'error creating system alert',
|
||||
teamId,
|
||||
config,
|
||||
error: serializeError(e),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
export default async () => {
|
||||
const teams = await Team.find({});
|
||||
logger.info(`Checking system alerts for ${teams.length} teams`);
|
||||
const promises: Promise<void>[] = [];
|
||||
|
||||
for (const team of teams) {
|
||||
logger.info(`Processing team ${team.id}`);
|
||||
const teamId = team.id;
|
||||
|
||||
for (const systemAlertConfig of SYSTEM_ALERT_CONFIGS) {
|
||||
const existingAlert = await Alert.findOne(
|
||||
{
|
||||
team: teamId,
|
||||
isSystem: true,
|
||||
source: 'CUSTOM',
|
||||
name: systemAlertConfig.name,
|
||||
},
|
||||
{},
|
||||
);
|
||||
|
||||
if (!existingAlert) {
|
||||
logger.info(
|
||||
`Team ${teamId} is missing ${systemAlertConfig.name}, creating one`,
|
||||
);
|
||||
|
||||
const defaultChannel: AlertChannel = {
|
||||
type: 'webhook',
|
||||
webhookId: 'YOUR_WEBHOOK_ID',
|
||||
};
|
||||
|
||||
const alertPromise = createAlertIfMissing(
|
||||
teamId,
|
||||
defaultChannel,
|
||||
systemAlertConfig,
|
||||
);
|
||||
|
||||
promises.push(alertPromise);
|
||||
}
|
||||
}
|
||||
await Promise.all(promises);
|
||||
}
|
||||
};
|
||||
|
|
@ -5,6 +5,7 @@ import { serializeError } from 'serialize-error';
|
|||
|
||||
import { IS_DEV } from '@/config';
|
||||
import { connectDB, mongooseConnection } from '@/models';
|
||||
import addSystemAlerts from '@/tasks/alerts/addSystemAlerts';
|
||||
import checkAnomalyAlerts from '@/tasks/alerts/checkAnomalyAlerts';
|
||||
import checkUserAlerts from '@/tasks/alerts/checkUserAlerts';
|
||||
import refreshPropertyTypeMappings from '@/tasks/refreshPropertyTypeMappings';
|
||||
|
|
@ -21,6 +22,9 @@ const main = async () => {
|
|||
const t0 = performance.now();
|
||||
logger.info(`Task [${taskName}] started at ${new Date()}`);
|
||||
switch (taskName) {
|
||||
case 'add-system-alerts':
|
||||
await addSystemAlerts();
|
||||
break;
|
||||
// TODO: rename to check-users-alerts
|
||||
case 'check-alerts':
|
||||
await checkUserAlerts();
|
||||
|
|
|
|||
Loading…
Reference in a new issue