mirror of
https://github.com/fleetdm/fleet
synced 2026-05-24 01:18:42 +00:00
Terraform monitoring alert thresholds update (#18790)
This allows customizing alert thresholds for http elb and target 5xx errors. This will prevent a single 5xx from triggering an alert if we decide the thresholds should be higher. The default 120 seconds with 0 tolerance will be used if nothing is passed in, but alternatives can be specified by a threshold and an interval. closes #18776 --------- Co-authored-by: Luke Heath <luke@fleetdm.com>
This commit is contained in:
parent
434239e5f9
commit
ef0414d1ec
3 changed files with 28 additions and 5 deletions
|
|
@ -138,7 +138,7 @@ No modules.
|
|||
| Name | Description | Type | Default | Required |
|
||||
|------|-------------|------|---------|:--------:|
|
||||
| <a name="input_acm_certificate_arn"></a> [acm\_certificate\_arn](#input\_acm\_certificate\_arn) | n/a | `string` | `null` | no |
|
||||
| <a name="input_albs"></a> [albs](#input\_albs) | n/a | <pre>list(object({<br> name = string<br> arn_suffix = string<br> target_group_name = string<br> target_group_arn_suffix = string<br> min_containers = optional(string, 1)<br> ecs_service_name = string<br> }))</pre> | `[]` | no |
|
||||
| <a name="input_albs"></a> [albs](#input\_albs) | n/a | <pre>list(object({<br> name = string<br> arn_suffix = string<br> target_group_name = string<br> target_group_arn_suffix = string<br> min_containers = optional(string, 1)<br> ecs_service_name = string<br> alert_thresholds = optional(<br> object({<br> HTTPCode_ELB_5XX_Count = object({<br> period = number<br> threshold = number<br> })<br> HTTPCode_Target_5XX_Count = object({<br> period = number<br> threshold = number<br> })<br> }),<br> {<br> HTTPCode_ELB_5XX_Count = {<br> period = 120<br> threshold = 0<br> },<br> HTTPCode_Target_5XX_Count = {<br> period = 120<br> threshold = 0<br> }<br> }<br> )<br> }))</pre> | `[]` | no |
|
||||
| <a name="input_cron_monitoring"></a> [cron\_monitoring](#input\_cron\_monitoring) | n/a | <pre>object({<br> mysql_host = string<br> mysql_database = string<br> mysql_user = string<br> mysql_password_secret_name = string<br> vpc_id = string<br> subnet_ids = list(string)<br> rds_security_group_id = string<br> delay_tolerance = string<br> run_interval = string<br> log_retention_in_days = optional(number, 7)<br> })</pre> | `null` | no |
|
||||
| <a name="input_customer_prefix"></a> [customer\_prefix](#input\_customer\_prefix) | n/a | `string` | `"fleet"` | no |
|
||||
| <a name="input_default_sns_topic_arns"></a> [default\_sns\_topic\_arns](#input\_default\_sns\_topic\_arns) | n/a | `list(string)` | `[]` | no |
|
||||
|
|
|
|||
|
|
@ -37,7 +37,7 @@ resource "aws_db_event_subscription" "default" {
|
|||
}
|
||||
|
||||
locals {
|
||||
alb_map = {for k, v in var.albs: k => v}
|
||||
alb_map = { for k, v in var.albs : k => v }
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -102,7 +102,7 @@ resource "aws_cloudwatch_metric_alarm" "target_response_time" {
|
|||
locals {
|
||||
http_5xx_alert_names = ["HTTPCode_ELB_5XX_Count", "HTTPCode_Target_5XX_Count"]
|
||||
http_5xx_alerts_list = flatten([for alert in local.http_5xx_alert_names : [for alb in var.albs : merge(alb, { "alert" : alert })]])
|
||||
http_5xx_alerts = {for k, v in local.http_5xx_alerts_list : k => v}
|
||||
http_5xx_alerts = { for k, v in local.http_5xx_alerts_list : k => v }
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -113,9 +113,9 @@ resource "aws_cloudwatch_metric_alarm" "lb" {
|
|||
evaluation_periods = "1"
|
||||
metric_name = each.value.alert
|
||||
namespace = "AWS/ApplicationELB"
|
||||
period = "120"
|
||||
period = each.value.alert_thresholds[each.value.alert].period
|
||||
statistic = "Sum"
|
||||
threshold = "0"
|
||||
threshold = each.value.alert_thresholds[each.value.alert].threshold
|
||||
alarm_description = "This alarm indicates there are an abnormal amount of 5XX responses. Either the lb cannot talk with the Fleet backend target or Fleet is returning an error."
|
||||
alarm_actions = lookup(var.sns_topic_arns_map, "alb_httpcode_5xx", var.default_sns_topic_arns)
|
||||
ok_actions = lookup(var.sns_topic_arns_map, "alb_httpcode_5xx", var.default_sns_topic_arns)
|
||||
|
|
|
|||
|
|
@ -16,10 +16,33 @@ variable "albs" {
|
|||
target_group_arn_suffix = string
|
||||
min_containers = optional(string, 1)
|
||||
ecs_service_name = string
|
||||
alert_thresholds = optional(
|
||||
object({
|
||||
HTTPCode_ELB_5XX_Count = object({
|
||||
period = number
|
||||
threshold = number
|
||||
})
|
||||
HTTPCode_Target_5XX_Count = object({
|
||||
period = number
|
||||
threshold = number
|
||||
})
|
||||
}),
|
||||
{
|
||||
HTTPCode_ELB_5XX_Count = {
|
||||
period = 120
|
||||
threshold = 0
|
||||
},
|
||||
HTTPCode_Target_5XX_Count = {
|
||||
period = 120
|
||||
threshold = 0
|
||||
}
|
||||
}
|
||||
)
|
||||
}))
|
||||
default = []
|
||||
}
|
||||
|
||||
|
||||
variable "default_sns_topic_arns" {
|
||||
type = list(string)
|
||||
default = []
|
||||
|
|
|
|||
Loading…
Reference in a new issue