From ef0414d1eca66a462fe9169080ba170e5616ea99 Mon Sep 17 00:00:00 2001 From: Robert Fairburn <8029478+rfairburn@users.noreply.github.com> Date: Tue, 7 May 2024 11:17:01 -0500 Subject: [PATCH] Terraform monitoring alert thresholds update (#18790) This allows customizing alert thresholds for http elb and target 5xx errors. This will prevent a single 5xx from triggering an alert if we decide the thresholds should be higher. The default 120 seconds with 0 tolerance will be used if nothing is passed in, but alternatives can be specified by a threshold and an interval. closes #18776 --------- Co-authored-by: Luke Heath --- terraform/addons/monitoring/README.md | 2 +- terraform/addons/monitoring/main.tf | 8 ++++---- terraform/addons/monitoring/variables.tf | 23 +++++++++++++++++++++++ 3 files changed, 28 insertions(+), 5 deletions(-) diff --git a/terraform/addons/monitoring/README.md b/terraform/addons/monitoring/README.md index b35a415e1d..5ee696f9ca 100644 --- a/terraform/addons/monitoring/README.md +++ b/terraform/addons/monitoring/README.md @@ -138,7 +138,7 @@ No modules. | Name | Description | Type | Default | Required | |------|-------------|------|---------|:--------:| | [acm\_certificate\_arn](#input\_acm\_certificate\_arn) | n/a | `string` | `null` | no | -| [albs](#input\_albs) | n/a |
list(object({
name = string
arn_suffix = string
target_group_name = string
target_group_arn_suffix = string
min_containers = optional(string, 1)
ecs_service_name = string
}))
| `[]` | no | +| [albs](#input\_albs) | n/a |
list(object({
name = string
arn_suffix = string
target_group_name = string
target_group_arn_suffix = string
min_containers = optional(string, 1)
ecs_service_name = string
alert_thresholds = optional(
object({
HTTPCode_ELB_5XX_Count = object({
period = number
threshold = number
})
HTTPCode_Target_5XX_Count = object({
period = number
threshold = number
})
}),
{
HTTPCode_ELB_5XX_Count = {
period = 120
threshold = 0
},
HTTPCode_Target_5XX_Count = {
period = 120
threshold = 0
}
}
)
}))
| `[]` | no | | [cron\_monitoring](#input\_cron\_monitoring) | n/a |
object({
mysql_host = string
mysql_database = string
mysql_user = string
mysql_password_secret_name = string
vpc_id = string
subnet_ids = list(string)
rds_security_group_id = string
delay_tolerance = string
run_interval = string
log_retention_in_days = optional(number, 7)
})
| `null` | no | | [customer\_prefix](#input\_customer\_prefix) | n/a | `string` | `"fleet"` | no | | [default\_sns\_topic\_arns](#input\_default\_sns\_topic\_arns) | n/a | `list(string)` | `[]` | no | diff --git a/terraform/addons/monitoring/main.tf b/terraform/addons/monitoring/main.tf index 4af90e5ca2..09dff06488 100644 --- a/terraform/addons/monitoring/main.tf +++ b/terraform/addons/monitoring/main.tf @@ -37,7 +37,7 @@ resource "aws_db_event_subscription" "default" { } locals { - alb_map = {for k, v in var.albs: k => v} + alb_map = { for k, v in var.albs : k => v } } @@ -102,7 +102,7 @@ resource "aws_cloudwatch_metric_alarm" "target_response_time" { locals { http_5xx_alert_names = ["HTTPCode_ELB_5XX_Count", "HTTPCode_Target_5XX_Count"] http_5xx_alerts_list = flatten([for alert in local.http_5xx_alert_names : [for alb in var.albs : merge(alb, { "alert" : alert })]]) - http_5xx_alerts = {for k, v in local.http_5xx_alerts_list : k => v} + http_5xx_alerts = { for k, v in local.http_5xx_alerts_list : k => v } } @@ -113,9 +113,9 @@ resource "aws_cloudwatch_metric_alarm" "lb" { evaluation_periods = "1" metric_name = each.value.alert namespace = "AWS/ApplicationELB" - period = "120" + period = each.value.alert_thresholds[each.value.alert].period statistic = "Sum" - threshold = "0" + threshold = each.value.alert_thresholds[each.value.alert].threshold alarm_description = "This alarm indicates there are an abnormal amount of 5XX responses. Either the lb cannot talk with the Fleet backend target or Fleet is returning an error." alarm_actions = lookup(var.sns_topic_arns_map, "alb_httpcode_5xx", var.default_sns_topic_arns) ok_actions = lookup(var.sns_topic_arns_map, "alb_httpcode_5xx", var.default_sns_topic_arns) diff --git a/terraform/addons/monitoring/variables.tf b/terraform/addons/monitoring/variables.tf index 365455beec..4bd494e870 100644 --- a/terraform/addons/monitoring/variables.tf +++ b/terraform/addons/monitoring/variables.tf @@ -16,10 +16,33 @@ variable "albs" { target_group_arn_suffix = string min_containers = optional(string, 1) ecs_service_name = string + alert_thresholds = optional( + object({ + HTTPCode_ELB_5XX_Count = object({ + period = number + threshold = number + }) + HTTPCode_Target_5XX_Count = object({ + period = number + threshold = number + }) + }), + { + HTTPCode_ELB_5XX_Count = { + period = 120 + threshold = 0 + }, + HTTPCode_Target_5XX_Count = { + period = 120 + threshold = 0 + } + } + ) })) default = [] } + variable "default_sns_topic_arns" { type = list(string) default = []