From 9ba0dafa9435a2be73c597fb486577bfd7e58924 Mon Sep 17 00:00:00 2001 From: Benjamin Edwards Date: Fri, 9 Feb 2024 13:58:28 -0500 Subject: [PATCH] vuln processing as distinct service (#16544) create a distinct service to managed vulnerability processing --- .../addons/external-vuln-scans/.header.md | 10 +- .../addons/external-vuln-scans/README.md | 28 ++- terraform/addons/external-vuln-scans/main.tf | 169 ++++++++++-------- .../addons/external-vuln-scans/outputs.tf | 2 +- .../addons/external-vuln-scans/variables.tf | 58 +++++- terraform/byo-vpc/byo-db/byo-ecs/outputs.tf | 4 + 6 files changed, 181 insertions(+), 90 deletions(-) diff --git a/terraform/addons/external-vuln-scans/.header.md b/terraform/addons/external-vuln-scans/.header.md index 8a01da3747..29d4b5775f 100644 --- a/terraform/addons/external-vuln-scans/.header.md +++ b/terraform/addons/external-vuln-scans/.header.md @@ -1,2 +1,10 @@ # External Vulnerability Scans addon -This addon moves vulnerability scans off of serving nodes and onto a scheduled task in AWS Eventbridge. +This addon creates an additional ECS service that only runs a single task, responsible for vuln processing. It receives +no web traffic. We utilize [current instance checks](https://fleetdm.com/docs/configuration/fleet-server-configuration#current-instance-checks) to make this happen. The advantages of this mechanism: + +1. dedicating processing power to vuln processing + 2. ensures task responsible for vuln processing isn't also trying to serve web traffic +2. caching of vulnerability artifacts/dependencies + + +Usage is simplified by using the output from the fleet byo-ecs module (../terraform/byo-vpc/byo-db/byo-ecs/README.md) diff --git a/terraform/addons/external-vuln-scans/README.md b/terraform/addons/external-vuln-scans/README.md index 01bbe17d89..5c0e755359 100644 --- a/terraform/addons/external-vuln-scans/README.md +++ b/terraform/addons/external-vuln-scans/README.md @@ -1,5 +1,12 @@ # External Vulnerability Scans addon -This addon moves vulnerability scans off of serving nodes and onto a scheduled task in AWS Eventbridge. +This addon creates an additional ECS service that only runs a single task, responsible for vuln processing. It receives +no web traffic. We utilize [current instance checks](https://fleetdm.com/docs/configuration/fleet-server-configuration#current-instance-checks) to make this happen. The advantages of this mechanism: + +1. dedicating processing power to vuln processing + 2. ensures task responsible for vuln processing isn't also trying to serve web traffic +2. caching of vulnerability artifacts/dependencies + +Usage is simplified by using the output from the fleet byo-ecs module (../terraform/byo-vpc/byo-db/byo-ecs/README.md) ## Requirements @@ -19,21 +26,24 @@ No modules. | Name | Type | |------|------| -| [aws_cloudwatch_event_rule.main](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_event_rule) | resource | -| [aws_cloudwatch_event_target.ecs_scheduled_task](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_event_target) | resource | -| [aws_iam_role.ecs_events](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role) | resource | -| [aws_iam_role_policy.ecs_events_run_task_with_any_role](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy) | resource | -| [aws_iam_policy_document.assume_role](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source | -| [aws_iam_policy_document.ecs_events_run_task_with_any_role](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source | +| [aws_ecs_service.fleet](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/ecs_service) | resource | +| [aws_ecs_task_definition.vuln-processing](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/ecs_task_definition) | resource | | [aws_region.current](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/region) | data source | ## Inputs | Name | Description | Type | Default | Required | |------|-------------|------|---------|:--------:| +| [awslogs\_config](#input\_awslogs\_config) | n/a |
object({
group = string
region = string
prefix = string
})
| n/a | yes | +| [customer\_prefix](#input\_customer\_prefix) | n/a | `string` | `"fleet"` | no | | [ecs\_cluster](#input\_ecs\_cluster) | The ecs cluster module that is created by the byo-db module | `any` | n/a | yes | -| [ecs\_service](#input\_ecs\_service) | The ecs service resource that is created by the byo-ecs module | `any` | n/a | yes | -| [task\_definition](#input\_task\_definition) | The task definition resource that is created by the byo-ecs module | `any` | n/a | yes | +| [execution\_iam\_role\_arn](#input\_execution\_iam\_role\_arn) | The ARN of the fleet execution role, this is necessary to pass role from ecs events | `any` | n/a | yes | +| [fleet\_config](#input\_fleet\_config) | The root Fleet config object | `any` | n/a | yes | +| [security\_groups](#input\_security\_groups) | n/a | `list(string)` | n/a | yes | +| [subnets](#input\_subnets) | n/a | `list(string)` | n/a | yes | +| [task\_role\_arn](#input\_task\_role\_arn) | The ARN of the fleet task role, this is necessary to pass role from ecs events | `any` | n/a | yes | +| [vuln\_processing\_cpu](#input\_vuln\_processing\_cpu) | The amount of CPU to dedicate to the vuln processing command | `number` | `1024` | no | +| [vuln\_processing\_memory](#input\_vuln\_processing\_memory) | The amount of memory to dedicate to the vuln processing command | `number` | `4096` | no | ## Outputs diff --git a/terraform/addons/external-vuln-scans/main.tf b/terraform/addons/external-vuln-scans/main.tf index 2f90af7ba6..e030a697e4 100644 --- a/terraform/addons/external-vuln-scans/main.tf +++ b/terraform/addons/external-vuln-scans/main.tf @@ -1,82 +1,109 @@ data "aws_region" "current" {} -resource "aws_cloudwatch_event_rule" "main" { - schedule_expression = "rate(1 hour)" -} - -data "aws_iam_policy_document" "assume_role" { - statement { - effect = "Allow" - - principals { - type = "Service" - identifiers = ["events.amazonaws.com", "ecs-tasks.amazonaws.com"] +locals { + environment = [ + // specifically overriding disable schedule here because the output of this module sets this to true + // and then we pull in the output of fleet ecs module + for k, v in merge( + var.fleet_config.extra_environment_variables, + { FLEET_VULNERABILITIES_DISABLE_SCHEDULE = "false"} + ) : { + name = k + value = v } - - actions = ["sts:AssumeRole"] - } -} - -resource "aws_iam_role" "ecs_events" { - assume_role_policy = data.aws_iam_policy_document.assume_role.json -} - -data "aws_iam_policy_document" "ecs_events_run_task_with_any_role" { - statement { - effect = "Allow" - actions = ["iam:PassRole"] - resources = ["*"] - } - - statement { - effect = "Allow" - actions = ["ecs:RunTask"] - resources = [replace(var.task_definition.arn, "/:\\d+$/", ":*")] - condition { - test = "ArnEquals" - values = [var.ecs_cluster.cluster_arn] - variable = "ecs:cluster" + ] + secrets = [ + for k, v in var.fleet_config.extra_secrets : { + name = k + valueFrom = v } - } -} -resource "aws_iam_role_policy" "ecs_events_run_task_with_any_role" { - role = aws_iam_role.ecs_events.id - policy = data.aws_iam_policy_document.ecs_events_run_task_with_any_role.json + ] } -resource "aws_cloudwatch_event_target" "ecs_scheduled_task" { - arn = var.ecs_cluster.cluster_arn - rule = aws_cloudwatch_event_rule.main.name - role_arn = aws_iam_role.ecs_events.arn +resource "aws_ecs_service" "fleet" { + name = "${var.fleet_config.service.name}-vuln-processing" + launch_type = "FARGATE" + cluster = var.ecs_cluster + task_definition = aws_ecs_task_definition.vuln-processing.arn + desired_count = 1 + deployment_minimum_healthy_percent = 100 + deployment_maximum_percent = 200 - ecs_target { - task_count = 1 - task_definition_arn = var.task_definition.arn - launch_type = "FARGATE" - network_configuration { - subnets = var.ecs_service.network_configuration[0].subnets - security_groups = var.ecs_service.network_configuration[0].security_groups - } + lifecycle { + ignore_changes = [desired_count] } - input = jsonencode({ - containerOverrides = [ - { - name = "fleet", - command = ["fleet", "vuln_processing"] - }, - { - resourceRequirements = [ - { - type = "VCPU", - value = "1" - }, - { - type = "MEMORY", - value = "4096" - } - ] + network_configuration { + subnets = var.subnets + security_groups = var.security_groups + } +} + +resource "aws_ecs_task_definition" "vuln-processing" { + family = "${var.fleet_config.family}-vuln-processing" + cpu = var.vuln_processing_cpu + memory = var.vuln_processing_memory + execution_role_arn = var.execution_iam_role_arn + task_role_arn = var.task_role_arn + network_mode = "awsvpc" + requires_compatibilities = ["FARGATE"] + + container_definitions = jsonencode(concat([ + { + name = "fleet-vuln-processing" + image = var.fleet_config.image + essential = true + networkMode = "awsvpc" + secrets = [ + { + name = "FLEET_MYSQL_PASSWORD" + valueFrom = var.fleet_config.database.password_secret_arn + } + ] + ulimits = [ + { + name = "nofile" + softLimit = 999999 + hardLimit = 999999 + } + ], + environment = concat([ + { + name = "FLEET_MYSQL_USERNAME" + value = var.fleet_config.database.user + }, + { + name = "FLEET_MYSQL_DATABASE" + value = var.fleet_config.database.database + }, + { + name = "FLEET_MYSQL_ADDRESS" + value = var.fleet_config.database.address + }, + { + name = "FLEET_REDIS_ADDRESS" + value = var.fleet_config.redis.address + }, + { + name = "FLEET_REDIS_USE_TLS" + value = tostring(var.fleet_config.redis.use_tls) + }, + { + name = "FLEET_SERVER_TLS" + value = "false" + }, + ], local.environment), + logConfiguration = { + logDriver = "awslogs" + options = { + awslogs-group = var.awslogs_config.group + awslogs-region = var.awslogs_config.region == null ? data.aws_region.current.name : var.awslogs_config.region + awslogs-stream-prefix = "${var.awslogs_config.prefix}-vuln-processing" + } } - ] - }) + }] + , var.fleet_config.sidecars)) } + + + diff --git a/terraform/addons/external-vuln-scans/outputs.tf b/terraform/addons/external-vuln-scans/outputs.tf index be1bc6b3f6..b62397c95d 100644 --- a/terraform/addons/external-vuln-scans/outputs.tf +++ b/terraform/addons/external-vuln-scans/outputs.tf @@ -1,5 +1,5 @@ output "extra_environment_variables" { value = { - FLEET_VULNERABILITIES_DISABLE_SCHEDULE = "true" + FLEET_VULNERABILITIES_DISABLE_SCHEDULE = "true" } } diff --git a/terraform/addons/external-vuln-scans/variables.tf b/terraform/addons/external-vuln-scans/variables.tf index b82855f5eb..e667143442 100644 --- a/terraform/addons/external-vuln-scans/variables.tf +++ b/terraform/addons/external-vuln-scans/variables.tf @@ -1,11 +1,53 @@ -variable "task_definition" { - description = "The task definition resource that is created by the byo-ecs module" -} - -variable "ecs_service" { - description = "The ecs service resource that is created by the byo-ecs module" -} - variable "ecs_cluster" { description = "The ecs cluster module that is created by the byo-db module" } + +variable "fleet_config" { + description = "The root Fleet config object" + type = any +} + +variable "awslogs_config" { + type = object({ + group = string + region = string + prefix = string + }) +} + +variable "subnets" { + type = list(string) + nullable = false +} + +variable "security_groups" { + type = list(string) + nullable = false +} + + +variable "customer_prefix" { + type = string + default = "fleet" +} + +variable "execution_iam_role_arn" { + description = "The ARN of the fleet execution role, this is necessary to pass role from ecs events" +} + +variable "task_role_arn" { + description = "The ARN of the fleet task role, this is necessary to pass role from ecs events" +} + +variable "vuln_processing_memory" { + // note must conform to FARGATE breakpoints https://docs.aws.amazon.com/AmazonECS/latest/userguide/fargate-task-defs.html + default = 4096 + description = "The amount of memory to dedicate to the vuln processing command" +} + +variable "vuln_processing_cpu" { + // note must conform to FARGETE breakpoints https://docs.aws.amazon.com/AmazonECS/latest/userguide/fargate-task-defs.html + default = 1024 + description = "The amount of CPU to dedicate to the vuln processing command" +} + diff --git a/terraform/byo-vpc/byo-db/byo-ecs/outputs.tf b/terraform/byo-vpc/byo-db/byo-ecs/outputs.tf index f0f3de397e..cb02461f13 100644 --- a/terraform/byo-vpc/byo-db/byo-ecs/outputs.tf +++ b/terraform/byo-vpc/byo-db/byo-ecs/outputs.tf @@ -34,3 +34,7 @@ output "non_circular" { "subnets" = var.fleet_config.networking.subnets, } } + +output "fleet_config" { + value = var.fleet_config +}