Bumping signoz resources for 100K hosts loadtest. (#41961)

This commit is contained in:
Victor Lyuboslavsky 2026-03-19 12:49:36 -05:00 committed by GitHub
parent 2a0d0c3804
commit ecee908157
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 23 additions and 23 deletions

View file

@ -137,7 +137,7 @@ module "eks" {
min_size = 1
max_size = 1
desired_size = 1
instance_types = ["t3.2xlarge"]
instance_types = ["m6i.8xlarge"]
}
}
@ -255,7 +255,7 @@ resource "helm_release" "signoz" {
# Clickhouse storage configuration
set {
name = "clickhouse.persistence.size"
value = "200Gi"
value = "600Gi"
}
set {
@ -279,49 +279,48 @@ resource "helm_release" "signoz" {
# Default 100m CPU and 200Mi memory are way too low for high-volume telemetry
set {
name = "clickhouse.resources.requests.cpu"
value = "2000m"
value = "6000m"
}
set {
name = "clickhouse.resources.requests.memory"
value = "4Gi"
value = "12Gi"
}
set {
name = "clickhouse.resources.limits.cpu"
value = "4000m"
value = "12000m"
}
set {
name = "clickhouse.resources.limits.memory"
value = "8Gi"
value = "24Gi"
}
# OTEL Collector resource configuration for loadtest
set {
name = "otelCollector.resources.requests.memory"
value = "8Gi"
value = "24Gi"
}
set {
name = "otelCollector.resources.limits.memory"
value = "12Gi"
value = "36Gi"
}
set {
name = "otelCollector.resources.requests.cpu"
value = "1000m"
value = "3000m"
}
set {
name = "otelCollector.resources.limits.cpu"
value = "4000m"
value = "12000m"
}
# Only need 1 replica since we have 1 LoadBalancer endpoint
set {
name = "otelCollector.replicaCount"
value = "1"
value = "3"
}
depends_on = [

View file

@ -31,16 +31,17 @@ otelCollector:
processors:
batch:
send_batch_size: 2000
send_batch_size: 6000
send_batch_max_size: 7500
timeout: 5s
batch/meter:
send_batch_max_size: 25000
send_batch_size: 20000
send_batch_max_size: 75000
send_batch_size: 60000
timeout: 1s
memory_limiter:
check_interval: 1s
limit_mib: 11264
spike_limit_mib: 512
limit_mib: 33792
spike_limit_mib: 1536
signozspanmetrics/delta:
metrics_exporter: signozclickhousemetrics
latency_histogram_buckets: [100us, 1ms, 2ms, 6ms, 10ms, 50ms, 100ms, 250ms, 500ms, 1000ms, 1400ms, 2000ms, 5s, 10s, 20s, 40s, 60s]
@ -72,13 +73,13 @@ otelCollector:
max_elapsed_time: 300s
sending_queue:
enabled: true
num_consumers: 10
queue_size: 10000
num_consumers: 30
queue_size: 15000
signozclickhousemetrics:
timeout: 45s
# Traces exporter must handle spans after tail_sampling
clickhousetraces:
timeout: 120s
timeout: 60s
retry_on_failure:
enabled: true
initial_interval: 5s
@ -86,8 +87,8 @@ otelCollector:
max_elapsed_time: 300s
sending_queue:
enabled: true
num_consumers: 10
queue_size: 10000
num_consumers: 30
queue_size: 15000
# Metadata exporter with increased timeout and retry support
metadataexporter:
@ -114,7 +115,7 @@ otelCollector:
exporters: [clickhousetraces, metadataexporter, signozmeter]
metrics:
receivers: [otlp]
processors: [batch]
processors: [memory_limiter, batch]
exporters: [metadataexporter, signozclickhousemetrics, signozmeter]
logs:
receivers: [otlp]