hyperdx/docker/otel-collector/config.yaml
Warren 31e22dcff4
feat: introduce clickhouse db init script (#843)
Ref: HDX-1777

This shouldn't have any impact on users
2025-06-09 16:45:23 +00:00

168 lines
5.7 KiB
YAML

receivers:
# Troubleshooting
prometheus:
config:
scrape_configs:
- job_name: 'otelcol'
scrape_interval: 30s
static_configs:
- targets:
- '0.0.0.0:8888'
- ${env:CLICKHOUSE_PROMETHEUS_METRICS_ENDPOINT}
# Data sources: logs
fluentforward:
endpoint: '0.0.0.0:24225'
# Configured via OpAMP w/ authentication
# Data sources: traces, metrics, logs
# otlp/hyperdx:
# protocols:
# grpc:
# include_metadata: true
# endpoint: '0.0.0.0:4317'
# http:
# cors:
# allowed_origins: ['*']
# allowed_headers: ['*']
# include_metadata: true
# endpoint: '0.0.0.0:4318'
processors:
transform:
log_statements:
- context: log
error_mode: ignore
statements:
# JSON parsing: Extends log attributes with the fields from structured log body content, either as an OTEL map or
# as a string containing JSON content.
- set(log.cache, ExtractPatterns(log.body, "(?P<0>(\\{.*\\}))")) where
IsString(log.body)
- merge_maps(log.attributes, ParseJSON(log.cache["0"]), "upsert")
where IsMap(log.cache)
- flatten(log.attributes) where IsMap(log.cache)
- merge_maps(log.attributes, log.body, "upsert") where IsMap(log.body)
- context: log
error_mode: ignore
conditions:
- severity_number == 0 and severity_text == ""
statements:
# Infer: extract the first log level keyword from the first 256 characters of the body
- set(log.cache["substr"], log.body.string) where Len(log.body.string)
< 256
- set(log.cache["substr"], Substring(log.body.string, 0, 256)) where
Len(log.body.string) >= 256
- set(log.cache, ExtractPatterns(log.cache["substr"],
"(?i)(?P<0>(alert|crit|emerg|fatal|error|err|warn|notice|debug|dbug|trace))"))
# Infer: detect FATAL
- set(log.severity_number, SEVERITY_NUMBER_FATAL) where
IsMatch(log.cache["0"], "(?i)(alert|crit|emerg|fatal)")
- set(log.severity_text, "fatal") where log.severity_number ==
SEVERITY_NUMBER_FATAL
# Infer: detect ERROR
- set(log.severity_number, SEVERITY_NUMBER_ERROR) where
IsMatch(log.cache["0"], "(?i)(error|err)")
- set(log.severity_text, "error") where log.severity_number ==
SEVERITY_NUMBER_ERROR
# Infer: detect WARN
- set(log.severity_number, SEVERITY_NUMBER_WARN) where
IsMatch(log.cache["0"], "(?i)(warn|notice)")
- set(log.severity_text, "warn") where log.severity_number ==
SEVERITY_NUMBER_WARN
# Infer: detect DEBUG
- set(log.severity_number, SEVERITY_NUMBER_DEBUG) where
IsMatch(log.cache["0"], "(?i)(debug|dbug)")
- set(log.severity_text, "debug") where log.severity_number ==
SEVERITY_NUMBER_DEBUG
# Infer: detect TRACE
- set(log.severity_number, SEVERITY_NUMBER_TRACE) where
IsMatch(log.cache["0"], "(?i)(trace)")
- set(log.severity_text, "trace") where log.severity_number ==
SEVERITY_NUMBER_TRACE
# Infer: else
- set(log.severity_text, "info") where log.severity_number == 0
- set(log.severity_number, SEVERITY_NUMBER_INFO) where log.severity_number == 0
- context: log
error_mode: ignore
statements:
# Normalize the severity_text case
- set(log.severity_text, ConvertCase(log.severity_text, "lower"))
resourcedetection:
detectors:
- env
- system
- docker
timeout: 5s
override: false
batch:
memory_limiter:
# 80% of maximum memory up to 2G
limit_mib: 1500
# 25% of limit up to 2G
spike_limit_mib: 512
check_interval: 5s
connectors:
routing/logs:
default_pipelines: [logs/out-default]
error_mode: ignore
table:
- context: log
statement: route() where IsMatch(attributes["rr-web.event"], ".*")
pipelines: [logs/out-rrweb]
exporters:
debug:
verbosity: detailed
sampling_initial: 5
sampling_thereafter: 200
clickhouse/rrweb:
database: ${env:HYPERDX_OTEL_EXPORTER_CLICKHOUSE_DATABASE}
endpoint: ${env:CLICKHOUSE_ENDPOINT}
password: ${env:CLICKHOUSE_PASSWORD}
username: ${env:CLICKHOUSE_USER}
ttl: 720h
logs_table_name: hyperdx_sessions
timeout: 5s
retry_on_failure:
enabled: true
initial_interval: 5s
max_interval: 30s
max_elapsed_time: 300s
clickhouse:
database: ${env:HYPERDX_OTEL_EXPORTER_CLICKHOUSE_DATABASE}
endpoint: ${env:CLICKHOUSE_ENDPOINT}
password: ${env:CLICKHOUSE_PASSWORD}
username: ${env:CLICKHOUSE_USER}
ttl: 720h
timeout: 5s
retry_on_failure:
enabled: true
initial_interval: 5s
max_interval: 30s
max_elapsed_time: 300s
extensions:
health_check:
endpoint: :13133
service:
telemetry:
metrics:
address: ':8888'
logs:
level: ${HYPERDX_LOG_LEVEL}
extensions: [health_check]
pipelines:
traces:
# receivers: [otlp/hyperdx]
processors: [memory_limiter, batch]
exporters: [clickhouse]
metrics:
# receivers: [otlp/hyperdx, prometheus]
processors: [memory_limiter, batch]
exporters: [clickhouse]
logs/in:
# receivers: [otlp/hyperdx, fluentforward]
exporters: [routing/logs]
logs/out-default:
receivers: [routing/logs]
processors: [memory_limiter, transform, batch]
exporters: [clickhouse]
logs/out-rrweb:
receivers: [routing/logs]
processors: [memory_limiter, batch]
exporters: [clickhouse/rrweb]