feat: allow otel-collector to run without OpAMP server (#1672)

Today, users have to set up an OpAMP server to run with our clickstack OTel collector. Instead, we should allow users to disable OpAMP when they're using ClickHouse Cloud with the clickstack integration.

This can be determined by `OPAMP_SERVER_URL` not being defined by the user.

The end result is that a user can do

```
docker run \
  -e CLICKHOUSE_ENDPOINT=${CLICKHOUSE_ENDPOINT} \
  -e CLICKHOUSE_USER=default \
  -e CLICKHOUSE_PASSWORD=${CLICKHOUSE_PASSWORD} \
  -p 8080:8080 -p 4317:4317 -p 4318:4318 \
  clickhouse/clickstack-otel-collector:latest
```

Ref: HDX-3300
This commit is contained in:
Warren Lee 2026-01-29 18:50:24 +01:00 committed by GitHub
parent 7ce9485e83
commit 43de467864
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 153 additions and 215 deletions

View file

@ -2,7 +2,7 @@
"$schema": "https://unpkg.com/@changesets/config@2.3.1/schema.json",
"changelog": "@changesets/cli/changelog",
"commit": false,
"fixed": [["@hyperdx/api", "@hyperdx/app"]],
"fixed": [["@hyperdx/api", "@hyperdx/app", "@hyperdx/otel-collector"]],
"linked": [],
"access": "restricted",
"baseBranch": "main",

View file

@ -0,0 +1,5 @@
---
"@hyperdx/otel-collector": patch
---
feat: allow otel-collector to run without OpAMP server

View file

@ -13,7 +13,8 @@ ARG USER_GID=10001
RUN apk add --no-cache ca-certificates && \
addgroup -S -g ${USER_GID} otel && \
adduser -S -u ${USER_UID} -G otel otel && \
install -d -m 0777 -o ${USER_UID} -g ${USER_GID} /etc/otel/supervisor-data
install -d -m 0777 -o ${USER_UID} -g ${USER_GID} /etc/otel/supervisor-data && \
install -d -m 0755 -o ${USER_UID} -g ${USER_GID} /etc/otelcol-contrib
# Copy gomplate binary from the gomplate image
COPY --from=gomplate /bin/gomplate /usr/local/bin/gomplate
@ -30,8 +31,9 @@ COPY --chmod=755 ./log-tailer.sh /log-tailer.sh
## dev ##############################################################################################
FROM base AS dev
COPY ./config.yaml /etc/otelcol-contrib/config.yaml
COPY ./supervisor_docker.yaml.tmpl /etc/otel/supervisor.yaml.tmpl
COPY --chown=10001:10001 ./config.yaml /etc/otelcol-contrib/config.yaml
COPY --chown=10001:10001 ./config.standalone.yaml /etc/otelcol-contrib/standalone-config.yaml
COPY --chown=10001:10001 ./supervisor_docker.yaml.tmpl /etc/otel/supervisor.yaml.tmpl
EXPOSE 4317 4318 13133
@ -40,8 +42,9 @@ ENTRYPOINT ["/entrypoint.sh", "/opampsupervisor"]
## prod #############################################################################################
FROM base AS prod
COPY ./config.yaml /etc/otelcol-contrib/config.yaml
COPY ./supervisor_docker.yaml.tmpl /etc/otel/supervisor.yaml.tmpl
COPY --chown=10001:10001 ./config.yaml /etc/otelcol-contrib/config.yaml
COPY --chown=10001:10001 ./config.standalone.yaml /etc/otelcol-contrib/standalone-config.yaml
COPY --chown=10001:10001 ./supervisor_docker.yaml.tmpl /etc/otel/supervisor.yaml.tmpl
EXPOSE 4317 4318 13133

View file

@ -1,174 +0,0 @@
receivers:
# Troubleshooting
prometheus:
config:
scrape_configs:
- job_name: 'otelcol'
scrape_interval: 30s
static_configs:
- targets:
- '0.0.0.0:8888'
- ${env:CLICKHOUSE_PROMETHEUS_METRICS_ENDPOINT}
# Data sources: logs
fluentforward:
endpoint: '0.0.0.0:24225'
# Configured via OpAMP w/ authentication
# Data sources: traces, metrics, logs
# otlp/hyperdx:
# protocols:
# grpc:
# include_metadata: true
# endpoint: '0.0.0.0:4317'
# http:
# cors:
# allowed_origins: ['*']
# allowed_headers: ['*']
# include_metadata: true
# endpoint: '0.0.0.0:4318'
processors:
transform:
log_statements:
- context: log
error_mode: ignore
statements:
# JSON parsing: Extends log attributes with the fields from structured log body content, either as an OTEL map or
# as a string containing JSON content.
- set(log.cache, ExtractPatterns(log.body, "(?P<0>(\\{.*\\}))")) where
IsString(log.body)
- merge_maps(log.attributes, ParseJSON(log.cache["0"]), "upsert")
where IsMap(log.cache)
- flatten(log.attributes) where IsMap(log.cache)
- merge_maps(log.attributes, log.body, "upsert") where IsMap(log.body)
- context: log
error_mode: ignore
conditions:
- severity_number == 0 and severity_text == ""
statements:
# Infer: extract the first log level keyword from the first 256 characters of the body
- set(log.cache["substr"], log.body.string) where Len(log.body.string)
< 256
- set(log.cache["substr"], Substring(log.body.string, 0, 256)) where
Len(log.body.string) >= 256
- set(log.cache, ExtractPatterns(log.cache["substr"],
"(?i)(?P<0>(alert|crit|emerg|fatal|error|err|warn|notice|debug|dbug|trace))"))
# Infer: detect FATAL
- set(log.severity_number, SEVERITY_NUMBER_FATAL) where
IsMatch(log.cache["0"], "(?i)(alert|crit|emerg|fatal)")
- set(log.severity_text, "fatal") where log.severity_number ==
SEVERITY_NUMBER_FATAL
# Infer: detect ERROR
- set(log.severity_number, SEVERITY_NUMBER_ERROR) where
IsMatch(log.cache["0"], "(?i)(error|err)")
- set(log.severity_text, "error") where log.severity_number ==
SEVERITY_NUMBER_ERROR
# Infer: detect WARN
- set(log.severity_number, SEVERITY_NUMBER_WARN) where
IsMatch(log.cache["0"], "(?i)(warn|notice)")
- set(log.severity_text, "warn") where log.severity_number ==
SEVERITY_NUMBER_WARN
# Infer: detect DEBUG
- set(log.severity_number, SEVERITY_NUMBER_DEBUG) where
IsMatch(log.cache["0"], "(?i)(debug|dbug)")
- set(log.severity_text, "debug") where log.severity_number ==
SEVERITY_NUMBER_DEBUG
# Infer: detect TRACE
- set(log.severity_number, SEVERITY_NUMBER_TRACE) where
IsMatch(log.cache["0"], "(?i)(trace)")
- set(log.severity_text, "trace") where log.severity_number ==
SEVERITY_NUMBER_TRACE
# Infer: else
- set(log.severity_text, "info") where log.severity_number == 0
- set(log.severity_number, SEVERITY_NUMBER_INFO) where
log.severity_number == 0
- context: log
error_mode: ignore
statements:
# Normalize the severity_text case
- set(log.severity_text, ConvertCase(log.severity_text, "lower"))
resourcedetection:
detectors:
- env
- system
- docker
timeout: 5s
override: false
batch:
memory_limiter:
# 80% of maximum memory up to 2G
limit_mib: 1500
# 25% of limit up to 2G
spike_limit_mib: 512
check_interval: 5s
connectors:
routing/logs:
default_pipelines: [logs/out-default]
error_mode: ignore
table:
- context: log
statement: route() where IsMatch(attributes["rr-web.event"], ".*")
pipelines: [logs/out-rrweb]
exporters:
debug:
verbosity: detailed
sampling_initial: 5
sampling_thereafter: 200
clickhouse/rrweb:
database: ${env:HYPERDX_OTEL_EXPORTER_CLICKHOUSE_DATABASE}
endpoint: ${env:CLICKHOUSE_ENDPOINT}
password: ${env:CLICKHOUSE_PASSWORD}
username: ${env:CLICKHOUSE_USER}
ttl: 720h
logs_table_name: hyperdx_sessions
timeout: 5s
retry_on_failure:
enabled: true
initial_interval: 5s
max_interval: 30s
max_elapsed_time: 300s
clickhouse:
database: ${env:HYPERDX_OTEL_EXPORTER_CLICKHOUSE_DATABASE}
endpoint: ${env:CLICKHOUSE_ENDPOINT}
password: ${env:CLICKHOUSE_PASSWORD}
username: ${env:CLICKHOUSE_USER}
ttl: 720h
timeout: 5s
retry_on_failure:
enabled: true
initial_interval: 5s
max_interval: 30s
max_elapsed_time: 300s
extensions:
health_check:
endpoint: :13133
service:
telemetry:
metrics:
readers:
- pull:
exporter:
prometheus:
host: '0.0.0.0'
port: 8888
logs:
level: ${HYPERDX_LOG_LEVEL}
extensions: [health_check]
pipelines:
traces:
# receivers: [otlp/hyperdx]
processors: [memory_limiter, batch]
exporters: [clickhouse]
metrics:
# receivers: [otlp/hyperdx, prometheus]
processors: [memory_limiter, batch]
exporters: [clickhouse]
logs/in:
# receivers: [otlp/hyperdx, fluentforward]
exporters: [routing/logs]
logs/out-default:
receivers: [routing/logs]
processors: [memory_limiter, transform, batch]
exporters: [clickhouse]
logs/out-rrweb:
receivers: [routing/logs]
processors: [memory_limiter, batch]
exporters: [clickhouse/rrweb]

View file

@ -0,0 +1,73 @@
# This configuration is derived from packages/api/src/opamp/controllers/opampController.ts
# When updating this file, ensure it stays in sync with buildOtelCollectorConfig()
receivers:
otlp/hyperdx:
protocols:
grpc:
include_metadata: true
endpoint: "0.0.0.0:4317"
http:
cors:
allowed_origins: ["*"]
allowed_headers: ["*"]
include_metadata: true
endpoint: "0.0.0.0:4318"
connectors:
routing/logs:
default_pipelines: [logs/out-default]
error_mode: ignore
table:
- context: log
statement: route() where IsMatch(attributes["rr-web.event"], ".*")
pipelines: [logs/out-rrweb]
exporters:
clickhouse/rrweb:
database: ${env:HYPERDX_OTEL_EXPORTER_CLICKHOUSE_DATABASE}
endpoint: ${env:CLICKHOUSE_ENDPOINT}
password: ${env:CLICKHOUSE_PASSWORD}
username: ${env:CLICKHOUSE_USER}
ttl: 720h
logs_table_name: hyperdx_sessions
timeout: 5s
retry_on_failure:
enabled: true
initial_interval: 5s
max_interval: 30s
max_elapsed_time: 300s
clickhouse:
database: ${env:HYPERDX_OTEL_EXPORTER_CLICKHOUSE_DATABASE}
endpoint: ${env:CLICKHOUSE_ENDPOINT}
password: ${env:CLICKHOUSE_PASSWORD}
username: ${env:CLICKHOUSE_USER}
ttl: 720h
timeout: 5s
retry_on_failure:
enabled: true
initial_interval: 5s
max_interval: 30s
max_elapsed_time: 300s
service:
pipelines:
traces:
receivers: [otlp/hyperdx]
processors: [memory_limiter, batch]
exporters: [clickhouse]
metrics:
receivers: [otlp/hyperdx]
processors: [memory_limiter, batch]
exporters: [clickhouse]
logs/in:
receivers: [otlp/hyperdx]
exporters: [routing/logs]
logs/out-default:
receivers: [routing/logs]
processors: [memory_limiter, transform, batch]
exporters: [clickhouse]
logs/out-rrweb:
receivers: [routing/logs]
processors: [memory_limiter, batch]
exporters: [clickhouse/rrweb]

View file

@ -1,34 +1,55 @@
#!/bin/sh
set -e
if [ "$OTEL_SUPERVISOR_LOGS" = "true" ]; then
# Start log tailer process in background for agent.log
# Arguments: log_file_path [check_interval_seconds]
/log-tailer.sh /etc/otel/supervisor-data/agent.log 1 &
# Check if OPAMP_SERVER_URL is defined to determine mode
if [ -z "$OPAMP_SERVER_URL" ]; then
# Standalone mode - run collector directly without supervisor
echo "Running in standalone mode (OPAMP_SERVER_URL not set)"
# Create a agent log file for the supervisor and collector child process. Normally
# this file would be created as a standard file but we just want a FIFO pipe that
# will pass data over to the tail process in the entrypoint script. This avoids
# the need to the supervisor to store and forward the logs in its memory while also
# eliminating the need for volume based storage.
if [ ! -e /etc/otel/supervisor-data/agent.log ]; then
mkfifo /etc/otel/supervisor-data/agent.log || echo "Failed to create FIFO" >&2
# Build collector arguments with multiple config files
COLLECTOR_ARGS="--config /etc/otelcol-contrib/config.yaml --config /etc/otelcol-contrib/standalone-config.yaml"
# Add custom config file if specified
if [ -n "$CUSTOM_OTELCOL_CONFIG_FILE" ]; then
echo "Including custom config: $CUSTOM_OTELCOL_CONFIG_FILE"
COLLECTOR_ARGS="$COLLECTOR_ARGS --config $CUSTOM_OTELCOL_CONFIG_FILE"
fi
fi
# Render the supervisor config template using gomplate
# Write to supervisor-data directory which has proper permissions for otel user
gomplate -f /etc/otel/supervisor.yaml.tmpl -o /etc/otel/supervisor-data/supervisor-runtime.yaml
# Log the configuration being used
if [ -n "$CUSTOM_OTELCOL_CONFIG_FILE" ]; then
echo "Using custom OTEL config file: $CUSTOM_OTELCOL_CONFIG_FILE"
# Execute collector directly
exec /otelcontribcol $COLLECTOR_ARGS
else
echo "CUSTOM_OTELCOL_CONFIG_FILE not set, using default configuration"
# Supervisor mode - run with OpAMP supervisor
echo "Running in supervisor mode (OPAMP_SERVER_URL: $OPAMP_SERVER_URL)"
if [ "$OTEL_SUPERVISOR_LOGS" = "true" ]; then
# Start log tailer process in background for agent.log
# Arguments: log_file_path [check_interval_seconds]
/log-tailer.sh /etc/otel/supervisor-data/agent.log 1 &
# Create a agent log file for the supervisor and collector child process. Normally
# this file would be created as a standard file but we just want a FIFO pipe that
# will pass data over to the tail process in the entrypoint script. This avoids
# the need to the supervisor to store and forward the logs in its memory while also
# eliminating the need for volume based storage.
if [ ! -e /etc/otel/supervisor-data/agent.log ]; then
mkfifo /etc/otel/supervisor-data/agent.log || echo "Failed to create FIFO" >&2
fi
fi
# Render the supervisor config template using gomplate
# Write to supervisor-data directory which has proper permissions for otel user
gomplate -f /etc/otel/supervisor.yaml.tmpl -o /etc/otel/supervisor-data/supervisor-runtime.yaml
# Log the configuration being used
if [ -n "$CUSTOM_OTELCOL_CONFIG_FILE" ]; then
echo "Using custom OTEL config file: $CUSTOM_OTELCOL_CONFIG_FILE"
else
echo "CUSTOM_OTELCOL_CONFIG_FILE not set, using default configuration"
fi
# Update the command arguments to use the rendered config file
set -- "$1" --config /etc/otel/supervisor-data/supervisor-runtime.yaml
# Execute the supervisor with all passed arguments
exec "$@"
fi
# Update the command arguments to use the rendered config file
set -- "$1" --config /etc/otel/supervisor-data/supervisor-runtime.yaml
# Execute the supervisor with all passed arguments
exec "$@"

View file

@ -0,0 +1,7 @@
{
"name": "@hyperdx/otel-collector",
"description": "HyperDX OpenTelemetry Collector configuration and Docker image",
"version": "2.13.0",
"license": "MIT",
"private": true
}

View file

@ -19,20 +19,17 @@ services:
retries: 5
start_period: 10s
otel-collector:
image: otel/opentelemetry-collector-contrib:0.129.1
volumes:
- ../../docker/otel-collector/config.deprecated.yaml:/etc/otelcol-contrib/config.yaml
- ./receiver-config.yaml:/etc/otelcol-contrib/receiver-config.yaml
command:
[
'--config=/etc/otelcol-contrib/receiver-config.yaml',
'--config=/etc/otelcol-contrib/config.yaml',
]
build:
context: ../../docker/otel-collector
target: dev
environment:
- CLICKHOUSE_ENDPOINT=tcp://ch-server:9000?dial_timeout=10s
- CLICKHOUSE_PROMETHEUS_METRICS_ENDPOINT=ch-server:9363
- CLICKHOUSE_USER=default
- CLICKHOUSE_PASSWORD=
- HYPERDX_OTEL_EXPORTER_CLICKHOUSE_DATABASE=default
- HYPERDX_LOG_LEVEL=info
# OPAMP_SERVER_URL is intentionally not set to run in standalone mode
ports:
- 4318:4318 # OTLP http receiver
- 13133:13133 # health check

View file

@ -4581,6 +4581,12 @@ __metadata:
languageName: node
linkType: hard
"@hyperdx/otel-collector@workspace:packages/otel-collector":
version: 0.0.0-use.local
resolution: "@hyperdx/otel-collector@workspace:packages/otel-collector"
languageName: unknown
linkType: soft
"@hyperdx/otel-web-session-recorder@npm:0.16.2":
version: 0.16.2
resolution: "@hyperdx/otel-web-session-recorder@npm:0.16.2"