From 43de46786488a8b8b760f6260fd784bf5628a94b Mon Sep 17 00:00:00 2001 From: Warren Lee <5959690+wrn14897@users.noreply.github.com> Date: Thu, 29 Jan 2026 18:50:24 +0100 Subject: [PATCH] feat: allow otel-collector to run without OpAMP server (#1672) Today, users have to set up an OpAMP server to run with our clickstack OTel collector. Instead, we should allow users to disable OpAMP when they're using ClickHouse Cloud with the clickstack integration. This can be determined by `OPAMP_SERVER_URL` not being defined by the user. The end result is that a user can do ``` docker run \ -e CLICKHOUSE_ENDPOINT=${CLICKHOUSE_ENDPOINT} \ -e CLICKHOUSE_USER=default \ -e CLICKHOUSE_PASSWORD=${CLICKHOUSE_PASSWORD} \ -p 8080:8080 -p 4317:4317 -p 4318:4318 \ clickhouse/clickstack-otel-collector:latest ``` Ref: HDX-3300 --- .changeset/config.json | 2 +- .changeset/smooth-bananas-hammer.md | 5 + docker/otel-collector/Dockerfile | 13 +- docker/otel-collector/config.deprecated.yaml | 174 ------------------ docker/otel-collector/config.standalone.yaml | 73 ++++++++ docker/otel-collector/entrypoint.sh | 73 +++++--- packages/otel-collector/package.json | 7 + .../otel-collector/docker-compose.yaml | 15 +- yarn.lock | 6 + 9 files changed, 153 insertions(+), 215 deletions(-) create mode 100644 .changeset/smooth-bananas-hammer.md delete mode 100644 docker/otel-collector/config.deprecated.yaml create mode 100644 docker/otel-collector/config.standalone.yaml create mode 100644 packages/otel-collector/package.json diff --git a/.changeset/config.json b/.changeset/config.json index cfef3e6f..3c2da191 100644 --- a/.changeset/config.json +++ b/.changeset/config.json @@ -2,7 +2,7 @@ "$schema": "https://unpkg.com/@changesets/config@2.3.1/schema.json", "changelog": "@changesets/cli/changelog", "commit": false, - "fixed": [["@hyperdx/api", "@hyperdx/app"]], + "fixed": [["@hyperdx/api", "@hyperdx/app", "@hyperdx/otel-collector"]], "linked": [], "access": "restricted", "baseBranch": "main", diff --git a/.changeset/smooth-bananas-hammer.md b/.changeset/smooth-bananas-hammer.md new file mode 100644 index 00000000..bea9279f --- /dev/null +++ b/.changeset/smooth-bananas-hammer.md @@ -0,0 +1,5 @@ +--- +"@hyperdx/otel-collector": patch +--- + +feat: allow otel-collector to run without OpAMP server diff --git a/docker/otel-collector/Dockerfile b/docker/otel-collector/Dockerfile index dbe3b316..5033a762 100644 --- a/docker/otel-collector/Dockerfile +++ b/docker/otel-collector/Dockerfile @@ -13,7 +13,8 @@ ARG USER_GID=10001 RUN apk add --no-cache ca-certificates && \ addgroup -S -g ${USER_GID} otel && \ adduser -S -u ${USER_UID} -G otel otel && \ - install -d -m 0777 -o ${USER_UID} -g ${USER_GID} /etc/otel/supervisor-data + install -d -m 0777 -o ${USER_UID} -g ${USER_GID} /etc/otel/supervisor-data && \ + install -d -m 0755 -o ${USER_UID} -g ${USER_GID} /etc/otelcol-contrib # Copy gomplate binary from the gomplate image COPY --from=gomplate /bin/gomplate /usr/local/bin/gomplate @@ -30,8 +31,9 @@ COPY --chmod=755 ./log-tailer.sh /log-tailer.sh ## dev ############################################################################################## FROM base AS dev -COPY ./config.yaml /etc/otelcol-contrib/config.yaml -COPY ./supervisor_docker.yaml.tmpl /etc/otel/supervisor.yaml.tmpl +COPY --chown=10001:10001 ./config.yaml /etc/otelcol-contrib/config.yaml +COPY --chown=10001:10001 ./config.standalone.yaml /etc/otelcol-contrib/standalone-config.yaml +COPY --chown=10001:10001 ./supervisor_docker.yaml.tmpl /etc/otel/supervisor.yaml.tmpl EXPOSE 4317 4318 13133 @@ -40,8 +42,9 @@ ENTRYPOINT ["/entrypoint.sh", "/opampsupervisor"] ## prod ############################################################################################# FROM base AS prod -COPY ./config.yaml /etc/otelcol-contrib/config.yaml -COPY ./supervisor_docker.yaml.tmpl /etc/otel/supervisor.yaml.tmpl +COPY --chown=10001:10001 ./config.yaml /etc/otelcol-contrib/config.yaml +COPY --chown=10001:10001 ./config.standalone.yaml /etc/otelcol-contrib/standalone-config.yaml +COPY --chown=10001:10001 ./supervisor_docker.yaml.tmpl /etc/otel/supervisor.yaml.tmpl EXPOSE 4317 4318 13133 diff --git a/docker/otel-collector/config.deprecated.yaml b/docker/otel-collector/config.deprecated.yaml deleted file mode 100644 index afe54dc4..00000000 --- a/docker/otel-collector/config.deprecated.yaml +++ /dev/null @@ -1,174 +0,0 @@ -receivers: - # Troubleshooting - prometheus: - config: - scrape_configs: - - job_name: 'otelcol' - scrape_interval: 30s - static_configs: - - targets: - - '0.0.0.0:8888' - - ${env:CLICKHOUSE_PROMETHEUS_METRICS_ENDPOINT} - # Data sources: logs - fluentforward: - endpoint: '0.0.0.0:24225' - # Configured via OpAMP w/ authentication - # Data sources: traces, metrics, logs - # otlp/hyperdx: - # protocols: - # grpc: - # include_metadata: true - # endpoint: '0.0.0.0:4317' - # http: - # cors: - # allowed_origins: ['*'] - # allowed_headers: ['*'] - # include_metadata: true - # endpoint: '0.0.0.0:4318' -processors: - transform: - log_statements: - - context: log - error_mode: ignore - statements: - # JSON parsing: Extends log attributes with the fields from structured log body content, either as an OTEL map or - # as a string containing JSON content. - - set(log.cache, ExtractPatterns(log.body, "(?P<0>(\\{.*\\}))")) where - IsString(log.body) - - merge_maps(log.attributes, ParseJSON(log.cache["0"]), "upsert") - where IsMap(log.cache) - - flatten(log.attributes) where IsMap(log.cache) - - merge_maps(log.attributes, log.body, "upsert") where IsMap(log.body) - - context: log - error_mode: ignore - conditions: - - severity_number == 0 and severity_text == "" - statements: - # Infer: extract the first log level keyword from the first 256 characters of the body - - set(log.cache["substr"], log.body.string) where Len(log.body.string) - < 256 - - set(log.cache["substr"], Substring(log.body.string, 0, 256)) where - Len(log.body.string) >= 256 - - set(log.cache, ExtractPatterns(log.cache["substr"], - "(?i)(?P<0>(alert|crit|emerg|fatal|error|err|warn|notice|debug|dbug|trace))")) - # Infer: detect FATAL - - set(log.severity_number, SEVERITY_NUMBER_FATAL) where - IsMatch(log.cache["0"], "(?i)(alert|crit|emerg|fatal)") - - set(log.severity_text, "fatal") where log.severity_number == - SEVERITY_NUMBER_FATAL - # Infer: detect ERROR - - set(log.severity_number, SEVERITY_NUMBER_ERROR) where - IsMatch(log.cache["0"], "(?i)(error|err)") - - set(log.severity_text, "error") where log.severity_number == - SEVERITY_NUMBER_ERROR - # Infer: detect WARN - - set(log.severity_number, SEVERITY_NUMBER_WARN) where - IsMatch(log.cache["0"], "(?i)(warn|notice)") - - set(log.severity_text, "warn") where log.severity_number == - SEVERITY_NUMBER_WARN - # Infer: detect DEBUG - - set(log.severity_number, SEVERITY_NUMBER_DEBUG) where - IsMatch(log.cache["0"], "(?i)(debug|dbug)") - - set(log.severity_text, "debug") where log.severity_number == - SEVERITY_NUMBER_DEBUG - # Infer: detect TRACE - - set(log.severity_number, SEVERITY_NUMBER_TRACE) where - IsMatch(log.cache["0"], "(?i)(trace)") - - set(log.severity_text, "trace") where log.severity_number == - SEVERITY_NUMBER_TRACE - # Infer: else - - set(log.severity_text, "info") where log.severity_number == 0 - - set(log.severity_number, SEVERITY_NUMBER_INFO) where - log.severity_number == 0 - - context: log - error_mode: ignore - statements: - # Normalize the severity_text case - - set(log.severity_text, ConvertCase(log.severity_text, "lower")) - resourcedetection: - detectors: - - env - - system - - docker - timeout: 5s - override: false - batch: - memory_limiter: - # 80% of maximum memory up to 2G - limit_mib: 1500 - # 25% of limit up to 2G - spike_limit_mib: 512 - check_interval: 5s -connectors: - routing/logs: - default_pipelines: [logs/out-default] - error_mode: ignore - table: - - context: log - statement: route() where IsMatch(attributes["rr-web.event"], ".*") - pipelines: [logs/out-rrweb] -exporters: - debug: - verbosity: detailed - sampling_initial: 5 - sampling_thereafter: 200 - clickhouse/rrweb: - database: ${env:HYPERDX_OTEL_EXPORTER_CLICKHOUSE_DATABASE} - endpoint: ${env:CLICKHOUSE_ENDPOINT} - password: ${env:CLICKHOUSE_PASSWORD} - username: ${env:CLICKHOUSE_USER} - ttl: 720h - logs_table_name: hyperdx_sessions - timeout: 5s - retry_on_failure: - enabled: true - initial_interval: 5s - max_interval: 30s - max_elapsed_time: 300s - clickhouse: - database: ${env:HYPERDX_OTEL_EXPORTER_CLICKHOUSE_DATABASE} - endpoint: ${env:CLICKHOUSE_ENDPOINT} - password: ${env:CLICKHOUSE_PASSWORD} - username: ${env:CLICKHOUSE_USER} - ttl: 720h - timeout: 5s - retry_on_failure: - enabled: true - initial_interval: 5s - max_interval: 30s - max_elapsed_time: 300s -extensions: - health_check: - endpoint: :13133 -service: - telemetry: - metrics: - readers: - - pull: - exporter: - prometheus: - host: '0.0.0.0' - port: 8888 - logs: - level: ${HYPERDX_LOG_LEVEL} - extensions: [health_check] - pipelines: - traces: - # receivers: [otlp/hyperdx] - processors: [memory_limiter, batch] - exporters: [clickhouse] - metrics: - # receivers: [otlp/hyperdx, prometheus] - processors: [memory_limiter, batch] - exporters: [clickhouse] - logs/in: - # receivers: [otlp/hyperdx, fluentforward] - exporters: [routing/logs] - logs/out-default: - receivers: [routing/logs] - processors: [memory_limiter, transform, batch] - exporters: [clickhouse] - logs/out-rrweb: - receivers: [routing/logs] - processors: [memory_limiter, batch] - exporters: [clickhouse/rrweb] diff --git a/docker/otel-collector/config.standalone.yaml b/docker/otel-collector/config.standalone.yaml new file mode 100644 index 00000000..bf374623 --- /dev/null +++ b/docker/otel-collector/config.standalone.yaml @@ -0,0 +1,73 @@ +# This configuration is derived from packages/api/src/opamp/controllers/opampController.ts +# When updating this file, ensure it stays in sync with buildOtelCollectorConfig() + +receivers: + otlp/hyperdx: + protocols: + grpc: + include_metadata: true + endpoint: "0.0.0.0:4317" + http: + cors: + allowed_origins: ["*"] + allowed_headers: ["*"] + include_metadata: true + endpoint: "0.0.0.0:4318" + +connectors: + routing/logs: + default_pipelines: [logs/out-default] + error_mode: ignore + table: + - context: log + statement: route() where IsMatch(attributes["rr-web.event"], ".*") + pipelines: [logs/out-rrweb] + +exporters: + clickhouse/rrweb: + database: ${env:HYPERDX_OTEL_EXPORTER_CLICKHOUSE_DATABASE} + endpoint: ${env:CLICKHOUSE_ENDPOINT} + password: ${env:CLICKHOUSE_PASSWORD} + username: ${env:CLICKHOUSE_USER} + ttl: 720h + logs_table_name: hyperdx_sessions + timeout: 5s + retry_on_failure: + enabled: true + initial_interval: 5s + max_interval: 30s + max_elapsed_time: 300s + clickhouse: + database: ${env:HYPERDX_OTEL_EXPORTER_CLICKHOUSE_DATABASE} + endpoint: ${env:CLICKHOUSE_ENDPOINT} + password: ${env:CLICKHOUSE_PASSWORD} + username: ${env:CLICKHOUSE_USER} + ttl: 720h + timeout: 5s + retry_on_failure: + enabled: true + initial_interval: 5s + max_interval: 30s + max_elapsed_time: 300s + +service: + pipelines: + traces: + receivers: [otlp/hyperdx] + processors: [memory_limiter, batch] + exporters: [clickhouse] + metrics: + receivers: [otlp/hyperdx] + processors: [memory_limiter, batch] + exporters: [clickhouse] + logs/in: + receivers: [otlp/hyperdx] + exporters: [routing/logs] + logs/out-default: + receivers: [routing/logs] + processors: [memory_limiter, transform, batch] + exporters: [clickhouse] + logs/out-rrweb: + receivers: [routing/logs] + processors: [memory_limiter, batch] + exporters: [clickhouse/rrweb] diff --git a/docker/otel-collector/entrypoint.sh b/docker/otel-collector/entrypoint.sh index 2c425851..c327efe6 100644 --- a/docker/otel-collector/entrypoint.sh +++ b/docker/otel-collector/entrypoint.sh @@ -1,34 +1,55 @@ #!/bin/sh set -e -if [ "$OTEL_SUPERVISOR_LOGS" = "true" ]; then - # Start log tailer process in background for agent.log - # Arguments: log_file_path [check_interval_seconds] - /log-tailer.sh /etc/otel/supervisor-data/agent.log 1 & +# Check if OPAMP_SERVER_URL is defined to determine mode +if [ -z "$OPAMP_SERVER_URL" ]; then + # Standalone mode - run collector directly without supervisor + echo "Running in standalone mode (OPAMP_SERVER_URL not set)" - # Create a agent log file for the supervisor and collector child process. Normally - # this file would be created as a standard file but we just want a FIFO pipe that - # will pass data over to the tail process in the entrypoint script. This avoids - # the need to the supervisor to store and forward the logs in its memory while also - # eliminating the need for volume based storage. - if [ ! -e /etc/otel/supervisor-data/agent.log ]; then - mkfifo /etc/otel/supervisor-data/agent.log || echo "Failed to create FIFO" >&2 + # Build collector arguments with multiple config files + COLLECTOR_ARGS="--config /etc/otelcol-contrib/config.yaml --config /etc/otelcol-contrib/standalone-config.yaml" + + # Add custom config file if specified + if [ -n "$CUSTOM_OTELCOL_CONFIG_FILE" ]; then + echo "Including custom config: $CUSTOM_OTELCOL_CONFIG_FILE" + COLLECTOR_ARGS="$COLLECTOR_ARGS --config $CUSTOM_OTELCOL_CONFIG_FILE" fi -fi -# Render the supervisor config template using gomplate -# Write to supervisor-data directory which has proper permissions for otel user -gomplate -f /etc/otel/supervisor.yaml.tmpl -o /etc/otel/supervisor-data/supervisor-runtime.yaml - -# Log the configuration being used -if [ -n "$CUSTOM_OTELCOL_CONFIG_FILE" ]; then - echo "Using custom OTEL config file: $CUSTOM_OTELCOL_CONFIG_FILE" + # Execute collector directly + exec /otelcontribcol $COLLECTOR_ARGS else - echo "CUSTOM_OTELCOL_CONFIG_FILE not set, using default configuration" + # Supervisor mode - run with OpAMP supervisor + echo "Running in supervisor mode (OPAMP_SERVER_URL: $OPAMP_SERVER_URL)" + + if [ "$OTEL_SUPERVISOR_LOGS" = "true" ]; then + # Start log tailer process in background for agent.log + # Arguments: log_file_path [check_interval_seconds] + /log-tailer.sh /etc/otel/supervisor-data/agent.log 1 & + + # Create a agent log file for the supervisor and collector child process. Normally + # this file would be created as a standard file but we just want a FIFO pipe that + # will pass data over to the tail process in the entrypoint script. This avoids + # the need to the supervisor to store and forward the logs in its memory while also + # eliminating the need for volume based storage. + if [ ! -e /etc/otel/supervisor-data/agent.log ]; then + mkfifo /etc/otel/supervisor-data/agent.log || echo "Failed to create FIFO" >&2 + fi + fi + + # Render the supervisor config template using gomplate + # Write to supervisor-data directory which has proper permissions for otel user + gomplate -f /etc/otel/supervisor.yaml.tmpl -o /etc/otel/supervisor-data/supervisor-runtime.yaml + + # Log the configuration being used + if [ -n "$CUSTOM_OTELCOL_CONFIG_FILE" ]; then + echo "Using custom OTEL config file: $CUSTOM_OTELCOL_CONFIG_FILE" + else + echo "CUSTOM_OTELCOL_CONFIG_FILE not set, using default configuration" + fi + + # Update the command arguments to use the rendered config file + set -- "$1" --config /etc/otel/supervisor-data/supervisor-runtime.yaml + + # Execute the supervisor with all passed arguments + exec "$@" fi - -# Update the command arguments to use the rendered config file -set -- "$1" --config /etc/otel/supervisor-data/supervisor-runtime.yaml - -# Execute the supervisor with all passed arguments -exec "$@" diff --git a/packages/otel-collector/package.json b/packages/otel-collector/package.json new file mode 100644 index 00000000..77099131 --- /dev/null +++ b/packages/otel-collector/package.json @@ -0,0 +1,7 @@ +{ + "name": "@hyperdx/otel-collector", + "description": "HyperDX OpenTelemetry Collector configuration and Docker image", + "version": "2.13.0", + "license": "MIT", + "private": true +} diff --git a/smoke-tests/otel-collector/docker-compose.yaml b/smoke-tests/otel-collector/docker-compose.yaml index 1c3d6008..16171bfd 100644 --- a/smoke-tests/otel-collector/docker-compose.yaml +++ b/smoke-tests/otel-collector/docker-compose.yaml @@ -19,20 +19,17 @@ services: retries: 5 start_period: 10s otel-collector: - image: otel/opentelemetry-collector-contrib:0.129.1 - volumes: - - ../../docker/otel-collector/config.deprecated.yaml:/etc/otelcol-contrib/config.yaml - - ./receiver-config.yaml:/etc/otelcol-contrib/receiver-config.yaml - command: - [ - '--config=/etc/otelcol-contrib/receiver-config.yaml', - '--config=/etc/otelcol-contrib/config.yaml', - ] + build: + context: ../../docker/otel-collector + target: dev environment: - CLICKHOUSE_ENDPOINT=tcp://ch-server:9000?dial_timeout=10s - CLICKHOUSE_PROMETHEUS_METRICS_ENDPOINT=ch-server:9363 + - CLICKHOUSE_USER=default + - CLICKHOUSE_PASSWORD= - HYPERDX_OTEL_EXPORTER_CLICKHOUSE_DATABASE=default - HYPERDX_LOG_LEVEL=info + # OPAMP_SERVER_URL is intentionally not set to run in standalone mode ports: - 4318:4318 # OTLP http receiver - 13133:13133 # health check diff --git a/yarn.lock b/yarn.lock index 2d0951c5..d3a57995 100644 --- a/yarn.lock +++ b/yarn.lock @@ -4581,6 +4581,12 @@ __metadata: languageName: node linkType: hard +"@hyperdx/otel-collector@workspace:packages/otel-collector": + version: 0.0.0-use.local + resolution: "@hyperdx/otel-collector@workspace:packages/otel-collector" + languageName: unknown + linkType: soft + "@hyperdx/otel-web-session-recorder@npm:0.16.2": version: 0.16.2 resolution: "@hyperdx/otel-web-session-recorder@npm:0.16.2"