mirror of
https://github.com/hyperdxio/hyperdx
synced 2026-04-21 13:37:15 +00:00
feat: allow otel-collector to run without OpAMP server (#1672)
Today, users have to set up an OpAMP server to run with our clickstack OTel collector. Instead, we should allow users to disable OpAMP when they're using ClickHouse Cloud with the clickstack integration.
This can be determined by `OPAMP_SERVER_URL` not being defined by the user.
The end result is that a user can do
```
docker run \
-e CLICKHOUSE_ENDPOINT=${CLICKHOUSE_ENDPOINT} \
-e CLICKHOUSE_USER=default \
-e CLICKHOUSE_PASSWORD=${CLICKHOUSE_PASSWORD} \
-p 8080:8080 -p 4317:4317 -p 4318:4318 \
clickhouse/clickstack-otel-collector:latest
```
Ref: HDX-3300
This commit is contained in:
parent
7ce9485e83
commit
43de467864
9 changed files with 153 additions and 215 deletions
|
|
@ -2,7 +2,7 @@
|
|||
"$schema": "https://unpkg.com/@changesets/config@2.3.1/schema.json",
|
||||
"changelog": "@changesets/cli/changelog",
|
||||
"commit": false,
|
||||
"fixed": [["@hyperdx/api", "@hyperdx/app"]],
|
||||
"fixed": [["@hyperdx/api", "@hyperdx/app", "@hyperdx/otel-collector"]],
|
||||
"linked": [],
|
||||
"access": "restricted",
|
||||
"baseBranch": "main",
|
||||
|
|
|
|||
5
.changeset/smooth-bananas-hammer.md
Normal file
5
.changeset/smooth-bananas-hammer.md
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
---
|
||||
"@hyperdx/otel-collector": patch
|
||||
---
|
||||
|
||||
feat: allow otel-collector to run without OpAMP server
|
||||
|
|
@ -13,7 +13,8 @@ ARG USER_GID=10001
|
|||
RUN apk add --no-cache ca-certificates && \
|
||||
addgroup -S -g ${USER_GID} otel && \
|
||||
adduser -S -u ${USER_UID} -G otel otel && \
|
||||
install -d -m 0777 -o ${USER_UID} -g ${USER_GID} /etc/otel/supervisor-data
|
||||
install -d -m 0777 -o ${USER_UID} -g ${USER_GID} /etc/otel/supervisor-data && \
|
||||
install -d -m 0755 -o ${USER_UID} -g ${USER_GID} /etc/otelcol-contrib
|
||||
|
||||
# Copy gomplate binary from the gomplate image
|
||||
COPY --from=gomplate /bin/gomplate /usr/local/bin/gomplate
|
||||
|
|
@ -30,8 +31,9 @@ COPY --chmod=755 ./log-tailer.sh /log-tailer.sh
|
|||
## dev ##############################################################################################
|
||||
FROM base AS dev
|
||||
|
||||
COPY ./config.yaml /etc/otelcol-contrib/config.yaml
|
||||
COPY ./supervisor_docker.yaml.tmpl /etc/otel/supervisor.yaml.tmpl
|
||||
COPY --chown=10001:10001 ./config.yaml /etc/otelcol-contrib/config.yaml
|
||||
COPY --chown=10001:10001 ./config.standalone.yaml /etc/otelcol-contrib/standalone-config.yaml
|
||||
COPY --chown=10001:10001 ./supervisor_docker.yaml.tmpl /etc/otel/supervisor.yaml.tmpl
|
||||
|
||||
EXPOSE 4317 4318 13133
|
||||
|
||||
|
|
@ -40,8 +42,9 @@ ENTRYPOINT ["/entrypoint.sh", "/opampsupervisor"]
|
|||
## prod #############################################################################################
|
||||
FROM base AS prod
|
||||
|
||||
COPY ./config.yaml /etc/otelcol-contrib/config.yaml
|
||||
COPY ./supervisor_docker.yaml.tmpl /etc/otel/supervisor.yaml.tmpl
|
||||
COPY --chown=10001:10001 ./config.yaml /etc/otelcol-contrib/config.yaml
|
||||
COPY --chown=10001:10001 ./config.standalone.yaml /etc/otelcol-contrib/standalone-config.yaml
|
||||
COPY --chown=10001:10001 ./supervisor_docker.yaml.tmpl /etc/otel/supervisor.yaml.tmpl
|
||||
|
||||
EXPOSE 4317 4318 13133
|
||||
|
||||
|
|
|
|||
|
|
@ -1,174 +0,0 @@
|
|||
receivers:
|
||||
# Troubleshooting
|
||||
prometheus:
|
||||
config:
|
||||
scrape_configs:
|
||||
- job_name: 'otelcol'
|
||||
scrape_interval: 30s
|
||||
static_configs:
|
||||
- targets:
|
||||
- '0.0.0.0:8888'
|
||||
- ${env:CLICKHOUSE_PROMETHEUS_METRICS_ENDPOINT}
|
||||
# Data sources: logs
|
||||
fluentforward:
|
||||
endpoint: '0.0.0.0:24225'
|
||||
# Configured via OpAMP w/ authentication
|
||||
# Data sources: traces, metrics, logs
|
||||
# otlp/hyperdx:
|
||||
# protocols:
|
||||
# grpc:
|
||||
# include_metadata: true
|
||||
# endpoint: '0.0.0.0:4317'
|
||||
# http:
|
||||
# cors:
|
||||
# allowed_origins: ['*']
|
||||
# allowed_headers: ['*']
|
||||
# include_metadata: true
|
||||
# endpoint: '0.0.0.0:4318'
|
||||
processors:
|
||||
transform:
|
||||
log_statements:
|
||||
- context: log
|
||||
error_mode: ignore
|
||||
statements:
|
||||
# JSON parsing: Extends log attributes with the fields from structured log body content, either as an OTEL map or
|
||||
# as a string containing JSON content.
|
||||
- set(log.cache, ExtractPatterns(log.body, "(?P<0>(\\{.*\\}))")) where
|
||||
IsString(log.body)
|
||||
- merge_maps(log.attributes, ParseJSON(log.cache["0"]), "upsert")
|
||||
where IsMap(log.cache)
|
||||
- flatten(log.attributes) where IsMap(log.cache)
|
||||
- merge_maps(log.attributes, log.body, "upsert") where IsMap(log.body)
|
||||
- context: log
|
||||
error_mode: ignore
|
||||
conditions:
|
||||
- severity_number == 0 and severity_text == ""
|
||||
statements:
|
||||
# Infer: extract the first log level keyword from the first 256 characters of the body
|
||||
- set(log.cache["substr"], log.body.string) where Len(log.body.string)
|
||||
< 256
|
||||
- set(log.cache["substr"], Substring(log.body.string, 0, 256)) where
|
||||
Len(log.body.string) >= 256
|
||||
- set(log.cache, ExtractPatterns(log.cache["substr"],
|
||||
"(?i)(?P<0>(alert|crit|emerg|fatal|error|err|warn|notice|debug|dbug|trace))"))
|
||||
# Infer: detect FATAL
|
||||
- set(log.severity_number, SEVERITY_NUMBER_FATAL) where
|
||||
IsMatch(log.cache["0"], "(?i)(alert|crit|emerg|fatal)")
|
||||
- set(log.severity_text, "fatal") where log.severity_number ==
|
||||
SEVERITY_NUMBER_FATAL
|
||||
# Infer: detect ERROR
|
||||
- set(log.severity_number, SEVERITY_NUMBER_ERROR) where
|
||||
IsMatch(log.cache["0"], "(?i)(error|err)")
|
||||
- set(log.severity_text, "error") where log.severity_number ==
|
||||
SEVERITY_NUMBER_ERROR
|
||||
# Infer: detect WARN
|
||||
- set(log.severity_number, SEVERITY_NUMBER_WARN) where
|
||||
IsMatch(log.cache["0"], "(?i)(warn|notice)")
|
||||
- set(log.severity_text, "warn") where log.severity_number ==
|
||||
SEVERITY_NUMBER_WARN
|
||||
# Infer: detect DEBUG
|
||||
- set(log.severity_number, SEVERITY_NUMBER_DEBUG) where
|
||||
IsMatch(log.cache["0"], "(?i)(debug|dbug)")
|
||||
- set(log.severity_text, "debug") where log.severity_number ==
|
||||
SEVERITY_NUMBER_DEBUG
|
||||
# Infer: detect TRACE
|
||||
- set(log.severity_number, SEVERITY_NUMBER_TRACE) where
|
||||
IsMatch(log.cache["0"], "(?i)(trace)")
|
||||
- set(log.severity_text, "trace") where log.severity_number ==
|
||||
SEVERITY_NUMBER_TRACE
|
||||
# Infer: else
|
||||
- set(log.severity_text, "info") where log.severity_number == 0
|
||||
- set(log.severity_number, SEVERITY_NUMBER_INFO) where
|
||||
log.severity_number == 0
|
||||
- context: log
|
||||
error_mode: ignore
|
||||
statements:
|
||||
# Normalize the severity_text case
|
||||
- set(log.severity_text, ConvertCase(log.severity_text, "lower"))
|
||||
resourcedetection:
|
||||
detectors:
|
||||
- env
|
||||
- system
|
||||
- docker
|
||||
timeout: 5s
|
||||
override: false
|
||||
batch:
|
||||
memory_limiter:
|
||||
# 80% of maximum memory up to 2G
|
||||
limit_mib: 1500
|
||||
# 25% of limit up to 2G
|
||||
spike_limit_mib: 512
|
||||
check_interval: 5s
|
||||
connectors:
|
||||
routing/logs:
|
||||
default_pipelines: [logs/out-default]
|
||||
error_mode: ignore
|
||||
table:
|
||||
- context: log
|
||||
statement: route() where IsMatch(attributes["rr-web.event"], ".*")
|
||||
pipelines: [logs/out-rrweb]
|
||||
exporters:
|
||||
debug:
|
||||
verbosity: detailed
|
||||
sampling_initial: 5
|
||||
sampling_thereafter: 200
|
||||
clickhouse/rrweb:
|
||||
database: ${env:HYPERDX_OTEL_EXPORTER_CLICKHOUSE_DATABASE}
|
||||
endpoint: ${env:CLICKHOUSE_ENDPOINT}
|
||||
password: ${env:CLICKHOUSE_PASSWORD}
|
||||
username: ${env:CLICKHOUSE_USER}
|
||||
ttl: 720h
|
||||
logs_table_name: hyperdx_sessions
|
||||
timeout: 5s
|
||||
retry_on_failure:
|
||||
enabled: true
|
||||
initial_interval: 5s
|
||||
max_interval: 30s
|
||||
max_elapsed_time: 300s
|
||||
clickhouse:
|
||||
database: ${env:HYPERDX_OTEL_EXPORTER_CLICKHOUSE_DATABASE}
|
||||
endpoint: ${env:CLICKHOUSE_ENDPOINT}
|
||||
password: ${env:CLICKHOUSE_PASSWORD}
|
||||
username: ${env:CLICKHOUSE_USER}
|
||||
ttl: 720h
|
||||
timeout: 5s
|
||||
retry_on_failure:
|
||||
enabled: true
|
||||
initial_interval: 5s
|
||||
max_interval: 30s
|
||||
max_elapsed_time: 300s
|
||||
extensions:
|
||||
health_check:
|
||||
endpoint: :13133
|
||||
service:
|
||||
telemetry:
|
||||
metrics:
|
||||
readers:
|
||||
- pull:
|
||||
exporter:
|
||||
prometheus:
|
||||
host: '0.0.0.0'
|
||||
port: 8888
|
||||
logs:
|
||||
level: ${HYPERDX_LOG_LEVEL}
|
||||
extensions: [health_check]
|
||||
pipelines:
|
||||
traces:
|
||||
# receivers: [otlp/hyperdx]
|
||||
processors: [memory_limiter, batch]
|
||||
exporters: [clickhouse]
|
||||
metrics:
|
||||
# receivers: [otlp/hyperdx, prometheus]
|
||||
processors: [memory_limiter, batch]
|
||||
exporters: [clickhouse]
|
||||
logs/in:
|
||||
# receivers: [otlp/hyperdx, fluentforward]
|
||||
exporters: [routing/logs]
|
||||
logs/out-default:
|
||||
receivers: [routing/logs]
|
||||
processors: [memory_limiter, transform, batch]
|
||||
exporters: [clickhouse]
|
||||
logs/out-rrweb:
|
||||
receivers: [routing/logs]
|
||||
processors: [memory_limiter, batch]
|
||||
exporters: [clickhouse/rrweb]
|
||||
73
docker/otel-collector/config.standalone.yaml
Normal file
73
docker/otel-collector/config.standalone.yaml
Normal file
|
|
@ -0,0 +1,73 @@
|
|||
# This configuration is derived from packages/api/src/opamp/controllers/opampController.ts
|
||||
# When updating this file, ensure it stays in sync with buildOtelCollectorConfig()
|
||||
|
||||
receivers:
|
||||
otlp/hyperdx:
|
||||
protocols:
|
||||
grpc:
|
||||
include_metadata: true
|
||||
endpoint: "0.0.0.0:4317"
|
||||
http:
|
||||
cors:
|
||||
allowed_origins: ["*"]
|
||||
allowed_headers: ["*"]
|
||||
include_metadata: true
|
||||
endpoint: "0.0.0.0:4318"
|
||||
|
||||
connectors:
|
||||
routing/logs:
|
||||
default_pipelines: [logs/out-default]
|
||||
error_mode: ignore
|
||||
table:
|
||||
- context: log
|
||||
statement: route() where IsMatch(attributes["rr-web.event"], ".*")
|
||||
pipelines: [logs/out-rrweb]
|
||||
|
||||
exporters:
|
||||
clickhouse/rrweb:
|
||||
database: ${env:HYPERDX_OTEL_EXPORTER_CLICKHOUSE_DATABASE}
|
||||
endpoint: ${env:CLICKHOUSE_ENDPOINT}
|
||||
password: ${env:CLICKHOUSE_PASSWORD}
|
||||
username: ${env:CLICKHOUSE_USER}
|
||||
ttl: 720h
|
||||
logs_table_name: hyperdx_sessions
|
||||
timeout: 5s
|
||||
retry_on_failure:
|
||||
enabled: true
|
||||
initial_interval: 5s
|
||||
max_interval: 30s
|
||||
max_elapsed_time: 300s
|
||||
clickhouse:
|
||||
database: ${env:HYPERDX_OTEL_EXPORTER_CLICKHOUSE_DATABASE}
|
||||
endpoint: ${env:CLICKHOUSE_ENDPOINT}
|
||||
password: ${env:CLICKHOUSE_PASSWORD}
|
||||
username: ${env:CLICKHOUSE_USER}
|
||||
ttl: 720h
|
||||
timeout: 5s
|
||||
retry_on_failure:
|
||||
enabled: true
|
||||
initial_interval: 5s
|
||||
max_interval: 30s
|
||||
max_elapsed_time: 300s
|
||||
|
||||
service:
|
||||
pipelines:
|
||||
traces:
|
||||
receivers: [otlp/hyperdx]
|
||||
processors: [memory_limiter, batch]
|
||||
exporters: [clickhouse]
|
||||
metrics:
|
||||
receivers: [otlp/hyperdx]
|
||||
processors: [memory_limiter, batch]
|
||||
exporters: [clickhouse]
|
||||
logs/in:
|
||||
receivers: [otlp/hyperdx]
|
||||
exporters: [routing/logs]
|
||||
logs/out-default:
|
||||
receivers: [routing/logs]
|
||||
processors: [memory_limiter, transform, batch]
|
||||
exporters: [clickhouse]
|
||||
logs/out-rrweb:
|
||||
receivers: [routing/logs]
|
||||
processors: [memory_limiter, batch]
|
||||
exporters: [clickhouse/rrweb]
|
||||
|
|
@ -1,34 +1,55 @@
|
|||
#!/bin/sh
|
||||
set -e
|
||||
|
||||
if [ "$OTEL_SUPERVISOR_LOGS" = "true" ]; then
|
||||
# Start log tailer process in background for agent.log
|
||||
# Arguments: log_file_path [check_interval_seconds]
|
||||
/log-tailer.sh /etc/otel/supervisor-data/agent.log 1 &
|
||||
# Check if OPAMP_SERVER_URL is defined to determine mode
|
||||
if [ -z "$OPAMP_SERVER_URL" ]; then
|
||||
# Standalone mode - run collector directly without supervisor
|
||||
echo "Running in standalone mode (OPAMP_SERVER_URL not set)"
|
||||
|
||||
# Create a agent log file for the supervisor and collector child process. Normally
|
||||
# this file would be created as a standard file but we just want a FIFO pipe that
|
||||
# will pass data over to the tail process in the entrypoint script. This avoids
|
||||
# the need to the supervisor to store and forward the logs in its memory while also
|
||||
# eliminating the need for volume based storage.
|
||||
if [ ! -e /etc/otel/supervisor-data/agent.log ]; then
|
||||
mkfifo /etc/otel/supervisor-data/agent.log || echo "Failed to create FIFO" >&2
|
||||
# Build collector arguments with multiple config files
|
||||
COLLECTOR_ARGS="--config /etc/otelcol-contrib/config.yaml --config /etc/otelcol-contrib/standalone-config.yaml"
|
||||
|
||||
# Add custom config file if specified
|
||||
if [ -n "$CUSTOM_OTELCOL_CONFIG_FILE" ]; then
|
||||
echo "Including custom config: $CUSTOM_OTELCOL_CONFIG_FILE"
|
||||
COLLECTOR_ARGS="$COLLECTOR_ARGS --config $CUSTOM_OTELCOL_CONFIG_FILE"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Render the supervisor config template using gomplate
|
||||
# Write to supervisor-data directory which has proper permissions for otel user
|
||||
gomplate -f /etc/otel/supervisor.yaml.tmpl -o /etc/otel/supervisor-data/supervisor-runtime.yaml
|
||||
|
||||
# Log the configuration being used
|
||||
if [ -n "$CUSTOM_OTELCOL_CONFIG_FILE" ]; then
|
||||
echo "Using custom OTEL config file: $CUSTOM_OTELCOL_CONFIG_FILE"
|
||||
# Execute collector directly
|
||||
exec /otelcontribcol $COLLECTOR_ARGS
|
||||
else
|
||||
echo "CUSTOM_OTELCOL_CONFIG_FILE not set, using default configuration"
|
||||
# Supervisor mode - run with OpAMP supervisor
|
||||
echo "Running in supervisor mode (OPAMP_SERVER_URL: $OPAMP_SERVER_URL)"
|
||||
|
||||
if [ "$OTEL_SUPERVISOR_LOGS" = "true" ]; then
|
||||
# Start log tailer process in background for agent.log
|
||||
# Arguments: log_file_path [check_interval_seconds]
|
||||
/log-tailer.sh /etc/otel/supervisor-data/agent.log 1 &
|
||||
|
||||
# Create a agent log file for the supervisor and collector child process. Normally
|
||||
# this file would be created as a standard file but we just want a FIFO pipe that
|
||||
# will pass data over to the tail process in the entrypoint script. This avoids
|
||||
# the need to the supervisor to store and forward the logs in its memory while also
|
||||
# eliminating the need for volume based storage.
|
||||
if [ ! -e /etc/otel/supervisor-data/agent.log ]; then
|
||||
mkfifo /etc/otel/supervisor-data/agent.log || echo "Failed to create FIFO" >&2
|
||||
fi
|
||||
fi
|
||||
|
||||
# Render the supervisor config template using gomplate
|
||||
# Write to supervisor-data directory which has proper permissions for otel user
|
||||
gomplate -f /etc/otel/supervisor.yaml.tmpl -o /etc/otel/supervisor-data/supervisor-runtime.yaml
|
||||
|
||||
# Log the configuration being used
|
||||
if [ -n "$CUSTOM_OTELCOL_CONFIG_FILE" ]; then
|
||||
echo "Using custom OTEL config file: $CUSTOM_OTELCOL_CONFIG_FILE"
|
||||
else
|
||||
echo "CUSTOM_OTELCOL_CONFIG_FILE not set, using default configuration"
|
||||
fi
|
||||
|
||||
# Update the command arguments to use the rendered config file
|
||||
set -- "$1" --config /etc/otel/supervisor-data/supervisor-runtime.yaml
|
||||
|
||||
# Execute the supervisor with all passed arguments
|
||||
exec "$@"
|
||||
fi
|
||||
|
||||
# Update the command arguments to use the rendered config file
|
||||
set -- "$1" --config /etc/otel/supervisor-data/supervisor-runtime.yaml
|
||||
|
||||
# Execute the supervisor with all passed arguments
|
||||
exec "$@"
|
||||
|
|
|
|||
7
packages/otel-collector/package.json
Normal file
7
packages/otel-collector/package.json
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
{
|
||||
"name": "@hyperdx/otel-collector",
|
||||
"description": "HyperDX OpenTelemetry Collector configuration and Docker image",
|
||||
"version": "2.13.0",
|
||||
"license": "MIT",
|
||||
"private": true
|
||||
}
|
||||
|
|
@ -19,20 +19,17 @@ services:
|
|||
retries: 5
|
||||
start_period: 10s
|
||||
otel-collector:
|
||||
image: otel/opentelemetry-collector-contrib:0.129.1
|
||||
volumes:
|
||||
- ../../docker/otel-collector/config.deprecated.yaml:/etc/otelcol-contrib/config.yaml
|
||||
- ./receiver-config.yaml:/etc/otelcol-contrib/receiver-config.yaml
|
||||
command:
|
||||
[
|
||||
'--config=/etc/otelcol-contrib/receiver-config.yaml',
|
||||
'--config=/etc/otelcol-contrib/config.yaml',
|
||||
]
|
||||
build:
|
||||
context: ../../docker/otel-collector
|
||||
target: dev
|
||||
environment:
|
||||
- CLICKHOUSE_ENDPOINT=tcp://ch-server:9000?dial_timeout=10s
|
||||
- CLICKHOUSE_PROMETHEUS_METRICS_ENDPOINT=ch-server:9363
|
||||
- CLICKHOUSE_USER=default
|
||||
- CLICKHOUSE_PASSWORD=
|
||||
- HYPERDX_OTEL_EXPORTER_CLICKHOUSE_DATABASE=default
|
||||
- HYPERDX_LOG_LEVEL=info
|
||||
# OPAMP_SERVER_URL is intentionally not set to run in standalone mode
|
||||
ports:
|
||||
- 4318:4318 # OTLP http receiver
|
||||
- 13133:13133 # health check
|
||||
|
|
|
|||
|
|
@ -4581,6 +4581,12 @@ __metadata:
|
|||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"@hyperdx/otel-collector@workspace:packages/otel-collector":
|
||||
version: 0.0.0-use.local
|
||||
resolution: "@hyperdx/otel-collector@workspace:packages/otel-collector"
|
||||
languageName: unknown
|
||||
linkType: soft
|
||||
|
||||
"@hyperdx/otel-web-session-recorder@npm:0.16.2":
|
||||
version: 0.16.2
|
||||
resolution: "@hyperdx/otel-web-session-recorder@npm:0.16.2"
|
||||
|
|
|
|||
Loading…
Reference in a new issue