feat: introduce clickhouse db init script (#843)

Ref: HDX-1777

This shouldn't have any impact on users
This commit is contained in:
Warren 2025-06-09 09:45:23 -07:00 committed by GitHub
parent 06332dc62d
commit 31e22dcff4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 139 additions and 2 deletions

View file

@ -0,0 +1,7 @@
---
"@hyperdx/api": patch
"@hyperdx/app": patch
"@hyperdx/common-utils": patch
---
feat: introduce clickhouse db init script

View file

@ -29,6 +29,7 @@ services:
volumes:
- ./docker/clickhouse/local/config.xml:/etc/clickhouse-server/config.xml
- ./docker/clickhouse/local/users.xml:/etc/clickhouse-server/users.xml
- ./docker/clickhouse/local/init-db.sh:/docker-entrypoint-initdb.d/init-db.sh
restart: on-failure
ports:
- 8123:8123 # http api

View file

@ -53,9 +53,11 @@ services:
environment:
# default settings
CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT: 1
HYPERDX_OTEL_EXPORTER_CLICKHOUSE_DATABASE: ${HYPERDX_OTEL_EXPORTER_CLICKHOUSE_DATABASE}
volumes:
- ./docker/clickhouse/local/config.xml:/etc/clickhouse-server/config.xml
- ./docker/clickhouse/local/users.xml:/etc/clickhouse-server/users.xml
- ./docker/clickhouse/local/init-db.sh:/docker-entrypoint-initdb.d/init-db.sh
- .volumes/ch_data_dev:/var/lib/clickhouse
- .volumes/ch_logs_dev:/var/log/clickhouse-server
restart: on-failure

View file

@ -0,0 +1,127 @@
#!/bin/bash
set -e
DATABASE=${HYPERDX_OTEL_EXPORTER_CLICKHOUSE_DATABASE:-default}
clickhouse client -n <<EOFSQL
CREATE DATABASE IF NOT EXISTS ${DATABASE};
CREATE TABLE IF NOT EXISTS ${DATABASE}.otel_logs
(
\`Timestamp\` DateTime64(9) CODEC(Delta(8), ZSTD(1)),
\`TimestampTime\` DateTime DEFAULT toDateTime(Timestamp),
\`TraceId\` String CODEC(ZSTD(1)),
\`SpanId\` String CODEC(ZSTD(1)),
\`TraceFlags\` UInt8,
\`SeverityText\` LowCardinality(String) CODEC(ZSTD(1)),
\`SeverityNumber\` UInt8,
\`ServiceName\` LowCardinality(String) CODEC(ZSTD(1)),
\`Body\` String CODEC(ZSTD(1)),
\`ResourceSchemaUrl\` LowCardinality(String) CODEC(ZSTD(1)),
\`ResourceAttributes\` Map(LowCardinality(String), String) CODEC(ZSTD(1)),
\`ScopeSchemaUrl\` LowCardinality(String) CODEC(ZSTD(1)),
\`ScopeName\` String CODEC(ZSTD(1)),
\`ScopeVersion\` LowCardinality(String) CODEC(ZSTD(1)),
\`ScopeAttributes\` Map(LowCardinality(String), String) CODEC(ZSTD(1)),
\`LogAttributes\` Map(LowCardinality(String), String) CODEC(ZSTD(1)),
\`__hdx_materialized_k8s.cluster.name\` LowCardinality(String) MATERIALIZED ResourceAttributes['k8s.cluster.name'] CODEC(ZSTD(1)),
\`__hdx_materialized_k8s.container.name\` LowCardinality(String) MATERIALIZED ResourceAttributes['k8s.container.name'] CODEC(ZSTD(1)),
\`__hdx_materialized_k8s.deployment.name\` LowCardinality(String) MATERIALIZED ResourceAttributes['k8s.deployment.name'] CODEC(ZSTD(1)),
\`__hdx_materialized_k8s.namespace.name\` LowCardinality(String) MATERIALIZED ResourceAttributes['k8s.namespace.name'] CODEC(ZSTD(1)),
\`__hdx_materialized_k8s.node.name\` LowCardinality(String) MATERIALIZED ResourceAttributes['k8s.node.name'] CODEC(ZSTD(1)),
\`__hdx_materialized_k8s.pod.name\` LowCardinality(String) MATERIALIZED ResourceAttributes['k8s.pod.name'] CODEC(ZSTD(1)),
\`__hdx_materialized_k8s.pod.uid\` LowCardinality(String) MATERIALIZED ResourceAttributes['k8s.pod.uid'] CODEC(ZSTD(1)),
\`__hdx_materialized_deployment.environment.name\` LowCardinality(String) MATERIALIZED ResourceAttributes['deployment.environment.name'] CODEC(ZSTD(1)),
INDEX idx_trace_id TraceId TYPE bloom_filter(0.001) GRANULARITY 1,
INDEX idx_res_attr_key mapKeys(ResourceAttributes) TYPE bloom_filter(0.01) GRANULARITY 1,
INDEX idx_res_attr_value mapValues(ResourceAttributes) TYPE bloom_filter(0.01) GRANULARITY 1,
INDEX idx_scope_attr_key mapKeys(ScopeAttributes) TYPE bloom_filter(0.01) GRANULARITY 1,
INDEX idx_scope_attr_value mapValues(ScopeAttributes) TYPE bloom_filter(0.01) GRANULARITY 1,
INDEX idx_log_attr_key mapKeys(LogAttributes) TYPE bloom_filter(0.01) GRANULARITY 1,
INDEX idx_log_attr_value mapValues(LogAttributes) TYPE bloom_filter(0.01) GRANULARITY 1,
INDEX idx_lower_body lower(Body) TYPE tokenbf_v1(32768, 3, 0) GRANULARITY 8
)
ENGINE = MergeTree
PARTITION BY toDate(TimestampTime)
PRIMARY KEY (ServiceName, TimestampTime)
ORDER BY (ServiceName, TimestampTime, Timestamp)
TTL TimestampTime + toIntervalDay(30)
SETTINGS index_granularity = 8192, ttl_only_drop_parts = 1;
CREATE TABLE IF NOT EXISTS ${DATABASE}.otel_traces
(
\`Timestamp\` DateTime64(9) CODEC(Delta(8), ZSTD(1)),
\`TraceId\` String CODEC(ZSTD(1)),
\`SpanId\` String CODEC(ZSTD(1)),
\`ParentSpanId\` String CODEC(ZSTD(1)),
\`TraceState\` String CODEC(ZSTD(1)),
\`SpanName\` LowCardinality(String) CODEC(ZSTD(1)),
\`SpanKind\` LowCardinality(String) CODEC(ZSTD(1)),
\`ServiceName\` LowCardinality(String) CODEC(ZSTD(1)),
\`ResourceAttributes\` Map(LowCardinality(String), String) CODEC(ZSTD(1)),
\`ScopeName\` String CODEC(ZSTD(1)),
\`ScopeVersion\` String CODEC(ZSTD(1)),
\`SpanAttributes\` Map(LowCardinality(String), String) CODEC(ZSTD(1)),
\`Duration\` UInt64 CODEC(ZSTD(1)),
\`StatusCode\` LowCardinality(String) CODEC(ZSTD(1)),
\`StatusMessage\` String CODEC(ZSTD(1)),
\`Events.Timestamp\` Array(DateTime64(9)) CODEC(ZSTD(1)),
\`Events.Name\` Array(LowCardinality(String)) CODEC(ZSTD(1)),
\`Events.Attributes\` Array(Map(LowCardinality(String), String)) CODEC(ZSTD(1)),
\`Links.TraceId\` Array(String) CODEC(ZSTD(1)),
\`Links.SpanId\` Array(String) CODEC(ZSTD(1)),
\`Links.TraceState\` Array(String) CODEC(ZSTD(1)),
\`Links.Attributes\` Array(Map(LowCardinality(String), String)) CODEC(ZSTD(1)),
\`__hdx_materialized_rum.sessionId\` String MATERIALIZED ResourceAttributes['rum.sessionId'] CODEC(ZSTD(1)),
INDEX idx_trace_id TraceId TYPE bloom_filter(0.001) GRANULARITY 1,
INDEX idx_rum_session_id __hdx_materialized_rum.sessionId TYPE bloom_filter(0.001) GRANULARITY 1,
INDEX idx_res_attr_key mapKeys(ResourceAttributes) TYPE bloom_filter(0.01) GRANULARITY 1,
INDEX idx_res_attr_value mapValues(ResourceAttributes) TYPE bloom_filter(0.01) GRANULARITY 1,
INDEX idx_span_attr_key mapKeys(SpanAttributes) TYPE bloom_filter(0.01) GRANULARITY 1,
INDEX idx_span_attr_value mapValues(SpanAttributes) TYPE bloom_filter(0.01) GRANULARITY 1,
INDEX idx_duration Duration TYPE minmax GRANULARITY 1,
INDEX idx_lower_span_name lower(SpanName) TYPE tokenbf_v1(32768, 3, 0) GRANULARITY 8
)
ENGINE = MergeTree
PARTITION BY toDate(Timestamp)
ORDER BY (ServiceName, SpanName, toDateTime(Timestamp))
TTL toDate(Timestamp) + toIntervalDay(30)
SETTINGS index_granularity = 8192, ttl_only_drop_parts = 1;
CREATE TABLE ${DATABASE}.hyperdx_sessions
(
\`Timestamp\` DateTime64(9) CODEC(Delta(8), ZSTD(1)),
\`TimestampTime\` DateTime DEFAULT toDateTime(Timestamp),
\`TraceId\` String CODEC(ZSTD(1)),
\`SpanId\` String CODEC(ZSTD(1)),
\`TraceFlags\` UInt8,
\`SeverityText\` LowCardinality(String) CODEC(ZSTD(1)),
\`SeverityNumber\` UInt8,
\`ServiceName\` LowCardinality(String) CODEC(ZSTD(1)),
\`Body\` String CODEC(ZSTD(1)),
\`ResourceSchemaUrl\` LowCardinality(String) CODEC(ZSTD(1)),
\`ResourceAttributes\` Map(LowCardinality(String), String) CODEC(ZSTD(1)),
\`ScopeSchemaUrl\` LowCardinality(String) CODEC(ZSTD(1)),
\`ScopeName\` String CODEC(ZSTD(1)),
\`ScopeVersion\` LowCardinality(String) CODEC(ZSTD(1)),
\`ScopeAttributes\` Map(LowCardinality(String), String) CODEC(ZSTD(1)),
\`LogAttributes\` Map(LowCardinality(String), String) CODEC(ZSTD(1)),
\`__hdx_materialized_rum.sessionId\` String MATERIALIZED ResourceAttributes['rum.sessionId'] CODEC(ZSTD(1)),
\`__hdx_materialized_type\` LowCardinality(String) MATERIALIZED toString(simpleJSONExtractInt(Body, 'type')) CODEC(ZSTD(1)),
INDEX idx_trace_id TraceId TYPE bloom_filter(0.001) GRANULARITY 1,
INDEX idx_rum_session_id __hdx_materialized_rum.sessionId TYPE bloom_filter(0.001) GRANULARITY 1,
INDEX idx_res_attr_key mapKeys(ResourceAttributes) TYPE bloom_filter(0.01) GRANULARITY 1,
INDEX idx_res_attr_value mapValues(ResourceAttributes) TYPE bloom_filter(0.01) GRANULARITY 1,
INDEX idx_scope_attr_key mapKeys(ScopeAttributes) TYPE bloom_filter(0.01) GRANULARITY 1,
INDEX idx_scope_attr_value mapValues(ScopeAttributes) TYPE bloom_filter(0.01) GRANULARITY 1,
INDEX idx_log_attr_key mapKeys(LogAttributes) TYPE bloom_filter(0.01) GRANULARITY 1,
INDEX idx_log_attr_value mapValues(LogAttributes) TYPE bloom_filter(0.01) GRANULARITY 1,
INDEX idx_body Body TYPE tokenbf_v1(32768, 3, 0) GRANULARITY 8
)
ENGINE = MergeTree
PARTITION BY toDate(TimestampTime)
PRIMARY KEY (ServiceName, TimestampTime)
ORDER BY (ServiceName, TimestampTime, Timestamp)
TTL TimestampTime + toIntervalDay(30)
SETTINGS index_granularity = 8192, ttl_only_drop_parts = 1
EOFSQL

View file

@ -116,7 +116,7 @@ exporters:
endpoint: ${env:CLICKHOUSE_ENDPOINT}
password: ${env:CLICKHOUSE_PASSWORD}
username: ${env:CLICKHOUSE_USER}
ttl: 72h
ttl: 720h
logs_table_name: hyperdx_sessions
timeout: 5s
retry_on_failure:
@ -129,7 +129,7 @@ exporters:
endpoint: ${env:CLICKHOUSE_ENDPOINT}
password: ${env:CLICKHOUSE_PASSWORD}
username: ${env:CLICKHOUSE_USER}
ttl: 72h
ttl: 720h
timeout: 5s
retry_on_failure:
enabled: true