hyperdx/docker/clickhouse/local/config.xml
Dan Hable d07e30d5fb
feat: associate logged in user to clickhouse query (#1636)
Allows setting a custom setting prefix on a connection. When set in HyperDX and the ClickHouse settings, the HyperDX app will set a custom setting for each query. These are recorded in the query log and can be used to identify which user issues the query.

## Testing

The commit also updates the local dev ClickHouse instance to support a custom setting prefix of `hyeprdx`. After running `make dev-up`, you should be able to edit the connection and set the the prefix to `hyperdx`. 

<img width="955" height="197" alt="Screenshot 2026-01-21 at 1 23 14 PM" src="https://github.com/user-attachments/assets/607fc945-d93f-4976-9862-3118b420c077" />

After saving, just allow the app to live tail a source like logs. If you connect to the ClickHouse database, you should then be able to run

```
SELECT query, Settings
FROM system.query_log
WHERE has(mapKeys(Settings), 'hyperdx_user')
FORMAT Vertical
```

and then see a bunch of queries with the user set to your logged in user.

```
Row 46:
───────
query:    SELECT Timestamp, ServiceName, SeverityText, Body, TimestampTime FROM default.otel_logs WHERE (TimestampTime >= fromUnixTimestamp64Milli(_CAST(1769022372269, 'Int64'))) AND (TimestampTime <= fromUnixTimestamp64Milli(_CAST(1769023272269, 'Int64'))) ORDER BY (TimestampTime, Timestamp) DESC LIMIT _CAST(0, 'Int32'), _CAST(200, 'Int32') FORMAT JSONCompactEachRowWithNamesAndTypes
Settings: {'use_uncompressed_cache':'0','load_balancing':'in_order','log_queries':'1','max_memory_usage':'10000000000','cancel_http_readonly_queries_on_client_close':'1','parallel_replicas_for_cluster_engines':'0','date_time_output_format':'iso','hyperdx_user':'\'dan@hyperdx.io\''}
```
2026-01-28 14:58:05 +00:00

177 lines
6.3 KiB
XML

<?xml version="1.0"?>
<clickhouse>
<logger>
<level>debug</level>
<console>true</console>
<log remove="remove" />
<errorlog remove="remove" />
</logger>
<listen_host>0.0.0.0</listen_host>
<http_port>8123</http_port>
<tcp_port>9000</tcp_port>
<interserver_http_host>ch-server</interserver_http_host>
<interserver_http_port>9009</interserver_http_port>
<max_connections>4096</max_connections>
<keep_alive_timeout>64</keep_alive_timeout>
<max_concurrent_queries>100</max_concurrent_queries>
<uncompressed_cache_size>8589934592</uncompressed_cache_size>
<mark_cache_size>5368709120</mark_cache_size>
<path>/var/lib/clickhouse/</path>
<tmp_path>/var/lib/clickhouse/tmp/</tmp_path>
<user_files_path>/var/lib/clickhouse/user_files/</user_files_path>
<user_directories>
<users_xml>
<path>users.xml</path>
</users_xml>
</user_directories>
<!-- <users_config>users.xml</users_config> -->
<default_profile>default</default_profile>
<default_database>default</default_database>
<timezone>UTC</timezone>
<mlock_executable>false</mlock_executable>
<!-- Prometheus exporter -->
<prometheus>
<endpoint>/metrics</endpoint>
<port>9363</port>
<metrics>true</metrics>
<events>true</events>
<asynchronous_metrics>true</asynchronous_metrics>
<errors>true</errors>
</prometheus>
<!-- Query log. Used only for queries with setting log_queries = 1. -->
<query_log>
<database>system</database>
<table>query_log</table>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
</query_log>
<!-- Metric log contains rows with current values of ProfileEvents, CurrentMetrics collected
with "collect_interval_milliseconds" interval. -->
<metric_log>
<database>system</database>
<table>metric_log</table>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
<collect_interval_milliseconds>1000</collect_interval_milliseconds>
</metric_log>
<!--
Asynchronous metric log contains values of metrics from
system.asynchronous_metrics.
-->
<asynchronous_metric_log>
<database>system</database>
<table>asynchronous_metric_log</table>
<!--
Asynchronous metrics are updated once a minute, so there is
no need to flush more often.
-->
<flush_interval_milliseconds>7000</flush_interval_milliseconds>
</asynchronous_metric_log>
<!--
OpenTelemetry log contains OpenTelemetry trace spans.
-->
<opentelemetry_span_log>
<!--
The default table creation code is insufficient, this <engine> spec
is a workaround. There is no 'event_time' for this log, but two times,
start and finish. It is sorted by finish time, to avoid inserting
data too far away in the past (probably we can sometimes insert a span
that is seconds earlier than the last span in the table, due to a race
between several spans inserted in parallel). This gives the spans a
global order that we can use to e.g. retry insertion into some external
system.
-->
<engine>
engine MergeTree
partition by toYYYYMM(finish_date)
order by (finish_date, finish_time_us, trace_id)
</engine>
<database>system</database>
<table>opentelemetry_span_log</table>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
</opentelemetry_span_log>
<!-- Crash log. Stores stack traces for fatal errors.
This table is normally empty. -->
<crash_log>
<database>system</database>
<table>crash_log</table>
<partition_by />
<flush_interval_milliseconds>1000</flush_interval_milliseconds>
</crash_log>
<!-- Profiling on Processors level. -->
<processors_profile_log>
<database>system</database>
<table>processors_profile_log</table>
<partition_by>toYYYYMM(event_date)</partition_by>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
</processors_profile_log>
<!-- Uncomment if use part log.
Part log contains information about all actions with parts in MergeTree tables (creation, deletion,
merges, downloads).-->
<part_log>
<database>system</database>
<table>part_log</table>
<partition_by>toYYYYMM(event_date)</partition_by>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
</part_log>
<!-- Trace log. Stores stack traces collected by query profilers.
See query_profiler_real_time_period_ns and query_profiler_cpu_time_period_ns settings. -->
<trace_log>
<database>system</database>
<table>trace_log</table>
<partition_by>toYYYYMM(event_date)</partition_by>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
</trace_log>
<!-- Query thread log. Has information about all threads participated in query execution.
Used only for queries with setting log_query_threads = 1. -->
<query_thread_log>
<database>system</database>
<table>query_thread_log</table>
<partition_by>toYYYYMM(event_date)</partition_by>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
</query_thread_log>
<!-- Query views log. Has information about all dependent views associated with a query.
Used only for queries with setting log_query_views = 1. -->
<query_views_log>
<database>system</database>
<table>query_views_log</table>
<partition_by>toYYYYMM(event_date)</partition_by>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
</query_views_log>
<remote_servers>
<hdx_cluster>
<shard>
<replica>
<host>ch-server</host>
<port>9000</port>
</replica>
</shard>
</hdx_cluster>
</remote_servers>
<distributed_ddl>
<path>/clickhouse/task_queue/ddl</path>
</distributed_ddl>
<format_schema_path>/var/lib/clickhouse/format_schemas/</format_schema_path>
<custom_settings_prefixes>hyperdx</custom_settings_prefixes>
</clickhouse>