2026-01-14 18:05:11 +00:00
|
|
|
import { differenceInSeconds } from 'date-fns';
|
|
|
|
|
|
feat: Add materialized view support (Beta) (#1507)
Closes HDX-3082
# Summary
This PR back-ports support for materialized views from the EE repo. Note that this feature is in **Beta**, and is subject to significant changes.
This feature is intended to support:
1. Configuring AggregatingMergeTree (or SummingMergeTree) Materialized Views which are associated with a Source
2. Automatically selecting and querying an associated materialized view when a query supports it, in Chart Explorer, Custom Dashboards, the Services Dashboard, and the Search Page Histogram.
3. A UX for understanding what materialized views are available for a source, and whether (and why) it is or is not being used for a particular visualization.
## Note to Reviewer(s)
This is a large PR, but the code has largely already been reviewed.
- For net-new files, types, components, and utility functions, the code does not differ from the EE repo
- Changes to the various services dashboard pages do not differ from the EE repo
- Changes to `useOffsetPaginatedQuery`, `useChartConfig`, and `DBEditTimeChart` differ slightly due to unrelated (to MVs) drift between this repo and the EE repo, and due to the lack of feature toggles in this repo. **This is where slightly closer review would be most valuable.**
## Demo
<details>
<summary>Demo: MV Configuration</summary>
https://github.com/user-attachments/assets/fedf3bcf-892c-4b8d-a788-7e231e23bcc3
</details>
<details>
<summary>Demo: Chart Explorer</summary>
https://github.com/user-attachments/assets/fc8d1efa-7edc-42fc-98f0-75431cc056b8
</details>
<details>
<summary>Demo: Dashboards</summary>
https://github.com/user-attachments/assets/f3cb247e-711f-4d90-95b8-cf977e94f065
</details>
## Known Limitations
This feature is in Beta due to the following known limitations, which will be addressed in subsequent PRs:
1. Visualization start and end time, when not aligned with the granularity of MVs, will result in statistics based on the MV "time buckets" which fall inside the date range. This may not align exactly with the source table data which is in the selected date range.
2. Alerts do not make use of MVs, even if the associated visualization does. Due to (1), this means that alert values may not exactly match the values shown in the associated visualization.
## Differences in OSS vs EE Support
- In OSS, there is a beta label on the MV configurations section
- In EE there are feature toggles to enable MV support, in OSS the feature is enabled for all teams, but will only run for sources with MVs configured.
## Testing
To test, a couple of MVs can be created on the default `otel_traces` table, directly in ClickHouse:
<details>
<summary>Example MVs DDL</summary>
```sql
CREATE TABLE default.metrics_rollup_1m
(
`Timestamp` DateTime,
`ServiceName` LowCardinality(String),
`SpanKind` LowCardinality(String),
`StatusCode` LowCardinality(String),
`count` SimpleAggregateFunction(sum, UInt64),
`sum__Duration` SimpleAggregateFunction(sum, UInt64),
`avg__Duration` AggregateFunction(avg, UInt64),
`quantile__Duration` AggregateFunction(quantileTDigest(0.5), UInt64),
`min__Duration` SimpleAggregateFunction(min, UInt64),
`max__Duration` SimpleAggregateFunction(max, UInt64)
)
ENGINE = AggregatingMergeTree
PARTITION BY toDate(Timestamp)
ORDER BY (Timestamp, StatusCode, SpanKind, ServiceName);
CREATE MATERIALIZED VIEW default.metrics_rollup_1m_mv TO default.metrics_rollup_1m
(
`Timestamp` DateTime,
`ServiceName` LowCardinality(String),
`SpanKind` LowCardinality(String),
`version` LowCardinality(String),
`StatusCode` LowCardinality(String),
`count` UInt64,
`sum__Duration` Int64,
`avg__Duration` AggregateFunction(avg, UInt64),
`quantile__Duration` AggregateFunction(quantileTDigest(0.5), UInt64),
`min__Duration` SimpleAggregateFunction(min, UInt64),
`max__Duration` SimpleAggregateFunction(max, UInt64)
)
AS SELECT
toStartOfMinute(Timestamp) AS Timestamp,
ServiceName,
SpanKind,
StatusCode,
count() AS count,
sum(Duration) AS sum__Duration,
avgState(Duration) AS avg__Duration,
quantileTDigestState(0.5)(Duration) AS quantile__Duration,
minSimpleState(Duration) AS min__Duration,
maxSimpleState(Duration) AS max__Duration
FROM default.otel_traces
GROUP BY
Timestamp,
ServiceName,
SpanKind,
StatusCode;
```
```sql
CREATE TABLE default.span_kind_rollup_1m
(
`Timestamp` DateTime,
`ServiceName` LowCardinality(String),
`SpanKind` LowCardinality(String),
`histogram__Duration` AggregateFunction(histogram(20), UInt64)
)
ENGINE = AggregatingMergeTree
PARTITION BY toDate(Timestamp)
ORDER BY (Timestamp, ServiceName, SpanKind);
CREATE MATERIALIZED VIEW default.span_kind_rollup_1m_mv TO default.span_kind_rollup_1m
(
`Timestamp` DateTime,
`ServiceName` LowCardinality(String),
`SpanKind` LowCardinality(String),
`histogram__Duration` AggregateFunction(histogram(20), UInt64)
)
AS SELECT
toStartOfMinute(Timestamp) AS Timestamp,
ServiceName,
SpanKind,
histogramState(20)(Duration) AS histogram__Duration
FROM default.otel_traces
GROUP BY
Timestamp,
ServiceName,
SpanKind;
```
</details>
Then you'll need to configure the materialized views in your source settings:
<details>
<summary>Source Configuration (should auto-infer when MVs are selected)</summary>
<img width="949" height="1011" alt="Screenshot 2025-12-19 at 10 26 54 AM" src="https://github.com/user-attachments/assets/fc46a1b9-de8b-4b95-a8ef-ba5fee905685" />
</details>
2025-12-19 16:17:23 +00:00
|
|
|
import { BaseClickhouseClient } from '@/clickhouse';
|
|
|
|
|
import {
|
|
|
|
|
ChartConfigWithOptDateRange,
|
|
|
|
|
CteChartConfig,
|
|
|
|
|
InternalAggregateFunction,
|
|
|
|
|
InternalAggregateFunctionSchema,
|
|
|
|
|
MaterializedViewConfiguration,
|
|
|
|
|
TSource,
|
|
|
|
|
} from '@/types';
|
|
|
|
|
|
|
|
|
|
import { Metadata, TableConnection } from './metadata';
|
|
|
|
|
import {
|
|
|
|
|
convertDateRangeToGranularityString,
|
|
|
|
|
convertGranularityToSeconds,
|
feat: Align date ranges to MV Granularity (#1575)
Closes HDX-3124
# Summary
This PR makes the following changes
1. Date ranges for all MV queries are now aligned to the MV Granularity
2. Each chart type now has an indicator when the date range has been adjusted to align with either the MV Granularity or (in the case of Line/Bar charts) the Chart Granularity.
3. The useQueriedChartConfig, useRenderedSqlChartConfig, and useOffsetPaginatedQuery hooks have been updated to get the MV-optimized chart configuration from the useMVOptimizationExplanation, which allows us to share the `EXPLAIN ESTIMATE` query results between the MV Optimization Indicator (the lightning bolt icon on each chart) and the chart itself. This roughly halves the number of EXPLAIN ESTIMATE queries that are made.
## Demo
<img width="1628" height="1220" alt="Screenshot 2026-01-08 at 11 42 39 AM" src="https://github.com/user-attachments/assets/80a06e3a-bbfc-4193-b6b7-5e0056c588d3" />
<img width="1627" height="1131" alt="Screenshot 2026-01-08 at 11 40 54 AM" src="https://github.com/user-attachments/assets/69879e3d-3a83-4c4d-9604-0552a01c17d7" />
## Testing
To test locally with an MV, you can use the following DDL
<details>
<summary>DDL For an MV</summary>
```sql
CREATE TABLE default.metrics_rollup_1m
(
`Timestamp` DateTime,
`ServiceName` LowCardinality(String),
`SpanKind` LowCardinality(String),
`StatusCode` LowCardinality(String),
`count` SimpleAggregateFunction(sum, UInt64),
`sum__Duration` SimpleAggregateFunction(sum, UInt64),
`avg__Duration` AggregateFunction(avg, UInt64),
`quantile__Duration` AggregateFunction(quantileTDigest(0.5), UInt64),
`min__Duration` SimpleAggregateFunction(min, UInt64),
`max__Duration` SimpleAggregateFunction(max, UInt64)
)
ENGINE = AggregatingMergeTree
PARTITION BY toDate(Timestamp)
ORDER BY (Timestamp, StatusCode, SpanKind, ServiceName)
SETTINGS index_granularity = 8192;
CREATE MATERIALIZED VIEW default.metrics_rollup_1m_mv TO default.metrics_rollup_1m
(
`Timestamp` DateTime,
`ServiceName` LowCardinality(String),
`SpanKind` LowCardinality(String),
`version` LowCardinality(String),
`StatusCode` LowCardinality(String),
`count` UInt64,
`sum__Duration` Int64,
`avg__Duration` AggregateFunction(avg, UInt64),
`quantile__Duration` AggregateFunction(quantileTDigest(0.5), UInt64),
`min__Duration` SimpleAggregateFunction(min, UInt64),
`max__Duration` SimpleAggregateFunction(max, UInt64)
)
AS SELECT
toStartOfMinute(Timestamp) AS Timestamp,
ServiceName,
SpanKind,
StatusCode,
count() AS count,
sum(Duration) AS sum__Duration,
avgState(Duration) AS avg__Duration,
quantileTDigestState(0.5)(Duration) AS quantile__Duration,
minSimpleState(Duration) AS min__Duration,
maxSimpleState(Duration) AS max__Duration
FROM default.otel_traces
GROUP BY
Timestamp,
ServiceName,
SpanKind,
StatusCode;
```
</details>
2026-01-09 16:07:52 +00:00
|
|
|
getAlignedDateRange,
|
2026-01-14 18:05:11 +00:00
|
|
|
splitAndTrimWithBracket,
|
feat: Add materialized view support (Beta) (#1507)
Closes HDX-3082
# Summary
This PR back-ports support for materialized views from the EE repo. Note that this feature is in **Beta**, and is subject to significant changes.
This feature is intended to support:
1. Configuring AggregatingMergeTree (or SummingMergeTree) Materialized Views which are associated with a Source
2. Automatically selecting and querying an associated materialized view when a query supports it, in Chart Explorer, Custom Dashboards, the Services Dashboard, and the Search Page Histogram.
3. A UX for understanding what materialized views are available for a source, and whether (and why) it is or is not being used for a particular visualization.
## Note to Reviewer(s)
This is a large PR, but the code has largely already been reviewed.
- For net-new files, types, components, and utility functions, the code does not differ from the EE repo
- Changes to the various services dashboard pages do not differ from the EE repo
- Changes to `useOffsetPaginatedQuery`, `useChartConfig`, and `DBEditTimeChart` differ slightly due to unrelated (to MVs) drift between this repo and the EE repo, and due to the lack of feature toggles in this repo. **This is where slightly closer review would be most valuable.**
## Demo
<details>
<summary>Demo: MV Configuration</summary>
https://github.com/user-attachments/assets/fedf3bcf-892c-4b8d-a788-7e231e23bcc3
</details>
<details>
<summary>Demo: Chart Explorer</summary>
https://github.com/user-attachments/assets/fc8d1efa-7edc-42fc-98f0-75431cc056b8
</details>
<details>
<summary>Demo: Dashboards</summary>
https://github.com/user-attachments/assets/f3cb247e-711f-4d90-95b8-cf977e94f065
</details>
## Known Limitations
This feature is in Beta due to the following known limitations, which will be addressed in subsequent PRs:
1. Visualization start and end time, when not aligned with the granularity of MVs, will result in statistics based on the MV "time buckets" which fall inside the date range. This may not align exactly with the source table data which is in the selected date range.
2. Alerts do not make use of MVs, even if the associated visualization does. Due to (1), this means that alert values may not exactly match the values shown in the associated visualization.
## Differences in OSS vs EE Support
- In OSS, there is a beta label on the MV configurations section
- In EE there are feature toggles to enable MV support, in OSS the feature is enabled for all teams, but will only run for sources with MVs configured.
## Testing
To test, a couple of MVs can be created on the default `otel_traces` table, directly in ClickHouse:
<details>
<summary>Example MVs DDL</summary>
```sql
CREATE TABLE default.metrics_rollup_1m
(
`Timestamp` DateTime,
`ServiceName` LowCardinality(String),
`SpanKind` LowCardinality(String),
`StatusCode` LowCardinality(String),
`count` SimpleAggregateFunction(sum, UInt64),
`sum__Duration` SimpleAggregateFunction(sum, UInt64),
`avg__Duration` AggregateFunction(avg, UInt64),
`quantile__Duration` AggregateFunction(quantileTDigest(0.5), UInt64),
`min__Duration` SimpleAggregateFunction(min, UInt64),
`max__Duration` SimpleAggregateFunction(max, UInt64)
)
ENGINE = AggregatingMergeTree
PARTITION BY toDate(Timestamp)
ORDER BY (Timestamp, StatusCode, SpanKind, ServiceName);
CREATE MATERIALIZED VIEW default.metrics_rollup_1m_mv TO default.metrics_rollup_1m
(
`Timestamp` DateTime,
`ServiceName` LowCardinality(String),
`SpanKind` LowCardinality(String),
`version` LowCardinality(String),
`StatusCode` LowCardinality(String),
`count` UInt64,
`sum__Duration` Int64,
`avg__Duration` AggregateFunction(avg, UInt64),
`quantile__Duration` AggregateFunction(quantileTDigest(0.5), UInt64),
`min__Duration` SimpleAggregateFunction(min, UInt64),
`max__Duration` SimpleAggregateFunction(max, UInt64)
)
AS SELECT
toStartOfMinute(Timestamp) AS Timestamp,
ServiceName,
SpanKind,
StatusCode,
count() AS count,
sum(Duration) AS sum__Duration,
avgState(Duration) AS avg__Duration,
quantileTDigestState(0.5)(Duration) AS quantile__Duration,
minSimpleState(Duration) AS min__Duration,
maxSimpleState(Duration) AS max__Duration
FROM default.otel_traces
GROUP BY
Timestamp,
ServiceName,
SpanKind,
StatusCode;
```
```sql
CREATE TABLE default.span_kind_rollup_1m
(
`Timestamp` DateTime,
`ServiceName` LowCardinality(String),
`SpanKind` LowCardinality(String),
`histogram__Duration` AggregateFunction(histogram(20), UInt64)
)
ENGINE = AggregatingMergeTree
PARTITION BY toDate(Timestamp)
ORDER BY (Timestamp, ServiceName, SpanKind);
CREATE MATERIALIZED VIEW default.span_kind_rollup_1m_mv TO default.span_kind_rollup_1m
(
`Timestamp` DateTime,
`ServiceName` LowCardinality(String),
`SpanKind` LowCardinality(String),
`histogram__Duration` AggregateFunction(histogram(20), UInt64)
)
AS SELECT
toStartOfMinute(Timestamp) AS Timestamp,
ServiceName,
SpanKind,
histogramState(20)(Duration) AS histogram__Duration
FROM default.otel_traces
GROUP BY
Timestamp,
ServiceName,
SpanKind;
```
</details>
Then you'll need to configure the materialized views in your source settings:
<details>
<summary>Source Configuration (should auto-infer when MVs are selected)</summary>
<img width="949" height="1011" alt="Screenshot 2025-12-19 at 10 26 54 AM" src="https://github.com/user-attachments/assets/fc46a1b9-de8b-4b95-a8ef-ba5fee905685" />
</details>
2025-12-19 16:17:23 +00:00
|
|
|
} from './utils';
|
|
|
|
|
|
|
|
|
|
type SelectItem = Exclude<
|
|
|
|
|
ChartConfigWithOptDateRange['select'],
|
|
|
|
|
string
|
|
|
|
|
>[number];
|
|
|
|
|
|
|
|
|
|
async function isSimpleAggregateFunction(
|
|
|
|
|
tableConnection: TableConnection,
|
|
|
|
|
column: string,
|
|
|
|
|
metadata: Metadata,
|
|
|
|
|
) {
|
|
|
|
|
try {
|
|
|
|
|
const columnMeta = await metadata.getColumn({
|
|
|
|
|
...tableConnection,
|
|
|
|
|
column,
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
return !!columnMeta?.type.startsWith('SimpleAggregateFunction(');
|
|
|
|
|
} catch {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Variants of quantile (ex. quantileExact, quantileDD, etc.)
|
|
|
|
|
async function getQuantileAggregateFunction(
|
|
|
|
|
tableConnection: TableConnection,
|
|
|
|
|
column: string,
|
|
|
|
|
metadata: Metadata,
|
|
|
|
|
) {
|
|
|
|
|
try {
|
|
|
|
|
const columnMeta = await metadata.getColumn({
|
|
|
|
|
...tableConnection,
|
|
|
|
|
column,
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
const type = columnMeta?.type;
|
|
|
|
|
if (!type) {
|
|
|
|
|
return undefined;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Use regex to extract the quantile function name inside AggregateFunction(...)
|
|
|
|
|
// For example, AggregateFunction(quantile(0.95), Int64) --> quantile
|
|
|
|
|
// AggregateFunction(quantileTDigest(0.95), Int64) --> quantileTDigest
|
|
|
|
|
// AggregateFunction(quantileDD(0.001, 0.95), Int64) --> quantileDD
|
|
|
|
|
const match = type.match(/^AggregateFunction\(\s*([^(, ]+)\s*\(/);
|
|
|
|
|
return match?.[1];
|
|
|
|
|
} catch {
|
|
|
|
|
return undefined;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
async function getAggregateMergeFunction(
|
|
|
|
|
tableConnection: TableConnection,
|
|
|
|
|
column: string,
|
|
|
|
|
aggFn: string,
|
|
|
|
|
metadata: Metadata,
|
|
|
|
|
) {
|
|
|
|
|
if (aggFn === 'count') {
|
|
|
|
|
// Counts are stored in AggregatingMergeTree as UInt64 or SimpleAggregateFunction(sum, UInt64),
|
|
|
|
|
// both of which should be summed rather than count()'ed.
|
|
|
|
|
return 'sum';
|
|
|
|
|
} else if (
|
|
|
|
|
await isSimpleAggregateFunction(tableConnection, column, metadata)
|
|
|
|
|
) {
|
|
|
|
|
return aggFn;
|
|
|
|
|
} else {
|
|
|
|
|
return `${aggFn}Merge`;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function isValidAggFn(
|
|
|
|
|
aggFn: string | undefined,
|
|
|
|
|
): aggFn is InternalAggregateFunction {
|
|
|
|
|
return !!aggFn && InternalAggregateFunctionSchema.safeParse(aggFn).success;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function isQuantileSelectItem(item: SelectItem): item is {
|
|
|
|
|
valueExpression: string;
|
|
|
|
|
aggFn: 'quantile';
|
|
|
|
|
level: number;
|
|
|
|
|
} {
|
|
|
|
|
return (
|
|
|
|
|
item.aggFn === 'quantile' &&
|
|
|
|
|
'level' in item &&
|
|
|
|
|
typeof item.level === 'number'
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function getAggregatedColumnConfig(
|
|
|
|
|
mvConfig: MaterializedViewConfiguration,
|
|
|
|
|
column: string,
|
|
|
|
|
aggFn: InternalAggregateFunction,
|
|
|
|
|
) {
|
|
|
|
|
return mvConfig.aggregatedColumns.find(
|
|
|
|
|
config =>
|
|
|
|
|
config.aggFn === aggFn &&
|
|
|
|
|
(config.aggFn === 'count' || config.sourceColumn === column),
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Indicates whether the MV described by mvConfig is capable of
|
|
|
|
|
* supporting the granularity requested in the given chart config.
|
|
|
|
|
**/
|
|
|
|
|
function mvConfigSupportsGranularity(
|
|
|
|
|
mvConfig: MaterializedViewConfiguration,
|
|
|
|
|
chartConfig: ChartConfigWithOptDateRange,
|
|
|
|
|
): boolean {
|
|
|
|
|
if (!chartConfig.granularity && !chartConfig.dateRange) {
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// If granularity is not provided at all, but we have a date range, we need a way to
|
|
|
|
|
// determine if the MV granularity is sufficient for the date range. So we'll assume
|
|
|
|
|
// an 'auto' granularity and check that against the MV.
|
|
|
|
|
const normalizedGranularity = chartConfig.granularity || 'auto';
|
|
|
|
|
|
|
|
|
|
// 'auto' granularity requires a date range to determine effective granularity
|
|
|
|
|
if (normalizedGranularity === 'auto' && !chartConfig.dateRange) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Determine the effective granularity if the granularity is 'auto'
|
|
|
|
|
const chartGranularity =
|
|
|
|
|
normalizedGranularity === 'auto' && chartConfig.dateRange
|
2026-01-12 16:51:21 +00:00
|
|
|
? convertDateRangeToGranularityString(chartConfig.dateRange)
|
feat: Add materialized view support (Beta) (#1507)
Closes HDX-3082
# Summary
This PR back-ports support for materialized views from the EE repo. Note that this feature is in **Beta**, and is subject to significant changes.
This feature is intended to support:
1. Configuring AggregatingMergeTree (or SummingMergeTree) Materialized Views which are associated with a Source
2. Automatically selecting and querying an associated materialized view when a query supports it, in Chart Explorer, Custom Dashboards, the Services Dashboard, and the Search Page Histogram.
3. A UX for understanding what materialized views are available for a source, and whether (and why) it is or is not being used for a particular visualization.
## Note to Reviewer(s)
This is a large PR, but the code has largely already been reviewed.
- For net-new files, types, components, and utility functions, the code does not differ from the EE repo
- Changes to the various services dashboard pages do not differ from the EE repo
- Changes to `useOffsetPaginatedQuery`, `useChartConfig`, and `DBEditTimeChart` differ slightly due to unrelated (to MVs) drift between this repo and the EE repo, and due to the lack of feature toggles in this repo. **This is where slightly closer review would be most valuable.**
## Demo
<details>
<summary>Demo: MV Configuration</summary>
https://github.com/user-attachments/assets/fedf3bcf-892c-4b8d-a788-7e231e23bcc3
</details>
<details>
<summary>Demo: Chart Explorer</summary>
https://github.com/user-attachments/assets/fc8d1efa-7edc-42fc-98f0-75431cc056b8
</details>
<details>
<summary>Demo: Dashboards</summary>
https://github.com/user-attachments/assets/f3cb247e-711f-4d90-95b8-cf977e94f065
</details>
## Known Limitations
This feature is in Beta due to the following known limitations, which will be addressed in subsequent PRs:
1. Visualization start and end time, when not aligned with the granularity of MVs, will result in statistics based on the MV "time buckets" which fall inside the date range. This may not align exactly with the source table data which is in the selected date range.
2. Alerts do not make use of MVs, even if the associated visualization does. Due to (1), this means that alert values may not exactly match the values shown in the associated visualization.
## Differences in OSS vs EE Support
- In OSS, there is a beta label on the MV configurations section
- In EE there are feature toggles to enable MV support, in OSS the feature is enabled for all teams, but will only run for sources with MVs configured.
## Testing
To test, a couple of MVs can be created on the default `otel_traces` table, directly in ClickHouse:
<details>
<summary>Example MVs DDL</summary>
```sql
CREATE TABLE default.metrics_rollup_1m
(
`Timestamp` DateTime,
`ServiceName` LowCardinality(String),
`SpanKind` LowCardinality(String),
`StatusCode` LowCardinality(String),
`count` SimpleAggregateFunction(sum, UInt64),
`sum__Duration` SimpleAggregateFunction(sum, UInt64),
`avg__Duration` AggregateFunction(avg, UInt64),
`quantile__Duration` AggregateFunction(quantileTDigest(0.5), UInt64),
`min__Duration` SimpleAggregateFunction(min, UInt64),
`max__Duration` SimpleAggregateFunction(max, UInt64)
)
ENGINE = AggregatingMergeTree
PARTITION BY toDate(Timestamp)
ORDER BY (Timestamp, StatusCode, SpanKind, ServiceName);
CREATE MATERIALIZED VIEW default.metrics_rollup_1m_mv TO default.metrics_rollup_1m
(
`Timestamp` DateTime,
`ServiceName` LowCardinality(String),
`SpanKind` LowCardinality(String),
`version` LowCardinality(String),
`StatusCode` LowCardinality(String),
`count` UInt64,
`sum__Duration` Int64,
`avg__Duration` AggregateFunction(avg, UInt64),
`quantile__Duration` AggregateFunction(quantileTDigest(0.5), UInt64),
`min__Duration` SimpleAggregateFunction(min, UInt64),
`max__Duration` SimpleAggregateFunction(max, UInt64)
)
AS SELECT
toStartOfMinute(Timestamp) AS Timestamp,
ServiceName,
SpanKind,
StatusCode,
count() AS count,
sum(Duration) AS sum__Duration,
avgState(Duration) AS avg__Duration,
quantileTDigestState(0.5)(Duration) AS quantile__Duration,
minSimpleState(Duration) AS min__Duration,
maxSimpleState(Duration) AS max__Duration
FROM default.otel_traces
GROUP BY
Timestamp,
ServiceName,
SpanKind,
StatusCode;
```
```sql
CREATE TABLE default.span_kind_rollup_1m
(
`Timestamp` DateTime,
`ServiceName` LowCardinality(String),
`SpanKind` LowCardinality(String),
`histogram__Duration` AggregateFunction(histogram(20), UInt64)
)
ENGINE = AggregatingMergeTree
PARTITION BY toDate(Timestamp)
ORDER BY (Timestamp, ServiceName, SpanKind);
CREATE MATERIALIZED VIEW default.span_kind_rollup_1m_mv TO default.span_kind_rollup_1m
(
`Timestamp` DateTime,
`ServiceName` LowCardinality(String),
`SpanKind` LowCardinality(String),
`histogram__Duration` AggregateFunction(histogram(20), UInt64)
)
AS SELECT
toStartOfMinute(Timestamp) AS Timestamp,
ServiceName,
SpanKind,
histogramState(20)(Duration) AS histogram__Duration
FROM default.otel_traces
GROUP BY
Timestamp,
ServiceName,
SpanKind;
```
</details>
Then you'll need to configure the materialized views in your source settings:
<details>
<summary>Source Configuration (should auto-infer when MVs are selected)</summary>
<img width="949" height="1011" alt="Screenshot 2025-12-19 at 10 26 54 AM" src="https://github.com/user-attachments/assets/fc46a1b9-de8b-4b95-a8ef-ba5fee905685" />
</details>
2025-12-19 16:17:23 +00:00
|
|
|
: normalizedGranularity;
|
|
|
|
|
|
|
|
|
|
const chartGranularitySeconds = convertGranularityToSeconds(chartGranularity);
|
|
|
|
|
const mvGranularitySeconds = convertGranularityToSeconds(
|
|
|
|
|
mvConfig.minGranularity,
|
|
|
|
|
);
|
|
|
|
|
|
2026-01-07 19:02:14 +00:00
|
|
|
// The chart granularity must be a multiple of the MV granularity,
|
|
|
|
|
// to avoid unequal distribution of data across chart time buckets
|
|
|
|
|
// which don't align with the MV time buckets.
|
|
|
|
|
return (
|
|
|
|
|
chartGranularitySeconds >= mvGranularitySeconds &&
|
|
|
|
|
chartGranularitySeconds % mvGranularitySeconds === 0
|
|
|
|
|
);
|
feat: Add materialized view support (Beta) (#1507)
Closes HDX-3082
# Summary
This PR back-ports support for materialized views from the EE repo. Note that this feature is in **Beta**, and is subject to significant changes.
This feature is intended to support:
1. Configuring AggregatingMergeTree (or SummingMergeTree) Materialized Views which are associated with a Source
2. Automatically selecting and querying an associated materialized view when a query supports it, in Chart Explorer, Custom Dashboards, the Services Dashboard, and the Search Page Histogram.
3. A UX for understanding what materialized views are available for a source, and whether (and why) it is or is not being used for a particular visualization.
## Note to Reviewer(s)
This is a large PR, but the code has largely already been reviewed.
- For net-new files, types, components, and utility functions, the code does not differ from the EE repo
- Changes to the various services dashboard pages do not differ from the EE repo
- Changes to `useOffsetPaginatedQuery`, `useChartConfig`, and `DBEditTimeChart` differ slightly due to unrelated (to MVs) drift between this repo and the EE repo, and due to the lack of feature toggles in this repo. **This is where slightly closer review would be most valuable.**
## Demo
<details>
<summary>Demo: MV Configuration</summary>
https://github.com/user-attachments/assets/fedf3bcf-892c-4b8d-a788-7e231e23bcc3
</details>
<details>
<summary>Demo: Chart Explorer</summary>
https://github.com/user-attachments/assets/fc8d1efa-7edc-42fc-98f0-75431cc056b8
</details>
<details>
<summary>Demo: Dashboards</summary>
https://github.com/user-attachments/assets/f3cb247e-711f-4d90-95b8-cf977e94f065
</details>
## Known Limitations
This feature is in Beta due to the following known limitations, which will be addressed in subsequent PRs:
1. Visualization start and end time, when not aligned with the granularity of MVs, will result in statistics based on the MV "time buckets" which fall inside the date range. This may not align exactly with the source table data which is in the selected date range.
2. Alerts do not make use of MVs, even if the associated visualization does. Due to (1), this means that alert values may not exactly match the values shown in the associated visualization.
## Differences in OSS vs EE Support
- In OSS, there is a beta label on the MV configurations section
- In EE there are feature toggles to enable MV support, in OSS the feature is enabled for all teams, but will only run for sources with MVs configured.
## Testing
To test, a couple of MVs can be created on the default `otel_traces` table, directly in ClickHouse:
<details>
<summary>Example MVs DDL</summary>
```sql
CREATE TABLE default.metrics_rollup_1m
(
`Timestamp` DateTime,
`ServiceName` LowCardinality(String),
`SpanKind` LowCardinality(String),
`StatusCode` LowCardinality(String),
`count` SimpleAggregateFunction(sum, UInt64),
`sum__Duration` SimpleAggregateFunction(sum, UInt64),
`avg__Duration` AggregateFunction(avg, UInt64),
`quantile__Duration` AggregateFunction(quantileTDigest(0.5), UInt64),
`min__Duration` SimpleAggregateFunction(min, UInt64),
`max__Duration` SimpleAggregateFunction(max, UInt64)
)
ENGINE = AggregatingMergeTree
PARTITION BY toDate(Timestamp)
ORDER BY (Timestamp, StatusCode, SpanKind, ServiceName);
CREATE MATERIALIZED VIEW default.metrics_rollup_1m_mv TO default.metrics_rollup_1m
(
`Timestamp` DateTime,
`ServiceName` LowCardinality(String),
`SpanKind` LowCardinality(String),
`version` LowCardinality(String),
`StatusCode` LowCardinality(String),
`count` UInt64,
`sum__Duration` Int64,
`avg__Duration` AggregateFunction(avg, UInt64),
`quantile__Duration` AggregateFunction(quantileTDigest(0.5), UInt64),
`min__Duration` SimpleAggregateFunction(min, UInt64),
`max__Duration` SimpleAggregateFunction(max, UInt64)
)
AS SELECT
toStartOfMinute(Timestamp) AS Timestamp,
ServiceName,
SpanKind,
StatusCode,
count() AS count,
sum(Duration) AS sum__Duration,
avgState(Duration) AS avg__Duration,
quantileTDigestState(0.5)(Duration) AS quantile__Duration,
minSimpleState(Duration) AS min__Duration,
maxSimpleState(Duration) AS max__Duration
FROM default.otel_traces
GROUP BY
Timestamp,
ServiceName,
SpanKind,
StatusCode;
```
```sql
CREATE TABLE default.span_kind_rollup_1m
(
`Timestamp` DateTime,
`ServiceName` LowCardinality(String),
`SpanKind` LowCardinality(String),
`histogram__Duration` AggregateFunction(histogram(20), UInt64)
)
ENGINE = AggregatingMergeTree
PARTITION BY toDate(Timestamp)
ORDER BY (Timestamp, ServiceName, SpanKind);
CREATE MATERIALIZED VIEW default.span_kind_rollup_1m_mv TO default.span_kind_rollup_1m
(
`Timestamp` DateTime,
`ServiceName` LowCardinality(String),
`SpanKind` LowCardinality(String),
`histogram__Duration` AggregateFunction(histogram(20), UInt64)
)
AS SELECT
toStartOfMinute(Timestamp) AS Timestamp,
ServiceName,
SpanKind,
histogramState(20)(Duration) AS histogram__Duration
FROM default.otel_traces
GROUP BY
Timestamp,
ServiceName,
SpanKind;
```
</details>
Then you'll need to configure the materialized views in your source settings:
<details>
<summary>Source Configuration (should auto-infer when MVs are selected)</summary>
<img width="949" height="1011" alt="Screenshot 2025-12-19 at 10 26 54 AM" src="https://github.com/user-attachments/assets/fc46a1b9-de8b-4b95-a8ef-ba5fee905685" />
</details>
2025-12-19 16:17:23 +00:00
|
|
|
}
|
|
|
|
|
|
2026-01-14 18:05:11 +00:00
|
|
|
function countIntervalsInDateRange(
|
|
|
|
|
dateRange: [Date, Date],
|
|
|
|
|
granularity: string,
|
|
|
|
|
) {
|
|
|
|
|
const [startDate, endDate] = dateRange;
|
|
|
|
|
const granularitySeconds = convertGranularityToSeconds(granularity);
|
|
|
|
|
const diffSeconds = differenceInSeconds(endDate, startDate);
|
|
|
|
|
return Math.floor(diffSeconds / granularitySeconds);
|
|
|
|
|
}
|
|
|
|
|
|
2026-01-05 17:59:39 +00:00
|
|
|
function mvConfigSupportsDateRange(
|
|
|
|
|
mvConfig: MaterializedViewConfiguration,
|
|
|
|
|
chartConfig: ChartConfigWithOptDateRange,
|
|
|
|
|
) {
|
|
|
|
|
if (mvConfig.minDate && !chartConfig.dateRange) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!mvConfig.minDate || !chartConfig.dateRange) {
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const [startDate] = chartConfig.dateRange;
|
|
|
|
|
const minDate = new Date(mvConfig.minDate);
|
|
|
|
|
|
|
|
|
|
return startDate >= minDate;
|
|
|
|
|
}
|
|
|
|
|
|
feat: Add materialized view support (Beta) (#1507)
Closes HDX-3082
# Summary
This PR back-ports support for materialized views from the EE repo. Note that this feature is in **Beta**, and is subject to significant changes.
This feature is intended to support:
1. Configuring AggregatingMergeTree (or SummingMergeTree) Materialized Views which are associated with a Source
2. Automatically selecting and querying an associated materialized view when a query supports it, in Chart Explorer, Custom Dashboards, the Services Dashboard, and the Search Page Histogram.
3. A UX for understanding what materialized views are available for a source, and whether (and why) it is or is not being used for a particular visualization.
## Note to Reviewer(s)
This is a large PR, but the code has largely already been reviewed.
- For net-new files, types, components, and utility functions, the code does not differ from the EE repo
- Changes to the various services dashboard pages do not differ from the EE repo
- Changes to `useOffsetPaginatedQuery`, `useChartConfig`, and `DBEditTimeChart` differ slightly due to unrelated (to MVs) drift between this repo and the EE repo, and due to the lack of feature toggles in this repo. **This is where slightly closer review would be most valuable.**
## Demo
<details>
<summary>Demo: MV Configuration</summary>
https://github.com/user-attachments/assets/fedf3bcf-892c-4b8d-a788-7e231e23bcc3
</details>
<details>
<summary>Demo: Chart Explorer</summary>
https://github.com/user-attachments/assets/fc8d1efa-7edc-42fc-98f0-75431cc056b8
</details>
<details>
<summary>Demo: Dashboards</summary>
https://github.com/user-attachments/assets/f3cb247e-711f-4d90-95b8-cf977e94f065
</details>
## Known Limitations
This feature is in Beta due to the following known limitations, which will be addressed in subsequent PRs:
1. Visualization start and end time, when not aligned with the granularity of MVs, will result in statistics based on the MV "time buckets" which fall inside the date range. This may not align exactly with the source table data which is in the selected date range.
2. Alerts do not make use of MVs, even if the associated visualization does. Due to (1), this means that alert values may not exactly match the values shown in the associated visualization.
## Differences in OSS vs EE Support
- In OSS, there is a beta label on the MV configurations section
- In EE there are feature toggles to enable MV support, in OSS the feature is enabled for all teams, but will only run for sources with MVs configured.
## Testing
To test, a couple of MVs can be created on the default `otel_traces` table, directly in ClickHouse:
<details>
<summary>Example MVs DDL</summary>
```sql
CREATE TABLE default.metrics_rollup_1m
(
`Timestamp` DateTime,
`ServiceName` LowCardinality(String),
`SpanKind` LowCardinality(String),
`StatusCode` LowCardinality(String),
`count` SimpleAggregateFunction(sum, UInt64),
`sum__Duration` SimpleAggregateFunction(sum, UInt64),
`avg__Duration` AggregateFunction(avg, UInt64),
`quantile__Duration` AggregateFunction(quantileTDigest(0.5), UInt64),
`min__Duration` SimpleAggregateFunction(min, UInt64),
`max__Duration` SimpleAggregateFunction(max, UInt64)
)
ENGINE = AggregatingMergeTree
PARTITION BY toDate(Timestamp)
ORDER BY (Timestamp, StatusCode, SpanKind, ServiceName);
CREATE MATERIALIZED VIEW default.metrics_rollup_1m_mv TO default.metrics_rollup_1m
(
`Timestamp` DateTime,
`ServiceName` LowCardinality(String),
`SpanKind` LowCardinality(String),
`version` LowCardinality(String),
`StatusCode` LowCardinality(String),
`count` UInt64,
`sum__Duration` Int64,
`avg__Duration` AggregateFunction(avg, UInt64),
`quantile__Duration` AggregateFunction(quantileTDigest(0.5), UInt64),
`min__Duration` SimpleAggregateFunction(min, UInt64),
`max__Duration` SimpleAggregateFunction(max, UInt64)
)
AS SELECT
toStartOfMinute(Timestamp) AS Timestamp,
ServiceName,
SpanKind,
StatusCode,
count() AS count,
sum(Duration) AS sum__Duration,
avgState(Duration) AS avg__Duration,
quantileTDigestState(0.5)(Duration) AS quantile__Duration,
minSimpleState(Duration) AS min__Duration,
maxSimpleState(Duration) AS max__Duration
FROM default.otel_traces
GROUP BY
Timestamp,
ServiceName,
SpanKind,
StatusCode;
```
```sql
CREATE TABLE default.span_kind_rollup_1m
(
`Timestamp` DateTime,
`ServiceName` LowCardinality(String),
`SpanKind` LowCardinality(String),
`histogram__Duration` AggregateFunction(histogram(20), UInt64)
)
ENGINE = AggregatingMergeTree
PARTITION BY toDate(Timestamp)
ORDER BY (Timestamp, ServiceName, SpanKind);
CREATE MATERIALIZED VIEW default.span_kind_rollup_1m_mv TO default.span_kind_rollup_1m
(
`Timestamp` DateTime,
`ServiceName` LowCardinality(String),
`SpanKind` LowCardinality(String),
`histogram__Duration` AggregateFunction(histogram(20), UInt64)
)
AS SELECT
toStartOfMinute(Timestamp) AS Timestamp,
ServiceName,
SpanKind,
histogramState(20)(Duration) AS histogram__Duration
FROM default.otel_traces
GROUP BY
Timestamp,
ServiceName,
SpanKind;
```
</details>
Then you'll need to configure the materialized views in your source settings:
<details>
<summary>Source Configuration (should auto-infer when MVs are selected)</summary>
<img width="949" height="1011" alt="Screenshot 2025-12-19 at 10 26 54 AM" src="https://github.com/user-attachments/assets/fc46a1b9-de8b-4b95-a8ef-ba5fee905685" />
</details>
2025-12-19 16:17:23 +00:00
|
|
|
const COUNT_FUNCTION_PATTERN = /\bcount(If)?\s*\(/i;
|
|
|
|
|
export function isUnsupportedCountFunction(selectItem: SelectItem): boolean {
|
|
|
|
|
return COUNT_FUNCTION_PATTERN.test(selectItem.valueExpression);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
async function convertSelectToMaterializedViewSelect(
|
|
|
|
|
mvConfig: MaterializedViewConfiguration,
|
|
|
|
|
selectItem: SelectItem,
|
|
|
|
|
mvTableConnection: TableConnection,
|
|
|
|
|
metadata: Metadata,
|
|
|
|
|
): Promise<SelectItem> {
|
|
|
|
|
const { valueExpression, aggFn: initialAggFn } = selectItem;
|
|
|
|
|
// can be modified later for quantile
|
|
|
|
|
let aggFn = initialAggFn;
|
|
|
|
|
|
|
|
|
|
// Custom count() expressions are not yet optimizable, but they also won't fail the
|
|
|
|
|
// EXPLAIN check - instead they'll just return an incorrect result.
|
|
|
|
|
if (isUnsupportedCountFunction(selectItem)) {
|
|
|
|
|
throw new Error(
|
|
|
|
|
`Custom count() expressions are not supported with materialized views.`,
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!aggFn) {
|
|
|
|
|
return selectItem;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!isValidAggFn(aggFn)) {
|
|
|
|
|
throw new Error(`Aggregate function ${aggFn} is not valid.`);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Handle aggregations without a value expression (eg. count)
|
|
|
|
|
// NOTE: such aggregations may still have a valueExpression in the selectItem,
|
|
|
|
|
// but it should be ignored
|
|
|
|
|
const columnConfigNoSourceColumn = getAggregatedColumnConfig(
|
|
|
|
|
mvConfig,
|
|
|
|
|
'',
|
|
|
|
|
aggFn,
|
|
|
|
|
);
|
|
|
|
|
if (columnConfigNoSourceColumn) {
|
|
|
|
|
const targetColumn = columnConfigNoSourceColumn.mvColumn;
|
|
|
|
|
const aggMergeFn = await getAggregateMergeFunction(
|
|
|
|
|
mvTableConnection,
|
|
|
|
|
targetColumn,
|
|
|
|
|
aggFn,
|
|
|
|
|
metadata,
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
...selectItem,
|
|
|
|
|
valueExpression: targetColumn,
|
|
|
|
|
aggFn: aggMergeFn,
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const aggregatedColumnConfig = getAggregatedColumnConfig(
|
|
|
|
|
mvConfig,
|
|
|
|
|
valueExpression,
|
|
|
|
|
aggFn,
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
if (!aggregatedColumnConfig) {
|
|
|
|
|
throw new Error(
|
|
|
|
|
`The aggregate function ${formatAggregateFunction(aggFn, selectItem['level'])} is not available for column '${valueExpression}'.`,
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (isQuantileSelectItem(selectItem)) {
|
|
|
|
|
const quantileAggregateFunction = await getQuantileAggregateFunction(
|
|
|
|
|
mvTableConnection,
|
|
|
|
|
aggregatedColumnConfig.mvColumn,
|
|
|
|
|
metadata,
|
|
|
|
|
);
|
|
|
|
|
if (quantileAggregateFunction) {
|
|
|
|
|
aggFn = quantileAggregateFunction;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const aggMergeFn = await getAggregateMergeFunction(
|
|
|
|
|
mvTableConnection,
|
|
|
|
|
aggregatedColumnConfig.mvColumn,
|
|
|
|
|
aggFn,
|
|
|
|
|
metadata,
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
...selectItem,
|
|
|
|
|
valueExpression: aggregatedColumnConfig.mvColumn,
|
|
|
|
|
aggFn: aggMergeFn,
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
export type MVOptimizationExplanation = {
|
|
|
|
|
success: boolean;
|
|
|
|
|
errors: string[];
|
|
|
|
|
rowEstimate?: number;
|
|
|
|
|
mvConfig: MaterializedViewConfiguration;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
export async function tryConvertConfigToMaterializedViewSelect<
|
|
|
|
|
C extends ChartConfigWithOptDateRange | CteChartConfig,
|
|
|
|
|
>(
|
|
|
|
|
chartConfig: C,
|
|
|
|
|
mvConfig: MaterializedViewConfiguration,
|
|
|
|
|
metadata: Metadata,
|
|
|
|
|
): Promise<{
|
|
|
|
|
optimizedConfig?: C;
|
|
|
|
|
errors?: string[];
|
|
|
|
|
}> {
|
|
|
|
|
if (!Array.isArray(chartConfig.select)) {
|
|
|
|
|
return {
|
|
|
|
|
errors: ['Only array-based select statements are supported.'],
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
2026-01-05 17:59:39 +00:00
|
|
|
if (mvConfig.minDate && !mvConfigSupportsDateRange(mvConfig, chartConfig)) {
|
|
|
|
|
return {
|
|
|
|
|
errors: [
|
|
|
|
|
'The selected date range includes dates for which this view does not contain data.',
|
|
|
|
|
],
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
feat: Add materialized view support (Beta) (#1507)
Closes HDX-3082
# Summary
This PR back-ports support for materialized views from the EE repo. Note that this feature is in **Beta**, and is subject to significant changes.
This feature is intended to support:
1. Configuring AggregatingMergeTree (or SummingMergeTree) Materialized Views which are associated with a Source
2. Automatically selecting and querying an associated materialized view when a query supports it, in Chart Explorer, Custom Dashboards, the Services Dashboard, and the Search Page Histogram.
3. A UX for understanding what materialized views are available for a source, and whether (and why) it is or is not being used for a particular visualization.
## Note to Reviewer(s)
This is a large PR, but the code has largely already been reviewed.
- For net-new files, types, components, and utility functions, the code does not differ from the EE repo
- Changes to the various services dashboard pages do not differ from the EE repo
- Changes to `useOffsetPaginatedQuery`, `useChartConfig`, and `DBEditTimeChart` differ slightly due to unrelated (to MVs) drift between this repo and the EE repo, and due to the lack of feature toggles in this repo. **This is where slightly closer review would be most valuable.**
## Demo
<details>
<summary>Demo: MV Configuration</summary>
https://github.com/user-attachments/assets/fedf3bcf-892c-4b8d-a788-7e231e23bcc3
</details>
<details>
<summary>Demo: Chart Explorer</summary>
https://github.com/user-attachments/assets/fc8d1efa-7edc-42fc-98f0-75431cc056b8
</details>
<details>
<summary>Demo: Dashboards</summary>
https://github.com/user-attachments/assets/f3cb247e-711f-4d90-95b8-cf977e94f065
</details>
## Known Limitations
This feature is in Beta due to the following known limitations, which will be addressed in subsequent PRs:
1. Visualization start and end time, when not aligned with the granularity of MVs, will result in statistics based on the MV "time buckets" which fall inside the date range. This may not align exactly with the source table data which is in the selected date range.
2. Alerts do not make use of MVs, even if the associated visualization does. Due to (1), this means that alert values may not exactly match the values shown in the associated visualization.
## Differences in OSS vs EE Support
- In OSS, there is a beta label on the MV configurations section
- In EE there are feature toggles to enable MV support, in OSS the feature is enabled for all teams, but will only run for sources with MVs configured.
## Testing
To test, a couple of MVs can be created on the default `otel_traces` table, directly in ClickHouse:
<details>
<summary>Example MVs DDL</summary>
```sql
CREATE TABLE default.metrics_rollup_1m
(
`Timestamp` DateTime,
`ServiceName` LowCardinality(String),
`SpanKind` LowCardinality(String),
`StatusCode` LowCardinality(String),
`count` SimpleAggregateFunction(sum, UInt64),
`sum__Duration` SimpleAggregateFunction(sum, UInt64),
`avg__Duration` AggregateFunction(avg, UInt64),
`quantile__Duration` AggregateFunction(quantileTDigest(0.5), UInt64),
`min__Duration` SimpleAggregateFunction(min, UInt64),
`max__Duration` SimpleAggregateFunction(max, UInt64)
)
ENGINE = AggregatingMergeTree
PARTITION BY toDate(Timestamp)
ORDER BY (Timestamp, StatusCode, SpanKind, ServiceName);
CREATE MATERIALIZED VIEW default.metrics_rollup_1m_mv TO default.metrics_rollup_1m
(
`Timestamp` DateTime,
`ServiceName` LowCardinality(String),
`SpanKind` LowCardinality(String),
`version` LowCardinality(String),
`StatusCode` LowCardinality(String),
`count` UInt64,
`sum__Duration` Int64,
`avg__Duration` AggregateFunction(avg, UInt64),
`quantile__Duration` AggregateFunction(quantileTDigest(0.5), UInt64),
`min__Duration` SimpleAggregateFunction(min, UInt64),
`max__Duration` SimpleAggregateFunction(max, UInt64)
)
AS SELECT
toStartOfMinute(Timestamp) AS Timestamp,
ServiceName,
SpanKind,
StatusCode,
count() AS count,
sum(Duration) AS sum__Duration,
avgState(Duration) AS avg__Duration,
quantileTDigestState(0.5)(Duration) AS quantile__Duration,
minSimpleState(Duration) AS min__Duration,
maxSimpleState(Duration) AS max__Duration
FROM default.otel_traces
GROUP BY
Timestamp,
ServiceName,
SpanKind,
StatusCode;
```
```sql
CREATE TABLE default.span_kind_rollup_1m
(
`Timestamp` DateTime,
`ServiceName` LowCardinality(String),
`SpanKind` LowCardinality(String),
`histogram__Duration` AggregateFunction(histogram(20), UInt64)
)
ENGINE = AggregatingMergeTree
PARTITION BY toDate(Timestamp)
ORDER BY (Timestamp, ServiceName, SpanKind);
CREATE MATERIALIZED VIEW default.span_kind_rollup_1m_mv TO default.span_kind_rollup_1m
(
`Timestamp` DateTime,
`ServiceName` LowCardinality(String),
`SpanKind` LowCardinality(String),
`histogram__Duration` AggregateFunction(histogram(20), UInt64)
)
AS SELECT
toStartOfMinute(Timestamp) AS Timestamp,
ServiceName,
SpanKind,
histogramState(20)(Duration) AS histogram__Duration
FROM default.otel_traces
GROUP BY
Timestamp,
ServiceName,
SpanKind;
```
</details>
Then you'll need to configure the materialized views in your source settings:
<details>
<summary>Source Configuration (should auto-infer when MVs are selected)</summary>
<img width="949" height="1011" alt="Screenshot 2025-12-19 at 10 26 54 AM" src="https://github.com/user-attachments/assets/fc46a1b9-de8b-4b95-a8ef-ba5fee905685" />
</details>
2025-12-19 16:17:23 +00:00
|
|
|
if (!mvConfigSupportsGranularity(mvConfig, chartConfig)) {
|
|
|
|
|
const error = chartConfig.granularity
|
2026-01-07 19:02:14 +00:00
|
|
|
? `Granularity must be a multiple of the view's granularity (${mvConfig.minGranularity}).`
|
feat: Add materialized view support (Beta) (#1507)
Closes HDX-3082
# Summary
This PR back-ports support for materialized views from the EE repo. Note that this feature is in **Beta**, and is subject to significant changes.
This feature is intended to support:
1. Configuring AggregatingMergeTree (or SummingMergeTree) Materialized Views which are associated with a Source
2. Automatically selecting and querying an associated materialized view when a query supports it, in Chart Explorer, Custom Dashboards, the Services Dashboard, and the Search Page Histogram.
3. A UX for understanding what materialized views are available for a source, and whether (and why) it is or is not being used for a particular visualization.
## Note to Reviewer(s)
This is a large PR, but the code has largely already been reviewed.
- For net-new files, types, components, and utility functions, the code does not differ from the EE repo
- Changes to the various services dashboard pages do not differ from the EE repo
- Changes to `useOffsetPaginatedQuery`, `useChartConfig`, and `DBEditTimeChart` differ slightly due to unrelated (to MVs) drift between this repo and the EE repo, and due to the lack of feature toggles in this repo. **This is where slightly closer review would be most valuable.**
## Demo
<details>
<summary>Demo: MV Configuration</summary>
https://github.com/user-attachments/assets/fedf3bcf-892c-4b8d-a788-7e231e23bcc3
</details>
<details>
<summary>Demo: Chart Explorer</summary>
https://github.com/user-attachments/assets/fc8d1efa-7edc-42fc-98f0-75431cc056b8
</details>
<details>
<summary>Demo: Dashboards</summary>
https://github.com/user-attachments/assets/f3cb247e-711f-4d90-95b8-cf977e94f065
</details>
## Known Limitations
This feature is in Beta due to the following known limitations, which will be addressed in subsequent PRs:
1. Visualization start and end time, when not aligned with the granularity of MVs, will result in statistics based on the MV "time buckets" which fall inside the date range. This may not align exactly with the source table data which is in the selected date range.
2. Alerts do not make use of MVs, even if the associated visualization does. Due to (1), this means that alert values may not exactly match the values shown in the associated visualization.
## Differences in OSS vs EE Support
- In OSS, there is a beta label on the MV configurations section
- In EE there are feature toggles to enable MV support, in OSS the feature is enabled for all teams, but will only run for sources with MVs configured.
## Testing
To test, a couple of MVs can be created on the default `otel_traces` table, directly in ClickHouse:
<details>
<summary>Example MVs DDL</summary>
```sql
CREATE TABLE default.metrics_rollup_1m
(
`Timestamp` DateTime,
`ServiceName` LowCardinality(String),
`SpanKind` LowCardinality(String),
`StatusCode` LowCardinality(String),
`count` SimpleAggregateFunction(sum, UInt64),
`sum__Duration` SimpleAggregateFunction(sum, UInt64),
`avg__Duration` AggregateFunction(avg, UInt64),
`quantile__Duration` AggregateFunction(quantileTDigest(0.5), UInt64),
`min__Duration` SimpleAggregateFunction(min, UInt64),
`max__Duration` SimpleAggregateFunction(max, UInt64)
)
ENGINE = AggregatingMergeTree
PARTITION BY toDate(Timestamp)
ORDER BY (Timestamp, StatusCode, SpanKind, ServiceName);
CREATE MATERIALIZED VIEW default.metrics_rollup_1m_mv TO default.metrics_rollup_1m
(
`Timestamp` DateTime,
`ServiceName` LowCardinality(String),
`SpanKind` LowCardinality(String),
`version` LowCardinality(String),
`StatusCode` LowCardinality(String),
`count` UInt64,
`sum__Duration` Int64,
`avg__Duration` AggregateFunction(avg, UInt64),
`quantile__Duration` AggregateFunction(quantileTDigest(0.5), UInt64),
`min__Duration` SimpleAggregateFunction(min, UInt64),
`max__Duration` SimpleAggregateFunction(max, UInt64)
)
AS SELECT
toStartOfMinute(Timestamp) AS Timestamp,
ServiceName,
SpanKind,
StatusCode,
count() AS count,
sum(Duration) AS sum__Duration,
avgState(Duration) AS avg__Duration,
quantileTDigestState(0.5)(Duration) AS quantile__Duration,
minSimpleState(Duration) AS min__Duration,
maxSimpleState(Duration) AS max__Duration
FROM default.otel_traces
GROUP BY
Timestamp,
ServiceName,
SpanKind,
StatusCode;
```
```sql
CREATE TABLE default.span_kind_rollup_1m
(
`Timestamp` DateTime,
`ServiceName` LowCardinality(String),
`SpanKind` LowCardinality(String),
`histogram__Duration` AggregateFunction(histogram(20), UInt64)
)
ENGINE = AggregatingMergeTree
PARTITION BY toDate(Timestamp)
ORDER BY (Timestamp, ServiceName, SpanKind);
CREATE MATERIALIZED VIEW default.span_kind_rollup_1m_mv TO default.span_kind_rollup_1m
(
`Timestamp` DateTime,
`ServiceName` LowCardinality(String),
`SpanKind` LowCardinality(String),
`histogram__Duration` AggregateFunction(histogram(20), UInt64)
)
AS SELECT
toStartOfMinute(Timestamp) AS Timestamp,
ServiceName,
SpanKind,
histogramState(20)(Duration) AS histogram__Duration
FROM default.otel_traces
GROUP BY
Timestamp,
ServiceName,
SpanKind;
```
</details>
Then you'll need to configure the materialized views in your source settings:
<details>
<summary>Source Configuration (should auto-infer when MVs are selected)</summary>
<img width="949" height="1011" alt="Screenshot 2025-12-19 at 10 26 54 AM" src="https://github.com/user-attachments/assets/fc46a1b9-de8b-4b95-a8ef-ba5fee905685" />
</details>
2025-12-19 16:17:23 +00:00
|
|
|
: 'The selected date range is too short for the granularity of this materialized view.';
|
|
|
|
|
return { errors: [error] };
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const mvTableConnection: TableConnection = {
|
|
|
|
|
databaseName: mvConfig.databaseName,
|
|
|
|
|
tableName: mvConfig.tableName,
|
|
|
|
|
connectionId: chartConfig.connection,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
const conversions = await Promise.allSettled(
|
|
|
|
|
chartConfig.select.map(selectItem =>
|
|
|
|
|
convertSelectToMaterializedViewSelect(
|
|
|
|
|
mvConfig,
|
|
|
|
|
selectItem,
|
|
|
|
|
mvTableConnection,
|
|
|
|
|
metadata,
|
|
|
|
|
),
|
|
|
|
|
),
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
const select: SelectItem[] = [];
|
|
|
|
|
const errors: string[] = [];
|
|
|
|
|
for (const result of conversions) {
|
|
|
|
|
if (result.status === 'rejected') {
|
|
|
|
|
errors.push(result.reason.message);
|
|
|
|
|
} else {
|
|
|
|
|
select.push(result.value);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (errors.length > 0) {
|
|
|
|
|
return {
|
|
|
|
|
errors,
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
2026-01-07 19:02:14 +00:00
|
|
|
const clonedConfig: C = {
|
feat: Add materialized view support (Beta) (#1507)
Closes HDX-3082
# Summary
This PR back-ports support for materialized views from the EE repo. Note that this feature is in **Beta**, and is subject to significant changes.
This feature is intended to support:
1. Configuring AggregatingMergeTree (or SummingMergeTree) Materialized Views which are associated with a Source
2. Automatically selecting and querying an associated materialized view when a query supports it, in Chart Explorer, Custom Dashboards, the Services Dashboard, and the Search Page Histogram.
3. A UX for understanding what materialized views are available for a source, and whether (and why) it is or is not being used for a particular visualization.
## Note to Reviewer(s)
This is a large PR, but the code has largely already been reviewed.
- For net-new files, types, components, and utility functions, the code does not differ from the EE repo
- Changes to the various services dashboard pages do not differ from the EE repo
- Changes to `useOffsetPaginatedQuery`, `useChartConfig`, and `DBEditTimeChart` differ slightly due to unrelated (to MVs) drift between this repo and the EE repo, and due to the lack of feature toggles in this repo. **This is where slightly closer review would be most valuable.**
## Demo
<details>
<summary>Demo: MV Configuration</summary>
https://github.com/user-attachments/assets/fedf3bcf-892c-4b8d-a788-7e231e23bcc3
</details>
<details>
<summary>Demo: Chart Explorer</summary>
https://github.com/user-attachments/assets/fc8d1efa-7edc-42fc-98f0-75431cc056b8
</details>
<details>
<summary>Demo: Dashboards</summary>
https://github.com/user-attachments/assets/f3cb247e-711f-4d90-95b8-cf977e94f065
</details>
## Known Limitations
This feature is in Beta due to the following known limitations, which will be addressed in subsequent PRs:
1. Visualization start and end time, when not aligned with the granularity of MVs, will result in statistics based on the MV "time buckets" which fall inside the date range. This may not align exactly with the source table data which is in the selected date range.
2. Alerts do not make use of MVs, even if the associated visualization does. Due to (1), this means that alert values may not exactly match the values shown in the associated visualization.
## Differences in OSS vs EE Support
- In OSS, there is a beta label on the MV configurations section
- In EE there are feature toggles to enable MV support, in OSS the feature is enabled for all teams, but will only run for sources with MVs configured.
## Testing
To test, a couple of MVs can be created on the default `otel_traces` table, directly in ClickHouse:
<details>
<summary>Example MVs DDL</summary>
```sql
CREATE TABLE default.metrics_rollup_1m
(
`Timestamp` DateTime,
`ServiceName` LowCardinality(String),
`SpanKind` LowCardinality(String),
`StatusCode` LowCardinality(String),
`count` SimpleAggregateFunction(sum, UInt64),
`sum__Duration` SimpleAggregateFunction(sum, UInt64),
`avg__Duration` AggregateFunction(avg, UInt64),
`quantile__Duration` AggregateFunction(quantileTDigest(0.5), UInt64),
`min__Duration` SimpleAggregateFunction(min, UInt64),
`max__Duration` SimpleAggregateFunction(max, UInt64)
)
ENGINE = AggregatingMergeTree
PARTITION BY toDate(Timestamp)
ORDER BY (Timestamp, StatusCode, SpanKind, ServiceName);
CREATE MATERIALIZED VIEW default.metrics_rollup_1m_mv TO default.metrics_rollup_1m
(
`Timestamp` DateTime,
`ServiceName` LowCardinality(String),
`SpanKind` LowCardinality(String),
`version` LowCardinality(String),
`StatusCode` LowCardinality(String),
`count` UInt64,
`sum__Duration` Int64,
`avg__Duration` AggregateFunction(avg, UInt64),
`quantile__Duration` AggregateFunction(quantileTDigest(0.5), UInt64),
`min__Duration` SimpleAggregateFunction(min, UInt64),
`max__Duration` SimpleAggregateFunction(max, UInt64)
)
AS SELECT
toStartOfMinute(Timestamp) AS Timestamp,
ServiceName,
SpanKind,
StatusCode,
count() AS count,
sum(Duration) AS sum__Duration,
avgState(Duration) AS avg__Duration,
quantileTDigestState(0.5)(Duration) AS quantile__Duration,
minSimpleState(Duration) AS min__Duration,
maxSimpleState(Duration) AS max__Duration
FROM default.otel_traces
GROUP BY
Timestamp,
ServiceName,
SpanKind,
StatusCode;
```
```sql
CREATE TABLE default.span_kind_rollup_1m
(
`Timestamp` DateTime,
`ServiceName` LowCardinality(String),
`SpanKind` LowCardinality(String),
`histogram__Duration` AggregateFunction(histogram(20), UInt64)
)
ENGINE = AggregatingMergeTree
PARTITION BY toDate(Timestamp)
ORDER BY (Timestamp, ServiceName, SpanKind);
CREATE MATERIALIZED VIEW default.span_kind_rollup_1m_mv TO default.span_kind_rollup_1m
(
`Timestamp` DateTime,
`ServiceName` LowCardinality(String),
`SpanKind` LowCardinality(String),
`histogram__Duration` AggregateFunction(histogram(20), UInt64)
)
AS SELECT
toStartOfMinute(Timestamp) AS Timestamp,
ServiceName,
SpanKind,
histogramState(20)(Duration) AS histogram__Duration
FROM default.otel_traces
GROUP BY
Timestamp,
ServiceName,
SpanKind;
```
</details>
Then you'll need to configure the materialized views in your source settings:
<details>
<summary>Source Configuration (should auto-infer when MVs are selected)</summary>
<img width="949" height="1011" alt="Screenshot 2025-12-19 at 10 26 54 AM" src="https://github.com/user-attachments/assets/fc46a1b9-de8b-4b95-a8ef-ba5fee905685" />
</details>
2025-12-19 16:17:23 +00:00
|
|
|
...structuredClone(chartConfig),
|
|
|
|
|
select,
|
2026-01-14 18:05:11 +00:00
|
|
|
timestampValueExpression: mvConfig.timestampColumn,
|
feat: Add materialized view support (Beta) (#1507)
Closes HDX-3082
# Summary
This PR back-ports support for materialized views from the EE repo. Note that this feature is in **Beta**, and is subject to significant changes.
This feature is intended to support:
1. Configuring AggregatingMergeTree (or SummingMergeTree) Materialized Views which are associated with a Source
2. Automatically selecting and querying an associated materialized view when a query supports it, in Chart Explorer, Custom Dashboards, the Services Dashboard, and the Search Page Histogram.
3. A UX for understanding what materialized views are available for a source, and whether (and why) it is or is not being used for a particular visualization.
## Note to Reviewer(s)
This is a large PR, but the code has largely already been reviewed.
- For net-new files, types, components, and utility functions, the code does not differ from the EE repo
- Changes to the various services dashboard pages do not differ from the EE repo
- Changes to `useOffsetPaginatedQuery`, `useChartConfig`, and `DBEditTimeChart` differ slightly due to unrelated (to MVs) drift between this repo and the EE repo, and due to the lack of feature toggles in this repo. **This is where slightly closer review would be most valuable.**
## Demo
<details>
<summary>Demo: MV Configuration</summary>
https://github.com/user-attachments/assets/fedf3bcf-892c-4b8d-a788-7e231e23bcc3
</details>
<details>
<summary>Demo: Chart Explorer</summary>
https://github.com/user-attachments/assets/fc8d1efa-7edc-42fc-98f0-75431cc056b8
</details>
<details>
<summary>Demo: Dashboards</summary>
https://github.com/user-attachments/assets/f3cb247e-711f-4d90-95b8-cf977e94f065
</details>
## Known Limitations
This feature is in Beta due to the following known limitations, which will be addressed in subsequent PRs:
1. Visualization start and end time, when not aligned with the granularity of MVs, will result in statistics based on the MV "time buckets" which fall inside the date range. This may not align exactly with the source table data which is in the selected date range.
2. Alerts do not make use of MVs, even if the associated visualization does. Due to (1), this means that alert values may not exactly match the values shown in the associated visualization.
## Differences in OSS vs EE Support
- In OSS, there is a beta label on the MV configurations section
- In EE there are feature toggles to enable MV support, in OSS the feature is enabled for all teams, but will only run for sources with MVs configured.
## Testing
To test, a couple of MVs can be created on the default `otel_traces` table, directly in ClickHouse:
<details>
<summary>Example MVs DDL</summary>
```sql
CREATE TABLE default.metrics_rollup_1m
(
`Timestamp` DateTime,
`ServiceName` LowCardinality(String),
`SpanKind` LowCardinality(String),
`StatusCode` LowCardinality(String),
`count` SimpleAggregateFunction(sum, UInt64),
`sum__Duration` SimpleAggregateFunction(sum, UInt64),
`avg__Duration` AggregateFunction(avg, UInt64),
`quantile__Duration` AggregateFunction(quantileTDigest(0.5), UInt64),
`min__Duration` SimpleAggregateFunction(min, UInt64),
`max__Duration` SimpleAggregateFunction(max, UInt64)
)
ENGINE = AggregatingMergeTree
PARTITION BY toDate(Timestamp)
ORDER BY (Timestamp, StatusCode, SpanKind, ServiceName);
CREATE MATERIALIZED VIEW default.metrics_rollup_1m_mv TO default.metrics_rollup_1m
(
`Timestamp` DateTime,
`ServiceName` LowCardinality(String),
`SpanKind` LowCardinality(String),
`version` LowCardinality(String),
`StatusCode` LowCardinality(String),
`count` UInt64,
`sum__Duration` Int64,
`avg__Duration` AggregateFunction(avg, UInt64),
`quantile__Duration` AggregateFunction(quantileTDigest(0.5), UInt64),
`min__Duration` SimpleAggregateFunction(min, UInt64),
`max__Duration` SimpleAggregateFunction(max, UInt64)
)
AS SELECT
toStartOfMinute(Timestamp) AS Timestamp,
ServiceName,
SpanKind,
StatusCode,
count() AS count,
sum(Duration) AS sum__Duration,
avgState(Duration) AS avg__Duration,
quantileTDigestState(0.5)(Duration) AS quantile__Duration,
minSimpleState(Duration) AS min__Duration,
maxSimpleState(Duration) AS max__Duration
FROM default.otel_traces
GROUP BY
Timestamp,
ServiceName,
SpanKind,
StatusCode;
```
```sql
CREATE TABLE default.span_kind_rollup_1m
(
`Timestamp` DateTime,
`ServiceName` LowCardinality(String),
`SpanKind` LowCardinality(String),
`histogram__Duration` AggregateFunction(histogram(20), UInt64)
)
ENGINE = AggregatingMergeTree
PARTITION BY toDate(Timestamp)
ORDER BY (Timestamp, ServiceName, SpanKind);
CREATE MATERIALIZED VIEW default.span_kind_rollup_1m_mv TO default.span_kind_rollup_1m
(
`Timestamp` DateTime,
`ServiceName` LowCardinality(String),
`SpanKind` LowCardinality(String),
`histogram__Duration` AggregateFunction(histogram(20), UInt64)
)
AS SELECT
toStartOfMinute(Timestamp) AS Timestamp,
ServiceName,
SpanKind,
histogramState(20)(Duration) AS histogram__Duration
FROM default.otel_traces
GROUP BY
Timestamp,
ServiceName,
SpanKind;
```
</details>
Then you'll need to configure the materialized views in your source settings:
<details>
<summary>Source Configuration (should auto-infer when MVs are selected)</summary>
<img width="949" height="1011" alt="Screenshot 2025-12-19 at 10 26 54 AM" src="https://github.com/user-attachments/assets/fc46a1b9-de8b-4b95-a8ef-ba5fee905685" />
</details>
2025-12-19 16:17:23 +00:00
|
|
|
from: {
|
|
|
|
|
databaseName: mvConfig.databaseName,
|
|
|
|
|
tableName: mvConfig.tableName,
|
|
|
|
|
},
|
2026-01-07 19:02:14 +00:00
|
|
|
// Make the date range end exclusive to avoid selecting the entire next time bucket from the MV
|
feat: Align date ranges to MV Granularity (#1575)
Closes HDX-3124
# Summary
This PR makes the following changes
1. Date ranges for all MV queries are now aligned to the MV Granularity
2. Each chart type now has an indicator when the date range has been adjusted to align with either the MV Granularity or (in the case of Line/Bar charts) the Chart Granularity.
3. The useQueriedChartConfig, useRenderedSqlChartConfig, and useOffsetPaginatedQuery hooks have been updated to get the MV-optimized chart configuration from the useMVOptimizationExplanation, which allows us to share the `EXPLAIN ESTIMATE` query results between the MV Optimization Indicator (the lightning bolt icon on each chart) and the chart itself. This roughly halves the number of EXPLAIN ESTIMATE queries that are made.
## Demo
<img width="1628" height="1220" alt="Screenshot 2026-01-08 at 11 42 39 AM" src="https://github.com/user-attachments/assets/80a06e3a-bbfc-4193-b6b7-5e0056c588d3" />
<img width="1627" height="1131" alt="Screenshot 2026-01-08 at 11 40 54 AM" src="https://github.com/user-attachments/assets/69879e3d-3a83-4c4d-9604-0552a01c17d7" />
## Testing
To test locally with an MV, you can use the following DDL
<details>
<summary>DDL For an MV</summary>
```sql
CREATE TABLE default.metrics_rollup_1m
(
`Timestamp` DateTime,
`ServiceName` LowCardinality(String),
`SpanKind` LowCardinality(String),
`StatusCode` LowCardinality(String),
`count` SimpleAggregateFunction(sum, UInt64),
`sum__Duration` SimpleAggregateFunction(sum, UInt64),
`avg__Duration` AggregateFunction(avg, UInt64),
`quantile__Duration` AggregateFunction(quantileTDigest(0.5), UInt64),
`min__Duration` SimpleAggregateFunction(min, UInt64),
`max__Duration` SimpleAggregateFunction(max, UInt64)
)
ENGINE = AggregatingMergeTree
PARTITION BY toDate(Timestamp)
ORDER BY (Timestamp, StatusCode, SpanKind, ServiceName)
SETTINGS index_granularity = 8192;
CREATE MATERIALIZED VIEW default.metrics_rollup_1m_mv TO default.metrics_rollup_1m
(
`Timestamp` DateTime,
`ServiceName` LowCardinality(String),
`SpanKind` LowCardinality(String),
`version` LowCardinality(String),
`StatusCode` LowCardinality(String),
`count` UInt64,
`sum__Duration` Int64,
`avg__Duration` AggregateFunction(avg, UInt64),
`quantile__Duration` AggregateFunction(quantileTDigest(0.5), UInt64),
`min__Duration` SimpleAggregateFunction(min, UInt64),
`max__Duration` SimpleAggregateFunction(max, UInt64)
)
AS SELECT
toStartOfMinute(Timestamp) AS Timestamp,
ServiceName,
SpanKind,
StatusCode,
count() AS count,
sum(Duration) AS sum__Duration,
avgState(Duration) AS avg__Duration,
quantileTDigestState(0.5)(Duration) AS quantile__Duration,
minSimpleState(Duration) AS min__Duration,
maxSimpleState(Duration) AS max__Duration
FROM default.otel_traces
GROUP BY
Timestamp,
ServiceName,
SpanKind,
StatusCode;
```
</details>
2026-01-09 16:07:52 +00:00
|
|
|
// Align the date range to the MV granularity to avoid excluding the first time bucket
|
|
|
|
|
...('dateRange' in chartConfig && chartConfig.dateRange
|
|
|
|
|
? {
|
|
|
|
|
dateRangeEndInclusive: false,
|
|
|
|
|
dateRange: getAlignedDateRange(
|
|
|
|
|
chartConfig.dateRange,
|
|
|
|
|
mvConfig.minGranularity,
|
|
|
|
|
),
|
|
|
|
|
}
|
|
|
|
|
: {}),
|
feat: Add materialized view support (Beta) (#1507)
Closes HDX-3082
# Summary
This PR back-ports support for materialized views from the EE repo. Note that this feature is in **Beta**, and is subject to significant changes.
This feature is intended to support:
1. Configuring AggregatingMergeTree (or SummingMergeTree) Materialized Views which are associated with a Source
2. Automatically selecting and querying an associated materialized view when a query supports it, in Chart Explorer, Custom Dashboards, the Services Dashboard, and the Search Page Histogram.
3. A UX for understanding what materialized views are available for a source, and whether (and why) it is or is not being used for a particular visualization.
## Note to Reviewer(s)
This is a large PR, but the code has largely already been reviewed.
- For net-new files, types, components, and utility functions, the code does not differ from the EE repo
- Changes to the various services dashboard pages do not differ from the EE repo
- Changes to `useOffsetPaginatedQuery`, `useChartConfig`, and `DBEditTimeChart` differ slightly due to unrelated (to MVs) drift between this repo and the EE repo, and due to the lack of feature toggles in this repo. **This is where slightly closer review would be most valuable.**
## Demo
<details>
<summary>Demo: MV Configuration</summary>
https://github.com/user-attachments/assets/fedf3bcf-892c-4b8d-a788-7e231e23bcc3
</details>
<details>
<summary>Demo: Chart Explorer</summary>
https://github.com/user-attachments/assets/fc8d1efa-7edc-42fc-98f0-75431cc056b8
</details>
<details>
<summary>Demo: Dashboards</summary>
https://github.com/user-attachments/assets/f3cb247e-711f-4d90-95b8-cf977e94f065
</details>
## Known Limitations
This feature is in Beta due to the following known limitations, which will be addressed in subsequent PRs:
1. Visualization start and end time, when not aligned with the granularity of MVs, will result in statistics based on the MV "time buckets" which fall inside the date range. This may not align exactly with the source table data which is in the selected date range.
2. Alerts do not make use of MVs, even if the associated visualization does. Due to (1), this means that alert values may not exactly match the values shown in the associated visualization.
## Differences in OSS vs EE Support
- In OSS, there is a beta label on the MV configurations section
- In EE there are feature toggles to enable MV support, in OSS the feature is enabled for all teams, but will only run for sources with MVs configured.
## Testing
To test, a couple of MVs can be created on the default `otel_traces` table, directly in ClickHouse:
<details>
<summary>Example MVs DDL</summary>
```sql
CREATE TABLE default.metrics_rollup_1m
(
`Timestamp` DateTime,
`ServiceName` LowCardinality(String),
`SpanKind` LowCardinality(String),
`StatusCode` LowCardinality(String),
`count` SimpleAggregateFunction(sum, UInt64),
`sum__Duration` SimpleAggregateFunction(sum, UInt64),
`avg__Duration` AggregateFunction(avg, UInt64),
`quantile__Duration` AggregateFunction(quantileTDigest(0.5), UInt64),
`min__Duration` SimpleAggregateFunction(min, UInt64),
`max__Duration` SimpleAggregateFunction(max, UInt64)
)
ENGINE = AggregatingMergeTree
PARTITION BY toDate(Timestamp)
ORDER BY (Timestamp, StatusCode, SpanKind, ServiceName);
CREATE MATERIALIZED VIEW default.metrics_rollup_1m_mv TO default.metrics_rollup_1m
(
`Timestamp` DateTime,
`ServiceName` LowCardinality(String),
`SpanKind` LowCardinality(String),
`version` LowCardinality(String),
`StatusCode` LowCardinality(String),
`count` UInt64,
`sum__Duration` Int64,
`avg__Duration` AggregateFunction(avg, UInt64),
`quantile__Duration` AggregateFunction(quantileTDigest(0.5), UInt64),
`min__Duration` SimpleAggregateFunction(min, UInt64),
`max__Duration` SimpleAggregateFunction(max, UInt64)
)
AS SELECT
toStartOfMinute(Timestamp) AS Timestamp,
ServiceName,
SpanKind,
StatusCode,
count() AS count,
sum(Duration) AS sum__Duration,
avgState(Duration) AS avg__Duration,
quantileTDigestState(0.5)(Duration) AS quantile__Duration,
minSimpleState(Duration) AS min__Duration,
maxSimpleState(Duration) AS max__Duration
FROM default.otel_traces
GROUP BY
Timestamp,
ServiceName,
SpanKind,
StatusCode;
```
```sql
CREATE TABLE default.span_kind_rollup_1m
(
`Timestamp` DateTime,
`ServiceName` LowCardinality(String),
`SpanKind` LowCardinality(String),
`histogram__Duration` AggregateFunction(histogram(20), UInt64)
)
ENGINE = AggregatingMergeTree
PARTITION BY toDate(Timestamp)
ORDER BY (Timestamp, ServiceName, SpanKind);
CREATE MATERIALIZED VIEW default.span_kind_rollup_1m_mv TO default.span_kind_rollup_1m
(
`Timestamp` DateTime,
`ServiceName` LowCardinality(String),
`SpanKind` LowCardinality(String),
`histogram__Duration` AggregateFunction(histogram(20), UInt64)
)
AS SELECT
toStartOfMinute(Timestamp) AS Timestamp,
ServiceName,
SpanKind,
histogramState(20)(Duration) AS histogram__Duration
FROM default.otel_traces
GROUP BY
Timestamp,
ServiceName,
SpanKind;
```
</details>
Then you'll need to configure the materialized views in your source settings:
<details>
<summary>Source Configuration (should auto-infer when MVs are selected)</summary>
<img width="949" height="1011" alt="Screenshot 2025-12-19 at 10 26 54 AM" src="https://github.com/user-attachments/assets/fc46a1b9-de8b-4b95-a8ef-ba5fee905685" />
</details>
2025-12-19 16:17:23 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
optimizedConfig: clonedConfig,
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/** Attempts to optimize a config with a single MV Config */
|
|
|
|
|
async function tryOptimizeConfig<C extends ChartConfigWithOptDateRange>(
|
|
|
|
|
config: C,
|
|
|
|
|
metadata: Metadata,
|
|
|
|
|
clickhouseClient: BaseClickhouseClient,
|
2026-01-07 15:27:54 +00:00
|
|
|
signal: AbortSignal | undefined,
|
feat: Add materialized view support (Beta) (#1507)
Closes HDX-3082
# Summary
This PR back-ports support for materialized views from the EE repo. Note that this feature is in **Beta**, and is subject to significant changes.
This feature is intended to support:
1. Configuring AggregatingMergeTree (or SummingMergeTree) Materialized Views which are associated with a Source
2. Automatically selecting and querying an associated materialized view when a query supports it, in Chart Explorer, Custom Dashboards, the Services Dashboard, and the Search Page Histogram.
3. A UX for understanding what materialized views are available for a source, and whether (and why) it is or is not being used for a particular visualization.
## Note to Reviewer(s)
This is a large PR, but the code has largely already been reviewed.
- For net-new files, types, components, and utility functions, the code does not differ from the EE repo
- Changes to the various services dashboard pages do not differ from the EE repo
- Changes to `useOffsetPaginatedQuery`, `useChartConfig`, and `DBEditTimeChart` differ slightly due to unrelated (to MVs) drift between this repo and the EE repo, and due to the lack of feature toggles in this repo. **This is where slightly closer review would be most valuable.**
## Demo
<details>
<summary>Demo: MV Configuration</summary>
https://github.com/user-attachments/assets/fedf3bcf-892c-4b8d-a788-7e231e23bcc3
</details>
<details>
<summary>Demo: Chart Explorer</summary>
https://github.com/user-attachments/assets/fc8d1efa-7edc-42fc-98f0-75431cc056b8
</details>
<details>
<summary>Demo: Dashboards</summary>
https://github.com/user-attachments/assets/f3cb247e-711f-4d90-95b8-cf977e94f065
</details>
## Known Limitations
This feature is in Beta due to the following known limitations, which will be addressed in subsequent PRs:
1. Visualization start and end time, when not aligned with the granularity of MVs, will result in statistics based on the MV "time buckets" which fall inside the date range. This may not align exactly with the source table data which is in the selected date range.
2. Alerts do not make use of MVs, even if the associated visualization does. Due to (1), this means that alert values may not exactly match the values shown in the associated visualization.
## Differences in OSS vs EE Support
- In OSS, there is a beta label on the MV configurations section
- In EE there are feature toggles to enable MV support, in OSS the feature is enabled for all teams, but will only run for sources with MVs configured.
## Testing
To test, a couple of MVs can be created on the default `otel_traces` table, directly in ClickHouse:
<details>
<summary>Example MVs DDL</summary>
```sql
CREATE TABLE default.metrics_rollup_1m
(
`Timestamp` DateTime,
`ServiceName` LowCardinality(String),
`SpanKind` LowCardinality(String),
`StatusCode` LowCardinality(String),
`count` SimpleAggregateFunction(sum, UInt64),
`sum__Duration` SimpleAggregateFunction(sum, UInt64),
`avg__Duration` AggregateFunction(avg, UInt64),
`quantile__Duration` AggregateFunction(quantileTDigest(0.5), UInt64),
`min__Duration` SimpleAggregateFunction(min, UInt64),
`max__Duration` SimpleAggregateFunction(max, UInt64)
)
ENGINE = AggregatingMergeTree
PARTITION BY toDate(Timestamp)
ORDER BY (Timestamp, StatusCode, SpanKind, ServiceName);
CREATE MATERIALIZED VIEW default.metrics_rollup_1m_mv TO default.metrics_rollup_1m
(
`Timestamp` DateTime,
`ServiceName` LowCardinality(String),
`SpanKind` LowCardinality(String),
`version` LowCardinality(String),
`StatusCode` LowCardinality(String),
`count` UInt64,
`sum__Duration` Int64,
`avg__Duration` AggregateFunction(avg, UInt64),
`quantile__Duration` AggregateFunction(quantileTDigest(0.5), UInt64),
`min__Duration` SimpleAggregateFunction(min, UInt64),
`max__Duration` SimpleAggregateFunction(max, UInt64)
)
AS SELECT
toStartOfMinute(Timestamp) AS Timestamp,
ServiceName,
SpanKind,
StatusCode,
count() AS count,
sum(Duration) AS sum__Duration,
avgState(Duration) AS avg__Duration,
quantileTDigestState(0.5)(Duration) AS quantile__Duration,
minSimpleState(Duration) AS min__Duration,
maxSimpleState(Duration) AS max__Duration
FROM default.otel_traces
GROUP BY
Timestamp,
ServiceName,
SpanKind,
StatusCode;
```
```sql
CREATE TABLE default.span_kind_rollup_1m
(
`Timestamp` DateTime,
`ServiceName` LowCardinality(String),
`SpanKind` LowCardinality(String),
`histogram__Duration` AggregateFunction(histogram(20), UInt64)
)
ENGINE = AggregatingMergeTree
PARTITION BY toDate(Timestamp)
ORDER BY (Timestamp, ServiceName, SpanKind);
CREATE MATERIALIZED VIEW default.span_kind_rollup_1m_mv TO default.span_kind_rollup_1m
(
`Timestamp` DateTime,
`ServiceName` LowCardinality(String),
`SpanKind` LowCardinality(String),
`histogram__Duration` AggregateFunction(histogram(20), UInt64)
)
AS SELECT
toStartOfMinute(Timestamp) AS Timestamp,
ServiceName,
SpanKind,
histogramState(20)(Duration) AS histogram__Duration
FROM default.otel_traces
GROUP BY
Timestamp,
ServiceName,
SpanKind;
```
</details>
Then you'll need to configure the materialized views in your source settings:
<details>
<summary>Source Configuration (should auto-infer when MVs are selected)</summary>
<img width="949" height="1011" alt="Screenshot 2025-12-19 at 10 26 54 AM" src="https://github.com/user-attachments/assets/fc46a1b9-de8b-4b95-a8ef-ba5fee905685" />
</details>
2025-12-19 16:17:23 +00:00
|
|
|
mvConfig: MaterializedViewConfiguration,
|
2026-01-21 16:07:30 +00:00
|
|
|
source: Omit<TSource, 'connection'>, // for overlap with ISource type
|
feat: Add materialized view support (Beta) (#1507)
Closes HDX-3082
# Summary
This PR back-ports support for materialized views from the EE repo. Note that this feature is in **Beta**, and is subject to significant changes.
This feature is intended to support:
1. Configuring AggregatingMergeTree (or SummingMergeTree) Materialized Views which are associated with a Source
2. Automatically selecting and querying an associated materialized view when a query supports it, in Chart Explorer, Custom Dashboards, the Services Dashboard, and the Search Page Histogram.
3. A UX for understanding what materialized views are available for a source, and whether (and why) it is or is not being used for a particular visualization.
## Note to Reviewer(s)
This is a large PR, but the code has largely already been reviewed.
- For net-new files, types, components, and utility functions, the code does not differ from the EE repo
- Changes to the various services dashboard pages do not differ from the EE repo
- Changes to `useOffsetPaginatedQuery`, `useChartConfig`, and `DBEditTimeChart` differ slightly due to unrelated (to MVs) drift between this repo and the EE repo, and due to the lack of feature toggles in this repo. **This is where slightly closer review would be most valuable.**
## Demo
<details>
<summary>Demo: MV Configuration</summary>
https://github.com/user-attachments/assets/fedf3bcf-892c-4b8d-a788-7e231e23bcc3
</details>
<details>
<summary>Demo: Chart Explorer</summary>
https://github.com/user-attachments/assets/fc8d1efa-7edc-42fc-98f0-75431cc056b8
</details>
<details>
<summary>Demo: Dashboards</summary>
https://github.com/user-attachments/assets/f3cb247e-711f-4d90-95b8-cf977e94f065
</details>
## Known Limitations
This feature is in Beta due to the following known limitations, which will be addressed in subsequent PRs:
1. Visualization start and end time, when not aligned with the granularity of MVs, will result in statistics based on the MV "time buckets" which fall inside the date range. This may not align exactly with the source table data which is in the selected date range.
2. Alerts do not make use of MVs, even if the associated visualization does. Due to (1), this means that alert values may not exactly match the values shown in the associated visualization.
## Differences in OSS vs EE Support
- In OSS, there is a beta label on the MV configurations section
- In EE there are feature toggles to enable MV support, in OSS the feature is enabled for all teams, but will only run for sources with MVs configured.
## Testing
To test, a couple of MVs can be created on the default `otel_traces` table, directly in ClickHouse:
<details>
<summary>Example MVs DDL</summary>
```sql
CREATE TABLE default.metrics_rollup_1m
(
`Timestamp` DateTime,
`ServiceName` LowCardinality(String),
`SpanKind` LowCardinality(String),
`StatusCode` LowCardinality(String),
`count` SimpleAggregateFunction(sum, UInt64),
`sum__Duration` SimpleAggregateFunction(sum, UInt64),
`avg__Duration` AggregateFunction(avg, UInt64),
`quantile__Duration` AggregateFunction(quantileTDigest(0.5), UInt64),
`min__Duration` SimpleAggregateFunction(min, UInt64),
`max__Duration` SimpleAggregateFunction(max, UInt64)
)
ENGINE = AggregatingMergeTree
PARTITION BY toDate(Timestamp)
ORDER BY (Timestamp, StatusCode, SpanKind, ServiceName);
CREATE MATERIALIZED VIEW default.metrics_rollup_1m_mv TO default.metrics_rollup_1m
(
`Timestamp` DateTime,
`ServiceName` LowCardinality(String),
`SpanKind` LowCardinality(String),
`version` LowCardinality(String),
`StatusCode` LowCardinality(String),
`count` UInt64,
`sum__Duration` Int64,
`avg__Duration` AggregateFunction(avg, UInt64),
`quantile__Duration` AggregateFunction(quantileTDigest(0.5), UInt64),
`min__Duration` SimpleAggregateFunction(min, UInt64),
`max__Duration` SimpleAggregateFunction(max, UInt64)
)
AS SELECT
toStartOfMinute(Timestamp) AS Timestamp,
ServiceName,
SpanKind,
StatusCode,
count() AS count,
sum(Duration) AS sum__Duration,
avgState(Duration) AS avg__Duration,
quantileTDigestState(0.5)(Duration) AS quantile__Duration,
minSimpleState(Duration) AS min__Duration,
maxSimpleState(Duration) AS max__Duration
FROM default.otel_traces
GROUP BY
Timestamp,
ServiceName,
SpanKind,
StatusCode;
```
```sql
CREATE TABLE default.span_kind_rollup_1m
(
`Timestamp` DateTime,
`ServiceName` LowCardinality(String),
`SpanKind` LowCardinality(String),
`histogram__Duration` AggregateFunction(histogram(20), UInt64)
)
ENGINE = AggregatingMergeTree
PARTITION BY toDate(Timestamp)
ORDER BY (Timestamp, ServiceName, SpanKind);
CREATE MATERIALIZED VIEW default.span_kind_rollup_1m_mv TO default.span_kind_rollup_1m
(
`Timestamp` DateTime,
`ServiceName` LowCardinality(String),
`SpanKind` LowCardinality(String),
`histogram__Duration` AggregateFunction(histogram(20), UInt64)
)
AS SELECT
toStartOfMinute(Timestamp) AS Timestamp,
ServiceName,
SpanKind,
histogramState(20)(Duration) AS histogram__Duration
FROM default.otel_traces
GROUP BY
Timestamp,
ServiceName,
SpanKind;
```
</details>
Then you'll need to configure the materialized views in your source settings:
<details>
<summary>Source Configuration (should auto-infer when MVs are selected)</summary>
<img width="949" height="1011" alt="Screenshot 2025-12-19 at 10 26 54 AM" src="https://github.com/user-attachments/assets/fc46a1b9-de8b-4b95-a8ef-ba5fee905685" />
</details>
2025-12-19 16:17:23 +00:00
|
|
|
) {
|
|
|
|
|
const errors: string[] = [];
|
|
|
|
|
// Attempt to optimize any CTEs that exist in the config
|
|
|
|
|
let optimizedConfig: C | undefined = undefined;
|
|
|
|
|
if (config.with) {
|
|
|
|
|
const cteOptimizationResults = await Promise.all(
|
|
|
|
|
config.with.map(async cte => {
|
|
|
|
|
if (
|
|
|
|
|
cte.chartConfig &&
|
2026-01-21 16:07:30 +00:00
|
|
|
cte.chartConfig.from.databaseName === source.from.databaseName &&
|
|
|
|
|
cte.chartConfig.from.tableName === source.from.tableName
|
feat: Add materialized view support (Beta) (#1507)
Closes HDX-3082
# Summary
This PR back-ports support for materialized views from the EE repo. Note that this feature is in **Beta**, and is subject to significant changes.
This feature is intended to support:
1. Configuring AggregatingMergeTree (or SummingMergeTree) Materialized Views which are associated with a Source
2. Automatically selecting and querying an associated materialized view when a query supports it, in Chart Explorer, Custom Dashboards, the Services Dashboard, and the Search Page Histogram.
3. A UX for understanding what materialized views are available for a source, and whether (and why) it is or is not being used for a particular visualization.
## Note to Reviewer(s)
This is a large PR, but the code has largely already been reviewed.
- For net-new files, types, components, and utility functions, the code does not differ from the EE repo
- Changes to the various services dashboard pages do not differ from the EE repo
- Changes to `useOffsetPaginatedQuery`, `useChartConfig`, and `DBEditTimeChart` differ slightly due to unrelated (to MVs) drift between this repo and the EE repo, and due to the lack of feature toggles in this repo. **This is where slightly closer review would be most valuable.**
## Demo
<details>
<summary>Demo: MV Configuration</summary>
https://github.com/user-attachments/assets/fedf3bcf-892c-4b8d-a788-7e231e23bcc3
</details>
<details>
<summary>Demo: Chart Explorer</summary>
https://github.com/user-attachments/assets/fc8d1efa-7edc-42fc-98f0-75431cc056b8
</details>
<details>
<summary>Demo: Dashboards</summary>
https://github.com/user-attachments/assets/f3cb247e-711f-4d90-95b8-cf977e94f065
</details>
## Known Limitations
This feature is in Beta due to the following known limitations, which will be addressed in subsequent PRs:
1. Visualization start and end time, when not aligned with the granularity of MVs, will result in statistics based on the MV "time buckets" which fall inside the date range. This may not align exactly with the source table data which is in the selected date range.
2. Alerts do not make use of MVs, even if the associated visualization does. Due to (1), this means that alert values may not exactly match the values shown in the associated visualization.
## Differences in OSS vs EE Support
- In OSS, there is a beta label on the MV configurations section
- In EE there are feature toggles to enable MV support, in OSS the feature is enabled for all teams, but will only run for sources with MVs configured.
## Testing
To test, a couple of MVs can be created on the default `otel_traces` table, directly in ClickHouse:
<details>
<summary>Example MVs DDL</summary>
```sql
CREATE TABLE default.metrics_rollup_1m
(
`Timestamp` DateTime,
`ServiceName` LowCardinality(String),
`SpanKind` LowCardinality(String),
`StatusCode` LowCardinality(String),
`count` SimpleAggregateFunction(sum, UInt64),
`sum__Duration` SimpleAggregateFunction(sum, UInt64),
`avg__Duration` AggregateFunction(avg, UInt64),
`quantile__Duration` AggregateFunction(quantileTDigest(0.5), UInt64),
`min__Duration` SimpleAggregateFunction(min, UInt64),
`max__Duration` SimpleAggregateFunction(max, UInt64)
)
ENGINE = AggregatingMergeTree
PARTITION BY toDate(Timestamp)
ORDER BY (Timestamp, StatusCode, SpanKind, ServiceName);
CREATE MATERIALIZED VIEW default.metrics_rollup_1m_mv TO default.metrics_rollup_1m
(
`Timestamp` DateTime,
`ServiceName` LowCardinality(String),
`SpanKind` LowCardinality(String),
`version` LowCardinality(String),
`StatusCode` LowCardinality(String),
`count` UInt64,
`sum__Duration` Int64,
`avg__Duration` AggregateFunction(avg, UInt64),
`quantile__Duration` AggregateFunction(quantileTDigest(0.5), UInt64),
`min__Duration` SimpleAggregateFunction(min, UInt64),
`max__Duration` SimpleAggregateFunction(max, UInt64)
)
AS SELECT
toStartOfMinute(Timestamp) AS Timestamp,
ServiceName,
SpanKind,
StatusCode,
count() AS count,
sum(Duration) AS sum__Duration,
avgState(Duration) AS avg__Duration,
quantileTDigestState(0.5)(Duration) AS quantile__Duration,
minSimpleState(Duration) AS min__Duration,
maxSimpleState(Duration) AS max__Duration
FROM default.otel_traces
GROUP BY
Timestamp,
ServiceName,
SpanKind,
StatusCode;
```
```sql
CREATE TABLE default.span_kind_rollup_1m
(
`Timestamp` DateTime,
`ServiceName` LowCardinality(String),
`SpanKind` LowCardinality(String),
`histogram__Duration` AggregateFunction(histogram(20), UInt64)
)
ENGINE = AggregatingMergeTree
PARTITION BY toDate(Timestamp)
ORDER BY (Timestamp, ServiceName, SpanKind);
CREATE MATERIALIZED VIEW default.span_kind_rollup_1m_mv TO default.span_kind_rollup_1m
(
`Timestamp` DateTime,
`ServiceName` LowCardinality(String),
`SpanKind` LowCardinality(String),
`histogram__Duration` AggregateFunction(histogram(20), UInt64)
)
AS SELECT
toStartOfMinute(Timestamp) AS Timestamp,
ServiceName,
SpanKind,
histogramState(20)(Duration) AS histogram__Duration
FROM default.otel_traces
GROUP BY
Timestamp,
ServiceName,
SpanKind;
```
</details>
Then you'll need to configure the materialized views in your source settings:
<details>
<summary>Source Configuration (should auto-infer when MVs are selected)</summary>
<img width="949" height="1011" alt="Screenshot 2025-12-19 at 10 26 54 AM" src="https://github.com/user-attachments/assets/fc46a1b9-de8b-4b95-a8ef-ba5fee905685" />
</details>
2025-12-19 16:17:23 +00:00
|
|
|
) {
|
|
|
|
|
return tryConvertConfigToMaterializedViewSelect(
|
|
|
|
|
cte.chartConfig,
|
|
|
|
|
mvConfig,
|
|
|
|
|
metadata,
|
|
|
|
|
);
|
|
|
|
|
} else {
|
|
|
|
|
return {
|
|
|
|
|
optimizedConfig: undefined,
|
|
|
|
|
errors: [],
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
}),
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
const hasOptimizedCTEs = cteOptimizationResults.some(
|
|
|
|
|
r => !!r.optimizedConfig,
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
if (hasOptimizedCTEs) {
|
|
|
|
|
optimizedConfig = {
|
|
|
|
|
...structuredClone(config),
|
|
|
|
|
with: config.with.map((originalCte, index) => {
|
|
|
|
|
return {
|
|
|
|
|
...originalCte,
|
|
|
|
|
chartConfig:
|
|
|
|
|
cteOptimizationResults[index].optimizedConfig ??
|
|
|
|
|
originalCte.chartConfig,
|
|
|
|
|
};
|
|
|
|
|
}),
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
errors.push(...cteOptimizationResults.flatMap(r => r.errors ?? []));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Attempt to optimize the main (outer) select
|
|
|
|
|
if (
|
2026-01-21 16:07:30 +00:00
|
|
|
config.from.databaseName === source.from.databaseName &&
|
|
|
|
|
config.from.tableName === source.from.tableName
|
feat: Add materialized view support (Beta) (#1507)
Closes HDX-3082
# Summary
This PR back-ports support for materialized views from the EE repo. Note that this feature is in **Beta**, and is subject to significant changes.
This feature is intended to support:
1. Configuring AggregatingMergeTree (or SummingMergeTree) Materialized Views which are associated with a Source
2. Automatically selecting and querying an associated materialized view when a query supports it, in Chart Explorer, Custom Dashboards, the Services Dashboard, and the Search Page Histogram.
3. A UX for understanding what materialized views are available for a source, and whether (and why) it is or is not being used for a particular visualization.
## Note to Reviewer(s)
This is a large PR, but the code has largely already been reviewed.
- For net-new files, types, components, and utility functions, the code does not differ from the EE repo
- Changes to the various services dashboard pages do not differ from the EE repo
- Changes to `useOffsetPaginatedQuery`, `useChartConfig`, and `DBEditTimeChart` differ slightly due to unrelated (to MVs) drift between this repo and the EE repo, and due to the lack of feature toggles in this repo. **This is where slightly closer review would be most valuable.**
## Demo
<details>
<summary>Demo: MV Configuration</summary>
https://github.com/user-attachments/assets/fedf3bcf-892c-4b8d-a788-7e231e23bcc3
</details>
<details>
<summary>Demo: Chart Explorer</summary>
https://github.com/user-attachments/assets/fc8d1efa-7edc-42fc-98f0-75431cc056b8
</details>
<details>
<summary>Demo: Dashboards</summary>
https://github.com/user-attachments/assets/f3cb247e-711f-4d90-95b8-cf977e94f065
</details>
## Known Limitations
This feature is in Beta due to the following known limitations, which will be addressed in subsequent PRs:
1. Visualization start and end time, when not aligned with the granularity of MVs, will result in statistics based on the MV "time buckets" which fall inside the date range. This may not align exactly with the source table data which is in the selected date range.
2. Alerts do not make use of MVs, even if the associated visualization does. Due to (1), this means that alert values may not exactly match the values shown in the associated visualization.
## Differences in OSS vs EE Support
- In OSS, there is a beta label on the MV configurations section
- In EE there are feature toggles to enable MV support, in OSS the feature is enabled for all teams, but will only run for sources with MVs configured.
## Testing
To test, a couple of MVs can be created on the default `otel_traces` table, directly in ClickHouse:
<details>
<summary>Example MVs DDL</summary>
```sql
CREATE TABLE default.metrics_rollup_1m
(
`Timestamp` DateTime,
`ServiceName` LowCardinality(String),
`SpanKind` LowCardinality(String),
`StatusCode` LowCardinality(String),
`count` SimpleAggregateFunction(sum, UInt64),
`sum__Duration` SimpleAggregateFunction(sum, UInt64),
`avg__Duration` AggregateFunction(avg, UInt64),
`quantile__Duration` AggregateFunction(quantileTDigest(0.5), UInt64),
`min__Duration` SimpleAggregateFunction(min, UInt64),
`max__Duration` SimpleAggregateFunction(max, UInt64)
)
ENGINE = AggregatingMergeTree
PARTITION BY toDate(Timestamp)
ORDER BY (Timestamp, StatusCode, SpanKind, ServiceName);
CREATE MATERIALIZED VIEW default.metrics_rollup_1m_mv TO default.metrics_rollup_1m
(
`Timestamp` DateTime,
`ServiceName` LowCardinality(String),
`SpanKind` LowCardinality(String),
`version` LowCardinality(String),
`StatusCode` LowCardinality(String),
`count` UInt64,
`sum__Duration` Int64,
`avg__Duration` AggregateFunction(avg, UInt64),
`quantile__Duration` AggregateFunction(quantileTDigest(0.5), UInt64),
`min__Duration` SimpleAggregateFunction(min, UInt64),
`max__Duration` SimpleAggregateFunction(max, UInt64)
)
AS SELECT
toStartOfMinute(Timestamp) AS Timestamp,
ServiceName,
SpanKind,
StatusCode,
count() AS count,
sum(Duration) AS sum__Duration,
avgState(Duration) AS avg__Duration,
quantileTDigestState(0.5)(Duration) AS quantile__Duration,
minSimpleState(Duration) AS min__Duration,
maxSimpleState(Duration) AS max__Duration
FROM default.otel_traces
GROUP BY
Timestamp,
ServiceName,
SpanKind,
StatusCode;
```
```sql
CREATE TABLE default.span_kind_rollup_1m
(
`Timestamp` DateTime,
`ServiceName` LowCardinality(String),
`SpanKind` LowCardinality(String),
`histogram__Duration` AggregateFunction(histogram(20), UInt64)
)
ENGINE = AggregatingMergeTree
PARTITION BY toDate(Timestamp)
ORDER BY (Timestamp, ServiceName, SpanKind);
CREATE MATERIALIZED VIEW default.span_kind_rollup_1m_mv TO default.span_kind_rollup_1m
(
`Timestamp` DateTime,
`ServiceName` LowCardinality(String),
`SpanKind` LowCardinality(String),
`histogram__Duration` AggregateFunction(histogram(20), UInt64)
)
AS SELECT
toStartOfMinute(Timestamp) AS Timestamp,
ServiceName,
SpanKind,
histogramState(20)(Duration) AS histogram__Duration
FROM default.otel_traces
GROUP BY
Timestamp,
ServiceName,
SpanKind;
```
</details>
Then you'll need to configure the materialized views in your source settings:
<details>
<summary>Source Configuration (should auto-infer when MVs are selected)</summary>
<img width="949" height="1011" alt="Screenshot 2025-12-19 at 10 26 54 AM" src="https://github.com/user-attachments/assets/fc46a1b9-de8b-4b95-a8ef-ba5fee905685" />
</details>
2025-12-19 16:17:23 +00:00
|
|
|
) {
|
|
|
|
|
const convertedOuterSelect = await tryConvertConfigToMaterializedViewSelect(
|
|
|
|
|
optimizedConfig ?? config,
|
|
|
|
|
mvConfig,
|
|
|
|
|
metadata,
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
if (convertedOuterSelect.optimizedConfig) {
|
|
|
|
|
optimizedConfig = convertedOuterSelect.optimizedConfig;
|
|
|
|
|
}
|
|
|
|
|
errors.push(...(convertedOuterSelect.errors ?? []));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// If the config has been optimized, validate it by checking whether an EXPLAIN query succeeds
|
|
|
|
|
if (optimizedConfig) {
|
|
|
|
|
const {
|
|
|
|
|
isValid,
|
|
|
|
|
rowEstimate = Number.POSITIVE_INFINITY,
|
|
|
|
|
error,
|
|
|
|
|
} = await clickhouseClient.testChartConfigValidity({
|
|
|
|
|
config: optimizedConfig,
|
|
|
|
|
metadata,
|
|
|
|
|
opts: {
|
|
|
|
|
abort_signal: signal,
|
|
|
|
|
},
|
2026-01-21 16:07:30 +00:00
|
|
|
querySettings: source.querySettings,
|
feat: Add materialized view support (Beta) (#1507)
Closes HDX-3082
# Summary
This PR back-ports support for materialized views from the EE repo. Note that this feature is in **Beta**, and is subject to significant changes.
This feature is intended to support:
1. Configuring AggregatingMergeTree (or SummingMergeTree) Materialized Views which are associated with a Source
2. Automatically selecting and querying an associated materialized view when a query supports it, in Chart Explorer, Custom Dashboards, the Services Dashboard, and the Search Page Histogram.
3. A UX for understanding what materialized views are available for a source, and whether (and why) it is or is not being used for a particular visualization.
## Note to Reviewer(s)
This is a large PR, but the code has largely already been reviewed.
- For net-new files, types, components, and utility functions, the code does not differ from the EE repo
- Changes to the various services dashboard pages do not differ from the EE repo
- Changes to `useOffsetPaginatedQuery`, `useChartConfig`, and `DBEditTimeChart` differ slightly due to unrelated (to MVs) drift between this repo and the EE repo, and due to the lack of feature toggles in this repo. **This is where slightly closer review would be most valuable.**
## Demo
<details>
<summary>Demo: MV Configuration</summary>
https://github.com/user-attachments/assets/fedf3bcf-892c-4b8d-a788-7e231e23bcc3
</details>
<details>
<summary>Demo: Chart Explorer</summary>
https://github.com/user-attachments/assets/fc8d1efa-7edc-42fc-98f0-75431cc056b8
</details>
<details>
<summary>Demo: Dashboards</summary>
https://github.com/user-attachments/assets/f3cb247e-711f-4d90-95b8-cf977e94f065
</details>
## Known Limitations
This feature is in Beta due to the following known limitations, which will be addressed in subsequent PRs:
1. Visualization start and end time, when not aligned with the granularity of MVs, will result in statistics based on the MV "time buckets" which fall inside the date range. This may not align exactly with the source table data which is in the selected date range.
2. Alerts do not make use of MVs, even if the associated visualization does. Due to (1), this means that alert values may not exactly match the values shown in the associated visualization.
## Differences in OSS vs EE Support
- In OSS, there is a beta label on the MV configurations section
- In EE there are feature toggles to enable MV support, in OSS the feature is enabled for all teams, but will only run for sources with MVs configured.
## Testing
To test, a couple of MVs can be created on the default `otel_traces` table, directly in ClickHouse:
<details>
<summary>Example MVs DDL</summary>
```sql
CREATE TABLE default.metrics_rollup_1m
(
`Timestamp` DateTime,
`ServiceName` LowCardinality(String),
`SpanKind` LowCardinality(String),
`StatusCode` LowCardinality(String),
`count` SimpleAggregateFunction(sum, UInt64),
`sum__Duration` SimpleAggregateFunction(sum, UInt64),
`avg__Duration` AggregateFunction(avg, UInt64),
`quantile__Duration` AggregateFunction(quantileTDigest(0.5), UInt64),
`min__Duration` SimpleAggregateFunction(min, UInt64),
`max__Duration` SimpleAggregateFunction(max, UInt64)
)
ENGINE = AggregatingMergeTree
PARTITION BY toDate(Timestamp)
ORDER BY (Timestamp, StatusCode, SpanKind, ServiceName);
CREATE MATERIALIZED VIEW default.metrics_rollup_1m_mv TO default.metrics_rollup_1m
(
`Timestamp` DateTime,
`ServiceName` LowCardinality(String),
`SpanKind` LowCardinality(String),
`version` LowCardinality(String),
`StatusCode` LowCardinality(String),
`count` UInt64,
`sum__Duration` Int64,
`avg__Duration` AggregateFunction(avg, UInt64),
`quantile__Duration` AggregateFunction(quantileTDigest(0.5), UInt64),
`min__Duration` SimpleAggregateFunction(min, UInt64),
`max__Duration` SimpleAggregateFunction(max, UInt64)
)
AS SELECT
toStartOfMinute(Timestamp) AS Timestamp,
ServiceName,
SpanKind,
StatusCode,
count() AS count,
sum(Duration) AS sum__Duration,
avgState(Duration) AS avg__Duration,
quantileTDigestState(0.5)(Duration) AS quantile__Duration,
minSimpleState(Duration) AS min__Duration,
maxSimpleState(Duration) AS max__Duration
FROM default.otel_traces
GROUP BY
Timestamp,
ServiceName,
SpanKind,
StatusCode;
```
```sql
CREATE TABLE default.span_kind_rollup_1m
(
`Timestamp` DateTime,
`ServiceName` LowCardinality(String),
`SpanKind` LowCardinality(String),
`histogram__Duration` AggregateFunction(histogram(20), UInt64)
)
ENGINE = AggregatingMergeTree
PARTITION BY toDate(Timestamp)
ORDER BY (Timestamp, ServiceName, SpanKind);
CREATE MATERIALIZED VIEW default.span_kind_rollup_1m_mv TO default.span_kind_rollup_1m
(
`Timestamp` DateTime,
`ServiceName` LowCardinality(String),
`SpanKind` LowCardinality(String),
`histogram__Duration` AggregateFunction(histogram(20), UInt64)
)
AS SELECT
toStartOfMinute(Timestamp) AS Timestamp,
ServiceName,
SpanKind,
histogramState(20)(Duration) AS histogram__Duration
FROM default.otel_traces
GROUP BY
Timestamp,
ServiceName,
SpanKind;
```
</details>
Then you'll need to configure the materialized views in your source settings:
<details>
<summary>Source Configuration (should auto-infer when MVs are selected)</summary>
<img width="949" height="1011" alt="Screenshot 2025-12-19 at 10 26 54 AM" src="https://github.com/user-attachments/assets/fc46a1b9-de8b-4b95-a8ef-ba5fee905685" />
</details>
2025-12-19 16:17:23 +00:00
|
|
|
});
|
|
|
|
|
|
|
|
|
|
if (error) {
|
|
|
|
|
errors.push(error);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (isValid) {
|
|
|
|
|
return {
|
|
|
|
|
optimizedConfig,
|
|
|
|
|
rowEstimate,
|
|
|
|
|
errors: [],
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return { errors };
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/** Attempts to optimize a config with each of the provided MV Configs */
|
|
|
|
|
export async function tryOptimizeConfigWithMaterializedViewWithExplanations<
|
|
|
|
|
C extends ChartConfigWithOptDateRange,
|
|
|
|
|
>(
|
|
|
|
|
config: C,
|
|
|
|
|
metadata: Metadata,
|
|
|
|
|
clickhouseClient: BaseClickhouseClient,
|
2026-01-07 15:27:54 +00:00
|
|
|
signal: AbortSignal | undefined,
|
2026-01-21 16:07:30 +00:00
|
|
|
source: Omit<TSource, 'connection'>, // for overlap with ISource type
|
feat: Add materialized view support (Beta) (#1507)
Closes HDX-3082
# Summary
This PR back-ports support for materialized views from the EE repo. Note that this feature is in **Beta**, and is subject to significant changes.
This feature is intended to support:
1. Configuring AggregatingMergeTree (or SummingMergeTree) Materialized Views which are associated with a Source
2. Automatically selecting and querying an associated materialized view when a query supports it, in Chart Explorer, Custom Dashboards, the Services Dashboard, and the Search Page Histogram.
3. A UX for understanding what materialized views are available for a source, and whether (and why) it is or is not being used for a particular visualization.
## Note to Reviewer(s)
This is a large PR, but the code has largely already been reviewed.
- For net-new files, types, components, and utility functions, the code does not differ from the EE repo
- Changes to the various services dashboard pages do not differ from the EE repo
- Changes to `useOffsetPaginatedQuery`, `useChartConfig`, and `DBEditTimeChart` differ slightly due to unrelated (to MVs) drift between this repo and the EE repo, and due to the lack of feature toggles in this repo. **This is where slightly closer review would be most valuable.**
## Demo
<details>
<summary>Demo: MV Configuration</summary>
https://github.com/user-attachments/assets/fedf3bcf-892c-4b8d-a788-7e231e23bcc3
</details>
<details>
<summary>Demo: Chart Explorer</summary>
https://github.com/user-attachments/assets/fc8d1efa-7edc-42fc-98f0-75431cc056b8
</details>
<details>
<summary>Demo: Dashboards</summary>
https://github.com/user-attachments/assets/f3cb247e-711f-4d90-95b8-cf977e94f065
</details>
## Known Limitations
This feature is in Beta due to the following known limitations, which will be addressed in subsequent PRs:
1. Visualization start and end time, when not aligned with the granularity of MVs, will result in statistics based on the MV "time buckets" which fall inside the date range. This may not align exactly with the source table data which is in the selected date range.
2. Alerts do not make use of MVs, even if the associated visualization does. Due to (1), this means that alert values may not exactly match the values shown in the associated visualization.
## Differences in OSS vs EE Support
- In OSS, there is a beta label on the MV configurations section
- In EE there are feature toggles to enable MV support, in OSS the feature is enabled for all teams, but will only run for sources with MVs configured.
## Testing
To test, a couple of MVs can be created on the default `otel_traces` table, directly in ClickHouse:
<details>
<summary>Example MVs DDL</summary>
```sql
CREATE TABLE default.metrics_rollup_1m
(
`Timestamp` DateTime,
`ServiceName` LowCardinality(String),
`SpanKind` LowCardinality(String),
`StatusCode` LowCardinality(String),
`count` SimpleAggregateFunction(sum, UInt64),
`sum__Duration` SimpleAggregateFunction(sum, UInt64),
`avg__Duration` AggregateFunction(avg, UInt64),
`quantile__Duration` AggregateFunction(quantileTDigest(0.5), UInt64),
`min__Duration` SimpleAggregateFunction(min, UInt64),
`max__Duration` SimpleAggregateFunction(max, UInt64)
)
ENGINE = AggregatingMergeTree
PARTITION BY toDate(Timestamp)
ORDER BY (Timestamp, StatusCode, SpanKind, ServiceName);
CREATE MATERIALIZED VIEW default.metrics_rollup_1m_mv TO default.metrics_rollup_1m
(
`Timestamp` DateTime,
`ServiceName` LowCardinality(String),
`SpanKind` LowCardinality(String),
`version` LowCardinality(String),
`StatusCode` LowCardinality(String),
`count` UInt64,
`sum__Duration` Int64,
`avg__Duration` AggregateFunction(avg, UInt64),
`quantile__Duration` AggregateFunction(quantileTDigest(0.5), UInt64),
`min__Duration` SimpleAggregateFunction(min, UInt64),
`max__Duration` SimpleAggregateFunction(max, UInt64)
)
AS SELECT
toStartOfMinute(Timestamp) AS Timestamp,
ServiceName,
SpanKind,
StatusCode,
count() AS count,
sum(Duration) AS sum__Duration,
avgState(Duration) AS avg__Duration,
quantileTDigestState(0.5)(Duration) AS quantile__Duration,
minSimpleState(Duration) AS min__Duration,
maxSimpleState(Duration) AS max__Duration
FROM default.otel_traces
GROUP BY
Timestamp,
ServiceName,
SpanKind,
StatusCode;
```
```sql
CREATE TABLE default.span_kind_rollup_1m
(
`Timestamp` DateTime,
`ServiceName` LowCardinality(String),
`SpanKind` LowCardinality(String),
`histogram__Duration` AggregateFunction(histogram(20), UInt64)
)
ENGINE = AggregatingMergeTree
PARTITION BY toDate(Timestamp)
ORDER BY (Timestamp, ServiceName, SpanKind);
CREATE MATERIALIZED VIEW default.span_kind_rollup_1m_mv TO default.span_kind_rollup_1m
(
`Timestamp` DateTime,
`ServiceName` LowCardinality(String),
`SpanKind` LowCardinality(String),
`histogram__Duration` AggregateFunction(histogram(20), UInt64)
)
AS SELECT
toStartOfMinute(Timestamp) AS Timestamp,
ServiceName,
SpanKind,
histogramState(20)(Duration) AS histogram__Duration
FROM default.otel_traces
GROUP BY
Timestamp,
ServiceName,
SpanKind;
```
</details>
Then you'll need to configure the materialized views in your source settings:
<details>
<summary>Source Configuration (should auto-infer when MVs are selected)</summary>
<img width="949" height="1011" alt="Screenshot 2025-12-19 at 10 26 54 AM" src="https://github.com/user-attachments/assets/fc46a1b9-de8b-4b95-a8ef-ba5fee905685" />
</details>
2025-12-19 16:17:23 +00:00
|
|
|
): Promise<{
|
|
|
|
|
optimizedConfig?: C;
|
|
|
|
|
explanations: MVOptimizationExplanation[];
|
|
|
|
|
}> {
|
|
|
|
|
const mvConfigs = source.materializedViews ?? [];
|
|
|
|
|
const optimizationResults = await Promise.all(
|
|
|
|
|
mvConfigs.map(mvConfig =>
|
|
|
|
|
tryOptimizeConfig(
|
|
|
|
|
config,
|
|
|
|
|
metadata,
|
|
|
|
|
clickhouseClient,
|
|
|
|
|
signal,
|
|
|
|
|
mvConfig,
|
2026-01-21 16:07:30 +00:00
|
|
|
source,
|
feat: Add materialized view support (Beta) (#1507)
Closes HDX-3082
# Summary
This PR back-ports support for materialized views from the EE repo. Note that this feature is in **Beta**, and is subject to significant changes.
This feature is intended to support:
1. Configuring AggregatingMergeTree (or SummingMergeTree) Materialized Views which are associated with a Source
2. Automatically selecting and querying an associated materialized view when a query supports it, in Chart Explorer, Custom Dashboards, the Services Dashboard, and the Search Page Histogram.
3. A UX for understanding what materialized views are available for a source, and whether (and why) it is or is not being used for a particular visualization.
## Note to Reviewer(s)
This is a large PR, but the code has largely already been reviewed.
- For net-new files, types, components, and utility functions, the code does not differ from the EE repo
- Changes to the various services dashboard pages do not differ from the EE repo
- Changes to `useOffsetPaginatedQuery`, `useChartConfig`, and `DBEditTimeChart` differ slightly due to unrelated (to MVs) drift between this repo and the EE repo, and due to the lack of feature toggles in this repo. **This is where slightly closer review would be most valuable.**
## Demo
<details>
<summary>Demo: MV Configuration</summary>
https://github.com/user-attachments/assets/fedf3bcf-892c-4b8d-a788-7e231e23bcc3
</details>
<details>
<summary>Demo: Chart Explorer</summary>
https://github.com/user-attachments/assets/fc8d1efa-7edc-42fc-98f0-75431cc056b8
</details>
<details>
<summary>Demo: Dashboards</summary>
https://github.com/user-attachments/assets/f3cb247e-711f-4d90-95b8-cf977e94f065
</details>
## Known Limitations
This feature is in Beta due to the following known limitations, which will be addressed in subsequent PRs:
1. Visualization start and end time, when not aligned with the granularity of MVs, will result in statistics based on the MV "time buckets" which fall inside the date range. This may not align exactly with the source table data which is in the selected date range.
2. Alerts do not make use of MVs, even if the associated visualization does. Due to (1), this means that alert values may not exactly match the values shown in the associated visualization.
## Differences in OSS vs EE Support
- In OSS, there is a beta label on the MV configurations section
- In EE there are feature toggles to enable MV support, in OSS the feature is enabled for all teams, but will only run for sources with MVs configured.
## Testing
To test, a couple of MVs can be created on the default `otel_traces` table, directly in ClickHouse:
<details>
<summary>Example MVs DDL</summary>
```sql
CREATE TABLE default.metrics_rollup_1m
(
`Timestamp` DateTime,
`ServiceName` LowCardinality(String),
`SpanKind` LowCardinality(String),
`StatusCode` LowCardinality(String),
`count` SimpleAggregateFunction(sum, UInt64),
`sum__Duration` SimpleAggregateFunction(sum, UInt64),
`avg__Duration` AggregateFunction(avg, UInt64),
`quantile__Duration` AggregateFunction(quantileTDigest(0.5), UInt64),
`min__Duration` SimpleAggregateFunction(min, UInt64),
`max__Duration` SimpleAggregateFunction(max, UInt64)
)
ENGINE = AggregatingMergeTree
PARTITION BY toDate(Timestamp)
ORDER BY (Timestamp, StatusCode, SpanKind, ServiceName);
CREATE MATERIALIZED VIEW default.metrics_rollup_1m_mv TO default.metrics_rollup_1m
(
`Timestamp` DateTime,
`ServiceName` LowCardinality(String),
`SpanKind` LowCardinality(String),
`version` LowCardinality(String),
`StatusCode` LowCardinality(String),
`count` UInt64,
`sum__Duration` Int64,
`avg__Duration` AggregateFunction(avg, UInt64),
`quantile__Duration` AggregateFunction(quantileTDigest(0.5), UInt64),
`min__Duration` SimpleAggregateFunction(min, UInt64),
`max__Duration` SimpleAggregateFunction(max, UInt64)
)
AS SELECT
toStartOfMinute(Timestamp) AS Timestamp,
ServiceName,
SpanKind,
StatusCode,
count() AS count,
sum(Duration) AS sum__Duration,
avgState(Duration) AS avg__Duration,
quantileTDigestState(0.5)(Duration) AS quantile__Duration,
minSimpleState(Duration) AS min__Duration,
maxSimpleState(Duration) AS max__Duration
FROM default.otel_traces
GROUP BY
Timestamp,
ServiceName,
SpanKind,
StatusCode;
```
```sql
CREATE TABLE default.span_kind_rollup_1m
(
`Timestamp` DateTime,
`ServiceName` LowCardinality(String),
`SpanKind` LowCardinality(String),
`histogram__Duration` AggregateFunction(histogram(20), UInt64)
)
ENGINE = AggregatingMergeTree
PARTITION BY toDate(Timestamp)
ORDER BY (Timestamp, ServiceName, SpanKind);
CREATE MATERIALIZED VIEW default.span_kind_rollup_1m_mv TO default.span_kind_rollup_1m
(
`Timestamp` DateTime,
`ServiceName` LowCardinality(String),
`SpanKind` LowCardinality(String),
`histogram__Duration` AggregateFunction(histogram(20), UInt64)
)
AS SELECT
toStartOfMinute(Timestamp) AS Timestamp,
ServiceName,
SpanKind,
histogramState(20)(Duration) AS histogram__Duration
FROM default.otel_traces
GROUP BY
Timestamp,
ServiceName,
SpanKind;
```
</details>
Then you'll need to configure the materialized views in your source settings:
<details>
<summary>Source Configuration (should auto-infer when MVs are selected)</summary>
<img width="949" height="1011" alt="Screenshot 2025-12-19 at 10 26 54 AM" src="https://github.com/user-attachments/assets/fc46a1b9-de8b-4b95-a8ef-ba5fee905685" />
</details>
2025-12-19 16:17:23 +00:00
|
|
|
).then(result => ({ ...result, mvConfig })),
|
|
|
|
|
),
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
// Find a config with the lowest row estimate among successfully optimized configs
|
|
|
|
|
let resultOptimizedConfig: C | undefined = undefined;
|
|
|
|
|
let minRowEstimate = Number.POSITIVE_INFINITY;
|
|
|
|
|
for (const result of optimizationResults) {
|
|
|
|
|
if (
|
|
|
|
|
result.optimizedConfig &&
|
|
|
|
|
(result.rowEstimate ?? Number.POSITIVE_INFINITY) < minRowEstimate
|
|
|
|
|
) {
|
|
|
|
|
resultOptimizedConfig = result.optimizedConfig;
|
|
|
|
|
minRowEstimate = result.rowEstimate ?? Number.POSITIVE_INFINITY;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const explanations = optimizationResults.map(
|
|
|
|
|
({ optimizedConfig, errors, rowEstimate, mvConfig }) => ({
|
|
|
|
|
success: !!optimizedConfig && optimizedConfig === resultOptimizedConfig,
|
|
|
|
|
errors,
|
|
|
|
|
rowEstimate,
|
|
|
|
|
mvConfig,
|
|
|
|
|
}),
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
optimizedConfig: resultOptimizedConfig,
|
|
|
|
|
explanations,
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
export async function tryOptimizeConfigWithMaterializedView<
|
|
|
|
|
C extends ChartConfigWithOptDateRange,
|
|
|
|
|
>(
|
|
|
|
|
config: C,
|
|
|
|
|
metadata: Metadata,
|
|
|
|
|
clickhouseClient: BaseClickhouseClient,
|
2026-01-07 15:27:54 +00:00
|
|
|
signal: AbortSignal | undefined,
|
2026-01-21 16:07:30 +00:00
|
|
|
source: Omit<TSource, 'connection'>, // for overlap with ISource type
|
feat: Add materialized view support (Beta) (#1507)
Closes HDX-3082
# Summary
This PR back-ports support for materialized views from the EE repo. Note that this feature is in **Beta**, and is subject to significant changes.
This feature is intended to support:
1. Configuring AggregatingMergeTree (or SummingMergeTree) Materialized Views which are associated with a Source
2. Automatically selecting and querying an associated materialized view when a query supports it, in Chart Explorer, Custom Dashboards, the Services Dashboard, and the Search Page Histogram.
3. A UX for understanding what materialized views are available for a source, and whether (and why) it is or is not being used for a particular visualization.
## Note to Reviewer(s)
This is a large PR, but the code has largely already been reviewed.
- For net-new files, types, components, and utility functions, the code does not differ from the EE repo
- Changes to the various services dashboard pages do not differ from the EE repo
- Changes to `useOffsetPaginatedQuery`, `useChartConfig`, and `DBEditTimeChart` differ slightly due to unrelated (to MVs) drift between this repo and the EE repo, and due to the lack of feature toggles in this repo. **This is where slightly closer review would be most valuable.**
## Demo
<details>
<summary>Demo: MV Configuration</summary>
https://github.com/user-attachments/assets/fedf3bcf-892c-4b8d-a788-7e231e23bcc3
</details>
<details>
<summary>Demo: Chart Explorer</summary>
https://github.com/user-attachments/assets/fc8d1efa-7edc-42fc-98f0-75431cc056b8
</details>
<details>
<summary>Demo: Dashboards</summary>
https://github.com/user-attachments/assets/f3cb247e-711f-4d90-95b8-cf977e94f065
</details>
## Known Limitations
This feature is in Beta due to the following known limitations, which will be addressed in subsequent PRs:
1. Visualization start and end time, when not aligned with the granularity of MVs, will result in statistics based on the MV "time buckets" which fall inside the date range. This may not align exactly with the source table data which is in the selected date range.
2. Alerts do not make use of MVs, even if the associated visualization does. Due to (1), this means that alert values may not exactly match the values shown in the associated visualization.
## Differences in OSS vs EE Support
- In OSS, there is a beta label on the MV configurations section
- In EE there are feature toggles to enable MV support, in OSS the feature is enabled for all teams, but will only run for sources with MVs configured.
## Testing
To test, a couple of MVs can be created on the default `otel_traces` table, directly in ClickHouse:
<details>
<summary>Example MVs DDL</summary>
```sql
CREATE TABLE default.metrics_rollup_1m
(
`Timestamp` DateTime,
`ServiceName` LowCardinality(String),
`SpanKind` LowCardinality(String),
`StatusCode` LowCardinality(String),
`count` SimpleAggregateFunction(sum, UInt64),
`sum__Duration` SimpleAggregateFunction(sum, UInt64),
`avg__Duration` AggregateFunction(avg, UInt64),
`quantile__Duration` AggregateFunction(quantileTDigest(0.5), UInt64),
`min__Duration` SimpleAggregateFunction(min, UInt64),
`max__Duration` SimpleAggregateFunction(max, UInt64)
)
ENGINE = AggregatingMergeTree
PARTITION BY toDate(Timestamp)
ORDER BY (Timestamp, StatusCode, SpanKind, ServiceName);
CREATE MATERIALIZED VIEW default.metrics_rollup_1m_mv TO default.metrics_rollup_1m
(
`Timestamp` DateTime,
`ServiceName` LowCardinality(String),
`SpanKind` LowCardinality(String),
`version` LowCardinality(String),
`StatusCode` LowCardinality(String),
`count` UInt64,
`sum__Duration` Int64,
`avg__Duration` AggregateFunction(avg, UInt64),
`quantile__Duration` AggregateFunction(quantileTDigest(0.5), UInt64),
`min__Duration` SimpleAggregateFunction(min, UInt64),
`max__Duration` SimpleAggregateFunction(max, UInt64)
)
AS SELECT
toStartOfMinute(Timestamp) AS Timestamp,
ServiceName,
SpanKind,
StatusCode,
count() AS count,
sum(Duration) AS sum__Duration,
avgState(Duration) AS avg__Duration,
quantileTDigestState(0.5)(Duration) AS quantile__Duration,
minSimpleState(Duration) AS min__Duration,
maxSimpleState(Duration) AS max__Duration
FROM default.otel_traces
GROUP BY
Timestamp,
ServiceName,
SpanKind,
StatusCode;
```
```sql
CREATE TABLE default.span_kind_rollup_1m
(
`Timestamp` DateTime,
`ServiceName` LowCardinality(String),
`SpanKind` LowCardinality(String),
`histogram__Duration` AggregateFunction(histogram(20), UInt64)
)
ENGINE = AggregatingMergeTree
PARTITION BY toDate(Timestamp)
ORDER BY (Timestamp, ServiceName, SpanKind);
CREATE MATERIALIZED VIEW default.span_kind_rollup_1m_mv TO default.span_kind_rollup_1m
(
`Timestamp` DateTime,
`ServiceName` LowCardinality(String),
`SpanKind` LowCardinality(String),
`histogram__Duration` AggregateFunction(histogram(20), UInt64)
)
AS SELECT
toStartOfMinute(Timestamp) AS Timestamp,
ServiceName,
SpanKind,
histogramState(20)(Duration) AS histogram__Duration
FROM default.otel_traces
GROUP BY
Timestamp,
ServiceName,
SpanKind;
```
</details>
Then you'll need to configure the materialized views in your source settings:
<details>
<summary>Source Configuration (should auto-infer when MVs are selected)</summary>
<img width="949" height="1011" alt="Screenshot 2025-12-19 at 10 26 54 AM" src="https://github.com/user-attachments/assets/fc46a1b9-de8b-4b95-a8ef-ba5fee905685" />
</details>
2025-12-19 16:17:23 +00:00
|
|
|
) {
|
|
|
|
|
const { optimizedConfig } =
|
|
|
|
|
await tryOptimizeConfigWithMaterializedViewWithExplanations(
|
|
|
|
|
config,
|
|
|
|
|
metadata,
|
|
|
|
|
clickhouseClient,
|
|
|
|
|
signal,
|
|
|
|
|
source,
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
return optimizedConfig ?? config;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function formatAggregateFunction(aggFn: string, level: number | undefined) {
|
|
|
|
|
if (aggFn === 'quantile') {
|
|
|
|
|
switch (level) {
|
|
|
|
|
case 0.5:
|
|
|
|
|
return 'median';
|
|
|
|
|
case 0.9:
|
|
|
|
|
return 'p90';
|
|
|
|
|
case 0.95:
|
|
|
|
|
return 'p95';
|
|
|
|
|
case 0.99:
|
|
|
|
|
return 'p99';
|
|
|
|
|
default:
|
|
|
|
|
return `quantile`;
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
return aggFn;
|
|
|
|
|
}
|
|
|
|
|
}
|
2026-01-14 18:05:11 +00:00
|
|
|
|
|
|
|
|
function toMvId(
|
|
|
|
|
mv: Pick<MaterializedViewConfiguration, 'databaseName' | 'tableName'>,
|
|
|
|
|
) {
|
|
|
|
|
return `${mv.databaseName}.${mv.tableName}`;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
export interface GetKeyValueCall<C extends ChartConfigWithOptDateRange> {
|
|
|
|
|
chartConfig: C;
|
|
|
|
|
keys: string[];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
export async function optimizeGetKeyValuesCalls<
|
|
|
|
|
C extends ChartConfigWithOptDateRange,
|
|
|
|
|
>({
|
|
|
|
|
chartConfig,
|
|
|
|
|
keys,
|
|
|
|
|
source,
|
|
|
|
|
clickhouseClient,
|
|
|
|
|
metadata,
|
|
|
|
|
signal,
|
|
|
|
|
}: {
|
|
|
|
|
chartConfig: C;
|
|
|
|
|
keys: string[];
|
|
|
|
|
source: TSource;
|
|
|
|
|
clickhouseClient: BaseClickhouseClient;
|
|
|
|
|
metadata: Metadata;
|
|
|
|
|
signal?: AbortSignal;
|
|
|
|
|
}): Promise<GetKeyValueCall<C>[]> {
|
|
|
|
|
// Get the MVs from the source
|
|
|
|
|
const mvs = source?.materializedViews || [];
|
|
|
|
|
const mvsById = new Map(mvs.map(mv => [toMvId(mv), mv]));
|
|
|
|
|
|
|
|
|
|
// Identify keys which can be queried from a materialized view
|
|
|
|
|
const supportedKeysByMv = new Map<string, string[]>();
|
|
|
|
|
for (const [mvId, mv] of mvsById.entries()) {
|
|
|
|
|
const mvIntervalsInDateRange = chartConfig.dateRange
|
|
|
|
|
? countIntervalsInDateRange(chartConfig.dateRange, mv.minGranularity)
|
|
|
|
|
: Infinity;
|
|
|
|
|
if (
|
|
|
|
|
// Ensures that the MV contains data for the selected date range
|
|
|
|
|
mvConfigSupportsDateRange(mv, chartConfig) &&
|
|
|
|
|
// Ensures that the MV's granularity is small enough that the selected date
|
|
|
|
|
// range will include multiple MV time buckets. (3 is an arbitrary cutoff)
|
|
|
|
|
mvIntervalsInDateRange >= 3
|
|
|
|
|
) {
|
|
|
|
|
const dimensionColumns = splitAndTrimWithBracket(mv.dimensionColumns);
|
|
|
|
|
const keysInMV = keys.filter(k => dimensionColumns.includes(k));
|
|
|
|
|
if (keysInMV.length > 0) {
|
|
|
|
|
supportedKeysByMv.set(mvId, keysInMV);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Build the configs which would be used to query each MV for all of the keys it supports
|
|
|
|
|
const configsToExplain = [...supportedKeysByMv.entries()].map(
|
|
|
|
|
([mvId, mvKeys]) => {
|
|
|
|
|
const { databaseName, tableName, timestampColumn } = mvsById.get(mvId)!;
|
|
|
|
|
return {
|
|
|
|
|
...structuredClone(chartConfig),
|
|
|
|
|
timestampValueExpression: timestampColumn,
|
|
|
|
|
from: {
|
|
|
|
|
databaseName,
|
|
|
|
|
tableName,
|
|
|
|
|
},
|
|
|
|
|
// These are dimension columns so we don't need to add any -Merge combinators
|
|
|
|
|
select: mvKeys
|
|
|
|
|
.map((k, i) => `groupUniqArray(1)(${k}) AS param${i}`)
|
|
|
|
|
.join(', '),
|
|
|
|
|
};
|
|
|
|
|
},
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
// Figure out which of those configs are valid by running EXPLAIN queries
|
|
|
|
|
const explainResults = await Promise.all(
|
|
|
|
|
configsToExplain.map(async config => {
|
|
|
|
|
const { isValid, rowEstimate = Number.POSITIVE_INFINITY } =
|
|
|
|
|
await clickhouseClient.testChartConfigValidity({
|
|
|
|
|
config,
|
|
|
|
|
metadata,
|
|
|
|
|
opts: { abort_signal: signal },
|
2026-01-21 16:07:30 +00:00
|
|
|
querySettings: source?.querySettings,
|
2026-01-14 18:05:11 +00:00
|
|
|
});
|
|
|
|
|
return {
|
|
|
|
|
id: toMvId({
|
|
|
|
|
databaseName: config.from.databaseName,
|
|
|
|
|
tableName: config.from.tableName,
|
|
|
|
|
}),
|
|
|
|
|
isValid,
|
|
|
|
|
rowEstimate,
|
|
|
|
|
};
|
|
|
|
|
}),
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
// For each key, find the best MV that can provide it while reading the fewest rows
|
|
|
|
|
const finalKeysByMv = new Map<string, string[]>();
|
|
|
|
|
const uncoveredKeys = new Set<string>(keys);
|
|
|
|
|
const sortedValidConfigs = explainResults
|
|
|
|
|
.filter(r => r.isValid)
|
|
|
|
|
.sort((a, b) => a.rowEstimate - b.rowEstimate);
|
|
|
|
|
for (const config of sortedValidConfigs) {
|
|
|
|
|
const mvKeys = supportedKeysByMv.get(config.id) ?? [];
|
|
|
|
|
|
|
|
|
|
// Only include keys which have not already been covered by a previous MV
|
|
|
|
|
const keysNotAlreadyCovered = mvKeys.filter(k => uncoveredKeys.has(k));
|
|
|
|
|
if (keysNotAlreadyCovered.length) {
|
|
|
|
|
finalKeysByMv.set(config.id, keysNotAlreadyCovered);
|
|
|
|
|
for (const key of keysNotAlreadyCovered) {
|
|
|
|
|
uncoveredKeys.delete(key);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Build the final list of optimized calls
|
|
|
|
|
const calls = [...finalKeysByMv.entries()].map(([mvId, mvKeys]) => {
|
|
|
|
|
const { databaseName, tableName, timestampColumn } = mvsById.get(mvId)!;
|
|
|
|
|
const optimizedConfig: C = {
|
|
|
|
|
...structuredClone(chartConfig),
|
|
|
|
|
timestampValueExpression: timestampColumn,
|
|
|
|
|
from: {
|
|
|
|
|
databaseName,
|
|
|
|
|
tableName,
|
|
|
|
|
},
|
|
|
|
|
};
|
|
|
|
|
return {
|
|
|
|
|
chartConfig: optimizedConfig,
|
|
|
|
|
keys: mvKeys,
|
|
|
|
|
};
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
if (uncoveredKeys.size) {
|
|
|
|
|
calls.push({
|
|
|
|
|
chartConfig: structuredClone(chartConfig),
|
|
|
|
|
keys: [...uncoveredKeys],
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return calls;
|
|
|
|
|
}
|