mirror of
https://github.com/hyperdxio/hyperdx
synced 2026-04-21 13:37:15 +00:00
perf: Improve getKeyValues query performance for JSON keys (#1284)
Closes HDX-2623 # Summary This change improves the performance of `getKeyValues` when getting values of a JSON key. Generally, columns that are not referenced outside of a CTE will be pruned by the query planner. For JSON however, if the outer select references one field in a JSON column, then the inner select will read (it seems) the entire JSON object. This PR also adds integration tests for `getKeyValues` to ensure that the function generates queries that work as expected in ClickHouse. ## Performance impact (on single JSON Dashboard Filter) - Original: 15.03s <img width="584" height="71" alt="Screenshot 2025-10-21 at 3 28 07 PM" src="https://github.com/user-attachments/assets/184de198-cee1-4b1d-beed-ec4465d3e248" /> - Optimized: 0.443s <img width="590" height="61" alt="Screenshot 2025-10-21 at 3 25 47 PM" src="https://github.com/user-attachments/assets/690d0ef0-15b8-47c5-9a7e-8b8f6b8f5e92" />
This commit is contained in:
parent
93edb6f84f
commit
8190ee8f6a
10 changed files with 255 additions and 89 deletions
5
.changeset/sweet-vans-pump.md
Normal file
5
.changeset/sweet-vans-pump.md
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
---
|
||||
"@hyperdx/common-utils": patch
|
||||
---
|
||||
|
||||
perf: Improve getKeyValues query performance for JSON keys
|
||||
8
Makefile
8
Makefile
|
|
@ -45,10 +45,16 @@ dev-int:
|
|||
npx nx run @hyperdx/api:dev:int $(FILE)
|
||||
docker compose -p int -f ./docker-compose.ci.yml down
|
||||
|
||||
.PHONY: dev-int-common-utils
|
||||
dev-int-common-utils:
|
||||
docker compose -p int -f ./docker-compose.ci.yml up -d
|
||||
npx nx run @hyperdx/common-utils:dev:int $(FILE)
|
||||
docker compose -p int -f ./docker-compose.ci.yml down
|
||||
|
||||
.PHONY: ci-int
|
||||
ci-int:
|
||||
docker compose -p int -f ./docker-compose.ci.yml up -d
|
||||
npx nx run @hyperdx/api:ci:int
|
||||
npx nx run-many -t ci:int --parallel=false
|
||||
docker compose -p int -f ./docker-compose.ci.yml down
|
||||
|
||||
.PHONY: dev-unit
|
||||
|
|
|
|||
4
packages/common-utils/.env.test
Normal file
4
packages/common-utils/.env.test
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
CLICKHOUSE_HOST=http://localhost:8123
|
||||
CLICKHOUSE_PASSWORD=
|
||||
CLICKHOUSE_USER=default
|
||||
NODE_ENV=test
|
||||
|
|
@ -5,6 +5,7 @@ module.exports = {
|
|||
verbose: true,
|
||||
rootDir: './src',
|
||||
testMatch: ['**/__tests__/*.test.ts?(x)'],
|
||||
testPathIgnorePatterns: ['.*\\.int\\.test\\.ts$'],
|
||||
testTimeout: 30000,
|
||||
moduleNameMapper: {
|
||||
'@/(.*)$': '<rootDir>/$1',
|
||||
|
|
|
|||
13
packages/common-utils/jest.int.config.js
Normal file
13
packages/common-utils/jest.int.config.js
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
/** @type {import('ts-jest/dist/types').InitialOptionsTsJest} */
|
||||
module.exports = {
|
||||
setupFiles: ['dotenv/config'],
|
||||
preset: 'ts-jest',
|
||||
testEnvironment: 'node',
|
||||
verbose: true,
|
||||
rootDir: './src',
|
||||
testMatch: ['**/__tests__/*.int.test.ts?(x)'],
|
||||
testTimeout: 30000,
|
||||
moduleNameMapper: {
|
||||
'@/(.*)$': '<rootDir>/$1',
|
||||
},
|
||||
};
|
||||
|
|
@ -36,6 +36,7 @@
|
|||
"@types/sqlstring": "^2.3.0",
|
||||
"@types/supertest": "^2.0.12",
|
||||
"@types/uuid": "^8.3.4",
|
||||
"dotenv": "^17.2.3",
|
||||
"jest": "^28.1.1",
|
||||
"nodemon": "^2.0.20",
|
||||
"rimraf": "^4.4.1",
|
||||
|
|
@ -56,6 +57,8 @@
|
|||
"lint:fix": "npx eslint . --ext .ts --fix",
|
||||
"ci:lint": "yarn lint && yarn tsc --noEmit",
|
||||
"ci:unit": "jest --runInBand --ci --forceExit --coverage",
|
||||
"dev:unit": "jest --watchAll --runInBand --detectOpenHandles"
|
||||
"dev:unit": "jest --watchAll --runInBand --detectOpenHandles",
|
||||
"ci:int": "DOTENV_CONFIG_PATH=.env.test jest --config jest.int.config.js --runInBand --ci --forceExit --coverage",
|
||||
"dev:int": "DOTENV_CONFIG_PATH=.env.test jest --config jest.int.config.js --watchAll --runInBand --detectOpenHandles"
|
||||
}
|
||||
}
|
||||
|
|
|
|||
193
packages/common-utils/src/__tests__/metadata.int.test.ts
Normal file
193
packages/common-utils/src/__tests__/metadata.int.test.ts
Normal file
|
|
@ -0,0 +1,193 @@
|
|||
import { createClient } from '@clickhouse/client';
|
||||
import { ClickHouseClient } from '@clickhouse/client-common';
|
||||
|
||||
import { ClickhouseClient as HdxClickhouseClient } from '@/clickhouse/node';
|
||||
import { Metadata, MetadataCache } from '@/metadata';
|
||||
import { ChartConfigWithDateRange } from '@/types';
|
||||
|
||||
describe('Metadata Integration Tests', () => {
|
||||
let client: ClickHouseClient;
|
||||
let hdxClient: HdxClickhouseClient;
|
||||
|
||||
beforeAll(() => {
|
||||
const host = process.env.CLICKHOUSE_HOST || 'http://localhost:8123';
|
||||
const username = process.env.CLICKHOUSE_USER || 'default';
|
||||
const password = process.env.CLICKHOUSE_PASSWORD || '';
|
||||
|
||||
client = createClient({
|
||||
url: host,
|
||||
username,
|
||||
password,
|
||||
});
|
||||
|
||||
hdxClient = new HdxClickhouseClient({
|
||||
host,
|
||||
username,
|
||||
password,
|
||||
});
|
||||
});
|
||||
|
||||
describe('getKeyValues', () => {
|
||||
let metadata: Metadata;
|
||||
const chartConfig: ChartConfigWithDateRange = {
|
||||
connection: 'test_connection',
|
||||
from: {
|
||||
databaseName: 'default',
|
||||
tableName: 'test_table',
|
||||
},
|
||||
dateRange: [new Date('2023-01-01'), new Date('2025-01-01')],
|
||||
select: 'col1, col2',
|
||||
timestampValueExpression: 'Timestamp',
|
||||
where: '',
|
||||
};
|
||||
|
||||
beforeAll(async () => {
|
||||
await client.command({
|
||||
query: `CREATE OR REPLACE TABLE default.test_table (
|
||||
Timestamp DateTime64(9) CODEC(Delta(8), ZSTD(1)),
|
||||
SeverityText LowCardinality(String) CODEC(ZSTD(1)),
|
||||
TraceId String,
|
||||
LogAttributes JSON CODEC(ZSTD(1)),
|
||||
ResourceAttributes Map(LowCardinality(String), String) CODEC(ZSTD(1)),
|
||||
\`__hdx_materialized_k8s.pod.name\` String MATERIALIZED ResourceAttributes['k8s.pod.name'] CODEC(ZSTD(1)),
|
||||
)
|
||||
ENGINE = MergeTree()
|
||||
ORDER BY (Timestamp)
|
||||
`,
|
||||
});
|
||||
|
||||
await client.command({
|
||||
query: `INSERT INTO default.test_table (Timestamp, SeverityText, TraceId, ResourceAttributes, LogAttributes) VALUES
|
||||
('2023-06-01 12:00:00', 'info', '1o2udn120d8n', { 'k8s.pod.name': 'pod1', 'env': 'prod' },'{"action":"ping"}'),
|
||||
('2024-06-01 12:00:00', 'error', '67890-09098', { 'k8s.pod.name': 'pod2', 'env': 'prod' },'{}'),
|
||||
('2024-06-01 12:00:00', 'info', '11h9238re1h92', { 'env': 'staging' },'{"user":"john"}'),
|
||||
('2024-06-01 12:00:00', 'warning', '1o2udn120d8n', { 'k8s.pod.name': 'pod1', 'env': 'prod' }, '{"user":"jack","action":"login"}'),
|
||||
('2024-06-01 12:00:00', '', '1o2udn120d8n', { 'env': 'prod' }, '{"user":"jane","action":"login"}')
|
||||
`,
|
||||
});
|
||||
});
|
||||
|
||||
beforeEach(async () => {
|
||||
metadata = new Metadata(hdxClient, new MetadataCache());
|
||||
});
|
||||
|
||||
afterAll(async () => {
|
||||
await client.command({
|
||||
query: 'DROP TABLE IF EXISTS default.test_table',
|
||||
});
|
||||
|
||||
await client.close();
|
||||
});
|
||||
|
||||
describe.each([true, false])('with disableRowLimit=%s', disableRowLimit => {
|
||||
it('should return key-value pairs for a given metadata key', async () => {
|
||||
const resultSeverityText = await metadata.getKeyValues({
|
||||
chartConfig,
|
||||
keys: ['SeverityText'],
|
||||
disableRowLimit,
|
||||
});
|
||||
|
||||
expect(resultSeverityText).toHaveLength(1);
|
||||
expect(resultSeverityText[0].key).toBe('SeverityText');
|
||||
expect(resultSeverityText[0].value).toHaveLength(3);
|
||||
expect(resultSeverityText[0].value).toEqual(
|
||||
expect.arrayContaining(['info', 'error', 'warning']),
|
||||
);
|
||||
|
||||
const resultTraceId = await metadata.getKeyValues({
|
||||
chartConfig,
|
||||
keys: ['TraceId'],
|
||||
disableRowLimit,
|
||||
});
|
||||
|
||||
expect(resultTraceId).toHaveLength(1);
|
||||
expect(resultTraceId[0].key).toBe('TraceId');
|
||||
expect(resultTraceId[0].value).toHaveLength(3);
|
||||
expect(resultTraceId[0].value).toEqual(
|
||||
expect.arrayContaining([
|
||||
'1o2udn120d8n',
|
||||
'67890-09098',
|
||||
'11h9238re1h92',
|
||||
]),
|
||||
);
|
||||
|
||||
const resultBoth = await metadata.getKeyValues({
|
||||
chartConfig,
|
||||
keys: ['TraceId', 'SeverityText'],
|
||||
disableRowLimit,
|
||||
});
|
||||
|
||||
expect(resultBoth).toEqual([
|
||||
{
|
||||
key: 'TraceId',
|
||||
value: expect.arrayContaining([
|
||||
'1o2udn120d8n',
|
||||
'67890-09098',
|
||||
'11h9238re1h92',
|
||||
]),
|
||||
},
|
||||
{
|
||||
key: 'SeverityText',
|
||||
value: expect.arrayContaining(['info', 'error', 'warning']),
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it('should handle materialized columns correctly', async () => {
|
||||
const resultPodName = await metadata.getKeyValues({
|
||||
chartConfig,
|
||||
keys: ['__hdx_materialized_k8s.pod.name'],
|
||||
disableRowLimit,
|
||||
});
|
||||
|
||||
expect(resultPodName).toHaveLength(1);
|
||||
expect(resultPodName[0].key).toBe('__hdx_materialized_k8s.pod.name');
|
||||
expect(resultPodName[0].value).toHaveLength(2);
|
||||
expect(resultPodName[0].value).toEqual(
|
||||
expect.arrayContaining(['pod1', 'pod2']),
|
||||
);
|
||||
});
|
||||
|
||||
it('should handle JSON columns correctly', async () => {
|
||||
const resultLogAttributes = await metadata.getKeyValues({
|
||||
chartConfig,
|
||||
keys: ['LogAttributes.user'],
|
||||
disableRowLimit,
|
||||
});
|
||||
|
||||
expect(resultLogAttributes).toHaveLength(1);
|
||||
expect(resultLogAttributes[0].key).toBe('LogAttributes.user');
|
||||
expect(resultLogAttributes[0].value).toHaveLength(3);
|
||||
expect(resultLogAttributes[0].value).toEqual(
|
||||
expect.arrayContaining(['john', 'jack', 'jane']),
|
||||
);
|
||||
});
|
||||
|
||||
it('should return an empty list when no keys are provided', async () => {
|
||||
const resultEmpty = await metadata.getKeyValues({
|
||||
chartConfig,
|
||||
keys: [],
|
||||
});
|
||||
|
||||
expect(resultEmpty).toEqual([]);
|
||||
});
|
||||
|
||||
it('should correctly limit the number of returned values', async () => {
|
||||
const resultLimited = await metadata.getKeyValues({
|
||||
chartConfig,
|
||||
keys: ['SeverityText'],
|
||||
limit: 2,
|
||||
});
|
||||
|
||||
expect(resultLimited).toHaveLength(1);
|
||||
expect(resultLimited[0].key).toBe('SeverityText');
|
||||
expect(resultLimited[0].value).toHaveLength(2);
|
||||
expect(
|
||||
resultLimited[0].value.every(v =>
|
||||
['info', 'error', 'warning'].includes(v),
|
||||
),
|
||||
).toBeTruthy();
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
@ -251,22 +251,6 @@ describe('Metadata', () => {
|
|||
],
|
||||
}),
|
||||
});
|
||||
|
||||
// Mock getColumns to return columns including materialized ones
|
||||
mockCache.getOrFetch.mockImplementation((key, queryFn) => {
|
||||
if (key.includes('.columns')) {
|
||||
return Promise.resolve([
|
||||
{ name: 'regular_column', type: 'String', default_type: '' },
|
||||
{
|
||||
name: 'materialized_column',
|
||||
type: 'String',
|
||||
default_type: 'MATERIALIZED',
|
||||
},
|
||||
{ name: 'default_column', type: 'String', default_type: 'DEFAULT' },
|
||||
]);
|
||||
}
|
||||
return queryFn();
|
||||
});
|
||||
});
|
||||
|
||||
it('should apply row limit when disableRowLimit is false', async () => {
|
||||
|
|
@ -355,71 +339,20 @@ describe('Metadata', () => {
|
|||
expect(result).toEqual([{ key: 'column1', value: ['value1', 'value2'] }]);
|
||||
});
|
||||
|
||||
it('should include materialized fields when selecting all columns', async () => {
|
||||
it('should return an empty list when no keys are provided', async () => {
|
||||
const renderChartConfigSpy = jest.spyOn(
|
||||
renderChartConfigModule,
|
||||
'renderChartConfig',
|
||||
);
|
||||
|
||||
await metadata.getKeyValues({
|
||||
const results = await metadata.getKeyValues({
|
||||
chartConfig: mockChartConfig,
|
||||
keys: ['column1'],
|
||||
keys: [],
|
||||
limit: 10,
|
||||
});
|
||||
|
||||
// Verify that renderChartConfig was called with the expanded select list
|
||||
// that includes all columns by name (including materialized ones)
|
||||
expect(renderChartConfigSpy).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
with: [
|
||||
expect.objectContaining({
|
||||
name: 'sampledData',
|
||||
chartConfig: expect.objectContaining({
|
||||
// Should expand to all column names instead of using '*'
|
||||
select:
|
||||
'`regular_column`, `materialized_column`, `default_column`',
|
||||
}),
|
||||
}),
|
||||
],
|
||||
}),
|
||||
metadata,
|
||||
);
|
||||
});
|
||||
|
||||
it('should fallback to * when no columns are found', async () => {
|
||||
// Mock getColumns to return empty array
|
||||
mockCache.getOrFetch.mockImplementation((key, queryFn) => {
|
||||
if (key.includes('.columns')) {
|
||||
return Promise.resolve([]);
|
||||
}
|
||||
return queryFn();
|
||||
});
|
||||
|
||||
const renderChartConfigSpy = jest.spyOn(
|
||||
renderChartConfigModule,
|
||||
'renderChartConfig',
|
||||
);
|
||||
|
||||
await metadata.getKeyValues({
|
||||
chartConfig: mockChartConfig,
|
||||
keys: ['column1'],
|
||||
limit: 10,
|
||||
});
|
||||
|
||||
// Should fallback to '*' when no columns are found
|
||||
expect(renderChartConfigSpy).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
with: [
|
||||
expect.objectContaining({
|
||||
name: 'sampledData',
|
||||
chartConfig: expect.objectContaining({
|
||||
select: '*',
|
||||
}),
|
||||
}),
|
||||
],
|
||||
}),
|
||||
metadata,
|
||||
);
|
||||
expect(results).toEqual([]);
|
||||
expect(renderChartConfigSpy).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -668,29 +668,24 @@ export class Metadata {
|
|||
return this.cache.getOrFetch(
|
||||
`${chartConfig.connection}.${chartConfig.from.databaseName}.${chartConfig.from.tableName}.${keys.join(',')}.${chartConfig.dateRange.toString()}.${disableRowLimit}.values`,
|
||||
async () => {
|
||||
const selectClause = keys
|
||||
.map((k, i) => `groupUniqArray(${limit})(${k}) AS param${i}`)
|
||||
.join(', ');
|
||||
if (keys.length === 0) return [];
|
||||
|
||||
// When disableRowLimit is true, query directly without CTE
|
||||
// Otherwise, use CTE with row limits for sampling
|
||||
const sqlConfig = disableRowLimit
|
||||
? {
|
||||
...chartConfig,
|
||||
select: selectClause,
|
||||
select: keys
|
||||
.map((k, i) => `groupUniqArray(${limit})(${k}) AS param${i}`)
|
||||
.join(', '),
|
||||
}
|
||||
: await (async () => {
|
||||
// Get all columns including materialized ones
|
||||
const columns = await this.getColumns({
|
||||
databaseName: chartConfig.from.databaseName,
|
||||
tableName: chartConfig.from.tableName,
|
||||
connectionId: chartConfig.connection,
|
||||
});
|
||||
|
||||
// Build select expression that includes all columns by name
|
||||
// This ensures materialized columns are included
|
||||
// Build select expression that includes each of the given keys.
|
||||
// This avoids selecting entire JSON columns, which is significantly slower
|
||||
// than selecting just the JSON paths corresponding to the given keys.
|
||||
// paramN aliases are used to avoid issues with special characters or complex expressions in keys.
|
||||
const selectExpr =
|
||||
columns.map(col => `\`${col.name}\``).join(', ') || '*';
|
||||
keys.map((k, i) => `${k} as param${i}`).join(', ') || '*';
|
||||
|
||||
return {
|
||||
with: [
|
||||
|
|
@ -710,7 +705,12 @@ export class Metadata {
|
|||
isSubquery: true,
|
||||
},
|
||||
],
|
||||
select: selectClause,
|
||||
select: keys
|
||||
.map(
|
||||
(_, i) =>
|
||||
`groupUniqArray(${limit})(param${i}) AS param${i}`,
|
||||
)
|
||||
.join(', '),
|
||||
connection: chartConfig.connection,
|
||||
from: { databaseName: '', tableName: 'sampledData' },
|
||||
where: '',
|
||||
|
|
|
|||
|
|
@ -4535,6 +4535,7 @@ __metadata:
|
|||
"@types/uuid": "npm:^8.3.4"
|
||||
date-fns: "npm:^2.28.0"
|
||||
date-fns-tz: "npm:^2.0.0"
|
||||
dotenv: "npm:^17.2.3"
|
||||
jest: "npm:^28.1.1"
|
||||
lodash: "npm:^4.17.21"
|
||||
node-sql-parser: "npm:^5.3.5"
|
||||
|
|
@ -14690,6 +14691,13 @@ __metadata:
|
|||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"dotenv@npm:^17.2.3":
|
||||
version: 17.2.3
|
||||
resolution: "dotenv@npm:17.2.3"
|
||||
checksum: 10c0/c884403209f713214a1b64d4d1defa4934c2aa5b0002f5a670ae298a51e3c3ad3ba79dfee2f8df49f01ae74290fcd9acdb1ab1d09c7bfb42b539036108bb2ba0
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"dunder-proto@npm:^1.0.1":
|
||||
version: 1.0.1
|
||||
resolution: "dunder-proto@npm:1.0.1"
|
||||
|
|
|
|||
Loading…
Reference in a new issue