feat: support search on multi implicit fields (#696)

Currently,  users (or hyperdx) will still need to create the index (ex: tokenbf)  on multi-fields to speed up query if perf is a concern.
ref: HDX-1522


<img width="715" alt="image" src="https://github.com/user-attachments/assets/d8ddbe3e-eb75-4780-b2cf-03dcf2f309ec" />

<img width="1056" alt="image" src="https://github.com/user-attachments/assets/e2071c55-9958-4772-a156-e1e1b568d67e" />
This commit is contained in:
Warren 2025-03-20 15:41:26 -07:00 committed by GitHub
parent f8accdeb0f
commit e5a210a1bd
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 76 additions and 29 deletions

View file

@ -0,0 +1,6 @@
---
"@hyperdx/common-utils": patch
"@hyperdx/app": patch
---
feat: support search on multi implicit fields (BETA)

View file

@ -25,6 +25,7 @@ import {
DisplayType,
Filter,
} from '@hyperdx/common-utils/dist/types';
import { splitAndTrimCSV } from '@hyperdx/common-utils/dist/utils';
import {
ActionIcon,
Box,
@ -872,13 +873,11 @@ function DBSearchPage() {
onFilterChange: handleSetFilters,
});
const displayedColumns = (
const displayedColumns = splitAndTrimCSV(
dbSqlRowTableConfig?.select ??
searchedSource?.defaultTableSelectExpression ??
''
)
.split(',')
.map(s => s.trim());
searchedSource?.defaultTableSelectExpression ??
'',
);
const toggleColumn = (column: string) => {
const newSelectArray = displayedColumns.includes(column)

View file

@ -15,6 +15,7 @@ import {
ChartConfigWithDateRange,
SelectList,
} from '@hyperdx/common-utils/dist/types';
import { splitAndTrimCSV } from '@hyperdx/common-utils/dist/utils';
import { Box, Code, Flex, Text } from '@mantine/core';
import { FetchNextPageOptions } from '@tanstack/react-query';
import {
@ -695,13 +696,10 @@ function mergeSelectWithPrimaryAndPartitionKey(
.map(k => extractColumnReference(k.trim()))
.filter((k): k is string => k != null && k.length > 0);
const primaryKeyArr =
primaryKeys.trim() !== '' ? primaryKeys.split(',').map(k => k.trim()) : [];
primaryKeys.trim() !== '' ? splitAndTrimCSV(primaryKeys) : [];
const allKeys = [...partitionKeyArr, ...primaryKeyArr];
if (typeof select === 'string') {
const selectSplit = select
.split(',')
.map(s => s.trim())
.filter(s => s.length > 0);
const selectSplit = splitAndTrimCSV(select);
const selectColumns = new Set(selectSplit);
const additionalKeys = allKeys.filter(k => !selectColumns.has(k));
return {

View file

@ -8,7 +8,7 @@ import {
JSDataType,
} from '@hyperdx/common-utils/dist/clickhouse';
import { MetricsDataType, TSource } from '@hyperdx/common-utils/dist/types';
import { hashCode } from '@hyperdx/common-utils/dist/utils';
import { hashCode, splitAndTrimCSV } from '@hyperdx/common-utils/dist/utils';
import { useMutation, useQuery, useQueryClient } from '@tanstack/react-query';
import { hdxServer } from '@/api';
@ -43,7 +43,7 @@ function getLocalSources(): TSource[] {
// If a user specifies a timestampValueExpression with multiple columns,
// this will return the first one. We'll want to refine this over time
export function getFirstTimestampValueExpression(valueExpression: string) {
return valueExpression.split(',')[0].trim();
return splitAndTrimCSV(valueExpression)[0];
}
export function getSpanEventBody(eventModel: TSource) {
@ -58,14 +58,15 @@ export function getDisplayedTimestampValueExpression(eventModel: TSource) {
}
export function getEventBody(eventModel: TSource) {
return (
const expression =
eventModel.bodyExpression ??
('spanNameExpression' in eventModel
? eventModel?.spanNameExpression
: undefined) ??
eventModel.implicitColumnExpression //??
// (eventModel.kind === 'log' ? 'Body' : 'SpanName')
);
eventModel.implicitColumnExpression; //??
// (eventModel.kind === 'log' ? 'Body' : 'SpanName')
const multiExpr = splitAndTrimCSV(expression ?? '');
return multiExpr.length === 1 ? expression : multiExpr[0]; // TODO: check if we want to show multiple columns
}
export function useSources() {
@ -214,7 +215,7 @@ export async function inferTableSourceConfig({
connectionId,
})
).primary_key;
const keys = primaryKeys.split(',').map(k => k.trim());
const keys = splitAndTrimCSV(primaryKeys);
const isOtelLogSchema = hasAllColumns(columns, [
'Timestamp',

View file

@ -0,0 +1,29 @@
import { splitAndTrimCSV } from '../utils';
describe('utils', () => {
describe('splitAndTrimCSV', () => {
it('should split a comma-separated string and trim whitespace', () => {
expect(splitAndTrimCSV('a, b, c')).toEqual(['a', 'b', 'c']);
});
it('should handle strings with no spaces', () => {
expect(splitAndTrimCSV('a,b,c')).toEqual(['a', 'b', 'c']);
});
it('should filter out empty values', () => {
expect(splitAndTrimCSV('a,b,,c,')).toEqual(['a', 'b', 'c']);
});
it('should handle strings with extra whitespace', () => {
expect(splitAndTrimCSV(' a , b , c ')).toEqual(['a', 'b', 'c']);
});
it('should return an empty array for an empty string', () => {
expect(splitAndTrimCSV('')).toEqual([]);
});
it('should handle a string with only commas and whitespace', () => {
expect(splitAndTrimCSV(',, ,,')).toEqual([]);
});
});
});

View file

@ -3,6 +3,7 @@ import SqlString from 'sqlstring';
import { convertCHTypeToPrimitiveJSType, JSDataType } from '@/clickhouse';
import { Metadata } from '@/metadata';
import { splitAndTrimCSV } from '@/utils';
function encodeSpecialTokens(query: string): string {
return query
@ -361,9 +362,9 @@ export abstract class SQLSerializer implements Serializer {
// to utilize the token bloom filter unless a prefix/sufix wildcard is specified
if (prefixWildcard || suffixWildcard) {
return SqlString.format(
`(lower(??) ${isNegatedField ? 'NOT ' : ''}LIKE lower(?))`,
`(lower(?) ${isNegatedField ? 'NOT ' : ''}LIKE lower(?))`,
[
column,
SqlString.raw(column ?? ''),
`${prefixWildcard ? '%' : ''}${term}${suffixWildcard ? '%' : ''}`,
],
);
@ -374,21 +375,21 @@ export abstract class SQLSerializer implements Serializer {
const tokens = this.tokenizeTerm(term);
return `(${isNegatedField ? 'NOT (' : ''}${[
...tokens.map(token =>
SqlString.format(`hasTokenCaseInsensitive(??, ?)`, [
column,
SqlString.format(`hasTokenCaseInsensitive(?, ?)`, [
SqlString.raw(column ?? ''),
token,
]),
),
// If there are symbols in the term, we'll try to match the whole term as well (ex. Scott!)
SqlString.format(`(lower(??) LIKE lower(?))`, [
column,
SqlString.format(`(lower(?) LIKE lower(?))`, [
SqlString.raw(column ?? ''),
`%${term}%`,
]),
].join(' AND ')}${isNegatedField ? ')' : ''})`;
} else {
return SqlString.format(
`(${isNegatedField ? 'NOT ' : ''}hasTokenCaseInsensitive(??, ?))`,
[column, term],
`(${isNegatedField ? 'NOT ' : ''}hasTokenCaseInsensitive(?, ?))`,
[SqlString.raw(column ?? ''), term],
);
}
}
@ -549,8 +550,13 @@ export class CustomSchemaSQLSerializerV2 extends SQLSerializer {
);
}
const expressions = splitAndTrimCSV(this.implicitColumnExpression);
return {
column: this.implicitColumnExpression,
column:
expressions.length > 1
? `concatWithSeparator(';',${expressions.join(',')})`
: this.implicitColumnExpression,
columnJSON: undefined,
propertyType: JSDataType.String,
found: true,

View file

@ -25,6 +25,7 @@ import {
import {
convertDateRangeToGranularityString,
getFirstTimestampValueExpression,
splitAndTrimCSV,
} from '@/utils';
// FIXME: SQLParser.ColumnRef is incomplete
@ -423,7 +424,7 @@ async function timeFilterExpr({
with?: ChartConfigWithDateRange['with'];
includedDataInterval?: string;
}) {
const valueExpressions = timestampValueExpression.split(',');
const valueExpressions = splitAndTrimCSV(timestampValueExpression);
const startTime = dateRange[0].getTime();
const endTime = dateRange[1].getTime();

View file

@ -11,10 +11,17 @@ export const isNode: boolean =
process.versions != null &&
process.versions.node != null;
export function splitAndTrimCSV(input: string): string[] {
return input
.split(',')
.map(column => column.trim())
.filter(column => column.length > 0);
}
// If a user specifies a timestampValueExpression with multiple columns,
// this will return the first one. We'll want to refine this over time
export function getFirstTimestampValueExpression(valueExpression: string) {
return valueExpression.split(',')[0].trim();
return splitAndTrimCSV(valueExpression)[0];
}
export enum Granularity {