mirror of
https://github.com/hyperdxio/hyperdx
synced 2026-04-21 13:37:15 +00:00
feat: support search on multi implicit fields (#696)
Currently, users (or hyperdx) will still need to create the index (ex: tokenbf) on multi-fields to speed up query if perf is a concern. ref: HDX-1522 <img width="715" alt="image" src="https://github.com/user-attachments/assets/d8ddbe3e-eb75-4780-b2cf-03dcf2f309ec" /> <img width="1056" alt="image" src="https://github.com/user-attachments/assets/e2071c55-9958-4772-a156-e1e1b568d67e" />
This commit is contained in:
parent
f8accdeb0f
commit
e5a210a1bd
8 changed files with 76 additions and 29 deletions
6
.changeset/wicked-plums-breathe.md
Normal file
6
.changeset/wicked-plums-breathe.md
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
---
|
||||
"@hyperdx/common-utils": patch
|
||||
"@hyperdx/app": patch
|
||||
---
|
||||
|
||||
feat: support search on multi implicit fields (BETA)
|
||||
|
|
@ -25,6 +25,7 @@ import {
|
|||
DisplayType,
|
||||
Filter,
|
||||
} from '@hyperdx/common-utils/dist/types';
|
||||
import { splitAndTrimCSV } from '@hyperdx/common-utils/dist/utils';
|
||||
import {
|
||||
ActionIcon,
|
||||
Box,
|
||||
|
|
@ -872,13 +873,11 @@ function DBSearchPage() {
|
|||
onFilterChange: handleSetFilters,
|
||||
});
|
||||
|
||||
const displayedColumns = (
|
||||
const displayedColumns = splitAndTrimCSV(
|
||||
dbSqlRowTableConfig?.select ??
|
||||
searchedSource?.defaultTableSelectExpression ??
|
||||
''
|
||||
)
|
||||
.split(',')
|
||||
.map(s => s.trim());
|
||||
searchedSource?.defaultTableSelectExpression ??
|
||||
'',
|
||||
);
|
||||
|
||||
const toggleColumn = (column: string) => {
|
||||
const newSelectArray = displayedColumns.includes(column)
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@ import {
|
|||
ChartConfigWithDateRange,
|
||||
SelectList,
|
||||
} from '@hyperdx/common-utils/dist/types';
|
||||
import { splitAndTrimCSV } from '@hyperdx/common-utils/dist/utils';
|
||||
import { Box, Code, Flex, Text } from '@mantine/core';
|
||||
import { FetchNextPageOptions } from '@tanstack/react-query';
|
||||
import {
|
||||
|
|
@ -695,13 +696,10 @@ function mergeSelectWithPrimaryAndPartitionKey(
|
|||
.map(k => extractColumnReference(k.trim()))
|
||||
.filter((k): k is string => k != null && k.length > 0);
|
||||
const primaryKeyArr =
|
||||
primaryKeys.trim() !== '' ? primaryKeys.split(',').map(k => k.trim()) : [];
|
||||
primaryKeys.trim() !== '' ? splitAndTrimCSV(primaryKeys) : [];
|
||||
const allKeys = [...partitionKeyArr, ...primaryKeyArr];
|
||||
if (typeof select === 'string') {
|
||||
const selectSplit = select
|
||||
.split(',')
|
||||
.map(s => s.trim())
|
||||
.filter(s => s.length > 0);
|
||||
const selectSplit = splitAndTrimCSV(select);
|
||||
const selectColumns = new Set(selectSplit);
|
||||
const additionalKeys = allKeys.filter(k => !selectColumns.has(k));
|
||||
return {
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ import {
|
|||
JSDataType,
|
||||
} from '@hyperdx/common-utils/dist/clickhouse';
|
||||
import { MetricsDataType, TSource } from '@hyperdx/common-utils/dist/types';
|
||||
import { hashCode } from '@hyperdx/common-utils/dist/utils';
|
||||
import { hashCode, splitAndTrimCSV } from '@hyperdx/common-utils/dist/utils';
|
||||
import { useMutation, useQuery, useQueryClient } from '@tanstack/react-query';
|
||||
|
||||
import { hdxServer } from '@/api';
|
||||
|
|
@ -43,7 +43,7 @@ function getLocalSources(): TSource[] {
|
|||
// If a user specifies a timestampValueExpression with multiple columns,
|
||||
// this will return the first one. We'll want to refine this over time
|
||||
export function getFirstTimestampValueExpression(valueExpression: string) {
|
||||
return valueExpression.split(',')[0].trim();
|
||||
return splitAndTrimCSV(valueExpression)[0];
|
||||
}
|
||||
|
||||
export function getSpanEventBody(eventModel: TSource) {
|
||||
|
|
@ -58,14 +58,15 @@ export function getDisplayedTimestampValueExpression(eventModel: TSource) {
|
|||
}
|
||||
|
||||
export function getEventBody(eventModel: TSource) {
|
||||
return (
|
||||
const expression =
|
||||
eventModel.bodyExpression ??
|
||||
('spanNameExpression' in eventModel
|
||||
? eventModel?.spanNameExpression
|
||||
: undefined) ??
|
||||
eventModel.implicitColumnExpression //??
|
||||
// (eventModel.kind === 'log' ? 'Body' : 'SpanName')
|
||||
);
|
||||
eventModel.implicitColumnExpression; //??
|
||||
// (eventModel.kind === 'log' ? 'Body' : 'SpanName')
|
||||
const multiExpr = splitAndTrimCSV(expression ?? '');
|
||||
return multiExpr.length === 1 ? expression : multiExpr[0]; // TODO: check if we want to show multiple columns
|
||||
}
|
||||
|
||||
export function useSources() {
|
||||
|
|
@ -214,7 +215,7 @@ export async function inferTableSourceConfig({
|
|||
connectionId,
|
||||
})
|
||||
).primary_key;
|
||||
const keys = primaryKeys.split(',').map(k => k.trim());
|
||||
const keys = splitAndTrimCSV(primaryKeys);
|
||||
|
||||
const isOtelLogSchema = hasAllColumns(columns, [
|
||||
'Timestamp',
|
||||
|
|
|
|||
29
packages/common-utils/src/__tests__/utils.test.ts
Normal file
29
packages/common-utils/src/__tests__/utils.test.ts
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
import { splitAndTrimCSV } from '../utils';
|
||||
|
||||
describe('utils', () => {
|
||||
describe('splitAndTrimCSV', () => {
|
||||
it('should split a comma-separated string and trim whitespace', () => {
|
||||
expect(splitAndTrimCSV('a, b, c')).toEqual(['a', 'b', 'c']);
|
||||
});
|
||||
|
||||
it('should handle strings with no spaces', () => {
|
||||
expect(splitAndTrimCSV('a,b,c')).toEqual(['a', 'b', 'c']);
|
||||
});
|
||||
|
||||
it('should filter out empty values', () => {
|
||||
expect(splitAndTrimCSV('a,b,,c,')).toEqual(['a', 'b', 'c']);
|
||||
});
|
||||
|
||||
it('should handle strings with extra whitespace', () => {
|
||||
expect(splitAndTrimCSV(' a , b , c ')).toEqual(['a', 'b', 'c']);
|
||||
});
|
||||
|
||||
it('should return an empty array for an empty string', () => {
|
||||
expect(splitAndTrimCSV('')).toEqual([]);
|
||||
});
|
||||
|
||||
it('should handle a string with only commas and whitespace', () => {
|
||||
expect(splitAndTrimCSV(',, ,,')).toEqual([]);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
@ -3,6 +3,7 @@ import SqlString from 'sqlstring';
|
|||
|
||||
import { convertCHTypeToPrimitiveJSType, JSDataType } from '@/clickhouse';
|
||||
import { Metadata } from '@/metadata';
|
||||
import { splitAndTrimCSV } from '@/utils';
|
||||
|
||||
function encodeSpecialTokens(query: string): string {
|
||||
return query
|
||||
|
|
@ -361,9 +362,9 @@ export abstract class SQLSerializer implements Serializer {
|
|||
// to utilize the token bloom filter unless a prefix/sufix wildcard is specified
|
||||
if (prefixWildcard || suffixWildcard) {
|
||||
return SqlString.format(
|
||||
`(lower(??) ${isNegatedField ? 'NOT ' : ''}LIKE lower(?))`,
|
||||
`(lower(?) ${isNegatedField ? 'NOT ' : ''}LIKE lower(?))`,
|
||||
[
|
||||
column,
|
||||
SqlString.raw(column ?? ''),
|
||||
`${prefixWildcard ? '%' : ''}${term}${suffixWildcard ? '%' : ''}`,
|
||||
],
|
||||
);
|
||||
|
|
@ -374,21 +375,21 @@ export abstract class SQLSerializer implements Serializer {
|
|||
const tokens = this.tokenizeTerm(term);
|
||||
return `(${isNegatedField ? 'NOT (' : ''}${[
|
||||
...tokens.map(token =>
|
||||
SqlString.format(`hasTokenCaseInsensitive(??, ?)`, [
|
||||
column,
|
||||
SqlString.format(`hasTokenCaseInsensitive(?, ?)`, [
|
||||
SqlString.raw(column ?? ''),
|
||||
token,
|
||||
]),
|
||||
),
|
||||
// If there are symbols in the term, we'll try to match the whole term as well (ex. Scott!)
|
||||
SqlString.format(`(lower(??) LIKE lower(?))`, [
|
||||
column,
|
||||
SqlString.format(`(lower(?) LIKE lower(?))`, [
|
||||
SqlString.raw(column ?? ''),
|
||||
`%${term}%`,
|
||||
]),
|
||||
].join(' AND ')}${isNegatedField ? ')' : ''})`;
|
||||
} else {
|
||||
return SqlString.format(
|
||||
`(${isNegatedField ? 'NOT ' : ''}hasTokenCaseInsensitive(??, ?))`,
|
||||
[column, term],
|
||||
`(${isNegatedField ? 'NOT ' : ''}hasTokenCaseInsensitive(?, ?))`,
|
||||
[SqlString.raw(column ?? ''), term],
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
@ -549,8 +550,13 @@ export class CustomSchemaSQLSerializerV2 extends SQLSerializer {
|
|||
);
|
||||
}
|
||||
|
||||
const expressions = splitAndTrimCSV(this.implicitColumnExpression);
|
||||
|
||||
return {
|
||||
column: this.implicitColumnExpression,
|
||||
column:
|
||||
expressions.length > 1
|
||||
? `concatWithSeparator(';',${expressions.join(',')})`
|
||||
: this.implicitColumnExpression,
|
||||
columnJSON: undefined,
|
||||
propertyType: JSDataType.String,
|
||||
found: true,
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@ import {
|
|||
import {
|
||||
convertDateRangeToGranularityString,
|
||||
getFirstTimestampValueExpression,
|
||||
splitAndTrimCSV,
|
||||
} from '@/utils';
|
||||
|
||||
// FIXME: SQLParser.ColumnRef is incomplete
|
||||
|
|
@ -423,7 +424,7 @@ async function timeFilterExpr({
|
|||
with?: ChartConfigWithDateRange['with'];
|
||||
includedDataInterval?: string;
|
||||
}) {
|
||||
const valueExpressions = timestampValueExpression.split(',');
|
||||
const valueExpressions = splitAndTrimCSV(timestampValueExpression);
|
||||
const startTime = dateRange[0].getTime();
|
||||
const endTime = dateRange[1].getTime();
|
||||
|
||||
|
|
|
|||
|
|
@ -11,10 +11,17 @@ export const isNode: boolean =
|
|||
process.versions != null &&
|
||||
process.versions.node != null;
|
||||
|
||||
export function splitAndTrimCSV(input: string): string[] {
|
||||
return input
|
||||
.split(',')
|
||||
.map(column => column.trim())
|
||||
.filter(column => column.length > 0);
|
||||
}
|
||||
|
||||
// If a user specifies a timestampValueExpression with multiple columns,
|
||||
// this will return the first one. We'll want to refine this over time
|
||||
export function getFirstTimestampValueExpression(valueExpression: string) {
|
||||
return valueExpression.split(',')[0].trim();
|
||||
return splitAndTrimCSV(valueExpression)[0];
|
||||
}
|
||||
|
||||
export enum Granularity {
|
||||
|
|
|
|||
Loading…
Reference in a new issue