fix: handle Nullable + Tuple type column + decouple useRowWhere (#1008)

Ref: HDX-1939
This commit is contained in:
Warren 2025-07-23 11:44:02 -07:00 committed by GitHub
parent 21b5df66aa
commit 4ce81d42b0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 637 additions and 91 deletions

View file

@ -0,0 +1,7 @@
---
"@hyperdx/common-utils": patch
"@hyperdx/api": patch
"@hyperdx/app": patch
---
fix: handle Nullable + Tuple type column + decouple useRowWhere

View file

@ -19,6 +19,7 @@ import {
ColumnMetaType,
convertCHDataTypeToJSType,
extractColumnReference,
isJSDataTypeJSONStringifiable,
JSDataType,
} from '@hyperdx/common-utils/dist/clickhouse';
import {
@ -1013,12 +1014,7 @@ function DBSqlRowTableComponent({
const objectTypeColumns = useMemo(() => {
return columns.filter(c => {
const columnType = columnMap.get(c)?._type;
return (
columnType === JSDataType.Map ||
columnType === JSDataType.Array ||
columnType === JSDataType.JSON ||
columnType === JSDataType.Dynamic
);
return isJSDataTypeJSONStringifiable(columnType);
});
}, [columns, columnMap]);
const processedRows = useMemo(() => {

View file

@ -0,0 +1,505 @@
import MD5 from 'crypto-js/md5';
import {
ColumnMetaType,
JSDataType,
} from '@hyperdx/common-utils/dist/clickhouse';
import { renderHook } from '@testing-library/react';
import useRowWhere, { processRowToWhereClause } from '../useRowWhere';
// Mock crypto-js/md5
jest.mock('crypto-js/md5');
// Mock convertCHDataTypeToJSType
jest.mock('@hyperdx/common-utils/dist/clickhouse', () => ({
...jest.requireActual('@hyperdx/common-utils/dist/clickhouse'),
convertCHDataTypeToJSType: jest.fn((type: string) => {
const typeMap: Record<string, JSDataType> = {
String: JSDataType.String,
DateTime64: JSDataType.Date,
'Array(String)': JSDataType.Array,
'Map(String, String)': JSDataType.Map,
JSON: JSDataType.JSON,
Dynamic: JSDataType.Dynamic,
Int32: JSDataType.Number,
'Tuple(String, Int32)': JSDataType.Tuple,
};
return typeMap[type] || JSDataType.String;
}),
}));
describe('processRowToWhereClause', () => {
beforeEach(() => {
jest.clearAllMocks();
(MD5 as jest.Mock).mockImplementation((value: string) => ({
toString: () => `md5_${value}`,
}));
});
it('should handle string columns', () => {
const columnMap = new Map([
[
'name',
{
name: 'name',
type: 'String',
valueExpr: 'name',
jsType: JSDataType.String,
},
],
]);
const row = { name: 'test' };
const result = processRowToWhereClause(row, columnMap);
expect(result).toBe("name='test'");
});
it('should handle date columns', () => {
const columnMap = new Map([
[
'created_at',
{
name: 'created_at',
type: 'DateTime64',
valueExpr: 'created_at',
jsType: JSDataType.Date,
},
],
]);
const row = { created_at: '2024-01-01T00:00:00Z' };
const result = processRowToWhereClause(row, columnMap);
expect(result).toBe(
"created_at=parseDateTime64BestEffort('2024-01-01T00:00:00Z', 9)",
);
});
it('should handle array columns', () => {
const columnMap = new Map([
[
'tags',
{
name: 'tags',
type: 'Array(String)',
valueExpr: 'tags',
jsType: JSDataType.Array,
},
],
]);
const row = { tags: ['tag1', 'tag2'] };
const result = processRowToWhereClause(row, columnMap);
expect(result).toBe("tags=JSONExtract('tag1', 'tag2', 'Array(String)')");
});
it('should handle map columns', () => {
const columnMap = new Map([
[
'attributes',
{
name: 'attributes',
type: 'Map(String, String)',
valueExpr: 'attributes',
jsType: JSDataType.Map,
},
],
]);
const row = { attributes: { key: 'value' } };
const result = processRowToWhereClause(row, columnMap);
expect(result).toBe(
"attributes=JSONExtract(`key` = 'value', 'Map(String, String)')",
);
});
it('should handle JSON columns with MD5', () => {
const columnMap = new Map([
[
'data',
{
name: 'data',
type: 'JSON',
valueExpr: 'data',
jsType: JSDataType.JSON,
},
],
]);
const row = { data: '{"key": "value"}' };
const result = processRowToWhereClause(row, columnMap);
expect(result).toBe(
'lower(hex(MD5(toString(data))))=\'md5_{\\"key\\": \\"value\\"}\'',
);
expect(MD5).toHaveBeenCalledWith('{"key": "value"}');
});
it('should handle Dynamic columns with null value', () => {
const columnMap = new Map([
[
'dynamic_field',
{
name: 'dynamic_field',
type: 'Dynamic',
valueExpr: 'dynamic_field',
jsType: JSDataType.Dynamic,
},
],
]);
const row = { dynamic_field: 'null' };
const result = processRowToWhereClause(row, columnMap);
expect(result).toBe('isNull(`dynamic_field`)');
});
it('should handle Dynamic columns with quoted string', () => {
const columnMap = new Map([
[
'dynamic_field',
{
name: 'dynamic_field',
type: 'Dynamic',
valueExpr: 'dynamic_field',
jsType: JSDataType.Dynamic,
},
],
]);
const row = { dynamic_field: '"quoted_value"' };
const result = processRowToWhereClause(row, columnMap);
expect(result).toBe("toString(dynamic_field)='quoted_value'");
});
it('should handle long strings with MD5', () => {
const columnMap = new Map([
[
'description',
{
name: 'description',
type: 'String',
valueExpr: 'description',
jsType: JSDataType.String,
},
],
]);
const longString = 'a'.repeat(600);
const row = { description: longString };
const result = processRowToWhereClause(row, columnMap);
expect(result).toBe(
`lower(hex(MD5(leftUTF8(description, 1000))))='md5_${'a'.repeat(600)}'`,
);
expect(MD5).toHaveBeenCalledWith('a'.repeat(600));
});
it('should handle multiple columns with AND', () => {
const columnMap = new Map([
[
'name',
{
name: 'name',
type: 'String',
valueExpr: 'name',
jsType: JSDataType.String,
},
],
[
'age',
{
name: 'age',
type: 'Int32',
valueExpr: 'age',
jsType: JSDataType.Number,
},
],
]);
const row = { name: 'test', age: 25 };
const result = processRowToWhereClause(row, columnMap);
expect(result).toBe("name='test' AND age=25");
});
it('should use custom valueExpr when provided', () => {
const columnMap = new Map([
[
'alias_name',
{
name: 'alias_name',
type: 'String',
valueExpr: 'original_column',
jsType: JSDataType.String,
},
],
]);
const row = { alias_name: 'test' };
const result = processRowToWhereClause(row, columnMap);
expect(result).toBe("original_column='test'");
});
it('should handle Tuple columns', () => {
const columnMap = new Map([
[
'coordinates',
{
name: 'coordinates',
type: 'Tuple(String, Int32)',
valueExpr: 'coordinates',
jsType: JSDataType.Tuple,
},
],
]);
const row = { coordinates: '{"s": "city", "i": 123}' };
const result = processRowToWhereClause(row, columnMap);
expect(result).toBe(
'toJSONString(coordinates)=\'{\\"s\\": \\"city\\", \\"i\\": 123}\'',
);
});
it('should handle null value in default block', () => {
const columnMap = new Map([
[
'name',
{
name: 'name',
type: 'String',
valueExpr: 'name',
jsType: JSDataType.String,
},
],
]);
const row = { name: null };
const result = processRowToWhereClause(row, columnMap);
expect(result).toBe('isNull(name)');
});
it('should handle undefined value in default block', () => {
const columnMap = new Map([
[
'description',
{
name: 'description',
type: 'String',
valueExpr: 'description',
jsType: JSDataType.String,
},
],
]);
const row = { description: undefined };
const result = processRowToWhereClause(row, columnMap);
expect(result).toBe('isNull(description)');
});
it('should throw error when column type not found', () => {
const columnMap = new Map();
const row = { unknown_column: 'test' };
expect(() => processRowToWhereClause(row, columnMap)).toThrow(
'Column type not found for unknown_column',
);
});
it('should throw error when valueExpr not found', () => {
const columnMap = new Map([
[
'test',
{
name: 'test',
type: 'String',
valueExpr: null as any,
jsType: JSDataType.String,
},
],
]);
const row = { test: 'value' };
expect(() => processRowToWhereClause(row, columnMap)).toThrow(
'valueExpr not found for test',
);
});
it('should throw error for large Dynamic values', () => {
const columnMap = new Map([
[
'dynamic_field',
{
name: 'dynamic_field',
type: 'Dynamic',
valueExpr: 'dynamic_field',
jsType: JSDataType.Dynamic,
},
],
]);
const row = { dynamic_field: 'a'.repeat(1001) };
expect(() => processRowToWhereClause(row, columnMap)).toThrow(
'Search value/object key too large.',
);
});
});
describe('useRowWhere', () => {
beforeEach(() => {
jest.clearAllMocks();
(MD5 as jest.Mock).mockImplementation((value: string) => ({
toString: () => `md5_${value}`,
}));
});
it('should return a function that processes rows', () => {
const meta: ColumnMetaType[] = [
{ name: 'id', type: 'String' },
{ name: 'status', type: 'String' },
];
const { result } = renderHook(() => useRowWhere({ meta }));
expect(typeof result.current).toBe('function');
});
it('should handle rows with meta', () => {
const meta: ColumnMetaType[] = [
{ name: 'id', type: 'String' },
{ name: 'status', type: 'String' },
];
const { result } = renderHook(() => useRowWhere({ meta }));
const row = { id: '123', status: 'active' };
const whereClause = result.current(row);
expect(whereClause).toBe("id='123' AND status='active'");
});
it('should handle aliasMap correctly', () => {
const meta: ColumnMetaType[] = [
{ name: 'user_id', type: 'String' },
{ name: 'user_status', type: 'String' },
];
const aliasMap = {
user_id: 'users.id',
user_status: 'users.status',
};
const { result } = renderHook(() => useRowWhere({ meta, aliasMap }));
const row = { user_id: '123', user_status: 'active' };
const whereClause = result.current(row);
expect(whereClause).toBe("users.id='123' AND users.status='active'");
});
it('should use column name when alias not found in aliasMap', () => {
const meta: ColumnMetaType[] = [
{ name: 'id', type: 'String' },
{ name: 'status', type: 'String' },
];
const aliasMap = {
id: 'users.id',
// status is not in aliasMap
};
const { result } = renderHook(() => useRowWhere({ meta, aliasMap }));
const row = { id: '123', status: 'active' };
const whereClause = result.current(row);
expect(whereClause).toBe("users.id='123' AND status='active'");
});
it('should handle undefined alias values in aliasMap', () => {
const meta: ColumnMetaType[] = [
{ name: 'id', type: 'String' },
{ name: 'status', type: 'String' },
];
const aliasMap = {
id: 'users.id',
status: undefined,
};
const { result } = renderHook(() => useRowWhere({ meta, aliasMap }));
const row = { id: '123', status: 'active' };
const whereClause = result.current(row);
expect(whereClause).toBe("users.id='123' AND status='active'");
});
it('should memoize the column map', () => {
const meta: ColumnMetaType[] = [{ name: 'id', type: 'String' }];
const { result, rerender } = renderHook(props => useRowWhere(props), {
initialProps: { meta },
});
const firstCallback = result.current;
// Rerender with same props
rerender({ meta });
const secondCallback = result.current;
// Callback should be the same reference
expect(firstCallback).toBe(secondCallback);
});
it('should update callback when meta changes', () => {
const meta1: ColumnMetaType[] = [{ name: 'id', type: 'String' }];
const meta2: ColumnMetaType[] = [
{ name: 'id', type: 'String' },
{ name: 'status', type: 'String' },
];
const { result, rerender } = renderHook(props => useRowWhere(props), {
initialProps: { meta: meta1 },
});
const firstCallback = result.current;
// Rerender with different meta
rerender({ meta: meta2 });
const secondCallback = result.current;
// Callback should be different
expect(firstCallback).not.toBe(secondCallback);
});
it('should handle empty meta', () => {
const { result } = renderHook(() => useRowWhere({ meta: [] }));
const row = { id: '123' };
expect(() => result.current(row)).toThrow('Column type not found for id');
});
it('should handle undefined meta', () => {
const { result } = renderHook(() => useRowWhere({ meta: undefined }));
const row = { id: '123' };
expect(() => result.current(row)).toThrow('Column type not found for id');
});
});

View file

@ -9,6 +9,107 @@ import {
const MAX_STRING_LENGTH = 512;
type ColumnWithMeta = ColumnMetaType & {
valueExpr: string;
jsType: JSDataType | null;
};
export function processRowToWhereClause(
row: Record<string, any>,
columnMap: Map<string, ColumnWithMeta>,
): string {
const res = Object.entries(row)
.map(([column, value]) => {
const cm = columnMap.get(column);
const chType = cm?.type;
const jsType = cm?.jsType;
const valueExpr = cm?.valueExpr;
if (chType == null) {
throw new Error(
`Column type not found for ${column}, ${JSON.stringify(columnMap)}`,
);
}
if (valueExpr == null) {
throw new Error(
`valueExpr not found for ${column}, ${JSON.stringify(columnMap)}`,
);
}
switch (jsType) {
case JSDataType.Date:
return SqlString.format(`?=parseDateTime64BestEffort(?, 9)`, [
SqlString.raw(valueExpr),
value,
]);
case JSDataType.Array:
case JSDataType.Map:
return SqlString.format(`?=JSONExtract(?, ?)`, [
SqlString.raw(valueExpr),
value,
chType,
]);
case JSDataType.Tuple:
return SqlString.format(`toJSONString(?)=?`, [
SqlString.raw(valueExpr),
value,
]);
case JSDataType.JSON:
// Handle case for whole json object, ex: json
return SqlString.format(`lower(hex(MD5(toString(?))))=?`, [
SqlString.raw(valueExpr),
MD5(value).toString(),
]);
case JSDataType.Dynamic:
// Handle case for json element, ex: json.c
// Currently we can't distinguish null or 'null'
if (value === 'null') {
return SqlString.format(`isNull(??)`, [column]);
}
if (value.length > 1000 || column.length > 1000) {
throw new Error('Search value/object key too large.');
}
// TODO: update when JSON type have new version
// will not work for array/object dyanmic data
return SqlString.format(`toString(?)=?`, [
SqlString.raw(valueExpr),
// data other than array/object will alwayas return with dobule quote(because of CH)
// remove dobule qoute to search correctly
value[0] === '"' && value[value.length - 1] === '"'
? value.slice(1, -1)
: value,
]);
default:
// Handle nullish values
if (value == null) {
return SqlString.format(`isNull(?)`, [SqlString.raw(valueExpr)]);
}
// Handle the case when string is too long
if (value.length > MAX_STRING_LENGTH) {
return SqlString.format(
// We need to slice since md5 can be slow on big payloads
// which will block the main thread on search table render
// UTF8 since js only slices in utf8 points, not bytes
`lower(hex(MD5(leftUTF8(?, 1000))))=?`,
[
SqlString.raw(valueExpr),
MD5(value.substring(0, 1000)).toString(),
],
);
}
return SqlString.format(`?=?`, [
SqlString.raw(valueExpr), // don't escape expressions
value,
]);
}
})
.join(' AND ');
return res;
}
export default function useRowWhere({
meta,
aliasMap,
@ -38,89 +139,7 @@ export default function useRowWhere({
);
return useCallback(
(row: Record<string, any>) => {
const res = Object.entries(row)
.map(([column, value]) => {
const cm = columnMap.get(column);
const chType = cm?.type;
const jsType = cm?.jsType;
const valueExpr = cm?.valueExpr;
if (jsType == null || chType == null) {
throw new Error(
`Column type not found for ${column}, ${JSON.stringify(columnMap)}`,
);
}
if (valueExpr == null) {
throw new Error(
`valueExpr not found for ${column}, ${JSON.stringify(columnMap)}`,
);
}
switch (jsType) {
case JSDataType.Date:
return SqlString.format(`?=parseDateTime64BestEffort(?, 9)`, [
SqlString.raw(valueExpr),
value,
]);
case JSDataType.Array:
case JSDataType.Map:
return SqlString.format(`?=JSONExtract(?, ?)`, [
SqlString.raw(valueExpr),
value,
chType,
]);
case JSDataType.JSON:
// Handle case for whole json object, ex: json
return SqlString.format(`lower(hex(MD5(toString(?))))=?`, [
SqlString.raw(valueExpr),
MD5(value).toString(),
]);
case JSDataType.Dynamic:
// Handle case for json element, ex: json.c
// Currently we can't distinguish null or 'null'
if (value === 'null') {
return SqlString.format(`isNull(??)`, [column]);
}
if (value.length > 1000 || column.length > 1000) {
throw new Error('Search value/object key too large.');
}
// TODO: update when JSON type have new version
// will not work for array/object dyanmic data
return SqlString.format(`toString(?)=?`, [
SqlString.raw(valueExpr),
// data other than array/object will alwayas return with dobule quote(because of CH)
// remove dobule qoute to search correctly
value[0] === '"' && value[value.length - 1] === '"'
? value.slice(1, -1)
: value,
]);
default:
// Handle the case when string is too long
if (value.length > MAX_STRING_LENGTH) {
return SqlString.format(
// We need to slice since md5 can be slow on big payloads
// which will block the main thread on search table render
// UTF8 since js only slices in utf8 points, not bytes
`lower(hex(MD5(leftUTF8(?, 1000))))=?`,
[
SqlString.raw(valueExpr),
MD5(value.substring(0, 1000)).toString(),
],
);
}
return SqlString.format(`?=?`, [
SqlString.raw(valueExpr), // don't escape expressions
value,
]);
}
})
.join(' AND ');
return res;
},
(row: Record<string, any>) => processRowToWhereClause(row, columnMap),
[columnMap],
);
}

View file

@ -25,6 +25,7 @@ export enum JSDataType {
Map = 'map',
Number = 'number',
String = 'string',
Tuple = 'tuple',
Bool = 'bool',
JSON = 'json',
Dynamic = 'dynamic', // json type will store anything as Dynamic type by default
@ -43,6 +44,8 @@ export const convertCHDataTypeToJSType = (
): JSDataType | null => {
if (dataType.startsWith('Date')) {
return JSDataType.Date;
} else if (dataType.startsWith('Tuple')) {
return JSDataType.Tuple;
} else if (dataType.startsWith('Map')) {
return JSDataType.Map;
} else if (dataType.startsWith('Array')) {
@ -79,11 +82,27 @@ export const convertCHDataTypeToJSType = (
return null;
};
export const isJSDataTypeJSONStringifiable = (
dataType: JSDataType | null | undefined,
) => {
return (
dataType === JSDataType.Map ||
dataType === JSDataType.Array ||
dataType === JSDataType.JSON ||
dataType === JSDataType.Tuple ||
dataType === JSDataType.Dynamic
);
};
export const convertCHTypeToPrimitiveJSType = (dataType: string) => {
const jsType = convertCHDataTypeToJSType(dataType);
if (jsType === JSDataType.Map || jsType === JSDataType.Array) {
throw new Error('Map type is not a primitive type');
if (
jsType === JSDataType.Map ||
jsType === JSDataType.Array ||
jsType === JSDataType.Tuple
) {
throw new Error('Map, Array or Tuple type is not a primitive type');
} else if (jsType === JSDataType.Date) {
return JSDataType.Number;
}