feat: Add support for querying nested/array fields (#1660)

Closes HDX-3113

# Summary

This PR adds support for querying nested (array-type) columns with Lucene syntax.

## Syntax

### Arrays of simple types

- Array is non-empty: `Events.Name:*` --> `notEmpty(Events.Name)`
- Array contains element: `Events.Name:"error"` --> `has(Events.Name, 'error')`
   - Supports string, numeric, and boolean inner types with `has()`
   - All other types are cast to string: `Events.Timestamp:"2026-02-02 15:19:37.196300098"` --> `arrayExists(el ->toString(el) = '2026-02-02 15:19:37.196300098, Events.Timestamp)`
- Array contains element containing: `Events.Name:domain` --> `arrayExists(el -> el ILIKE '%domain%', Events.Name)`

### Arrays of Maps

- Array of maps contains map with non-empty key: `Events.Attributes.exception.stacktrace:*` --> `arrayExists(el - > notEmpty (toString (el['exception.stacktrace'])) = 1, Events.Attributes)`
- Array of maps contains map with key+value: `Events.Attributes.exception.stacktrace:"AggregateError"` --> `arrayExists(el - > el['exception.stacktrace'] = 'AggregateError', Events.Attributes)`
- Array of maps contains map with key containing value: `Events.Attributes.exception.stacktrace:"AggregateError"` --> `arrayExists(el - > el['exception.stacktrace'] ILIKE '%AggregateError%', Events.Attributes)`

### Arrays of JSON

- Array of JSON contains object with non-empty key: `Events.Attributes.message:*` --> `arrayExists(el - > notEmpty (toString (el.message)) = 1, Events.Attributes)`
- Array of JSON contains object with key + value: `Events.Attributes.exception.stacktrace:"AggregateError"` --> `arrayExists(el - > toString (el.exception.stacktrace) = 'AggregateError', Events.Attributes)`
- Array of JSON contains object with key containing value: `Events.Attributes.exception.stacktrace:AggregateError` --> `arrayExists(el - > toString (el.exception.stacktrace) ILIKE '%AggregateError%', Events.Attributes)`
This commit is contained in:
Drew Davis 2026-02-04 12:34:48 -05:00 committed by GitHub
parent 8f1026089d
commit 6cfa40a0f7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 833 additions and 68 deletions

View file

@ -0,0 +1,7 @@
---
"@hyperdx/common-utils": patch
"@hyperdx/api": patch
"@hyperdx/app": patch
---
feat: Add support for querying nested/array columns with lucene

View file

@ -15,6 +15,7 @@ import {
ILanguageFormatter,
useAutoCompleteOptions,
} from './hooks/useAutoCompleteOptions';
import { useMetadataWithSettings } from './hooks/useMetadata';
export class LuceneLanguageFormatter implements ILanguageFormatter {
formatFieldValue(f: Field): string {
@ -60,6 +61,7 @@ export default function SearchInputV2({
field: { onChange, value },
} = useController(props);
const metadata = useMetadataWithSettings();
const ref = useRef<HTMLTextAreaElement>(null);
const [parsedEnglishQuery, setParsedEnglishQuery] = useState<string>('');
@ -73,10 +75,16 @@ export default function SearchInputV2({
);
useEffect(() => {
genEnglishExplanation(value).then(q => {
setParsedEnglishQuery(q);
});
}, [value]);
if (tableConnection) {
genEnglishExplanation({
query: value,
tableConnection,
metadata,
}).then(q => {
setParsedEnglishQuery(q);
});
}
}, [value, tableConnection, metadata]);
useHotkeys(
'/',

View file

@ -1,3 +1,4 @@
import { JSDataType } from '@/clickhouse';
import { ClickhouseClient } from '@/clickhouse/node';
import { getMetadata } from '@/core/metadata';
import {
@ -57,6 +58,7 @@ describe('CustomSchemaSQLSerializerV2 - json', () => {
},
found: true,
propertyType: 'json',
isArray: false,
});
const field2 = 'ResourceAttributesJSON.test.nest';
const res2 = await serializer.getColumnForField(field2, {});
@ -69,6 +71,7 @@ describe('CustomSchemaSQLSerializerV2 - json', () => {
},
found: true,
propertyType: 'json',
isArray: false,
});
});
@ -448,7 +451,15 @@ describe('CustomSchemaSQLSerializerV2 - json', () => {
it.each(testCases)(
'converts "$lucene" to english "$english"',
async ({ lucene, english }) => {
const actualEnglish = await genEnglishExplanation(lucene);
const actualEnglish = await genEnglishExplanation({
query: lucene,
tableConnection: {
tableName,
databaseName,
connectionId,
},
metadata,
});
expect(actualEnglish).toBe(english);
},
);
@ -1213,3 +1224,387 @@ describe('CustomSchemaSQLSerializerV2 - indexCoversColumn', () => {
},
);
});
describe('CustomSchemaSQLSerializerV2 - Array and Nested Fields', () => {
const metadata = getMetadata(
new ClickhouseClient({ host: 'http://localhost:8123' }),
);
metadata.getColumn = jest.fn().mockImplementation(async ({ column }) => {
if (column === 'Events.Name') {
return { name: 'Events.Name', type: 'Array(String)' };
} else if (column === 'Events.Count') {
return { name: 'Events.Count', type: 'Array(UInt64)' };
} else if (column === 'Events.Attributes') {
return { name: 'Events.Attributes', type: 'Array(Map(String, String))' };
} else if (column === 'Events.IsAvailable') {
return { name: 'Events.IsAvailable', type: 'Array(Bool)' };
} else if (column === 'Events.Timestamp') {
return { name: 'Events.Timestamp', type: 'Array(DateTime64)' };
} else if (column === 'Events.JSONAttributes') {
return { name: 'Events.JSONAttributes', type: 'Array(JSON)' };
} else {
return undefined;
}
});
metadata.getMaterializedColumnsLookupTable = jest
.fn()
.mockResolvedValue(new Map());
metadata.getSetting = jest.fn().mockResolvedValue(undefined);
const databaseName = 'testName';
const tableName = 'testTable';
const connectionId = 'testId';
const serializer = new CustomSchemaSQLSerializerV2({
metadata,
databaseName,
tableName,
connectionId,
implicitColumnExpression: 'Body',
});
it('getColumnForField', async () => {
const field1 = 'Events.Name';
const res1 = await serializer.getColumnForField(field1, {});
expect(res1).toEqual({
column: 'Events.Name',
found: true,
propertyType: JSDataType.String,
isArray: true,
});
const field2 = 'Events.Count';
const res2 = await serializer.getColumnForField(field2, {});
expect(res2).toEqual({
column: 'Events.Count',
found: true,
propertyType: JSDataType.Number,
isArray: true,
});
const field3 = 'Events.IsAvailable';
const res3 = await serializer.getColumnForField(field3, {});
expect(res3).toEqual({
column: 'Events.IsAvailable',
found: true,
propertyType: JSDataType.Bool,
isArray: true,
});
});
it('compare - eq', async () => {
const eqField = 'Events.Name';
const eqTerm = 'error';
const eq1 = await serializer.eq(eqField, eqTerm, false, {});
expect(eq1).toBe("has(Events.Name, 'error')");
const eq2 = await serializer.eq(eqField, eqTerm, true, {});
expect(eq2).toBe("NOT has(Events.Name, 'error')");
});
it('compare - isNotNull', async () => {
const isNotNullField = 'Events.Name';
const isNotNull1 = await serializer.isNotNull(isNotNullField, false, {});
expect(isNotNull1).toBe('notEmpty(Events.Name) = 1');
const isNotNull2 = await serializer.isNotNull(isNotNullField, true, {});
expect(isNotNull2).toBe('notEmpty(Events.Name) != 1');
});
it('compare - gte', async () => {
const gteField = 'Events.Name';
const gteTerm = '30';
await expect(async () =>
serializer.gte(gteField, gteTerm, {}),
).rejects.toThrow('>= comparison is not supported for Array-type fields');
});
it('compare - lte', async () => {
const lteField = 'Events.Name';
const lteTerm = '40';
await expect(async () =>
serializer.lte(lteField, lteTerm, {}),
).rejects.toThrow('<= comparison is not supported for Array-type fields');
});
it('compare - gt', async () => {
const gtField = 'Events.Name';
const gtTerm = '70';
await expect(async () =>
serializer.gt(gtField, gtTerm, {}),
).rejects.toThrow('> comparison is not supported for Array-type fields');
});
it('compare - lt', async () => {
const ltField = 'Events.Name';
const ltTerm = '2';
await expect(async () =>
serializer.lt(ltField, ltTerm, {}),
).rejects.toThrow('< comparison is not supported for Array-type fields');
});
it('compare - range', async () => {
const rangeField = 'Events.Name';
await expect(async () =>
serializer.range(rangeField, '2', '5', false, {}),
).rejects.toThrow(
'range comparison is not supported for Array-type fields',
);
});
const testCases = [
// String array field tests
{
lucene: 'Events.Name:foo',
sql: "(arrayExists(el -> el ILIKE '%foo%', Events.Name))",
english: "'Events.Name' contains an element containing foo",
},
{
lucene: 'NOT Events.Name:foo',
sql: "(NOT arrayExists(el -> el ILIKE '%foo%', Events.Name))",
english: "NOT 'Events.Name' contains an element containing foo",
},
{
lucene: '-Events.Name:foo',
sql: "(NOT arrayExists(el -> el ILIKE '%foo%', Events.Name))",
english: "'Events.Name' does not contain an element containing foo",
},
{
lucene: 'Events.Name:"foo"',
sql: "(has(Events.Name, 'foo'))",
english: "'Events.Name' contains foo",
},
{
lucene: 'NOT Events.Name:"foo"',
sql: "(NOT has(Events.Name, 'foo'))",
english: "NOT 'Events.Name' contains foo",
},
{
lucene: '-Events.Name:"foo"',
sql: "(NOT has(Events.Name, 'foo'))",
english: "'Events.Name' does not contain foo",
},
{
lucene: 'Events.Name:"foo bar"',
sql: "(has(Events.Name, 'foo bar'))",
english: "'Events.Name' contains foo bar",
},
{
lucene: 'NOT Events.Name:"foo bar"',
sql: "(NOT has(Events.Name, 'foo bar'))",
english: "NOT 'Events.Name' contains foo bar",
},
{
lucene: '-Events.Name:"foo bar"',
sql: "(NOT has(Events.Name, 'foo bar'))",
english: "'Events.Name' does not contain foo bar",
},
// Prefix / suffix wildcard tests
{
lucene: 'Events.Name:foo*',
sql: "(arrayExists(el -> el ILIKE '%foo%', Events.Name))",
english: "'Events.Name' contains an element containing foo",
},
{
lucene: 'Events.Name:*foo',
sql: "(arrayExists(el -> el ILIKE '%foo%', Events.Name))",
english: "'Events.Name' contains an element containing foo",
},
{
lucene: 'Events.Name:*foo*',
sql: "(arrayExists(el -> el ILIKE '%foo%', Events.Name))",
english: "'Events.Name' contains an element containing foo",
},
// Number array field tests
{
lucene: 'Events.Count:5',
sql: "(has(Events.Count, CAST('5', 'Float64')))",
english: "'Events.Count' contains 5",
},
{
lucene: 'NOT Events.Count:5',
sql: "(NOT has(Events.Count, CAST('5', 'Float64')))",
english: "NOT 'Events.Count' contains 5",
},
{
lucene: 'Events.Count:"4"',
sql: "(has(Events.Count, CAST('4', 'Float64')))",
english: "'Events.Count' contains 4",
},
{
lucene: 'NOT Events.Count:"4"',
sql: "(NOT has(Events.Count, CAST('4', 'Float64')))",
english: "NOT 'Events.Count' contains 4",
},
// Boolean array field tests
{
lucene: 'Events.IsAvailable:true',
sql: '(has(Events.IsAvailable, 1))',
english: "'Events.IsAvailable' contains true",
},
{
lucene: 'NOT Events.IsAvailable:true',
sql: '(NOT has(Events.IsAvailable, 1))',
english: "NOT 'Events.IsAvailable' contains true",
},
{
lucene: 'Events.IsAvailable:false',
sql: '(has(Events.IsAvailable, 0))',
english: "'Events.IsAvailable' contains false",
},
{
lucene: 'NOT Events.IsAvailable:false',
sql: '(NOT has(Events.IsAvailable, 0))',
english: "NOT 'Events.IsAvailable' contains false",
},
// Array(Map(String, String)) tests
{
lucene: 'Events.Attributes.message:key1',
sql: "(arrayExists(el -> el['message'] ILIKE '%key1%', Events.Attributes))",
english:
"'Events.Attributes' contains an element with key message and value key1",
},
{
lucene: '-Events.Attributes.message:key1',
sql: "(NOT arrayExists(el -> el['message'] ILIKE '%key1%', Events.Attributes))",
english:
"'Events.Attributes' does not contain an element with key message and value key1",
},
{
lucene: 'Events.Attributes.message:key1*',
sql: "(arrayExists(el -> el['message'] ILIKE '%key1%', Events.Attributes))",
english:
"'Events.Attributes' contains an element with key message and value key1",
},
{
lucene: 'Events.Attributes.message:"key1"',
sql: "(arrayExists(el -> el['message'] = 'key1', Events.Attributes))",
english: "'Events.Attributes.message' contains key1",
},
{
lucene: 'Events.Attributes.message.subkey:"key1"',
sql: "(arrayExists(el -> el['message.subkey'] = 'key1', Events.Attributes))",
english: "'Events.Attributes.message.subkey' contains key1",
},
{
lucene: 'Events.Attributes.message:("key1 key2")',
sql: "((arrayExists(el -> el['message'] ILIKE '%key1 key2%', Events.Attributes)))",
english: '(Events.Attributes.message contains "key1 key2")',
},
{
lucene: 'Events.Attributes.message:*',
sql: "(arrayExists(el -> notEmpty(toString(el['message'])) = 1, Events.Attributes))",
english: "'Events.Attributes' contains an element with non-null message",
},
{
lucene: '-Events.Attributes.message:*',
sql: "(NOT arrayExists(el -> notEmpty(toString(el['message'])) = 1, Events.Attributes))",
english:
"'Events.Attributes' does not contain an element with non-null message",
},
{
lucene: 'NOT Events.Attributes.message:*',
sql: "(NOT arrayExists(el -> notEmpty(toString(el['message'])) = 1, Events.Attributes))",
english:
"NOT 'Events.Attributes' contains an element with non-null message",
},
{
lucene: 'Events.Attributes:*',
sql: '(notEmpty(Events.Attributes) = 1)',
english: "'Events.Attributes' is not null",
},
// Non-string inner type tests
{
lucene: 'Events.Timestamp:"2025-01-01"',
sql: "(arrayExists(el -> toString(el) = '2025-01-01', Events.Timestamp))",
english: "'Events.Timestamp' contains 2025-01-01",
},
{
lucene: 'Events.Timestamp:2025-01-01',
sql: "(arrayExists(el -> toString(el) ILIKE '%2025-01-01%', Events.Timestamp))",
english: "'Events.Timestamp' contains an element containing 2025-01-01",
},
// JSON inner type tests
{
lucene: 'Events.JSONAttributes.message:key1',
sql: "(arrayExists(el -> toString(el.`message`) ILIKE '%key1%', Events.JSONAttributes))",
english:
"'Events.JSONAttributes' contains an element with key message and value key1",
},
{
lucene: '-Events.JSONAttributes.message:key1',
sql: "(NOT arrayExists(el -> toString(el.`message`) ILIKE '%key1%', Events.JSONAttributes))",
english:
"'Events.JSONAttributes' does not contain an element with key message and value key1",
},
{
lucene: 'Events.JSONAttributes.message:key1*',
sql: "(arrayExists(el -> toString(el.`message`) ILIKE '%key1%', Events.JSONAttributes))",
english:
"'Events.JSONAttributes' contains an element with key message and value key1",
},
{
lucene: 'Events.JSONAttributes.message:"key1"',
sql: "(arrayExists(el -> toString(el.`message`) = 'key1', Events.JSONAttributes))",
english: "'Events.JSONAttributes.message' contains key1",
},
{
lucene: 'Events.JSONAttributes.message.subkey:"key1"',
sql: "(arrayExists(el -> toString(el.`message`.`subkey`) = 'key1', Events.JSONAttributes))",
english: "'Events.JSONAttributes.message.subkey' contains key1",
},
{
lucene: 'Events.JSONAttributes.message:("key1 key2")',
sql: "((arrayExists(el -> toString(el.`message`) ILIKE '%key1 key2%', Events.JSONAttributes)))",
english: '(Events.JSONAttributes.message contains "key1 key2")',
},
{
lucene: 'Events.JSONAttributes.message:*',
sql: '(arrayExists(el -> notEmpty(toString(el.`message`)) = 1, Events.JSONAttributes))',
english:
"'Events.JSONAttributes' contains an element with non-null message",
},
{
lucene: '-Events.JSONAttributes.message:*',
sql: '(NOT arrayExists(el -> notEmpty(toString(el.`message`)) = 1, Events.JSONAttributes))',
english:
"'Events.JSONAttributes' does not contain an element with non-null message",
},
{
lucene: 'NOT Events.JSONAttributes.message:*',
sql: '(NOT arrayExists(el -> notEmpty(toString(el.`message`)) = 1, Events.JSONAttributes))',
english:
"NOT 'Events.JSONAttributes' contains an element with non-null message",
},
{
lucene: 'Events.JSONAttributes:*',
sql: '(notEmpty(Events.JSONAttributes) = 1)',
english: "'Events.JSONAttributes' is not null",
},
];
it.each(testCases)(
'converts "$lucene" to SQL "$sql"',
async ({ lucene, sql }) => {
const builder = new SearchQueryBuilder(lucene, serializer);
const actualSql = await builder.build();
expect(actualSql).toBe(sql);
},
);
it.each(testCases)(
'converts "$lucene" to english "$english"',
async ({ lucene, english }) => {
const actualEnglish = await genEnglishExplanation({
query: lucene,
tableConnection: {
tableName,
databaseName,
connectionId,
},
metadata,
});
expect(actualEnglish).toBe(english);
},
);
});

View file

@ -110,20 +110,38 @@ export const isJSDataTypeJSONStringifiable = (
);
};
export const convertCHTypeToPrimitiveJSType = (dataType: string) => {
const jsType = convertCHDataTypeToJSType(dataType);
if (
jsType === JSDataType.Map ||
jsType === JSDataType.Array ||
jsType === JSDataType.Tuple
) {
throw new Error('Map, Array or Tuple type is not a primitive type');
} else if (jsType === JSDataType.Date) {
return JSDataType.Number;
export const extractInnerCHArrayJSType = (
dataType: string,
): JSDataType | null => {
if (dataType.trim().startsWith('Array(') && dataType.trim().endsWith(')')) {
const innerType = dataType.trim().slice(6, -1);
return convertCHDataTypeToJSType(innerType);
}
return jsType;
return null;
};
export const convertCHTypeToLuceneSearchType = (
dataType: string,
): {
type: JSDataType | null;
isArray: boolean;
} => {
let jsType = convertCHDataTypeToJSType(dataType);
const isArray = jsType === JSDataType.Array;
if (jsType === JSDataType.Map || jsType === JSDataType.Tuple) {
throw new Error('Map or Tuple types cannot be searched with Lucene.');
} else if (jsType === JSDataType.Date) {
jsType = JSDataType.Number;
} else if (
jsType === JSDataType.Array &&
extractInnerCHArrayJSType(dataType)
) {
jsType = extractInnerCHArrayJSType(dataType);
}
return { type: jsType, isArray };
};
const hash = (input: string | number) => Math.abs(hashCode(`${input}`));

View file

@ -2,8 +2,14 @@ import lucene from '@hyperdx/lucene';
import { chunk } from 'lodash';
import SqlString from 'sqlstring';
import { convertCHTypeToPrimitiveJSType, JSDataType } from '@/clickhouse';
import { Metadata, SkipIndexMetadata } from '@/core/metadata';
import {
ColumnMeta,
convertCHDataTypeToJSType,
convertCHTypeToLuceneSearchType,
extractInnerCHArrayJSType,
JSDataType,
} from '@/clickhouse';
import { Metadata, SkipIndexMetadata, TableConnection } from '@/core/metadata';
import {
parseTokenizerFromTextIndex,
splitAndTrimWithBracket,
@ -97,6 +103,44 @@ const CLICK_HOUSE_JSON_NUMBER_TYPES = [
'Float64',
];
/**
* Find and return the column metadata for the column in the
* given table with the shortest name that matches a
* prefix of the given dot-separated field.
*
* eg. for field 'a.b.c', check for columns 'a', 'a.b', 'a.b.c' in order.
**/
async function findPrefixMatch({
field,
metadata,
databaseName,
tableName,
connectionId,
}: {
field: string;
metadata: Metadata;
databaseName: string;
tableName: string;
connectionId: string;
}): Promise<ColumnMeta | undefined> {
const fieldParts = field.split('.');
let fieldPrefix = '';
for (const part of fieldParts) {
fieldPrefix = fieldPrefix ? `${fieldPrefix}.${part}` : part;
const prefixMatch = await metadata.getColumn({
databaseName,
tableName,
column: fieldPrefix,
connectionId,
});
if (prefixMatch) {
return prefixMatch;
}
}
}
interface SerializerContext {
/** The current implicit column expression, indicating which SQL expression to use when comparing a term to the '<implicit>' field */
implicitColumnExpression?: string;
@ -138,6 +182,23 @@ interface Serializer {
}
class EnglishSerializer implements Serializer {
private metadata: Metadata;
private tableName: string;
private databaseName: string;
private connectionId: string;
constructor({
metadata,
databaseName,
tableName,
connectionId,
}: { metadata: Metadata } & CustomSchemaConfig) {
this.metadata = metadata;
this.databaseName = databaseName;
this.tableName = tableName;
this.connectionId = connectionId;
}
private translateField(field: string, context: SerializerContext) {
if (field === IMPLICIT_FIELD) {
return context.implicitColumnExpression ?? 'event';
@ -146,6 +207,35 @@ class EnglishSerializer implements Serializer {
return `'${field}'`;
}
private async getFieldType(field: string) {
const column = await findPrefixMatch({
field,
metadata: this.metadata,
databaseName: this.databaseName,
tableName: this.tableName,
connectionId: this.connectionId,
});
const fieldParts = field.split('.');
const fieldPostfix = fieldParts
.slice(column ? column.name.split('.').length : 0)
.join('.');
if (!column) {
return {
isArray: false,
type: null,
};
}
let jsType = convertCHDataTypeToJSType(column.type);
const isArray = jsType === JSDataType.Array;
if (isArray && extractInnerCHArrayJSType(column.type)) {
jsType = extractInnerCHArrayJSType(column.type);
}
return { isArray, type: jsType, fieldPostfix, column: column.name };
}
operator(op: lucene.Operator) {
switch (op) {
case 'NOT':
@ -153,12 +243,12 @@ class EnglishSerializer implements Serializer {
return 'AND NOT';
case 'OR NOT':
return 'OR NOT';
// @ts-ignore TODO: Types need to be fixed upstream
// @ts-expect-error TODO: Types need to be fixed upstream
case '&&':
case '<implicit>':
case 'AND':
return 'AND';
// @ts-ignore TODO: Types need to be fixed upstream
// @ts-expect-error TODO: Types need to be fixed upstream
case '||':
case 'OR':
return 'OR';
@ -173,8 +263,16 @@ class EnglishSerializer implements Serializer {
isNegatedField: boolean,
context: SerializerContext,
) {
const { isArray } = await this.getFieldType(field);
return `${this.translateField(field, context)} ${
isNegatedField ? 'is not' : 'is'
isArray
? isNegatedField
? 'does not contain'
: 'contains'
: isNegatedField
? 'is not'
: 'is'
} ${term}`;
}
@ -183,6 +281,19 @@ class EnglishSerializer implements Serializer {
isNegatedField: boolean,
context: SerializerContext,
) {
const { isArray, type, fieldPostfix, column } =
await this.getFieldType(field);
const isArrayOfMaps =
isArray && (type === JSDataType.Map || type === JSDataType.JSON);
if (column && isArrayOfMaps && fieldPostfix) {
return `${this.translateField(column, context)} ${
isNegatedField
? `does not contain an element with non-null ${fieldPostfix}`
: `contains an element with non-null ${fieldPostfix}`
}`;
}
return `${this.translateField(field, context)} ${
isNegatedField ? 'is null' : 'is not null'
}`;
@ -238,8 +349,25 @@ class EnglishSerializer implements Serializer {
: 'contains'
} ${formattedTerm}`;
} else {
return `${this.translateField(field, context)} ${
isNegatedField ? 'does not contain' : 'contains'
const { isArray, type, column, fieldPostfix } =
await this.getFieldType(field);
const isExactMatchType =
type === JSDataType.Bool || type === JSDataType.Number;
const isArrayOfMaps =
isArray && (type === JSDataType.Map || type === JSDataType.JSON);
const fieldToTranslate = isArrayOfMaps && column ? column : field;
return `${this.translateField(fieldToTranslate, context)} ${
isArrayOfMaps
? isNegatedField
? `does not contain an element with key ${fieldPostfix} and value`
: `contains an element with key ${fieldPostfix} and value`
: isArray && !isExactMatchType
? isNegatedField
? 'does not contain an element containing'
: 'contains an element containing'
: isNegatedField
? 'does not contain'
: 'contains'
} ${formattedTerm}`;
}
}
@ -266,8 +394,10 @@ export abstract class SQLSerializer implements Serializer {
column?: string;
columnJSON?: { string: string; number: string };
propertyType?: JSDataType;
isArray?: boolean;
found: boolean;
mapKeyIndexExpression?: string;
arrayMapKeyExpression?: string;
}>;
operator(op: lucene.Operator) {
@ -277,12 +407,12 @@ export abstract class SQLSerializer implements Serializer {
return 'AND NOT';
case 'OR NOT':
return 'OR NOT';
// @ts-ignore TODO: Types need to be fixed upstream
// @ts-expect-error TODO: Types need to be fixed upstream
case '&&':
case '<implicit>':
case 'AND':
return 'AND';
// @ts-ignore TODO: Types need to be fixed upstream
// @ts-expect-error TODO: Types need to be fixed upstream
case '||':
case 'OR':
return 'OR';
@ -298,11 +428,30 @@ export abstract class SQLSerializer implements Serializer {
isNegatedField: boolean,
context: SerializerContext,
) {
const { column, columnJSON, found, propertyType, mapKeyIndexExpression } =
await this.getColumnForField(field, context);
const {
column,
columnJSON,
found,
propertyType,
isArray,
mapKeyIndexExpression,
arrayMapKeyExpression,
} = await this.getColumnForField(field, context);
if (!found) {
return this.NOT_FOUND_QUERY;
}
if (column && isArray) {
return renderArrayFieldExpression({
column,
mapKey: arrayMapKeyExpression,
term,
propertyType,
isNegatedField,
exactMatch: true,
});
}
const expressionPostfix =
mapKeyIndexExpression && !isNegatedField
? ` AND ${mapKeyIndexExpression}`
@ -343,8 +492,15 @@ export abstract class SQLSerializer implements Serializer {
isNegatedField: boolean,
context: SerializerContext,
) {
const { column, columnJSON, found, propertyType, mapKeyIndexExpression } =
await this.getColumnForField(field, context);
const {
column,
columnJSON,
found,
propertyType,
mapKeyIndexExpression,
isArray,
arrayMapKeyExpression,
} = await this.getColumnForField(field, context);
if (!found) {
return this.NOT_FOUND_QUERY;
}
@ -352,18 +508,44 @@ export abstract class SQLSerializer implements Serializer {
mapKeyIndexExpression && !isNegatedField
? ` AND ${mapKeyIndexExpression}`
: '';
if (propertyType === JSDataType.JSON) {
if (
column &&
isArray &&
(propertyType === JSDataType.Map || propertyType === JSDataType.JSON) &&
arrayMapKeyExpression
) {
const fieldAccess =
propertyType === JSDataType.Map
? SqlString.format('el[?]', [arrayMapKeyExpression])
: SqlString.format('el.??', [arrayMapKeyExpression]);
return SqlString.format(
`${isNegatedField ? 'NOT ' : ''}arrayExists(el -> notEmpty(toString(${fieldAccess})) = 1, ?)`,
[SqlString.raw(column)],
);
}
if (propertyType === JSDataType.JSON && !isArray) {
return `notEmpty(${columnJSON?.string}) ${isNegatedField ? '!' : ''}= 1${expressionPostfix}`;
}
return `notEmpty(${column}) ${isNegatedField ? '!' : ''}= 1${expressionPostfix}`;
}
async gte(field: string, term: string, context: SerializerContext) {
const { column, columnJSON, found, propertyType, mapKeyIndexExpression } =
await this.getColumnForField(field, context);
const {
column,
columnJSON,
found,
propertyType,
isArray,
mapKeyIndexExpression,
} = await this.getColumnForField(field, context);
if (!found) {
return this.NOT_FOUND_QUERY;
}
if (isArray) {
throw new Error('>= comparison is not supported for Array-type fields');
}
const expressionPostfix = mapKeyIndexExpression
? ` AND ${mapKeyIndexExpression}`
: '';
@ -377,11 +559,20 @@ export abstract class SQLSerializer implements Serializer {
}
async lte(field: string, term: string, context: SerializerContext) {
const { column, columnJSON, found, propertyType, mapKeyIndexExpression } =
await this.getColumnForField(field, context);
const {
column,
columnJSON,
found,
propertyType,
isArray,
mapKeyIndexExpression,
} = await this.getColumnForField(field, context);
if (!found) {
return this.NOT_FOUND_QUERY;
}
if (isArray) {
throw new Error('<= comparison is not supported for Array-type fields');
}
const expressionPostfix = mapKeyIndexExpression
? ` AND ${mapKeyIndexExpression}`
: '';
@ -395,11 +586,20 @@ export abstract class SQLSerializer implements Serializer {
}
async lt(field: string, term: string, context: SerializerContext) {
const { column, columnJSON, found, propertyType, mapKeyIndexExpression } =
await this.getColumnForField(field, context);
const {
column,
columnJSON,
found,
propertyType,
isArray,
mapKeyIndexExpression,
} = await this.getColumnForField(field, context);
if (!found) {
return this.NOT_FOUND_QUERY;
}
if (isArray) {
throw new Error('< comparison is not supported for Array-type fields');
}
const expressionPostfix = mapKeyIndexExpression
? ` AND ${mapKeyIndexExpression}`
: '';
@ -413,11 +613,20 @@ export abstract class SQLSerializer implements Serializer {
}
async gt(field: string, term: string, context: SerializerContext) {
const { column, columnJSON, found, propertyType, mapKeyIndexExpression } =
await this.getColumnForField(field, context);
const {
column,
columnJSON,
found,
propertyType,
isArray,
mapKeyIndexExpression,
} = await this.getColumnForField(field, context);
if (!found) {
return this.NOT_FOUND_QUERY;
}
if (isArray) {
throw new Error('> comparison is not supported for Array-type fields');
}
const expressionPostfix = mapKeyIndexExpression
? ` AND ${mapKeyIndexExpression}`
: '';
@ -465,11 +674,16 @@ export abstract class SQLSerializer implements Serializer {
isNegatedField: boolean,
context: SerializerContext,
) {
const { column, found, mapKeyIndexExpression } =
const { column, found, mapKeyIndexExpression, isArray } =
await this.getColumnForField(field, context);
if (!found) {
return this.NOT_FOUND_QUERY;
}
if (isArray) {
throw new Error(
'range comparison is not supported for Array-type fields',
);
}
const expressionPostfix =
mapKeyIndexExpression && !isNegatedField
? ` AND ${mapKeyIndexExpression}`
@ -490,6 +704,7 @@ type CustomSchemaSQLColumnExpression = {
number: string;
};
mapKeyIndexExpression?: string;
arrayMapKeyExpression?: string;
};
export type CustomSchemaConfig = {
@ -499,6 +714,95 @@ export type CustomSchemaConfig = {
connectionId: string;
};
function renderArrayFieldExpression({
column,
mapKey,
term,
isNegatedField,
propertyType,
exactMatch,
}: {
column: string;
mapKey?: string;
term: string;
isNegatedField: boolean;
propertyType?: JSDataType;
exactMatch: boolean;
}) {
const prefix = isNegatedField ? 'NOT ' : '';
if (propertyType === JSDataType.Number) {
return SqlString.format(`${prefix}has(?, CAST(?, 'Float64'))`, [
SqlString.raw(column),
term,
]);
}
if (propertyType === JSDataType.Bool) {
const normTerm = `${term}`.trim().toLowerCase();
const comparisonValue =
normTerm === 'true' ? 1 : normTerm === 'false' ? 0 : term;
return SqlString.format(`${prefix}has(?, ?)`, [
SqlString.raw(column),
comparisonValue,
]);
}
if (propertyType === JSDataType.Map) {
if (!mapKey) {
throw new Error(
`Map key expression is required for searching column ${column}. Try '${column}.key:value'`,
);
}
return exactMatch
? SqlString.format(`${prefix}arrayExists(el -> el[?] = ?, ?)`, [
mapKey,
term,
SqlString.raw(column),
])
: SqlString.format(`${prefix}arrayExists(el -> el[?] ILIKE ?, ?)`, [
mapKey,
`%${term}%`,
SqlString.raw(column),
]);
}
if (propertyType === JSDataType.JSON) {
if (!mapKey) {
throw new Error(
`Map key expression is required for searching column ${column}. Try '${column}.key:value'`,
);
}
return exactMatch
? SqlString.format(`${prefix}arrayExists(el -> toString(el.??) = ?, ?)`, [
mapKey,
term,
SqlString.raw(column),
])
: SqlString.format(
`${prefix}arrayExists(el -> toString(el.??) ILIKE ?, ?)`,
[mapKey, `%${term}%`, SqlString.raw(column)],
);
}
const stringifiedElement =
propertyType === JSDataType.String
? 'el'
: SqlString.format('toString(el)', [SqlString.raw(column)]);
return exactMatch && propertyType === JSDataType.String
? SqlString.format(`${prefix}has(?, ?)`, [SqlString.raw(column), term])
: exactMatch
? SqlString.format(
`${prefix}arrayExists(el -> ${stringifiedElement} = ?, ?)`,
[term, SqlString.raw(column)],
)
: SqlString.format(
`${prefix}arrayExists(el -> ${stringifiedElement} ILIKE ?, ?)`,
[`%${term}%`, SqlString.raw(column)],
);
}
export class CustomSchemaSQLSerializerV2 extends SQLSerializer {
private metadata: Metadata;
private tableName: string;
@ -560,8 +864,15 @@ export class CustomSchemaSQLSerializerV2 extends SQLSerializer {
context: SerializerContext,
) {
const isImplicitField = field === IMPLICIT_FIELD;
const { column, columnJSON, found, propertyType, mapKeyIndexExpression } =
await this.getColumnForField(field, context);
const {
column,
columnJSON,
found,
propertyType,
isArray,
mapKeyIndexExpression,
arrayMapKeyExpression,
} = await this.getColumnForField(field, context);
if (!found) {
return this.NOT_FOUND_QUERY;
}
@ -572,6 +883,17 @@ export class CustomSchemaSQLSerializerV2 extends SQLSerializer {
? ` AND ${mapKeyIndexExpression}`
: '';
if (isArray) {
return renderArrayFieldExpression({
column,
mapKey: arrayMapKeyExpression,
term,
propertyType,
isNegatedField,
exactMatch: false,
});
}
if (propertyType === JSDataType.Bool) {
const normTerm = `${term}`.trim().toLowerCase();
return SqlString.format(
@ -739,7 +1061,7 @@ export class CustomSchemaSQLSerializerV2 extends SQLSerializer {
columnType: exactMatch.type,
columnExpression: exactMatch.name,
// TODO
// Add JSON excatMatch if want to support whole json compare in future, ex: json:"{a: 1234}""
// Add JSON exactMatch if want to support whole json compare in future, ex: json:"{a: 1234}""
};
let materializedColumns: Map<string, string>;
try {
@ -776,16 +1098,17 @@ export class CustomSchemaSQLSerializerV2 extends SQLSerializer {
return columnExpression;
}
const fieldPrefix = field.split('.')[0];
const prefixMatch = await this.metadata.getColumn({
const prefixMatch = await findPrefixMatch({
field,
metadata: this.metadata,
databaseName: this.databaseName,
tableName: this.tableName,
column: fieldPrefix,
connectionId: this.connectionId,
});
if (prefixMatch) {
const fieldPostfix = field.split('.').slice(1).join('.');
const prefixParts = prefixMatch.name.split('.');
const fieldPostfix = field.split('.').slice(prefixParts.length).join('.');
if (prefixMatch.type.startsWith('Map')) {
const valueType = prefixMatch.type.match(/,\s+(\w+)\)$/)?.[1];
@ -795,7 +1118,7 @@ export class CustomSchemaSQLSerializerV2 extends SQLSerializer {
prefixMatch.name,
fieldPostfix,
]),
mapKeyIndexExpression: `indexHint(${buildMapContains(`${fieldPrefix}['${fieldPostfix}']`)})`,
mapKeyIndexExpression: `indexHint(${buildMapContains(`${prefixMatch.name}['${fieldPostfix}']`)})`,
columnType: valueType ?? 'Unknown',
};
} else if (prefixMatch.type.startsWith('JSON')) {
@ -828,8 +1151,15 @@ export class CustomSchemaSQLSerializerV2 extends SQLSerializer {
),
columnType: 'String',
};
} else if (prefixMatch.type.startsWith('Array')) {
return {
found: true,
columnType: prefixMatch.type,
columnExpression: prefixMatch.name,
arrayMapKeyExpression: fieldPostfix,
};
}
// TODO: Support arrays and tuples
// TODO: Support tuples
throw new Error('Unsupported column type for prefix match');
}
@ -976,13 +1306,20 @@ export class CustomSchemaSQLSerializerV2 extends SQLSerializer {
const expression = await this.buildColumnExpressionFromField(fieldFinal);
const { type, isArray } = convertCHTypeToLuceneSearchType(
expression.columnType,
);
return {
column: expression.columnExpression,
columnJSON: expression?.columnExpressionJSON,
propertyType:
convertCHTypeToPrimitiveJSType(expression.columnType) ?? undefined,
propertyType: type ?? undefined,
isArray,
found: expression.found,
mapKeyIndexExpression: expression.mapKeyIndexExpression,
arrayMapKeyExpression: isArray
? expression.arrayMapKeyExpression
: undefined,
};
}
}
@ -1001,7 +1338,7 @@ async function nodeTerm(
const nodeTerm = node;
let term = decodeSpecialTokens(nodeTerm.term);
// We should only negate the search for negated bare terms (ex. '-5')
// This meeans the field is implicit and the prefix is -
// This means the field is implicit and the prefix is -
if (isImplicitField && nodeTerm.prefix === '-') {
isNegatedField = true;
}
@ -1203,20 +1540,6 @@ export class SearchQueryBuilder {
return '';
}
// const implicitColumn = await this.serializer.getColumnForField(
// IMPLICIT_FIELD,
// );
// let querySql = this.searchQ
// .split(/\s+/)
// .map(queryToken =>
// SqlString.format(`lower(??) LIKE lower(?)`, [
// implicitColumn.column,
// `%${queryToken}%`,
// ]),
// )
// .join(' AND ');
const parsedQ = parse(this.searchQ);
return await genWhereSQL(parsedQ, this.serializer);
@ -1238,12 +1561,26 @@ export class SearchQueryBuilder {
}
}
export async function genEnglishExplanation(query: string): Promise<string> {
export async function genEnglishExplanation({
query,
metadata,
tableConnection,
}: {
query: string;
tableConnection: TableConnection;
metadata: Metadata;
}): Promise<string> {
try {
const { tableName, databaseName, connectionId } = tableConnection;
const parsedQ = parse(query);
if (parsedQ) {
const serializer = new EnglishSerializer();
const serializer = new EnglishSerializer({
metadata,
tableName,
databaseName,
connectionId,
});
return await serialize(parsedQ, serializer, {});
}
} catch (e) {