feat: Support field:(<term>...) Lucene searches (#1315)

# Summary This PR updates HyperDX's lucene support to include parenthesized field searches of the form `<field>:(<term>...)`. Prior to these changes, HyperDX would ignore the `<field>` entirely and search as if the query were just `<term>...`. With these changes, the search is performed just like a `<term>...` search except: 1. The `field` is used for the search, instead of the implicit field expression (eg. `Body` for `otel_logs`) 2. The search is performed without `hasToken()`, as we assume that fields do not have bloom filters setup (matching the current behavior for how we search fields) This support has the added benefit of unlocking multi-token substring searches (Ref HDX-1931) - Previously, you could not search a field for a substring with multiple tokens, eg `error.message:*Method not allowed*` is interpreted as 3 separate terms, and only `*Method` would be associated with `error.message`. `error.message:"Method not allowed"` and `error.message:"*Method not allowed*"` look for exact matches, instead of substrings. - Now, this can be accomplished with `error.message:("Method not allowed")`. This matches the current behavior of a search like `"Method not allowed"`, which would search the source's default implicit column (eg. `Body`) for the substring "Method not allowed". ## Testing To test these changes, this PR adds a few dozen query parser unit test cases.
2026-04-21 13:37:15 +00:00 · 2025-11-04 18:39:58 -05:00 · 2025-11-04 18:39:58 -05:00 · 6e628bcded
commit 6e628bcded
parent f612bf3c00
3 changed files with 438 additions and 131 deletions
--- a/.changeset/hungry-ways-rush.md
+++ b/.changeset/hungry-ways-rush.md
@ -0,0 +1,5 @@
+---
+"@hyperdx/common-utils": patch
+---
+
+feat: Support field:(<term>...) Lucene searches
--- a/packages/common-utils/src/tests/queryParser.test.ts
+++ b/packages/common-utils/src/tests/queryParser.test.ts
@ -1,23 +1,30 @@
 import { ClickhouseClient } from '@/clickhouse/node';
 import { getMetadata } from '@/core/metadata';
-import { CustomSchemaSQLSerializerV2 } from '@/queryParser';
+import {
+  CustomSchemaSQLSerializerV2,
+  genEnglishExplanation,
+  SearchQueryBuilder,
+} from '@/queryParser';

 describe('CustomSchemaSQLSerializerV2 - json', () => {
-  function getTestTable(field) {
-    return { name: field, type: 'JSON' };
-  }
  const metadata = getMetadata(
    new ClickhouseClient({ host: 'http://localhost:8123' }),
  );
-  // @ts-ignore
-  metadata.getColumn = ({ column }) => {
-    return new Promise((resolve, reject) => {
-      if (column.indexOf('.') >= 0) return resolve(undefined);
-      const testTable = getTestTable(column);
-      // @ts-ignore
-      return resolve(testTable);
-    });
-  };
+  metadata.getColumn = jest.fn().mockImplementation(async ({ column }) => {
+    if (column === 'ResourceAttributesJSON') {
+      return { name: 'ResourceAttributesJSON', type: 'JSON' };
+    } else if (column === 'LogAttributes') {
+      return { name: 'LogAttributes', type: 'Map' };
+    } else if (column === 'ServiceName') {
+      return { name: 'ServiceName', type: 'String' };
+    } else if (column === 'SeverityNumber') {
+      return { name: 'SeverityNumber', type: 'UInt8' };
+    } else if (column === 'foo') {
+      return { name: 'foo', type: 'String' };
+    } else {
+      return undefined;
+    }
+  });
  const databaseName = 'testName';
  const tableName = 'testTable';
  const connectionId = 'testId';
@ -26,29 +33,30 @@ describe('CustomSchemaSQLSerializerV2 - json', () => {
    databaseName,
    tableName,
    connectionId,
+    implicitColumnExpression: 'Body',
  });

  it('getColumnForField', async () => {
-    const field1 = 'serviceName.test';
-    const res1 = await serializer.getColumnForField(field1);
+    const field1 = 'ResourceAttributesJSON.test';
+    const res1 = await serializer.getColumnForField(field1, {});
    expect(res1).toEqual({
      column: '',
      columnJSON: {
        number:
-          "dynamicType(`serviceName`.`test`) in ('Int8', 'Int16', 'Int32', 'Int64', 'Int128', 'Int256', 'UInt8', 'UInt16', 'UInt32', 'UInt64', 'UInt128', 'UInt256', 'Float32', 'Float64') and `serviceName`.`test`",
-        string: 'toString(`serviceName`.`test`)',
+          "dynamicType(`ResourceAttributesJSON`.`test`) in ('Int8', 'Int16', 'Int32', 'Int64', 'Int128', 'Int256', 'UInt8', 'UInt16', 'UInt32', 'UInt64', 'UInt128', 'UInt256', 'Float32', 'Float64') and `ResourceAttributesJSON`.`test`",
+        string: 'toString(`ResourceAttributesJSON`.`test`)',
      },
      found: true,
      propertyType: 'json',
    });
-    const field2 = 'logBody.test.nest';
-    const res2 = await serializer.getColumnForField(field2);
+    const field2 = 'ResourceAttributesJSON.test.nest';
+    const res2 = await serializer.getColumnForField(field2, {});
    expect(res2).toEqual({
      column: '',
      columnJSON: {
        number:
-          "dynamicType(`logBody`.`test`.`nest`) in ('Int8', 'Int16', 'Int32', 'Int64', 'Int128', 'Int256', 'UInt8', 'UInt16', 'UInt32', 'UInt64', 'UInt128', 'UInt256', 'Float32', 'Float64') and `logBody`.`test`.`nest`",
-        string: 'toString(`logBody`.`test`.`nest`)',
+          "dynamicType(`ResourceAttributesJSON`.`test`.`nest`) in ('Int8', 'Int16', 'Int32', 'Int64', 'Int128', 'Int256', 'UInt8', 'UInt16', 'UInt32', 'UInt64', 'UInt128', 'UInt256', 'Float32', 'Float64') and `ResourceAttributesJSON`.`test`.`nest`",
+        string: 'toString(`ResourceAttributesJSON`.`test`.`nest`)',
      },
      found: true,
      propertyType: 'json',
@ -56,59 +64,263 @@ describe('CustomSchemaSQLSerializerV2 - json', () => {
  });

  it('compare - eq, isNotNull, gte, lte, lt, gt', async () => {
-    const eqField = 'serviceName.eq.test';
+    const eqField = 'ResourceAttributesJSON.eq.test';
    const eqTerm = 'testTerm';
-    const eq1 = await serializer.eq(eqField, eqTerm, false);
-    expect(eq1).toBe("(toString(`serviceName`.`eq`.`test`) = 'testTerm')");
-    const eq2 = await serializer.eq(eqField, eqTerm, true);
-    expect(eq2).toBe("(toString(`serviceName`.`eq`.`test`) != 'testTerm')");
+    const eq1 = await serializer.eq(eqField, eqTerm, false, {});
+    expect(eq1).toBe(
+      "(toString(`ResourceAttributesJSON`.`eq`.`test`) = 'testTerm')",
+    );
+    const eq2 = await serializer.eq(eqField, eqTerm, true, {});
+    expect(eq2).toBe(
+      "(toString(`ResourceAttributesJSON`.`eq`.`test`) != 'testTerm')",
+    );
  });

  it('compare - isNotNull', async () => {
-    const isNotNullField = 'serviceName.isNotNull.test';
-    const isNotNull1 = await serializer.isNotNull(isNotNullField, false);
+    const isNotNullField = 'ResourceAttributesJSON.isNotNull.test';
+    const isNotNull1 = await serializer.isNotNull(isNotNullField, false, {});
    expect(isNotNull1).toBe(
-      'notEmpty(toString(`serviceName`.`isNotNull`.`test`)) = 1',
+      'notEmpty(toString(`ResourceAttributesJSON`.`isNotNull`.`test`)) = 1',
    );
-    const isNotNull2 = await serializer.isNotNull(isNotNullField, true);
+    const isNotNull2 = await serializer.isNotNull(isNotNullField, true, {});
    expect(isNotNull2).toBe(
-      'notEmpty(toString(`serviceName`.`isNotNull`.`test`)) != 1',
+      'notEmpty(toString(`ResourceAttributesJSON`.`isNotNull`.`test`)) != 1',
    );
  });

  it('compare - gte', async () => {
-    const gteField = 'serviceName.gte.test';
+    const gteField = 'ResourceAttributesJSON.gte.test';
    const gteTerm = '30';
-    const gte = await serializer.gte(gteField, gteTerm);
+    const gte = await serializer.gte(gteField, gteTerm, {});
    expect(gte).toBe(
-      "(dynamicType(`serviceName`.`gte`.`test`) in ('Int8', 'Int16', 'Int32', 'Int64', 'Int128', 'Int256', 'UInt8', 'UInt16', 'UInt32', 'UInt64', 'UInt128', 'UInt256', 'Float32', 'Float64') and `serviceName`.`gte`.`test` >= '30')",
+      "(dynamicType(`ResourceAttributesJSON`.`gte`.`test`) in ('Int8', 'Int16', 'Int32', 'Int64', 'Int128', 'Int256', 'UInt8', 'UInt16', 'UInt32', 'UInt64', 'UInt128', 'UInt256', 'Float32', 'Float64') and `ResourceAttributesJSON`.`gte`.`test` >= '30')",
    );
  });

  it('compare - lte', async () => {
-    const lteField = 'serviceName.lte.test';
+    const lteField = 'ResourceAttributesJSON.lte.test';
    const lteTerm = '40';
-    const lte = await serializer.lte(lteField, lteTerm);
+    const lte = await serializer.lte(lteField, lteTerm, {});
    expect(lte).toBe(
-      "(dynamicType(`serviceName`.`lte`.`test`) in ('Int8', 'Int16', 'Int32', 'Int64', 'Int128', 'Int256', 'UInt8', 'UInt16', 'UInt32', 'UInt64', 'UInt128', 'UInt256', 'Float32', 'Float64') and `serviceName`.`lte`.`test` <= '40')",
+      "(dynamicType(`ResourceAttributesJSON`.`lte`.`test`) in ('Int8', 'Int16', 'Int32', 'Int64', 'Int128', 'Int256', 'UInt8', 'UInt16', 'UInt32', 'UInt64', 'UInt128', 'UInt256', 'Float32', 'Float64') and `ResourceAttributesJSON`.`lte`.`test` <= '40')",
    );
  });

  it('compare - gt', async () => {
-    const gtField = 'serviceName.gt.test';
+    const gtField = 'ResourceAttributesJSON.gt.test';
    const gtTerm = '70';
-    const gt = await serializer.gt(gtField, gtTerm);
+    const gt = await serializer.gt(gtField, gtTerm, {});
    expect(gt).toBe(
-      "(dynamicType(`serviceName`.`gt`.`test`) in ('Int8', 'Int16', 'Int32', 'Int64', 'Int128', 'Int256', 'UInt8', 'UInt16', 'UInt32', 'UInt64', 'UInt128', 'UInt256', 'Float32', 'Float64') and `serviceName`.`gt`.`test` > '70')",
+      "(dynamicType(`ResourceAttributesJSON`.`gt`.`test`) in ('Int8', 'Int16', 'Int32', 'Int64', 'Int128', 'Int256', 'UInt8', 'UInt16', 'UInt32', 'UInt64', 'UInt128', 'UInt256', 'Float32', 'Float64') and `ResourceAttributesJSON`.`gt`.`test` > '70')",
    );
  });

  it('compare - lt', async () => {
-    const ltField = 'serviceName.lt.test';
+    const ltField = 'ResourceAttributesJSON.lt.test';
    const ltTerm = '2';
-    const lt = await serializer.lt(ltField, ltTerm);
+    const lt = await serializer.lt(ltField, ltTerm, {});
    expect(lt).toBe(
-      "(dynamicType(`serviceName`.`lt`.`test`) in ('Int8', 'Int16', 'Int32', 'Int64', 'Int128', 'Int256', 'UInt8', 'UInt16', 'UInt32', 'UInt64', 'UInt128', 'UInt256', 'Float32', 'Float64') and `serviceName`.`lt`.`test` < '2')",
+      "(dynamicType(`ResourceAttributesJSON`.`lt`.`test`) in ('Int8', 'Int16', 'Int32', 'Int64', 'Int128', 'Int256', 'UInt8', 'UInt16', 'UInt32', 'UInt64', 'UInt128', 'UInt256', 'Float32', 'Float64') and `ResourceAttributesJSON`.`lt`.`test` < '2')",
    );
  });
+
+  const testCases = [
+    {
+      lucene: '"foo bar baz"',
+      sql: "((hasToken(lower(Body), lower('foo')) AND hasToken(lower(Body), lower('bar')) AND hasToken(lower(Body), lower('baz')) AND (lower(Body) LIKE lower('%foo bar baz%'))))",
+      english: 'event has whole word "foo bar baz"',
+    },
+    {
+      lucene: 'foo bar baz',
+      sql: "((hasToken(lower(Body), lower('foo'))) AND (hasToken(lower(Body), lower('bar'))) AND (hasToken(lower(Body), lower('baz'))))",
+      english:
+        'event has whole word foo AND event has whole word bar AND event has whole word baz',
+    },
+    {
+      lucene: 'ServiceName:foo bar baz',
+      sql: "((ServiceName ILIKE '%foo%') AND (hasToken(lower(Body), lower('bar'))) AND (hasToken(lower(Body), lower('baz'))))",
+      english:
+        "'ServiceName' contains foo AND event has whole word bar AND event has whole word baz",
+    },
+    {
+      lucene: 'ServiceName:"foo bar baz"',
+      sql: "((ServiceName = 'foo bar baz'))",
+      english: "'ServiceName' is foo bar baz",
+    },
+    {
+      lucene: 'ServiceName:("foo bar baz")',
+      sql: "(((ServiceName ILIKE '%foo bar baz%')))",
+      english: '(ServiceName contains "foo bar baz")',
+    },
+    {
+      lucene: 'ServiceName:(abc def)',
+      sql: "(((ServiceName ILIKE '%abc%') AND (ServiceName ILIKE '%def%')))",
+      english: '(ServiceName contains abc AND ServiceName contains def)',
+    },
+    {
+      lucene: '(abc def)',
+      sql: "(((hasToken(lower(Body), lower('abc'))) AND (hasToken(lower(Body), lower('def')))))",
+      english: '(event has whole word abc AND event has whole word def)',
+    },
+    {
+      lucene: '("abc def")',
+      sql: "(((hasToken(lower(Body), lower('abc')) AND hasToken(lower(Body), lower('def')) AND (lower(Body) LIKE lower('%abc def%')))))",
+      english: '(event has whole word "abc def")',
+    },
+    {
+      lucene: 'foo:bar',
+      sql: "((foo ILIKE '%bar%'))",
+      english: "'foo' contains bar",
+    },
+    {
+      lucene: '(foo:bar)',
+      sql: "(((foo ILIKE '%bar%')))",
+      english: "('foo' contains bar)",
+    },
+    {
+      lucene: 'bar',
+      sql: "((hasToken(lower(Body), lower('bar'))))",
+      english: 'event has whole word bar',
+    },
+    {
+      lucene: '(bar)',
+      sql: "(((hasToken(lower(Body), lower('bar')))))",
+      english: '(event has whole word bar)',
+    },
+    {
+      lucene: 'foo:(bar)',
+      sql: "(((foo ILIKE '%bar%')))",
+      english: '(foo contains bar)',
+    },
+    {
+      lucene: 'foo:(bar) baz',
+      sql: "(((foo ILIKE '%bar%')) AND (hasToken(lower(Body), lower('baz'))))",
+      english: '(foo contains bar) AND event has whole word baz',
+    },
+    {
+      lucene: 'LogAttributes.error.message:("Failed to fetch")',
+      sql: "(((`LogAttributes`['error.message'] ILIKE '%Failed to fetch%')))",
+      english: '(LogAttributes.error.message contains "Failed to fetch")',
+    },
+    {
+      lucene: 'ResourceAttributesJSON.error.message:("Failed to fetch")',
+      sql: "(((toString(`ResourceAttributesJSON`.`error`.`message`) ILIKE '%Failed to fetch%')))",
+      english:
+        '(ResourceAttributesJSON.error.message contains "Failed to fetch")',
+    },
+    {
+      lucene: 'SeverityNumber:>10',
+      sql: "((SeverityNumber > '10'))",
+      english: "'SeverityNumber' is greater than 10",
+    },
+    {
+      lucene: 'ResourceAttributesJSON.error.severity:>10',
+      sql: "((dynamicType(`ResourceAttributesJSON`.`error`.`severity`) in ('Int8', 'Int16', 'Int32', 'Int64', 'Int128', 'Int256', 'UInt8', 'UInt16', 'UInt32', 'UInt64', 'UInt128', 'UInt256', 'Float32', 'Float64') and `ResourceAttributesJSON`.`error`.`severity` > '10'))",
+      english: "'ResourceAttributesJSON.error.severity' is greater than 10",
+    },
+    {
+      lucene: 'foo:(bar baz)',
+      sql: "(((foo ILIKE '%bar%') AND (foo ILIKE '%baz%')))",
+      english: '(foo contains bar AND foo contains baz)',
+    },
+    {
+      lucene: '-foo:bar',
+      sql: "((foo NOT ILIKE '%bar%'))",
+      english: "'foo' does not contain bar",
+    },
+    {
+      lucene: 'NOT foo:(bar baz)',
+      sql: "(NOT ((foo ILIKE '%bar%') AND (foo ILIKE '%baz%')))",
+      english: 'NOT (foo contains bar AND foo contains baz)',
+    },
+    {
+      lucene: '-foo:(bar baz)',
+      sql: "(NOT ((foo ILIKE '%bar%') AND (foo ILIKE '%baz%')))",
+      english: 'NOT (foo contains bar AND foo contains baz)',
+    },
+    {
+      lucene: '-foo:(bar)',
+      sql: "(NOT ((foo ILIKE '%bar%')))",
+      english: 'NOT (foo contains bar)',
+    },
+    {
+      lucene: '-foo:(-bar)',
+      sql: "(NOT ((foo NOT ILIKE '%bar%')))",
+      english: 'NOT (foo does not contain bar)',
+    },
+    {
+      lucene: '*bar',
+      sql: "((lower(Body) LIKE lower('%bar')))",
+      english: 'event ends with bar',
+    },
+    {
+      lucene: 'foo:*bar',
+      sql: "((foo ILIKE '%bar%'))",
+      english: "'foo' contains bar",
+    },
+    {
+      lucene: 'foo:*bar*',
+      sql: "((foo ILIKE '%bar%'))",
+      english: "'foo' contains bar",
+    },
+    {
+      lucene: 'foo:(*bar)',
+      sql: "(((lower(foo) LIKE lower('%bar'))))",
+      english: '(foo ends with bar)',
+    },
+    {
+      lucene: 'foo:(bar*)',
+      sql: "(((lower(foo) LIKE lower('bar%'))))",
+      english: '(foo starts with bar)',
+    },
+    {
+      lucene: 'foo:(*bar*)',
+      sql: "(((lower(foo) LIKE lower('%bar%'))))",
+      english: '(foo contains bar)',
+    },
+    {
+      lucene: 'foo:[1 TO 5]',
+      sql: '((foo BETWEEN 1 AND 5))',
+      english: 'foo is between 1 and 5',
+    },
+    {
+      lucene: 'foo:(bar:(baz) qux)',
+      sql: "((((bar ILIKE '%baz%')) AND (foo ILIKE '%qux%')))",
+      english: '((bar contains baz) AND foo contains qux)',
+    },
+  ];
+
+  it.each(testCases)(
+    'converts "$lucene" to SQL "$sql"',
+    async ({ lucene, sql }) => {
+      const builder = new SearchQueryBuilder(lucene, serializer);
+      const actualSql = await builder.build();
+      expect(actualSql).toBe(sql);
+    },
+  );
+
+  it.each(testCases)(
+    'converts "$lucene" to english "$english"',
+    async ({ lucene, english }) => {
+      const actualEnglish = await genEnglishExplanation(lucene);
+      expect(actualEnglish).toBe(english);
+    },
+  );
+
+  it('correctly searches multi-column implicit field', async () => {
+    const serializer = new CustomSchemaSQLSerializerV2({
+      metadata,
+      databaseName,
+      tableName,
+      connectionId,
+      implicitColumnExpression: 'Body, OtherColumn',
+    });
+
+    const lucene = 'foo bar';
+    const builder = new SearchQueryBuilder(lucene, serializer);
+    const actualSql = await builder.build();
+    const expectedSql =
+      "((hasToken(lower(concatWithSeparator(';',Body,OtherColumn)), lower('foo'))) AND (hasToken(lower(concatWithSeparator(';',Body,OtherColumn)), lower('bar'))))";
+    expect(actualSql).toBe(expectedSql);
+  });
 });
--- a/packages/common-utils/src/queryParser.ts
+++ b/packages/common-utils/src/queryParser.ts
@ -46,33 +46,49 @@ const CLICK_HOUSE_JSON_NUMBER_TYPES = [
  'Float64',
 ];

+interface SerializerContext {
+  /** The current implicit column expression, indicating which SQL expression to use when comparing a term to the '<implicit>' field */
+  implicitColumnExpression?: string;
+}
+
 interface Serializer {
-  operator(op: lucene.Operator): string;
-  eq(field: string, term: string, isNegatedField: boolean): Promise<string>;
-  isNotNull(field: string, isNegatedField: boolean): Promise<string>;
-  gte(field: string, term: string): Promise<string>;
-  lte(field: string, term: string): Promise<string>;
-  lt(field: string, term: string): Promise<string>;
-  gt(field: string, term: string): Promise<string>;
+  operator(op: lucene.Operator, context: SerializerContext): string;
+  eq(
+    field: string,
+    term: string,
+    isNegatedField: boolean,
+    context: SerializerContext,
+  ): Promise<string>;
+  isNotNull(
+    field: string,
+    isNegatedField: boolean,
+    context: SerializerContext,
+  ): Promise<string>;
+  gte(field: string, term: string, context: SerializerContext): Promise<string>;
+  lte(field: string, term: string, context: SerializerContext): Promise<string>;
+  lt(field: string, term: string, context: SerializerContext): Promise<string>;
+  gt(field: string, term: string, context: SerializerContext): Promise<string>;
  fieldSearch(
    field: string,
    term: string,
    isNegatedField: boolean,
    prefixWildcard: boolean,
    suffixWildcard: boolean,
+    context: SerializerContext,
  ): Promise<string>;
  range(
    field: string,
    start: string,
    end: string,
    isNegatedField: boolean,
+    context: SerializerContext,
  ): Promise<string>;
 }

 class EnglishSerializer implements Serializer {
-  private translateField(field: string) {
+  private translateField(field: string, context: SerializerContext) {
    if (field === IMPLICIT_FIELD) {
-      return 'event';
+      return context.implicitColumnExpression ?? 'event';
    }

    return `'${field}'`;
@ -99,49 +115,56 @@ class EnglishSerializer implements Serializer {
    }
  }

-  async eq(field: string, term: string, isNegatedField: boolean) {
-    return `${this.translateField(field)} ${
+  async eq(
+    field: string,
+    term: string,
+    isNegatedField: boolean,
+    context: SerializerContext,
+  ) {
+    return `${this.translateField(field, context)} ${
      isNegatedField ? 'is not' : 'is'
    } ${term}`;
  }

-  async isNotNull(field: string, isNegatedField: boolean) {
-    return `${this.translateField(field)} ${
+  async isNotNull(
+    field: string,
+    isNegatedField: boolean,
+    context: SerializerContext,
+  ) {
+    return `${this.translateField(field, context)} ${
      isNegatedField ? 'is null' : 'is not null'
    }`;
  }

-  async gte(field: string, term: string) {
-    return `${this.translateField(field)} is greater than or equal to ${term}`;
+  async gte(field: string, term: string, context: SerializerContext) {
+    return `${this.translateField(field, context)} is greater than or equal to ${term}`;
  }

-  async lte(field: string, term: string) {
-    return `${this.translateField(field)} is less than or equal to ${term}`;
+  async lte(field: string, term: string, context: SerializerContext) {
+    return `${this.translateField(field, context)} is less than or equal to ${term}`;
  }

-  async lt(field: string, term: string) {
-    return `${this.translateField(field)} is less than ${term}`;
+  async lt(field: string, term: string, context: SerializerContext) {
+    return `${this.translateField(field, context)} is less than ${term}`;
  }

-  async gt(field: string, term: string) {
-    return `${this.translateField(field)} is greater than ${term}`;
+  async gt(field: string, term: string, context: SerializerContext) {
+    return `${this.translateField(field, context)} is greater than ${term}`;
  }

-  // async fieldSearch(field: string, term: string, isNegatedField: boolean) {
-  //   return `${this.translateField(field)} ${
-  //     isNegatedField ? 'does not contain' : 'contains'
-  //   } ${term}`;
-  // }
-
  async fieldSearch(
    field: string,
    term: string,
    isNegatedField: boolean,
    prefixWildcard: boolean,
    suffixWildcard: boolean,
+    context: SerializerContext,
  ) {
+    const formattedTerm = term.trim().match(/\s/) ? `"${term}"` : term;
+
    if (field === IMPLICIT_FIELD) {
-      return `${this.translateField(field)} ${
+      const isUsingTokenSearch = !context.implicitColumnExpression; // Source's implicit column has not been overridden
+      return `${this.translateField(field, context)} ${
        prefixWildcard && suffixWildcard
          ? isNegatedField
            ? 'does not contain'
@ -154,14 +177,18 @@ class EnglishSerializer implements Serializer {
              ? isNegatedField
                ? 'does not start with'
                : 'starts with'
-              : isNegatedField
-                ? 'does not have whole word'
-                : 'has whole word'
-      } ${term}`;
+              : isUsingTokenSearch
+                ? isNegatedField
+                  ? 'does not have whole word'
+                  : 'has whole word'
+                : isNegatedField
+                  ? 'does not contain'
+                  : 'contains'
+      } ${formattedTerm}`;
    } else {
-      return `${this.translateField(field)} ${
+      return `${this.translateField(field, context)} ${
        isNegatedField ? 'does not contain' : 'contains'
-      } ${term}`;
+      } ${formattedTerm}`;
    }
  }

@ -180,7 +207,10 @@ class EnglishSerializer implements Serializer {
 export abstract class SQLSerializer implements Serializer {
  private NOT_FOUND_QUERY = '(1 = 0)';

-  abstract getColumnForField(field: string): Promise<{
+  abstract getColumnForField(
+    field: string,
+    context: SerializerContext,
+  ): Promise<{
    column?: string;
    columnJSON?: { string: string; number: string };
    propertyType?: JSDataType;
@ -209,9 +239,14 @@ export abstract class SQLSerializer implements Serializer {
  }

  // Only for exact string matches
-  async eq(field: string, term: string, isNegatedField: boolean) {
+  async eq(
+    field: string,
+    term: string,
+    isNegatedField: boolean,
+    context: SerializerContext,
+  ) {
    const { column, columnJSON, found, propertyType } =
-      await this.getColumnForField(field);
+      await this.getColumnForField(field, context);
    if (!found) {
      return this.NOT_FOUND_QUERY;
    }
@ -238,9 +273,13 @@ export abstract class SQLSerializer implements Serializer {
    ]);
  }

-  async isNotNull(field: string, isNegatedField: boolean) {
+  async isNotNull(
+    field: string,
+    isNegatedField: boolean,
+    context: SerializerContext,
+  ) {
    const { column, columnJSON, found, propertyType } =
-      await this.getColumnForField(field);
+      await this.getColumnForField(field, context);
    if (!found) {
      return this.NOT_FOUND_QUERY;
    }
@ -250,9 +289,9 @@ export abstract class SQLSerializer implements Serializer {
    return `notEmpty(${column}) ${isNegatedField ? '!' : ''}= 1`;
  }

-  async gte(field: string, term: string) {
+  async gte(field: string, term: string, context: SerializerContext) {
    const { column, columnJSON, found, propertyType } =
-      await this.getColumnForField(field);
+      await this.getColumnForField(field, context);
    if (!found) {
      return this.NOT_FOUND_QUERY;
    }
@ -262,9 +301,9 @@ export abstract class SQLSerializer implements Serializer {
    return SqlString.format(`(${column} >= ?)`, [term]);
  }

-  async lte(field: string, term: string) {
+  async lte(field: string, term: string, context: SerializerContext) {
    const { column, columnJSON, found, propertyType } =
-      await this.getColumnForField(field);
+      await this.getColumnForField(field, context);
    if (!found) {
      return this.NOT_FOUND_QUERY;
    }
@ -274,9 +313,9 @@ export abstract class SQLSerializer implements Serializer {
    return SqlString.format(`(${column} <= ?)`, [term]);
  }

-  async lt(field: string, term: string) {
+  async lt(field: string, term: string, context: SerializerContext) {
    const { column, columnJSON, found, propertyType } =
-      await this.getColumnForField(field);
+      await this.getColumnForField(field, context);
    if (!found) {
      return this.NOT_FOUND_QUERY;
    }
@ -286,9 +325,9 @@ export abstract class SQLSerializer implements Serializer {
    return SqlString.format(`(${column} < ?)`, [term]);
  }

-  async gt(field: string, term: string) {
+  async gt(field: string, term: string, context: SerializerContext) {
    const { column, columnJSON, found, propertyType } =
-      await this.getColumnForField(field);
+      await this.getColumnForField(field, context);
    if (!found) {
      return this.NOT_FOUND_QUERY;
    }
@ -323,10 +362,11 @@ export abstract class SQLSerializer implements Serializer {
    isNegatedField: boolean,
    prefixWildcard: boolean,
    suffixWildcard: boolean,
+    context: SerializerContext,
  ) {
    const isImplicitField = field === IMPLICIT_FIELD;
    const { column, columnJSON, found, propertyType } =
-      await this.getColumnForField(field);
+      await this.getColumnForField(field, context);
    if (!found) {
      return this.NOT_FOUND_QUERY;
    }
@ -358,8 +398,13 @@ export abstract class SQLSerializer implements Serializer {
    }

    if (isImplicitField) {
+      // For implicit fields that come directly from the Source, we assume there is a bloom filter that can be used to
+      // optimize searches with hasToken. Overridden implicit columns (eg. "foo" in "foo:("bar baz")") are assumed
+      // to not have bloom filters.
+      const shouldUseTokenBf = !context.implicitColumnExpression;
+
      // For the _source column, we'll try to do whole word searches by default
-      // to utilize the token bloom filter unless a prefix/sufix wildcard is specified
+      // to utilize the token bloom filter unless a prefix/suffix wildcard is specified
      if (prefixWildcard || suffixWildcard) {
        return SqlString.format(
          `(lower(?) ${isNegatedField ? 'NOT ' : ''}LIKE lower(?))`,
@ -368,7 +413,7 @@ export abstract class SQLSerializer implements Serializer {
            `${prefixWildcard ? '%' : ''}${term}${suffixWildcard ? '%' : ''}`,
          ],
        );
-      } else {
+      } else if (shouldUseTokenBf) {
        // TODO: Check case sensitivity of the index before lowering by default
        // We can't search multiple tokens with `hasToken`, so we need to split up the term into tokens
        const hasSeperators = this.termHasSeperators(term);
@ -394,13 +439,12 @@ export abstract class SQLSerializer implements Serializer {
          );
        }
      }
-    } else {
-      const shoudUseTokenBf = isImplicitField;
-      return SqlString.format(
-        `(${column} ${isNegatedField ? 'NOT ' : ''}? ?)`,
-        [SqlString.raw(shoudUseTokenBf ? 'LIKE' : 'ILIKE'), `%${term}%`],
-      );
    }
+
+    return SqlString.format(`(${column} ${isNegatedField ? 'NOT ' : ''}? ?)`, [
+      SqlString.raw('ILIKE'),
+      `%${term}%`,
+    ]);
  }

  async range(
@ -408,8 +452,9 @@ export abstract class SQLSerializer implements Serializer {
    start: string,
    end: string,
    isNegatedField: boolean,
+    context: SerializerContext,
  ) {
-    const { column, found } = await this.getColumnForField(field);
+    const { column, found } = await this.getColumnForField(field, context);
    if (!found) {
      return this.NOT_FOUND_QUERY;
    }
@ -543,30 +588,38 @@ export class CustomSchemaSQLSerializerV2 extends SQLSerializer {
    // throw new Error(`Column not found: ${field}`);
  }

-  async getColumnForField(field: string) {
-    if (field === IMPLICIT_FIELD) {
-      if (!this.implicitColumnExpression) {
-        throw new Error(
-          'Can not search bare text without an implicit column set.',
-        );
-      }
-
-      const expressions = splitAndTrimWithBracket(
-        this.implicitColumnExpression,
+  async getColumnForField(field: string, context: SerializerContext) {
+    const implicitColumnExpression =
+      context.implicitColumnExpression ?? this.implicitColumnExpression;
+    if (field === IMPLICIT_FIELD && !implicitColumnExpression) {
+      throw new Error(
+        'Can not search bare text without an implicit column set.',
      );
+    }
+
+    const fieldFinal =
+      field === IMPLICIT_FIELD ? implicitColumnExpression! : field;
+
+    if (
+      field === IMPLICIT_FIELD &&
+      implicitColumnExpression === this.implicitColumnExpression // Source's implicit column has not been overridden
+    ) {
+      // Sources can specify multi-column implicit columns, eg. Body and Message, in
+      // which case we search the combined string `concatWithSeparator(';', Body, Message)`.
+      const expressions = splitAndTrimWithBracket(fieldFinal);

      return {
        column:
          expressions.length > 1
            ? `concatWithSeparator(';',${expressions.join(',')})`
-            : this.implicitColumnExpression,
+            : fieldFinal,
        columnJSON: undefined,
        propertyType: JSDataType.String,
        found: true,
      };
    }

-    const expression = await this.buildColumnExpressionFromField(field);
+    const expression = await this.buildColumnExpressionFromField(fieldFinal);

    return {
      column: expression.columnExpression,
@ -581,6 +634,7 @@ export class CustomSchemaSQLSerializerV2 extends SQLSerializer {
 async function nodeTerm(
  node: lucene.Node,
  serializer: Serializer,
+  context: SerializerContext,
 ): Promise<string> {
  const field = node.field[0] === '-' ? node.field.slice(1) : node.field;
  let isNegatedField = node.field[0] === '-';
@ -605,36 +659,36 @@ async function nodeTerm(
    // If the term is quoted, we should search for the exact term in a property (ex. foo:"bar")
    // Implicit field searches should still use substring matching (ex. "foo bar")
    if (nodeTerm.quoted && !isImplicitField) {
-      return serializer.eq(field, term, isNegatedField);
+      return serializer.eq(field, term, isNegatedField, context);
    }

    if (!nodeTerm.quoted && term === '*') {
-      return serializer.isNotNull(field, isNegatedField);
+      return serializer.isNotNull(field, isNegatedField, context);
    }

    if (!nodeTerm.quoted && term.substring(0, 2) === '>=') {
      if (isNegatedField) {
-        return serializer.lt(field, term.slice(2));
+        return serializer.lt(field, term.slice(2), context);
      }
-      return serializer.gte(field, term.slice(2));
+      return serializer.gte(field, term.slice(2), context);
    }
    if (!nodeTerm.quoted && term.substring(0, 2) === '<=') {
      if (isNegatedField) {
-        return serializer.gt(field, term.slice(2));
+        return serializer.gt(field, term.slice(2), context);
      }
-      return serializer.lte(field, term.slice(2));
+      return serializer.lte(field, term.slice(2), context);
    }
    if (!nodeTerm.quoted && term[0] === '>') {
      if (isNegatedField) {
-        return serializer.lte(field, term.slice(1));
+        return serializer.lte(field, term.slice(1), context);
      }
-      return serializer.gt(field, term.slice(1));
+      return serializer.gt(field, term.slice(1), context);
    }
    if (!nodeTerm.quoted && term[0] === '<') {
      if (isNegatedField) {
-        return serializer.gte(field, term.slice(1));
+        return serializer.gte(field, term.slice(1), context);
      }
-      return serializer.lt(field, term.slice(1));
+      return serializer.lt(field, term.slice(1), context);
    }

    let prefixWildcard = false;
@ -654,6 +708,7 @@ async function nodeTerm(
      isNegatedField,
      prefixWildcard,
      suffixWildcard,
+      context,
    );

    // TODO: Handle regex, similarity, boost, prefix
@ -666,24 +721,50 @@ async function nodeTerm(
      rangedTerm.term_min,
      rangedTerm.term_max,
      isNegatedField,
+      context,
    );
  }

  throw new Error(`Unexpected Node type. ${node}`);
 }

+function createSerializerContext(
+  currentContext: SerializerContext,
+  ast: lucene.BinaryAST | lucene.LeftOnlyAST,
+) {
+  // For syntax like `foo:(bar baz)` or `foo:("bar baz")`, the implicit field for the inner expression must be `foo`
+  if (ast.field && ast.parenthesized && ast.field !== IMPLICIT_FIELD) {
+    const fieldWithoutNegation = ast.field?.startsWith('-')
+      ? ast.field.slice(1)
+      : ast.field;
+
+    return {
+      ...currentContext,
+      implicitColumnExpression: fieldWithoutNegation,
+    };
+  } else {
+    return currentContext;
+  }
+}
+
+/** Returns true if the AST is of the form `-[field]:([terms...])` */
+function isNegatedAndParenthesized(ast: lucene.BinaryAST | lucene.LeftOnlyAST) {
+  return ast.parenthesized && ast.field?.startsWith('-');
+}
+
 async function serialize(
  ast: lucene.AST | lucene.Node,
  serializer: Serializer,
+  context: SerializerContext,
 ): Promise<string> {
  // Node Scenarios:
  // 1. NodeTerm: Single term ex. "foo:bar"
  // 2. NodeRangedTerm: Two terms ex. "foo:[bar TO qux]"
  if ((ast as lucene.NodeTerm).term != null) {
-    return await nodeTerm(ast as lucene.NodeTerm, serializer);
+    return await nodeTerm(ast as lucene.NodeTerm, serializer, context);
  }
  if ((ast as lucene.NodeRangedTerm).inclusive != null) {
-    return await nodeTerm(ast as lucene.NodeTerm, serializer);
+    return await nodeTerm(ast as lucene.NodeTerm, serializer, context);
  }

  // AST Scenarios:
@ -691,25 +772,34 @@ async function serialize(
  // 2. LeftOnlyAST: Single term ex. "foo:bar"
  if ((ast as lucene.BinaryAST).right != null) {
    const binaryAST = ast as lucene.BinaryAST;
-    const operator = serializer.operator(binaryAST.operator);
+    const operator = serializer.operator(binaryAST.operator, context);
    const parenthesized = binaryAST.parenthesized;
-    return `${parenthesized ? '(' : ''}${await serialize(
+
+    const newContext = createSerializerContext(context, binaryAST);
+    const serialized = `${isNegatedAndParenthesized(binaryAST) ? 'NOT ' : ''}${parenthesized ? '(' : ''}${await serialize(
      binaryAST.left,
      serializer,
-    )} ${operator} ${await serialize(binaryAST.right, serializer)}${
+      newContext,
+    )} ${operator} ${await serialize(binaryAST.right, serializer, newContext)}${
      parenthesized ? ')' : ''
    }`;
+    return serialized;
  }

  if ((ast as lucene.LeftOnlyAST).left != null) {
    const leftOnlyAST = ast as lucene.LeftOnlyAST;
    const parenthesized = leftOnlyAST.parenthesized;
+
+    const newContext = createSerializerContext(context, leftOnlyAST);
+
    // start is used when ex. "NOT foo:bar"
-    return `${parenthesized ? '(' : ''}${
+    const serialized = `${isNegatedAndParenthesized(leftOnlyAST) ? 'NOT ' : ''}${parenthesized ? '(' : ''}${
      leftOnlyAST.start != undefined ? `${leftOnlyAST.start} ` : ''
-    }${await serialize(leftOnlyAST.left, serializer)}${
+    }${await serialize(leftOnlyAST.left, serializer, newContext)}${
      parenthesized ? ')' : ''
    }`;
+
+    return serialized;
  }

  // Blank AST, means no text was parsed
@ -721,7 +811,7 @@ export async function genWhereSQL(
  ast: lucene.AST,
  serializer: Serializer,
 ): Promise<string> {
-  return await serialize(ast, serializer);
+  return await serialize(ast, serializer, {});
 }

 export class SearchQueryBuilder {
@ -793,7 +883,7 @@ export async function genEnglishExplanation(query: string): Promise<string> {

    if (parsedQ) {
      const serializer = new EnglishSerializer();
-      return await serialize(parsedQ, serializer);
+      return await serialize(parsedQ, serializer, {});
    }
  } catch (e) {
    console.warn('Parse failure', query, e);