fix: Correctly detect text index with quoted tokenizer argument (#1985)

## Summary This PR fixes text index detection for indexes with quoted tokenizer arguments: ``` TYPE text(tokenizer = 'splitByNonAlpha') ``` ### Screenshots or video ### How to test locally or on Vercel The unit tests demonstrate the fix. ### References - Linear Issue: Closes HDX-3812 - Related PRs:
2026-04-21 13:37:15 +00:00 · 2026-03-25 09:51:05 -04:00 · 2026-03-25 09:51:05 -04:00 · 4f7dd9ef63
commit 4f7dd9ef63
parent 629009da9e
4 changed files with 45 additions and 1 deletions
--- a/.changeset/eighty-rice-cough.md
+++ b/.changeset/eighty-rice-cough.md
@ -0,0 +1,5 @@
+---
+"@hyperdx/common-utils": patch
+---
+
+fix: Correctly detect text index with quoted tokenizer argument
--- a/packages/common-utils/src/tests/queryParser.test.ts
+++ b/packages/common-utils/src/tests/queryParser.test.ts
@ -855,6 +855,34 @@ describe('CustomSchemaSQLSerializerV2 - text indices', () => {
    expect(sql).toBe("((hasAllTokens(Body, 'foo')))");
  });

+  it('should use hasAllTokens when text index exists on multi-column expression', async () => {
+    metadata.getSkipIndices = jest.fn().mockResolvedValue([
+      {
+        name: 'idx_body_text',
+        type: 'text',
+        typeFull:
+          "text(tokenizer='splitByNonAlpha', preprocessor=lower(concatWithSeparator(';', Body, OtherColumn)))",
+        expression: "concatWithSeparator(';', Body, OtherColumn)",
+        granularity: '8',
+      },
+    ]);
+
+    const serializer = new CustomSchemaSQLSerializerV2({
+      metadata,
+      databaseName,
+      tableName,
+      connectionId,
+      implicitColumnExpression: "concatWithSeparator(';', Body, OtherColumn)",
+    });
+
+    const builder = new SearchQueryBuilder('foo', serializer);
+    const sql = await builder.build();
+
+    expect(sql).toBe(
+      "((hasAllTokens(concatWithSeparator(';', Body, OtherColumn), 'foo')))",
+    );
+  });
+
  it('should use hasAllTokens for multi-token terms with single call', async () => {
    metadata.getSkipIndices = jest.fn().mockResolvedValue([
      {
--- a/packages/common-utils/src/tests/utils.test.ts
+++ b/packages/common-utils/src/tests/utils.test.ts
@ -1831,6 +1831,14 @@ describe('utils', () => {
        type: 'text( tokenizer = splitByNonAlpha )',
        expected: { type: 'splitByNonAlpha' },
      },
+      {
+        type: "text(tokenizer = 'splitByNonAlpha')",
+        expected: { type: 'splitByNonAlpha' },
+      },
+      {
+        type: 'text(tokenizer = "splitByNonAlpha")',
+        expected: { type: 'splitByNonAlpha' },
+      },
      {
        type: 'text(tokenizer = splitByString())',
        expected: { type: 'splitByString', separators: [' '] },
--- a/packages/common-utils/src/core/utils.ts
+++ b/packages/common-utils/src/core/utils.ts
@ -816,7 +816,10 @@ export function parseTokenizerFromTextIndex({
    return { key, value };
  });

-  const tokenizerArg = args.find(arg => arg.key === 'tokenizer')?.value;
+  const tokenizerArgRaw = args.find(arg => arg.key === 'tokenizer')?.value;
+
+  // Strip surrounding quotes if present (e.g., 'splitByNonAlpha' -> splitByNonAlpha)
+  const tokenizerArg = stripQuotes(tokenizerArgRaw ?? '');
  if (!tokenizerArg) {
    console.error(
      `Invalid tokenizer argument in index type ${typeFull}: ${tokenizerArg}`,