fix: Correctly detect text index with quoted tokenizer argument (#1985)

## Summary

This PR fixes text index detection for indexes with quoted tokenizer arguments:

```
TYPE text(tokenizer = 'splitByNonAlpha')
```

### Screenshots or video

### How to test locally or on Vercel

The unit tests demonstrate the fix.

### References



- Linear Issue: Closes HDX-3812
- Related PRs:
This commit is contained in:
Drew Davis 2026-03-25 09:51:05 -04:00 committed by GitHub
parent 629009da9e
commit 4f7dd9ef63
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 45 additions and 1 deletions

View file

@ -0,0 +1,5 @@
---
"@hyperdx/common-utils": patch
---
fix: Correctly detect text index with quoted tokenizer argument

View file

@ -855,6 +855,34 @@ describe('CustomSchemaSQLSerializerV2 - text indices', () => {
expect(sql).toBe("((hasAllTokens(Body, 'foo')))");
});
it('should use hasAllTokens when text index exists on multi-column expression', async () => {
metadata.getSkipIndices = jest.fn().mockResolvedValue([
{
name: 'idx_body_text',
type: 'text',
typeFull:
"text(tokenizer='splitByNonAlpha', preprocessor=lower(concatWithSeparator(';', Body, OtherColumn)))",
expression: "concatWithSeparator(';', Body, OtherColumn)",
granularity: '8',
},
]);
const serializer = new CustomSchemaSQLSerializerV2({
metadata,
databaseName,
tableName,
connectionId,
implicitColumnExpression: "concatWithSeparator(';', Body, OtherColumn)",
});
const builder = new SearchQueryBuilder('foo', serializer);
const sql = await builder.build();
expect(sql).toBe(
"((hasAllTokens(concatWithSeparator(';', Body, OtherColumn), 'foo')))",
);
});
it('should use hasAllTokens for multi-token terms with single call', async () => {
metadata.getSkipIndices = jest.fn().mockResolvedValue([
{

View file

@ -1831,6 +1831,14 @@ describe('utils', () => {
type: 'text( tokenizer = splitByNonAlpha )',
expected: { type: 'splitByNonAlpha' },
},
{
type: "text(tokenizer = 'splitByNonAlpha')",
expected: { type: 'splitByNonAlpha' },
},
{
type: 'text(tokenizer = "splitByNonAlpha")',
expected: { type: 'splitByNonAlpha' },
},
{
type: 'text(tokenizer = splitByString())',
expected: { type: 'splitByString', separators: [' '] },

View file

@ -816,7 +816,10 @@ export function parseTokenizerFromTextIndex({
return { key, value };
});
const tokenizerArg = args.find(arg => arg.key === 'tokenizer')?.value;
const tokenizerArgRaw = args.find(arg => arg.key === 'tokenizer')?.value;
// Strip surrounding quotes if present (e.g., 'splitByNonAlpha' -> splitByNonAlpha)
const tokenizerArg = stripQuotes(tokenizerArgRaw ?? '');
if (!tokenizerArg) {
console.error(
`Invalid tokenizer argument in index type ${typeFull}: ${tokenizerArg}`,