mirror of
https://github.com/hyperdxio/hyperdx
synced 2026-04-21 13:37:15 +00:00
fix: Correctly detect text index with quoted tokenizer argument (#1985)
## Summary This PR fixes text index detection for indexes with quoted tokenizer arguments: ``` TYPE text(tokenizer = 'splitByNonAlpha') ``` ### Screenshots or video ### How to test locally or on Vercel The unit tests demonstrate the fix. ### References - Linear Issue: Closes HDX-3812 - Related PRs:
This commit is contained in:
parent
629009da9e
commit
4f7dd9ef63
4 changed files with 45 additions and 1 deletions
5
.changeset/eighty-rice-cough.md
Normal file
5
.changeset/eighty-rice-cough.md
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
---
|
||||
"@hyperdx/common-utils": patch
|
||||
---
|
||||
|
||||
fix: Correctly detect text index with quoted tokenizer argument
|
||||
|
|
@ -855,6 +855,34 @@ describe('CustomSchemaSQLSerializerV2 - text indices', () => {
|
|||
expect(sql).toBe("((hasAllTokens(Body, 'foo')))");
|
||||
});
|
||||
|
||||
it('should use hasAllTokens when text index exists on multi-column expression', async () => {
|
||||
metadata.getSkipIndices = jest.fn().mockResolvedValue([
|
||||
{
|
||||
name: 'idx_body_text',
|
||||
type: 'text',
|
||||
typeFull:
|
||||
"text(tokenizer='splitByNonAlpha', preprocessor=lower(concatWithSeparator(';', Body, OtherColumn)))",
|
||||
expression: "concatWithSeparator(';', Body, OtherColumn)",
|
||||
granularity: '8',
|
||||
},
|
||||
]);
|
||||
|
||||
const serializer = new CustomSchemaSQLSerializerV2({
|
||||
metadata,
|
||||
databaseName,
|
||||
tableName,
|
||||
connectionId,
|
||||
implicitColumnExpression: "concatWithSeparator(';', Body, OtherColumn)",
|
||||
});
|
||||
|
||||
const builder = new SearchQueryBuilder('foo', serializer);
|
||||
const sql = await builder.build();
|
||||
|
||||
expect(sql).toBe(
|
||||
"((hasAllTokens(concatWithSeparator(';', Body, OtherColumn), 'foo')))",
|
||||
);
|
||||
});
|
||||
|
||||
it('should use hasAllTokens for multi-token terms with single call', async () => {
|
||||
metadata.getSkipIndices = jest.fn().mockResolvedValue([
|
||||
{
|
||||
|
|
|
|||
|
|
@ -1831,6 +1831,14 @@ describe('utils', () => {
|
|||
type: 'text( tokenizer = splitByNonAlpha )',
|
||||
expected: { type: 'splitByNonAlpha' },
|
||||
},
|
||||
{
|
||||
type: "text(tokenizer = 'splitByNonAlpha')",
|
||||
expected: { type: 'splitByNonAlpha' },
|
||||
},
|
||||
{
|
||||
type: 'text(tokenizer = "splitByNonAlpha")',
|
||||
expected: { type: 'splitByNonAlpha' },
|
||||
},
|
||||
{
|
||||
type: 'text(tokenizer = splitByString())',
|
||||
expected: { type: 'splitByString', separators: [' '] },
|
||||
|
|
|
|||
|
|
@ -816,7 +816,10 @@ export function parseTokenizerFromTextIndex({
|
|||
return { key, value };
|
||||
});
|
||||
|
||||
const tokenizerArg = args.find(arg => arg.key === 'tokenizer')?.value;
|
||||
const tokenizerArgRaw = args.find(arg => arg.key === 'tokenizer')?.value;
|
||||
|
||||
// Strip surrounding quotes if present (e.g., 'splitByNonAlpha' -> splitByNonAlpha)
|
||||
const tokenizerArg = stripQuotes(tokenizerArgRaw ?? '');
|
||||
if (!tokenizerArg) {
|
||||
console.error(
|
||||
`Invalid tokenizer argument in index type ${typeFull}: ${tokenizerArg}`,
|
||||
|
|
|
|||
Loading…
Reference in a new issue