mirror of
https://github.com/hyperdxio/hyperdx
synced 2026-04-21 13:37:15 +00:00
627 lines
18 KiB
TypeScript
627 lines
18 KiB
TypeScript
import SqlString from 'sqlstring';
|
|
import lucene from '@hyperdx/lucene';
|
|
import { serializeError } from 'serialize-error';
|
|
|
|
import { LogPlatform, LogType } from '../utils/logParser';
|
|
import { PropertyTypeMappingsModel } from './propertyTypeMappingsModel';
|
|
|
|
function encodeSpecialTokens(query: string): string {
|
|
return query
|
|
.replace('http://', 'http_COLON_//')
|
|
.replace('https://', 'https_COLON_//')
|
|
.replace(/localhost:(\d{1,5})/, 'localhost_COLON_$1')
|
|
.replace(/\\:/g, 'HDX_COLON');
|
|
}
|
|
function decodeSpecialTokens(query: string): string {
|
|
return query
|
|
.replace('http_COLON_//', 'http://')
|
|
.replace('https_COLON_//', 'https://')
|
|
.replace(/localhost_COLON_(\d{1,5})/, 'localhost:$1')
|
|
.replace(/HDX_COLON/g, ':');
|
|
}
|
|
|
|
export function parse(query: string): lucene.AST {
|
|
return lucene.parse(encodeSpecialTokens(query));
|
|
}
|
|
|
|
const IMPLICIT_FIELD = '<implicit>';
|
|
|
|
export const msToBigIntNs = (ms: number) => BigInt(ms * 1000000);
|
|
export const isLikelyTokenTerm = (term: string) => {
|
|
return term.length >= 16;
|
|
};
|
|
|
|
const customColumnMap: { [level: string]: string } = {
|
|
[IMPLICIT_FIELD]: '_source',
|
|
body: '_hdx_body',
|
|
duration: '_duration',
|
|
end_timestamp: 'end_timestamp',
|
|
host: '_host',
|
|
hyperdx_event_size: '_hyperdx_event_size',
|
|
hyperdx_platform: '_platform',
|
|
level: 'severity_text',
|
|
parent_span_id: 'parent_span_id',
|
|
rum_session_id: '_rum_session_id',
|
|
service: '_service',
|
|
span_id: 'span_id',
|
|
span_name: 'span_name',
|
|
timestamp: 'timestamp',
|
|
trace_id: 'trace_id',
|
|
userEmail: '_user_email',
|
|
userId: '_user_id',
|
|
userName: '_user_name',
|
|
};
|
|
export const customColumnMapType: {
|
|
[property: string]: 'string' | 'number' | 'bool';
|
|
} = {
|
|
[IMPLICIT_FIELD]: 'string',
|
|
body: 'string',
|
|
duration: 'number',
|
|
host: 'string',
|
|
hyperdx_event_size: 'number',
|
|
hyperdx_platform: 'string',
|
|
level: 'string',
|
|
parent_span_id: 'string',
|
|
rum_session_id: 'string',
|
|
service: 'string',
|
|
span_id: 'string',
|
|
span_name: 'string',
|
|
trace_id: 'string',
|
|
userEmail: 'string',
|
|
userId: 'string',
|
|
userName: 'string',
|
|
};
|
|
|
|
export const isCustomColumn = (name: string) => customColumnMap[name] != null;
|
|
|
|
// used by rrweb table
|
|
export const buildSearchColumnName_OLD = (
|
|
type: 'string' | 'number' | 'bool',
|
|
name: string,
|
|
) => {
|
|
if (customColumnMap[name] != null) {
|
|
return customColumnMap[name];
|
|
}
|
|
|
|
return type != null && name != null
|
|
? `"${type}.values"[indexOf("${type}.names", ${SqlString.escape(name)})]`
|
|
: null;
|
|
};
|
|
|
|
export const buildSearchColumnName = (
|
|
type: 'string' | 'number' | 'bool' | undefined | null,
|
|
name: string,
|
|
) => {
|
|
if (customColumnMap[name] != null) {
|
|
return customColumnMap[name];
|
|
}
|
|
|
|
if (name != null && type != null) {
|
|
return SqlString.format(`_${type}_attributes[?]`, [name]);
|
|
}
|
|
|
|
return null;
|
|
};
|
|
|
|
interface Serializer {
|
|
eq(field: string, term: string, isNegatedField: boolean): Promise<string>;
|
|
isNotNull(field: string, isNegatedField: boolean): Promise<string>;
|
|
gte(field: string, term: string): Promise<string>;
|
|
lte(field: string, term: string): Promise<string>;
|
|
lt(field: string, term: string): Promise<string>;
|
|
gt(field: string, term: string): Promise<string>;
|
|
fieldSearch(
|
|
field: string,
|
|
term: string,
|
|
isNegatedField: boolean,
|
|
prefixWildcard: boolean,
|
|
suffixWildcard: boolean,
|
|
): Promise<string>;
|
|
range(
|
|
field: string,
|
|
start: string,
|
|
end: string,
|
|
isNegatedField: boolean,
|
|
): Promise<string>;
|
|
}
|
|
|
|
type SQLSerializerOptions = {
|
|
useTokenization: boolean;
|
|
};
|
|
export class SQLSerializer implements Serializer {
|
|
private NOT_FOUND_QUERY = '(1 = 0)';
|
|
|
|
private alreadyRefrehPropertyTypeMapModel = false;
|
|
|
|
propertyTypeMapModel: PropertyTypeMappingsModel;
|
|
|
|
options: SQLSerializerOptions | undefined;
|
|
|
|
constructor(
|
|
propertyTypeMappingsModel: PropertyTypeMappingsModel,
|
|
opts?: SQLSerializerOptions,
|
|
) {
|
|
this.propertyTypeMapModel = propertyTypeMappingsModel;
|
|
this.options = opts ?? {
|
|
useTokenization: false,
|
|
};
|
|
}
|
|
|
|
private getCustomFieldOnly(field: string) {
|
|
return {
|
|
column: customColumnMap[field],
|
|
propertyType: customColumnMapType[field],
|
|
found: customColumnMap[field] != null, // propertyType can be null
|
|
};
|
|
}
|
|
|
|
// In the future this may trigger network calls against a property mapping cache
|
|
async getColumnForField(field: string) {
|
|
const customField = this.getCustomFieldOnly(field);
|
|
if (customField.found) {
|
|
return customField;
|
|
}
|
|
|
|
let propertyType = this.propertyTypeMapModel.get(field);
|
|
// TODO: Deal with ambiguous fields
|
|
let column: string = field;
|
|
// refresh cache if property not found
|
|
if (propertyType == null && !this.alreadyRefrehPropertyTypeMapModel) {
|
|
this.alreadyRefrehPropertyTypeMapModel = true;
|
|
// TODO: what if the property type doesn't exist?
|
|
// we need to setup a cap on how many times we refresh the cache
|
|
await this.propertyTypeMapModel.refresh();
|
|
propertyType = this.propertyTypeMapModel.get(field);
|
|
}
|
|
|
|
if (propertyType != null) {
|
|
column = buildSearchColumnName(propertyType, field);
|
|
}
|
|
|
|
return {
|
|
column,
|
|
propertyType,
|
|
found: column != null && propertyType != null,
|
|
};
|
|
}
|
|
|
|
async eq(field: string, term: string, isNegatedField: boolean) {
|
|
const { column, found, propertyType } = await this.getColumnForField(field);
|
|
if (!found) {
|
|
return this.NOT_FOUND_QUERY;
|
|
}
|
|
if (propertyType === 'bool') {
|
|
// numeric and boolean fields must be equality matched
|
|
return SqlString.format(`(${column} ${isNegatedField ? '!' : ''}= ?)`, [
|
|
parseInt(term),
|
|
]);
|
|
} else if (propertyType === 'number') {
|
|
return SqlString.format(
|
|
`(${column} ${isNegatedField ? '!' : ''}= CAST(?, 'Float64'))`,
|
|
[term],
|
|
);
|
|
}
|
|
return SqlString.format(`(${column} ${isNegatedField ? '!' : ''}= ?)`, [
|
|
term,
|
|
]);
|
|
}
|
|
|
|
async isNotNull(field: string, isNegatedField: boolean) {
|
|
const customField = this.getCustomFieldOnly(field);
|
|
if (customField.found) {
|
|
if (field === 'duration') {
|
|
// Duration will be negative if there is no end_timestamp
|
|
return `_duration ${isNegatedField ? '<' : '>='} 0`;
|
|
}
|
|
if (customField.propertyType === 'string') {
|
|
// Internal string fields are not nullable as long as they are not empty, they're likely not null
|
|
return `notEmpty(${customField.column}) ${
|
|
isNegatedField ? '!' : ''
|
|
}= 1`;
|
|
} else {
|
|
// We'll just try to check for nulls...
|
|
return `${customField.column} IS ${isNegatedField ? '' : 'NOT '}NULL`;
|
|
}
|
|
}
|
|
|
|
const { propertyType, found } = await this.getColumnForField(field);
|
|
if (!found) {
|
|
return this.NOT_FOUND_QUERY;
|
|
}
|
|
return SqlString.format(`mapContains(_${propertyType}_attributes, ?) = ?`, [
|
|
field,
|
|
isNegatedField ? 0 : 1,
|
|
]);
|
|
}
|
|
|
|
async gte(field: string, term: string) {
|
|
const { column, found } = await this.getColumnForField(field);
|
|
if (!found) {
|
|
return this.NOT_FOUND_QUERY;
|
|
}
|
|
return SqlString.format(`(${column} >= ?)`, [term]);
|
|
}
|
|
|
|
async lte(field: string, term: string) {
|
|
const { column, found } = await this.getColumnForField(field);
|
|
if (!found) {
|
|
return this.NOT_FOUND_QUERY;
|
|
}
|
|
return SqlString.format(`(${column} <= ?)`, [term]);
|
|
}
|
|
|
|
async lt(field: string, term: string) {
|
|
const { column, found } = await this.getColumnForField(field);
|
|
if (!found) {
|
|
return this.NOT_FOUND_QUERY;
|
|
}
|
|
return SqlString.format(`(${column} < ?)`, [term]);
|
|
}
|
|
|
|
async gt(field: string, term: string) {
|
|
const { column, found } = await this.getColumnForField(field);
|
|
if (!found) {
|
|
return this.NOT_FOUND_QUERY;
|
|
}
|
|
return SqlString.format(`(${column} > ?)`, [term]);
|
|
}
|
|
|
|
// TODO: Not sure if SQL really needs this or if it'll coerce itself
|
|
private attemptToParseNumber(term: string): string | number {
|
|
const number = Number.parseFloat(term);
|
|
if (Number.isNaN(number)) {
|
|
return term;
|
|
}
|
|
return number;
|
|
}
|
|
|
|
// Ref: https://clickhouse.com/codebrowser/ClickHouse/src/Functions/HasTokenImpl.h.html#_ZN2DB12HasTokenImpl16isTokenSeparatorEDu
|
|
// Split by anything that's ascii 0-128, that's not a letter or a number
|
|
private tokenizeTerm(term: string): string[] {
|
|
return term.split(/[ -/:-@[-`{-~\t\n\r]+/).filter(t => t.length > 0);
|
|
}
|
|
|
|
private termHasSeperators(term: string): boolean {
|
|
return term.match(/[ -/:-@[-`{-~\t\n\r]+/) != null;
|
|
}
|
|
|
|
async fieldSearch(
|
|
field: string,
|
|
term: string,
|
|
isNegatedField: boolean,
|
|
prefixWildcard: boolean,
|
|
suffixWildcard: boolean,
|
|
) {
|
|
const isImplicitField = field === IMPLICIT_FIELD;
|
|
const { column, propertyType, found } = await this.getColumnForField(field);
|
|
if (!found) {
|
|
return this.NOT_FOUND_QUERY;
|
|
}
|
|
// If it's a string field, we will always try to match with ilike
|
|
|
|
if (propertyType === 'bool') {
|
|
// numeric and boolean fields must be equality matched
|
|
return SqlString.format(`(${column} ${isNegatedField ? '!' : ''}= ?)`, [
|
|
parseInt(term),
|
|
]);
|
|
} else if (propertyType === 'number') {
|
|
return SqlString.format(
|
|
`(${column} ${isNegatedField ? '!' : ''}= CAST(?, 'Float64'))`,
|
|
[term],
|
|
);
|
|
}
|
|
|
|
if (isImplicitField && this.options?.useTokenization) {
|
|
// For the _source column, we'll try to do whole word searches by default
|
|
// to utilize the token bloom filter unless a prefix/sufix wildcard is specified
|
|
if (prefixWildcard || suffixWildcard) {
|
|
return SqlString.format(
|
|
`(lower(${column}) ${isNegatedField ? 'NOT ' : ''}LIKE lower(?))`,
|
|
[`${prefixWildcard ? '%' : ''}${term}${suffixWildcard ? '%' : ''}`],
|
|
);
|
|
} else {
|
|
// We can't search multiple tokens with `hasToken`, so we need to split up the term into tokens
|
|
const hasSeperators = this.termHasSeperators(term);
|
|
if (hasSeperators) {
|
|
const tokens = this.tokenizeTerm(term);
|
|
return `(${isNegatedField ? 'NOT (' : ''}${[
|
|
...tokens.map(token =>
|
|
SqlString.format(`hasTokenCaseInsensitive(${column}, ?)`, [
|
|
token,
|
|
]),
|
|
),
|
|
// If there are symbols in the term, we'll try to match the whole term as well (ex. Scott!)
|
|
SqlString.format(`(lower(${column}) LIKE lower(?))`, [`%${term}%`]),
|
|
].join(' AND ')}${isNegatedField ? ')' : ''})`;
|
|
} else {
|
|
return SqlString.format(
|
|
`(${
|
|
isNegatedField ? 'NOT ' : ''
|
|
}hasTokenCaseInsensitive(${column}, ?))`,
|
|
[term],
|
|
);
|
|
}
|
|
}
|
|
} else {
|
|
const shoudUseTokenBf = isImplicitField && isLikelyTokenTerm(term);
|
|
return SqlString.format(
|
|
`(${column} ${isNegatedField ? 'NOT ' : ''}? ?)`,
|
|
[SqlString.raw(shoudUseTokenBf ? 'LIKE' : 'ILIKE'), `%${term}%`],
|
|
);
|
|
}
|
|
}
|
|
|
|
async range(
|
|
field: string,
|
|
start: string,
|
|
end: string,
|
|
isNegatedField: boolean,
|
|
) {
|
|
const { column, found } = await this.getColumnForField(field);
|
|
if (!found) {
|
|
return this.NOT_FOUND_QUERY;
|
|
}
|
|
return SqlString.format(
|
|
`(${column} ${isNegatedField ? 'NOT ' : ''}BETWEEN ? AND ?)`,
|
|
[this.attemptToParseNumber(start), this.attemptToParseNumber(end)],
|
|
);
|
|
}
|
|
}
|
|
|
|
async function nodeTerm(
|
|
node: lucene.Node,
|
|
serializer: Serializer,
|
|
): Promise<string> {
|
|
const field = node.field[0] === '-' ? node.field.slice(1) : node.field;
|
|
let isNegatedField = node.field[0] === '-';
|
|
const isImplicitField = node.field === IMPLICIT_FIELD;
|
|
|
|
// NodeTerm
|
|
if ((node as lucene.NodeTerm).term != null) {
|
|
const nodeTerm = node as lucene.NodeTerm;
|
|
let term = decodeSpecialTokens(nodeTerm.term);
|
|
// We should only negate the search for negated bare terms (ex. '-5')
|
|
// This meeans the field is implicit and the prefix is -
|
|
if (isImplicitField && nodeTerm.prefix === '-') {
|
|
isNegatedField = true;
|
|
}
|
|
// Otherwise, if we have a negated term for a field (ex. 'level:-5')
|
|
// we should not negate the search, and search for -5
|
|
if (!isImplicitField && nodeTerm.prefix === '-') {
|
|
term = nodeTerm.prefix + decodeSpecialTokens(nodeTerm.term);
|
|
}
|
|
|
|
// TODO: Decide if this is good behavior
|
|
// If the term is quoted, we should search for the exact term in a property (ex. foo:"bar")
|
|
// Implicit field searches should still use substring matching (ex. "foo bar")
|
|
if (nodeTerm.quoted && !isImplicitField) {
|
|
return serializer.eq(field, term, isNegatedField);
|
|
}
|
|
|
|
if (!nodeTerm.quoted && term === '*') {
|
|
return serializer.isNotNull(field, isNegatedField);
|
|
}
|
|
|
|
if (!nodeTerm.quoted && term.substring(0, 2) === '>=') {
|
|
if (isNegatedField) {
|
|
return serializer.lt(field, term.slice(2));
|
|
}
|
|
return serializer.gte(field, term.slice(2));
|
|
}
|
|
if (!nodeTerm.quoted && term.substring(0, 2) === '<=') {
|
|
if (isNegatedField) {
|
|
return serializer.gt(field, term.slice(2));
|
|
}
|
|
return serializer.lte(field, term.slice(2));
|
|
}
|
|
if (!nodeTerm.quoted && term[0] === '>') {
|
|
if (isNegatedField) {
|
|
return serializer.lte(field, term.slice(1));
|
|
}
|
|
return serializer.gt(field, term.slice(1));
|
|
}
|
|
if (!nodeTerm.quoted && term[0] === '<') {
|
|
if (isNegatedField) {
|
|
return serializer.gte(field, term.slice(1));
|
|
}
|
|
return serializer.lt(field, term.slice(1));
|
|
}
|
|
|
|
let prefixWildcard = false;
|
|
let suffixWildcard = false;
|
|
if (!nodeTerm.quoted && term[0] === '*') {
|
|
prefixWildcard = true;
|
|
term = term.slice(1);
|
|
}
|
|
if (!nodeTerm.quoted && term[term.length - 1] === '*') {
|
|
suffixWildcard = true;
|
|
term = term.slice(0, -1);
|
|
}
|
|
|
|
return serializer.fieldSearch(
|
|
field,
|
|
term,
|
|
isNegatedField,
|
|
prefixWildcard,
|
|
suffixWildcard,
|
|
);
|
|
|
|
// TODO: Handle regex, similarity, boost, prefix
|
|
}
|
|
// NodeRangedTerm
|
|
if ((node as lucene.NodeRangedTerm).inclusive != null) {
|
|
const rangedTerm = node as lucene.NodeRangedTerm;
|
|
return serializer.range(
|
|
field,
|
|
rangedTerm.term_min,
|
|
rangedTerm.term_max,
|
|
isNegatedField,
|
|
);
|
|
}
|
|
|
|
throw new Error(`Unexpected Node type. ${node}`);
|
|
}
|
|
|
|
async function serialize(
|
|
ast: lucene.AST | lucene.Node,
|
|
serializer: Serializer,
|
|
): Promise<string> {
|
|
// Node Scenarios:
|
|
// 1. NodeTerm: Single term ex. "foo:bar"
|
|
// 2. NodeRangedTerm: Two terms ex. "foo:[bar TO qux]"
|
|
if ((ast as lucene.NodeTerm).term != null) {
|
|
return await nodeTerm(ast as lucene.NodeTerm, serializer);
|
|
}
|
|
if ((ast as lucene.NodeRangedTerm).inclusive != null) {
|
|
return await nodeTerm(ast as lucene.NodeTerm, serializer);
|
|
}
|
|
|
|
// AST Scenarios:
|
|
// 1. BinaryAST: Two terms ex. "foo:bar AND baz:qux"
|
|
// 2. LeftOnlyAST: Single term ex. "foo:bar"
|
|
if ((ast as lucene.BinaryAST).right != null) {
|
|
const binaryAST = ast as lucene.BinaryAST;
|
|
const operator =
|
|
binaryAST.operator === IMPLICIT_FIELD ? 'AND' : binaryAST.operator;
|
|
const parenthesized = binaryAST.parenthesized;
|
|
return `${parenthesized ? '(' : ''}${await serialize(
|
|
binaryAST.left,
|
|
serializer,
|
|
)} ${operator} ${await serialize(binaryAST.right, serializer)}${
|
|
parenthesized ? ')' : ''
|
|
}`;
|
|
}
|
|
|
|
if ((ast as lucene.LeftOnlyAST).left != null) {
|
|
const leftOnlyAST = ast as lucene.LeftOnlyAST;
|
|
const parenthesized = leftOnlyAST.parenthesized;
|
|
// start is used when ex. "NOT foo:bar"
|
|
return `${parenthesized ? '(' : ''}${
|
|
leftOnlyAST.start != undefined ? `${leftOnlyAST.start} ` : ''
|
|
}${await serialize(leftOnlyAST.left, serializer)}${
|
|
parenthesized ? ')' : ''
|
|
}`;
|
|
}
|
|
|
|
// Blank AST, means no text was parsed
|
|
return '';
|
|
}
|
|
|
|
export async function genWhereSQL(
|
|
ast: lucene.AST,
|
|
propertyTypeMappingsModel: PropertyTypeMappingsModel,
|
|
teamId?: string,
|
|
): Promise<string> {
|
|
const serializer = new SQLSerializer(propertyTypeMappingsModel, {
|
|
useTokenization: true,
|
|
});
|
|
return await serialize(ast, serializer);
|
|
}
|
|
|
|
export class SearchQueryBuilder {
|
|
private readonly searchQ: string;
|
|
|
|
private readonly conditions: string[];
|
|
|
|
private readonly propertyTypeMappingsModel: PropertyTypeMappingsModel;
|
|
|
|
teamId?: string;
|
|
|
|
constructor(
|
|
searchQ: string,
|
|
propertyTypeMappingsModel: PropertyTypeMappingsModel,
|
|
) {
|
|
this.conditions = [];
|
|
this.searchQ = searchQ;
|
|
this.propertyTypeMappingsModel = propertyTypeMappingsModel;
|
|
}
|
|
|
|
private async genSearchQuery() {
|
|
if (!this.searchQ) {
|
|
return '';
|
|
}
|
|
|
|
let querySql = this.searchQ
|
|
.split(/\s+/)
|
|
.map(queryToken =>
|
|
SqlString.format(`lower(_source) LIKE lower(?)`, [`%${queryToken}%`]),
|
|
)
|
|
.join(' AND ');
|
|
|
|
try {
|
|
const parsedQ = parse(this.searchQ);
|
|
|
|
if (parsedQ) {
|
|
querySql = await genWhereSQL(
|
|
parsedQ,
|
|
this.propertyTypeMappingsModel,
|
|
this.teamId,
|
|
);
|
|
}
|
|
} catch (e) {
|
|
console.warn({
|
|
error: serializeError(e),
|
|
message: 'Parse failure',
|
|
query: this.searchQ,
|
|
});
|
|
}
|
|
|
|
return querySql;
|
|
}
|
|
|
|
and(condition: string) {
|
|
if (condition && condition.trim()) {
|
|
this.conditions.push(`(${condition})`);
|
|
}
|
|
return this;
|
|
}
|
|
|
|
removeInternals() {
|
|
this.and(SqlString.format('notEquals(_platform, ?)', [LogPlatform.Rrweb]));
|
|
return this;
|
|
}
|
|
|
|
filterLogsAndSpans() {
|
|
this.and(
|
|
SqlString.format('type = ? OR type = ?', [LogType.Log, LogType.Span]),
|
|
);
|
|
return this;
|
|
}
|
|
|
|
// startTime and endTime are unix in ms
|
|
timestampInBetween(startTime: number, endTime: number) {
|
|
this.and(
|
|
`_timestamp_sort_key >= ${msToBigIntNs(
|
|
startTime,
|
|
)} AND _timestamp_sort_key < ${msToBigIntNs(endTime)}`,
|
|
);
|
|
return this;
|
|
}
|
|
|
|
async build() {
|
|
const searchQuery = await this.genSearchQuery();
|
|
if (this.searchQ) {
|
|
this.and(searchQuery);
|
|
}
|
|
return this.conditions.join(' AND ');
|
|
}
|
|
}
|
|
|
|
// TODO: replace with a proper query builder
|
|
export const buildSearchQueryWhereCondition = async ({
|
|
startTime,
|
|
endTime,
|
|
query,
|
|
propertyTypeMappingsModel,
|
|
teamId,
|
|
}: {
|
|
startTime: number; // unix in ms
|
|
endTime: number; // unix in ms,
|
|
query: string;
|
|
propertyTypeMappingsModel: PropertyTypeMappingsModel;
|
|
teamId?: string;
|
|
}) => {
|
|
const builder = new SearchQueryBuilder(query, propertyTypeMappingsModel);
|
|
builder.teamId = teamId;
|
|
return await builder.timestampInBetween(startTime, endTime).build();
|
|
};
|