feat: Improve search speed by chunking long time range searches into smaller incremental search windows. (#1125)

Resolves HDX-1035
This commit is contained in:
Mike Shi 2025-09-03 05:17:00 -10:00 committed by GitHub
parent 88f3cafb20
commit 64eb638bbe
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 151 additions and 28 deletions

View file

@ -0,0 +1,5 @@
---
"@hyperdx/app": minor
---
feat: Improve search speed by chunking long time range searches into smaller incremental search windows.

View file

@ -238,6 +238,7 @@ export const RawLogTable = memo(
error,
columnTypeMap,
dateRange,
loadingDate,
}: {
wrapLines: boolean;
displayedColumns: string[];
@ -266,6 +267,7 @@ export const RawLogTable = memo(
isError?: boolean;
error?: ClickHouseQueryError | Error;
dateRange?: [Date, Date];
loadingDate?: Date;
}) => {
const generateRowMatcher = generateRowId;
@ -770,6 +772,11 @@ export const RawLogTable = memo(
<div className="spin-animate d-inline-block">
<i className="bi bi-arrow-repeat" />
</div>{' '}
{loadingDate != null && (
<>
Searched <FormatTime value={loadingDate} />.{' '}
</>
)}
Loading results
{dateRange?.[0] != null && dateRange?.[1] != null ? (
<>
@ -1174,6 +1181,7 @@ function DBSqlRowTableComponent({
error={error ?? undefined}
columnTypeMap={columnMap}
dateRange={config.dateRange}
loadingDate={data?.window?.startTime}
/>
</>
);

View file

@ -33,18 +33,122 @@ function queryKeyFn(
return [prefix, config, queryTimeout];
}
type TPageParam = number;
// Time window configuration - progressive bucketing strategy
const TIME_WINDOWS_MS = [
6 * 60 * 60 * 1000, // 6h
6 * 60 * 60 * 1000, // 6h
12 * 60 * 60 * 1000, // 12h
24 * 60 * 60 * 1000, // 24h
];
type TimeWindow = {
startTime: Date;
endTime: Date;
windowIndex: number;
};
type TPageParam = {
windowIndex: number;
offset: number;
};
type TQueryFnData = {
data: Record<string, any>[];
meta: ColumnMetaType[];
chSql: ChSql;
window: TimeWindow;
};
type TData = {
pages: TQueryFnData[];
pageParams: TPageParam[];
};
const queryFn: QueryFunction<TQueryFnData, TQueryKey, number> = async ({
// Generate time windows from date range using progressive bucketing
function generateTimeWindows(startDate: Date, endDate: Date): TimeWindow[] {
const windows: TimeWindow[] = [];
let currentEnd = new Date(endDate);
let windowIndex = 0;
while (currentEnd > startDate) {
const windowSize =
TIME_WINDOWS_MS[windowIndex] ||
TIME_WINDOWS_MS[TIME_WINDOWS_MS.length - 1]; // use largest window size
const windowStart = new Date(
Math.max(currentEnd.getTime() - windowSize, startDate.getTime()),
);
windows.push({
endTime: new Date(currentEnd),
startTime: windowStart,
windowIndex,
});
currentEnd = windowStart;
windowIndex++;
}
return windows;
}
// Get time window from page param
function getTimeWindowFromPageParam(
config: ChartConfigWithDateRange,
pageParam: TPageParam,
): TimeWindow {
const [startDate, endDate] = config.dateRange;
const windows = generateTimeWindows(startDate, endDate);
const window = windows[pageParam.windowIndex];
if (window == null) {
throw new Error('Invalid time window for page param');
}
return window;
}
// Calculate next page param based on current results and window
function getNextPageParam(
lastPage: TQueryFnData | null,
allPages: TQueryFnData[],
config: ChartConfigWithDateRange,
): TPageParam | undefined {
if (lastPage == null) {
return undefined;
}
const [startDate, endDate] = config.dateRange;
const windows = generateTimeWindows(startDate, endDate);
const currentWindow = lastPage.window;
// Calculate total results from all pages in current window
const currentWindowPages = allPages.filter(
p => p.window.windowIndex === currentWindow.windowIndex,
);
const currentWindowResults = currentWindowPages.reduce(
(sum, page) => sum + page.data.length,
0,
);
// If we have results in the current window, continue paginating within it
if (lastPage.data.length > 0) {
return {
windowIndex: currentWindow.windowIndex,
offset: currentWindowResults,
};
}
// If no more results in current window, move to next window
const nextWindowIndex = currentWindow.windowIndex + 1;
if (nextWindowIndex < windows.length) {
return {
windowIndex: nextWindowIndex,
offset: 0,
};
}
return undefined;
}
const queryFn: QueryFunction<TQueryFnData, TQueryKey, TPageParam> = async ({
queryKey,
pageParam,
signal,
@ -57,19 +161,27 @@ const queryFn: QueryFunction<TQueryFnData, TQueryKey, number> = async ({
// Only stream incrementally if this is a fresh query with no previous
// response or if it's a paginated query
// otherwise we'll flicker the UI with streaming data
const isStreamingIncrementally = !meta.hasPreviousQueries || pageParam > 0;
const isStreamingIncrementally =
!meta.hasPreviousQueries ||
pageParam.offset > 0 ||
pageParam.windowIndex > 0;
const config = queryKey[1];
const query = await renderChartConfig(
{
...config,
limit: {
limit: config.limit?.limit,
offset: pageParam,
},
// Get the time window for this page
const timeWindow = getTimeWindowFromPageParam(config, pageParam);
// Create config with windowed date range
const windowedConfig = {
...config,
dateRange: [timeWindow.startTime, timeWindow.endTime] as [Date, Date],
limit: {
limit: config.limit?.limit,
offset: pageParam.offset,
},
getMetadata(),
);
};
const query = await renderChartConfig(windowedConfig, getMetadata());
const queryTimeout = queryKey[2];
const clickhouseClient = getClickhouseClient({ queryTimeout });
@ -94,6 +206,7 @@ const queryFn: QueryFunction<TQueryFnData, TQueryKey, number> = async ({
data: [],
meta: [],
chSql: { sql: '', params: {} },
window: timeWindow,
};
if (oldData == null) {
return {
@ -161,7 +274,14 @@ const queryFn: QueryFunction<TQueryFnData, TQueryKey, number> = async ({
queryClient.setQueryData<TData>(queryKey, oldData => {
if (oldData == null) {
return {
pages: [{ data: rowObjs, meta: queryResultMeta, chSql: query }],
pages: [
{
data: rowObjs,
meta: queryResultMeta,
chSql: query,
window: timeWindow,
},
],
pageParams: [pageParam],
};
}
@ -177,6 +297,7 @@ const queryFn: QueryFunction<TQueryFnData, TQueryKey, number> = async ({
data: [...(page.data ?? []), ...rowObjs],
meta: queryResultMeta,
chSql: query,
window: timeWindow,
},
],
pageParams: oldData.pageParams,
@ -215,6 +336,7 @@ const queryFn: QueryFunction<TQueryFnData, TQueryKey, number> = async ({
data: queryResultData,
meta: queryResultMeta,
chSql: query,
window: timeWindow,
};
}
@ -244,6 +366,7 @@ function flattenData(data: TData | undefined): TQueryFnData | null {
meta: data.pages[0].meta,
data: flattenPages(data.pages),
chSql: data.pages[0].chSql,
window: data.pages[data.pages.length - 1].window,
};
}
@ -290,22 +413,9 @@ export default function useOffsetPaginatedQuery(
return isLive ? prev : undefined;
},
enabled,
initialPageParam: 0,
// TODO: Use initialData derived from cache to do a smarter time range fetch
initialPageParam: { windowIndex: 0, offset: 0 } as TPageParam,
getNextPageParam: (lastPage, allPages) => {
if (lastPage == null) {
return undefined;
}
const len = lastPage.data.length;
if (len === 0) {
return undefined;
}
const data = flattenPages(allPages);
// TODO: Need to configure overlap and account for granularity
return data.length;
return getNextPageParam(lastPage, allPages, config);
},
staleTime: Infinity, // TODO: Pick a correct time
meta: {