hyperdx/packages/app/src/hooks/useMetadata.tsx

336 lines
8.7 KiB
TypeScript
Raw Normal View History

import { useEffect, useRef, useState } from 'react';
import objectHash from 'object-hash';
import {
ColumnMeta,
filterColumnMetaByType,
JSDataType,
} from '@hyperdx/common-utils/dist/clickhouse';
import {
Field,
TableConnection,
TableMetadata,
} from '@hyperdx/common-utils/dist/core/metadata';
import {
ChartConfigWithDateRange,
TSource,
} from '@hyperdx/common-utils/dist/types';
2024-11-12 12:53:15 +00:00
import {
keepPreviousData,
useQuery,
useQueryClient,
2024-11-12 12:53:15 +00:00
UseQueryOptions,
} from '@tanstack/react-query';
import api from '@/api';
import { IS_LOCAL_MODE } from '@/config';
import { LOCAL_STORE_CONNECTIONS_KEY } from '@/connection';
import { getMetadata } from '@/metadata';
import { useSource, useSources } from '@/source';
import { toArray } from '@/utils';
2024-11-12 12:53:15 +00:00
// Hook to get metadata with proper settings applied
export function useMetadataWithSettings() {
const [metadata, setMetadata] = useState(getMetadata());
const { data: me } = api.useMe();
const settingsApplied = useRef(false);
const queryClient = useQueryClient();
// Create a listener that triggers when connections are updated in local mode
useEffect(() => {
const isBrowser =
typeof window !== 'undefined' && typeof window.document !== 'undefined';
if (!isBrowser || !IS_LOCAL_MODE) return;
const createNewMetadata = (event: StorageEvent) => {
if (event.key === LOCAL_STORE_CONNECTIONS_KEY && event.newValue) {
// Create a new metadata instance with a new ClickHouse client,
// since the existing one will not have connection / auth info.
setMetadata(getMetadata());
settingsApplied.current = false;
// Clear react-query cache so that metadata is refetched with
// the new connection info, and error states are cleared.
queryClient.resetQueries();
}
};
window.addEventListener('storage', createNewMetadata);
return () => {
window.removeEventListener('storage', createNewMetadata);
};
}, [queryClient]);
useEffect(() => {
if (me?.team?.metadataMaxRowsToRead && !settingsApplied.current) {
metadata.setClickHouseSettings({
max_rows_to_read: me.team.metadataMaxRowsToRead,
});
settingsApplied.current = true;
}
}, [me?.team?.metadataMaxRowsToRead, metadata]);
return metadata;
}
2024-11-12 12:53:15 +00:00
export function useColumns(
{
databaseName,
tableName,
connectionId,
}: {
databaseName: string;
tableName: string;
connectionId: string;
},
options?: Partial<UseQueryOptions<ColumnMeta[]>>,
) {
const metadata = useMetadataWithSettings();
2024-11-12 12:53:15 +00:00
return useQuery<ColumnMeta[]>({
queryKey: ['useMetadata.useColumns', { databaseName, tableName }],
queryFn: async () => {
return metadata.getColumns({
databaseName,
tableName,
connectionId,
});
},
enabled: !!databaseName && !!tableName && !!connectionId,
2024-11-12 12:53:15 +00:00
...options,
});
}
export function useJsonColumns(
tableConnection: TableConnection | undefined,
options?: Partial<UseQueryOptions<string[]>>,
) {
const metadata = useMetadataWithSettings();
return useQuery<string[]>({
queryKey: ['useMetadata.useJsonColumns', tableConnection],
queryFn: async () => {
if (!tableConnection) return [];
const columns = await metadata.getColumns(tableConnection);
return (
filterColumnMetaByType(columns, [JSDataType.JSON])?.map(
column => column.name,
) ?? []
);
},
enabled:
tableConnection &&
!!tableConnection.databaseName &&
!!tableConnection.tableName &&
!!tableConnection.connectionId,
...options,
});
}
export function useMultipleAllFields(
tableConnections: TableConnection[],
2024-11-12 12:53:15 +00:00
options?: Partial<UseQueryOptions<Field[]>>,
) {
const metadata = useMetadataWithSettings();
const { data: me, isFetched } = api.useMe();
2024-11-12 12:53:15 +00:00
return useQuery<Field[]>({
queryKey: [
'useMetadata.useMultipleAllFields',
...tableConnections.map(tc => ({ ...tc })),
],
2024-11-12 12:53:15 +00:00
queryFn: async () => {
const team = me?.team;
if (team?.fieldMetadataDisabled) {
return [];
}
const fields2d = await Promise.all(
tableConnections.map(tc => metadata.getAllFields(tc)),
);
// skip deduplication if not needed
if (fields2d.length === 1) return fields2d[0];
return deduplicate2dArray<Field>(fields2d);
2024-11-12 12:53:15 +00:00
},
enabled:
tableConnections.length > 0 &&
tableConnections.every(
tc => !!tc.databaseName && !!tc.tableName && !!tc.connectionId,
) &&
isFetched,
2024-11-12 12:53:15 +00:00
...options,
});
}
export function useAllFields(
tableConnection: TableConnection | undefined,
options?: Partial<UseQueryOptions<Field[]>>,
) {
return useMultipleAllFields(
tableConnection ? [tableConnection] : [],
options,
);
}
export function useTableMetadata(
2024-11-12 12:53:15 +00:00
{
databaseName,
tableName,
connectionId,
}: {
databaseName: string;
tableName: string;
connectionId: string;
},
options?: Omit<UseQueryOptions<any, Error>, 'queryKey'>,
) {
const metadata = useMetadataWithSettings();
return useQuery<TableMetadata>({
queryKey: ['useMetadata.useTableMetadata', { databaseName, tableName }],
2024-11-12 12:53:15 +00:00
queryFn: async () => {
return await metadata.getTableMetadata({
2024-11-12 12:53:15 +00:00
databaseName,
tableName,
connectionId,
});
},
staleTime: 1000 * 60 * 5, // Cache every 5 min
enabled: !!databaseName && !!tableName && !!connectionId,
2024-11-12 12:53:15 +00:00
...options,
});
}
export function useMultipleGetKeyValues(
{
chartConfigs,
keys,
limit,
disableRowLimit,
}: {
chartConfigs: ChartConfigWithDateRange | ChartConfigWithDateRange[];
keys: string[];
limit?: number;
disableRowLimit?: boolean;
},
options?: Omit<UseQueryOptions<any, Error>, 'queryKey'>,
) {
const metadata = useMetadataWithSettings();
const chartConfigsArr = toArray(chartConfigs);
perf: Query filter values from MVs (#1591) Closes HDX-3066 # Summary This PR improves the performance of Search and Dashboard filters by querying available filter values from materialized views, when possible. The existing `useMultipleGetKeyValues` has been updated to make use of `getKeyValuesWithMVs`, which works as follows: 1. Identify which materialized views support each of the requested keys. Keys must be `dimensionColumns` in the materialized view, the materialized view must support the provided date range, and the materialized view must support the provided filters (determined by running an EXPLAIN query). 2. Split the keys into groups based on which Materialized view can provide their values. Query values for each group using the existing `getKeyValues` function. Sampling is disabled because it is assumed that MVs are small enough to be queried without sampling. 3. Query any keys which are not supported by any materialized view from the base table. To reduce the number of EXPLAIN queries required to support this, and to generally decrease the number of concurrent requests for filters, Dashboard filter value queries are now batched by source. Values for each batch are then queried using `getKeyValuesWithMVs` (described above). Other fixes: 1. I've also updated the various filter functions and hooks to support abort signals, so that filter queries are canceled when a query value is no longer needed. 2. The getKeyValues cache key now includes `where` and `filters`, so that the filter values correctly update when new filters or where conditions are added on the search page.
2026-01-14 18:05:11 +00:00
const { enabled = true } = options || {};
const { data: sources, isLoading: isLoadingSources } = useSources();
const query = useQuery<{ key: string; value: string[] }[]>({
queryKey: [
'useMetadata.useGetKeyValues',
...chartConfigsArr.map(cc => ({ ...cc })),
...keys,
disableRowLimit,
],
perf: Query filter values from MVs (#1591) Closes HDX-3066 # Summary This PR improves the performance of Search and Dashboard filters by querying available filter values from materialized views, when possible. The existing `useMultipleGetKeyValues` has been updated to make use of `getKeyValuesWithMVs`, which works as follows: 1. Identify which materialized views support each of the requested keys. Keys must be `dimensionColumns` in the materialized view, the materialized view must support the provided date range, and the materialized view must support the provided filters (determined by running an EXPLAIN query). 2. Split the keys into groups based on which Materialized view can provide their values. Query values for each group using the existing `getKeyValues` function. Sampling is disabled because it is assumed that MVs are small enough to be queried without sampling. 3. Query any keys which are not supported by any materialized view from the base table. To reduce the number of EXPLAIN queries required to support this, and to generally decrease the number of concurrent requests for filters, Dashboard filter value queries are now batched by source. Values for each batch are then queried using `getKeyValuesWithMVs` (described above). Other fixes: 1. I've also updated the various filter functions and hooks to support abort signals, so that filter queries are canceled when a query value is no longer needed. 2. The getKeyValues cache key now includes `where` and `filters`, so that the filter values correctly update when new filters or where conditions are added on the search page.
2026-01-14 18:05:11 +00:00
queryFn: async ({ signal }) => {
return (
await Promise.all(
perf: Query filter values from MVs (#1591) Closes HDX-3066 # Summary This PR improves the performance of Search and Dashboard filters by querying available filter values from materialized views, when possible. The existing `useMultipleGetKeyValues` has been updated to make use of `getKeyValuesWithMVs`, which works as follows: 1. Identify which materialized views support each of the requested keys. Keys must be `dimensionColumns` in the materialized view, the materialized view must support the provided date range, and the materialized view must support the provided filters (determined by running an EXPLAIN query). 2. Split the keys into groups based on which Materialized view can provide their values. Query values for each group using the existing `getKeyValues` function. Sampling is disabled because it is assumed that MVs are small enough to be queried without sampling. 3. Query any keys which are not supported by any materialized view from the base table. To reduce the number of EXPLAIN queries required to support this, and to generally decrease the number of concurrent requests for filters, Dashboard filter value queries are now batched by source. Values for each batch are then queried using `getKeyValuesWithMVs` (described above). Other fixes: 1. I've also updated the various filter functions and hooks to support abort signals, so that filter queries are canceled when a query value is no longer needed. 2. The getKeyValues cache key now includes `where` and `filters`, so that the filter values correctly update when new filters or where conditions are added on the search page.
2026-01-14 18:05:11 +00:00
chartConfigsArr.map(chartConfig => {
const source = chartConfig.source
? sources?.find(s => s.id === chartConfig.source)
: undefined;
return metadata.getKeyValuesWithMVs({
chartConfig,
keys: keys.slice(0, 20), // Limit to 20 keys for now, otherwise request fails (max header size)
limit,
disableRowLimit,
perf: Query filter values from MVs (#1591) Closes HDX-3066 # Summary This PR improves the performance of Search and Dashboard filters by querying available filter values from materialized views, when possible. The existing `useMultipleGetKeyValues` has been updated to make use of `getKeyValuesWithMVs`, which works as follows: 1. Identify which materialized views support each of the requested keys. Keys must be `dimensionColumns` in the materialized view, the materialized view must support the provided date range, and the materialized view must support the provided filters (determined by running an EXPLAIN query). 2. Split the keys into groups based on which Materialized view can provide their values. Query values for each group using the existing `getKeyValues` function. Sampling is disabled because it is assumed that MVs are small enough to be queried without sampling. 3. Query any keys which are not supported by any materialized view from the base table. To reduce the number of EXPLAIN queries required to support this, and to generally decrease the number of concurrent requests for filters, Dashboard filter value queries are now batched by source. Values for each batch are then queried using `getKeyValuesWithMVs` (described above). Other fixes: 1. I've also updated the various filter functions and hooks to support abort signals, so that filter queries are canceled when a query value is no longer needed. 2. The getKeyValues cache key now includes `where` and `filters`, so that the filter values correctly update when new filters or where conditions are added on the search page.
2026-01-14 18:05:11 +00:00
source,
signal,
});
}),
)
).flatMap(v => v);
},
2024-11-12 12:53:15 +00:00
staleTime: 1000 * 60 * 5, // Cache every 5 min
placeholderData: keepPreviousData,
...options,
perf: Query filter values from MVs (#1591) Closes HDX-3066 # Summary This PR improves the performance of Search and Dashboard filters by querying available filter values from materialized views, when possible. The existing `useMultipleGetKeyValues` has been updated to make use of `getKeyValuesWithMVs`, which works as follows: 1. Identify which materialized views support each of the requested keys. Keys must be `dimensionColumns` in the materialized view, the materialized view must support the provided date range, and the materialized view must support the provided filters (determined by running an EXPLAIN query). 2. Split the keys into groups based on which Materialized view can provide their values. Query values for each group using the existing `getKeyValues` function. Sampling is disabled because it is assumed that MVs are small enough to be queried without sampling. 3. Query any keys which are not supported by any materialized view from the base table. To reduce the number of EXPLAIN queries required to support this, and to generally decrease the number of concurrent requests for filters, Dashboard filter value queries are now batched by source. Values for each batch are then queried using `getKeyValuesWithMVs` (described above). Other fixes: 1. I've also updated the various filter functions and hooks to support abort signals, so that filter queries are canceled when a query value is no longer needed. 2. The getKeyValues cache key now includes `where` and `filters`, so that the filter values correctly update when new filters or where conditions are added on the search page.
2026-01-14 18:05:11 +00:00
enabled: !!enabled && !!keys.length && !isLoadingSources,
2024-11-12 12:53:15 +00:00
});
perf: Query filter values from MVs (#1591) Closes HDX-3066 # Summary This PR improves the performance of Search and Dashboard filters by querying available filter values from materialized views, when possible. The existing `useMultipleGetKeyValues` has been updated to make use of `getKeyValuesWithMVs`, which works as follows: 1. Identify which materialized views support each of the requested keys. Keys must be `dimensionColumns` in the materialized view, the materialized view must support the provided date range, and the materialized view must support the provided filters (determined by running an EXPLAIN query). 2. Split the keys into groups based on which Materialized view can provide their values. Query values for each group using the existing `getKeyValues` function. Sampling is disabled because it is assumed that MVs are small enough to be queried without sampling. 3. Query any keys which are not supported by any materialized view from the base table. To reduce the number of EXPLAIN queries required to support this, and to generally decrease the number of concurrent requests for filters, Dashboard filter value queries are now batched by source. Values for each batch are then queried using `getKeyValuesWithMVs` (described above). Other fixes: 1. I've also updated the various filter functions and hooks to support abort signals, so that filter queries are canceled when a query value is no longer needed. 2. The getKeyValues cache key now includes `where` and `filters`, so that the filter values correctly update when new filters or where conditions are added on the search page.
2026-01-14 18:05:11 +00:00
return {
...query,
isLoading: query.isLoading || isLoadingSources,
};
2024-11-12 12:53:15 +00:00
}
export function useGetValuesDistribution(
{
chartConfig,
key,
limit,
}: {
chartConfig: ChartConfigWithDateRange;
key: string;
limit: number;
},
options?: Omit<UseQueryOptions<Map<string, number>, Error>, 'queryKey'>,
) {
const metadata = useMetadataWithSettings();
const { data: source, isLoading: isLoadingSource } = useSource({
id: chartConfig.source,
});
return useQuery<Map<string, number>>({
queryKey: ['useMetadata.useGetValuesDistribution', chartConfig, key],
queryFn: async () => {
return await metadata.getValuesDistribution({
chartConfig,
key,
limit,
source,
});
},
staleTime: Infinity,
enabled: !!key && !isLoadingSource,
placeholderData: keepPreviousData,
retry: false,
...options,
});
}
export function useGetKeyValues(
{
chartConfig,
keys,
limit,
disableRowLimit,
}: {
chartConfig?: ChartConfigWithDateRange;
keys: string[];
limit?: number;
disableRowLimit?: boolean;
},
options?: Omit<UseQueryOptions<any, Error>, 'queryKey'>,
) {
return useMultipleGetKeyValues(
{
chartConfigs: chartConfig ? [chartConfig] : [],
keys,
limit,
disableRowLimit,
},
options,
);
}
export function deduplicateArray<T extends object>(array: T[]): T[] {
return deduplicate2dArray([array]);
}
export function deduplicate2dArray<T extends object>(array2d: T[][]): T[] {
// deduplicate common fields
const array: T[] = [];
const set = new Set<string>();
for (const _array of array2d) {
for (const elem of _array) {
const key = objectHash.sha1(elem);
if (set.has(key)) continue;
set.add(key);
array.push(elem);
}
}
return array;
}