mirror of
https://github.com/hyperdxio/hyperdx
synced 2026-04-21 13:37:15 +00:00
feat: Add percentages to filter values (#1250)
# Summary Closes HDX-1960 This PR adds a button to our search filters component which can be used to show the _approximate_ percentage of rows which have each filter value. https://github.com/user-attachments/assets/2dba1b28-d2b9-4414-986c-0c515d252c89 Notes: - The percentages are based on a sample of 100k rows. The sampling is done similarly to how EE version samples logs for patterns. - We only fetch the most common 100 values in the sample. All other values are assumed to represent <1% of the data. - The percentages represent the distribution within the dataset after it has been filtered by the selected filters and the where clause. - This is a potentially expensive query, even with sampling, so the percentages are only queried if they're toggled on for a particular filter, and do not refresh in live mode. They do refresh if the search or date ranges changes (outside of live mode).
This commit is contained in:
parent
13b191c8a0
commit
daffcf3594
8 changed files with 480 additions and 17 deletions
6
.changeset/tricky-brooms-thank.md
Normal file
6
.changeset/tricky-brooms-thank.md
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
---
|
||||
"@hyperdx/common-utils": patch
|
||||
"@hyperdx/app": patch
|
||||
---
|
||||
|
||||
feat: Add percentages to filter values
|
||||
|
|
@ -55,7 +55,7 @@ import {
|
|||
useDocumentVisibility,
|
||||
} from '@mantine/hooks';
|
||||
import { notifications } from '@mantine/notifications';
|
||||
import { useIsFetching } from '@tanstack/react-query';
|
||||
import { keepPreviousData, useIsFetching } from '@tanstack/react-query';
|
||||
import { SortingState } from '@tanstack/react-table';
|
||||
import CodeMirror from '@uiw/react-codemirror';
|
||||
|
||||
|
|
@ -1099,7 +1099,10 @@ function DBSearchPage() {
|
|||
}
|
||||
}, [isReady, queryReady, isChartConfigLoading, onSearch]);
|
||||
|
||||
const { data: aliasMap } = useAliasMapFromChartConfig(dbSqlRowTableConfig);
|
||||
const { data: aliasMap } = useAliasMapFromChartConfig(dbSqlRowTableConfig, {
|
||||
placeholderData: keepPreviousData,
|
||||
queryKey: ['aliasMap', dbSqlRowTableConfig, 'withPlaceholder'],
|
||||
});
|
||||
|
||||
const aliasWith = useMemo(
|
||||
() =>
|
||||
|
|
|
|||
|
|
@ -40,6 +40,7 @@ import { useExplainQuery } from '@/hooks/useExplainQuery';
|
|||
import {
|
||||
useAllFields,
|
||||
useGetKeyValues,
|
||||
useGetValuesDistribution,
|
||||
useJsonColumns,
|
||||
useTableMetadata,
|
||||
} from '@/hooks/useMetadata';
|
||||
|
|
@ -76,6 +77,8 @@ type FilterCheckboxProps = {
|
|||
onClickExclude?: VoidFunction;
|
||||
onClickPin: VoidFunction;
|
||||
className?: string;
|
||||
percentage?: number;
|
||||
isPercentageLoading?: boolean;
|
||||
};
|
||||
|
||||
export const TextButton = ({
|
||||
|
|
@ -105,6 +108,26 @@ export const TextButton = ({
|
|||
);
|
||||
};
|
||||
|
||||
type FilterPercentageProps = {
|
||||
percentage: number;
|
||||
isLoading?: boolean;
|
||||
};
|
||||
|
||||
const FilterPercentage = ({ percentage, isLoading }: FilterPercentageProps) => {
|
||||
const formattedPercentage =
|
||||
percentage < 1
|
||||
? `<1%`
|
||||
: percentage >= 99.5
|
||||
? `>99%`
|
||||
: `~${Math.round(percentage)}%`;
|
||||
|
||||
return (
|
||||
<Text size="xs" c="gray.3" className={isLoading ? 'effect-pulse' : ''}>
|
||||
{formattedPercentage}
|
||||
</Text>
|
||||
);
|
||||
};
|
||||
|
||||
const emptyFn = () => {};
|
||||
export const FilterCheckbox = ({
|
||||
value,
|
||||
|
|
@ -115,6 +138,8 @@ export const FilterCheckbox = ({
|
|||
onClickExclude,
|
||||
onClickPin,
|
||||
className,
|
||||
percentage,
|
||||
isPercentageLoading,
|
||||
}: FilterCheckboxProps) => {
|
||||
return (
|
||||
<div
|
||||
|
|
@ -146,15 +171,30 @@ export const FilterCheckbox = ({
|
|||
fz="xxs"
|
||||
color="gray"
|
||||
>
|
||||
<Text
|
||||
size="xs"
|
||||
c={value === 'excluded' ? 'red.4' : 'gray.3'}
|
||||
truncate="end"
|
||||
<Group
|
||||
w="100%"
|
||||
title={label}
|
||||
gap="xs"
|
||||
wrap="nowrap"
|
||||
justify="space-between"
|
||||
pe={'11px'}
|
||||
miw={0}
|
||||
>
|
||||
{label}
|
||||
</Text>
|
||||
<Text
|
||||
size="xs"
|
||||
c={value === 'excluded' ? 'red.4' : 'gray.3'}
|
||||
truncate="end"
|
||||
flex={1}
|
||||
title={label}
|
||||
>
|
||||
{label}
|
||||
</Text>
|
||||
{percentage != null && (
|
||||
<FilterPercentage
|
||||
percentage={percentage}
|
||||
isLoading={isPercentageLoading}
|
||||
/>
|
||||
)}
|
||||
</Group>
|
||||
</Tooltip>
|
||||
</Group>
|
||||
<div className={classes.filterActions}>
|
||||
|
|
@ -208,6 +248,8 @@ export type FilterGroupProps = {
|
|||
hasLoadedMore: boolean;
|
||||
isDefaultExpanded?: boolean;
|
||||
'data-testid'?: string;
|
||||
chartConfig: ChartConfigWithDateRange;
|
||||
isLive?: boolean;
|
||||
};
|
||||
|
||||
const MAX_FILTER_GROUP_ITEMS = 10;
|
||||
|
|
@ -230,6 +272,8 @@ export const FilterGroup = ({
|
|||
hasLoadedMore,
|
||||
isDefaultExpanded,
|
||||
'data-testid': dataTestId,
|
||||
chartConfig,
|
||||
isLive,
|
||||
}: FilterGroupProps) => {
|
||||
const [search, setSearch] = useState('');
|
||||
// "Show More" button when there's lots of options
|
||||
|
|
@ -238,6 +282,26 @@ export const FilterGroup = ({
|
|||
const [isExpanded, setExpanded] = useState(isDefaultExpanded ?? false);
|
||||
// Track recently moved items for highlight animation
|
||||
const [recentlyMoved, setRecentlyMoved] = useState<Set<string>>(new Set());
|
||||
// Show what percentage of the data has each value
|
||||
const [showDistributions, setShowDistributions] = useState(false);
|
||||
// For live searches, don't refresh percentages when date range changes
|
||||
const [dateRange, setDateRange] = useState<[Date, Date]>(
|
||||
chartConfig.dateRange,
|
||||
);
|
||||
|
||||
const toggleShowDistributions = () => {
|
||||
if (!showDistributions) {
|
||||
setExpanded(true);
|
||||
setDateRange(chartConfig.dateRange);
|
||||
}
|
||||
setShowDistributions(prev => !prev);
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
if (!isLive) {
|
||||
setDateRange(chartConfig.dateRange);
|
||||
}
|
||||
}, [chartConfig.dateRange, isLive]);
|
||||
|
||||
useEffect(() => {
|
||||
if (isDefaultExpanded) {
|
||||
|
|
@ -245,6 +309,33 @@ export const FilterGroup = ({
|
|||
}
|
||||
}, [isDefaultExpanded]);
|
||||
|
||||
const {
|
||||
data: distributionData,
|
||||
isFetching: isFetchingDistribution,
|
||||
error: distributionError,
|
||||
} = useGetValuesDistribution(
|
||||
{
|
||||
chartConfig: { ...chartConfig, dateRange },
|
||||
key: name,
|
||||
limit: 100, // The 100 most common values are enough to find any values that are present in at least 1% of rows
|
||||
},
|
||||
{
|
||||
enabled: showDistributions,
|
||||
},
|
||||
);
|
||||
|
||||
useEffect(() => {
|
||||
if (distributionError) {
|
||||
notifications.show({
|
||||
color: 'red',
|
||||
title: 'Error loading filter distribution',
|
||||
message: distributionError?.message,
|
||||
autoClose: 5000,
|
||||
});
|
||||
setShowDistributions(false);
|
||||
}
|
||||
}, [distributionError]);
|
||||
|
||||
const totalFiltersSize =
|
||||
selectedValues.included.size + selectedValues.excluded.size;
|
||||
|
||||
|
|
@ -292,6 +383,13 @@ export const FilterGroup = ({
|
|||
if (aExcluded && !bExcluded) return -1;
|
||||
if (!aExcluded && bExcluded) return 1;
|
||||
|
||||
// Then sort by estimated percentage of rows with this value, if available
|
||||
const aPercentage = distributionData?.get(a.value) ?? 0;
|
||||
const bPercentage = distributionData?.get(b.value) ?? 0;
|
||||
if (aPercentage !== bPercentage) {
|
||||
return bPercentage - aPercentage;
|
||||
}
|
||||
|
||||
// Finally sort alphabetically/numerically
|
||||
return a.value.localeCompare(b.value, undefined, { numeric: true });
|
||||
});
|
||||
|
|
@ -310,6 +408,7 @@ export const FilterGroup = ({
|
|||
augmentedOptions,
|
||||
selectedValues,
|
||||
totalFiltersSize,
|
||||
distributionData,
|
||||
]);
|
||||
|
||||
// Simple highlight animation when checkbox is checked
|
||||
|
|
@ -402,6 +501,22 @@ export const FilterGroup = ({
|
|||
</Tooltip>
|
||||
</Accordion.Control>
|
||||
<Group gap="xxxs" wrap="nowrap">
|
||||
<ActionIcon
|
||||
size="xs"
|
||||
variant="subtle"
|
||||
color="gray"
|
||||
onClick={toggleShowDistributions}
|
||||
title={
|
||||
showDistributions ? 'Hide distribution' : 'Show distribution'
|
||||
}
|
||||
data-testid={`toggle-distribution-button-${name}`}
|
||||
aria-checked={showDistributions}
|
||||
role="checkbox"
|
||||
>
|
||||
<i
|
||||
className={`bi ${isFetchingDistribution ? 'spinner-border spinner-border-sm' : showDistributions ? 'bi-bar-chart-line-fill' : 'bi-bar-chart-line'}`}
|
||||
/>
|
||||
</ActionIcon>
|
||||
{onFieldPinClick && (
|
||||
<ActionIcon
|
||||
size="xs"
|
||||
|
|
@ -409,6 +524,7 @@ export const FilterGroup = ({
|
|||
color="gray"
|
||||
onClick={onFieldPinClick}
|
||||
title={isFieldPinned ? 'Unpin field' : 'Pin field'}
|
||||
me={'4px'}
|
||||
>
|
||||
<i
|
||||
className={`bi bi-pin-angle${isFieldPinned ? '-fill' : ''}`}
|
||||
|
|
@ -452,6 +568,12 @@ export const FilterGroup = ({
|
|||
onClickOnly={() => onOnlyClick(option.value)}
|
||||
onClickExclude={() => onExcludeClick(option.value)}
|
||||
onClickPin={() => onPinClick(option.value)}
|
||||
isPercentageLoading={isFetchingDistribution}
|
||||
percentage={
|
||||
showDistributions && distributionData
|
||||
? (distributionData.get(option.value) ?? 0)
|
||||
: undefined
|
||||
}
|
||||
/>
|
||||
))}
|
||||
{optionsLoading ? (
|
||||
|
|
@ -900,6 +1022,8 @@ const DBSearchPageFiltersComponent = ({
|
|||
(filterState[facet.key].included.size > 0 ||
|
||||
filterState[facet.key].excluded.size > 0))
|
||||
}
|
||||
chartConfig={chartConfig}
|
||||
isLive={isLive}
|
||||
/>
|
||||
))}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,12 +1,21 @@
|
|||
import { UseQueryOptions, UseQueryResult } from '@tanstack/react-query';
|
||||
import { screen, within } from '@testing-library/react';
|
||||
import userEvent from '@testing-library/user-event';
|
||||
|
||||
import { useGetValuesDistribution } from '@/hooks/useMetadata';
|
||||
|
||||
import {
|
||||
cleanedFacetName,
|
||||
FilterGroup,
|
||||
type FilterGroupProps,
|
||||
} from '../DBSearchPageFilters';
|
||||
|
||||
jest.mock('@/hooks/useMetadata', () => ({
|
||||
useGetValuesDistribution: jest
|
||||
.fn()
|
||||
.mockReturnValue({ data: undefined, isFetching: false, error: undefined }),
|
||||
}));
|
||||
|
||||
describe('cleanedFacetName', () => {
|
||||
describe('basic functionality', () => {
|
||||
it('should return non-toString strings unchanged', () => {
|
||||
|
|
@ -202,12 +211,24 @@ describe('FilterGroup', () => {
|
|||
loadMoreLoading: false,
|
||||
hasLoadedMore: false,
|
||||
isDefaultExpanded: true,
|
||||
chartConfig: {
|
||||
from: {
|
||||
databaseName: 'test_db',
|
||||
tableName: 'test_table',
|
||||
},
|
||||
select: '',
|
||||
where: '',
|
||||
whereLanguage: 'sql',
|
||||
timestampValueExpression: '',
|
||||
connection: 'test_connection',
|
||||
dateRange: [new Date('2024-01-01'), new Date('2024-01-02')],
|
||||
},
|
||||
};
|
||||
|
||||
it('should sort options alphabetically by default', () => {
|
||||
renderWithMantine(<FilterGroup {...defaultProps} />);
|
||||
|
||||
const options = screen.getAllByRole('checkbox');
|
||||
const options = screen.getAllByTestId(/filter-checkbox-input/g);
|
||||
expect(options).toHaveLength(3);
|
||||
const labels = screen.getAllByText(/apple|banana|zebra/);
|
||||
expect(labels[0]).toHaveTextContent('apple');
|
||||
|
|
@ -226,7 +247,7 @@ describe('FilterGroup', () => {
|
|||
/>,
|
||||
);
|
||||
|
||||
const options = screen.getAllByRole('checkbox');
|
||||
const options = screen.getAllByTestId(/filter-checkbox-input/g);
|
||||
expect(options).toHaveLength(3);
|
||||
const labels = screen.getAllByText(/apple|banana|zebra/);
|
||||
expect(labels[0]).toHaveTextContent('apple');
|
||||
|
|
@ -234,6 +255,68 @@ describe('FilterGroup', () => {
|
|||
expect(labels[2]).toHaveTextContent('banana');
|
||||
});
|
||||
|
||||
it('should show selected items first, then sort by counts, if percentages when they are enabled', () => {
|
||||
jest.mocked(useGetValuesDistribution).mockReturnValue({
|
||||
data: new Map([
|
||||
['apple', 30],
|
||||
['banana', 20],
|
||||
['zebra', 50],
|
||||
]),
|
||||
isFetching: false,
|
||||
error: null,
|
||||
} as UseQueryResult<Map<string, number>>);
|
||||
|
||||
renderWithMantine(
|
||||
<FilterGroup
|
||||
{...defaultProps}
|
||||
selectedValues={{
|
||||
included: new Set(['banana']),
|
||||
excluded: new Set(),
|
||||
}}
|
||||
/>,
|
||||
);
|
||||
|
||||
const options = screen.getAllByTestId(/filter-checkbox-input/g);
|
||||
expect(options).toHaveLength(3);
|
||||
const labels = screen.getAllByText(/apple|banana|zebra/);
|
||||
expect(labels[0]).toHaveTextContent('banana'); // Selected
|
||||
expect(labels[1]).toHaveTextContent('zebra'); // 50%
|
||||
expect(labels[2]).toHaveTextContent('apple'); // 30%
|
||||
});
|
||||
|
||||
it('should show percentages, if enabled', async () => {
|
||||
jest.mocked(useGetValuesDistribution).mockReturnValue({
|
||||
data: new Map([
|
||||
['apple', 99.2],
|
||||
['zebra', 0.6],
|
||||
]),
|
||||
isFetching: false,
|
||||
error: null,
|
||||
} as UseQueryResult<Map<string, number>>);
|
||||
|
||||
renderWithMantine(
|
||||
<FilterGroup
|
||||
{...defaultProps}
|
||||
selectedValues={{
|
||||
included: new Set(),
|
||||
excluded: new Set(),
|
||||
}}
|
||||
/>,
|
||||
);
|
||||
|
||||
const showPercentages = screen.getByTestId(
|
||||
'toggle-distribution-button-Test Filter',
|
||||
);
|
||||
await userEvent.click(showPercentages);
|
||||
|
||||
const options = screen.getAllByTestId(/filter-checkbox-input/g);
|
||||
expect(options).toHaveLength(3);
|
||||
const labels = screen.getAllByText(/%/);
|
||||
expect(labels[0]).toHaveTextContent('~99%'); // apple
|
||||
expect(labels[1]).toHaveTextContent('<1%'); // zebra
|
||||
expect(labels[2]).toHaveTextContent('<1%'); // banana
|
||||
});
|
||||
|
||||
it('should handle excluded items', () => {
|
||||
renderWithMantine(
|
||||
<FilterGroup
|
||||
|
|
@ -245,7 +328,7 @@ describe('FilterGroup', () => {
|
|||
/>,
|
||||
);
|
||||
|
||||
const options = screen.getAllByRole('checkbox');
|
||||
const options = screen.getAllByTestId(/filter-checkbox-input/g);
|
||||
expect(options).toHaveLength(3);
|
||||
const labels = screen.getAllByText(/apple|banana|zebra/);
|
||||
expect(labels[0]).toHaveTextContent('apple'); // included first
|
||||
|
|
@ -276,7 +359,7 @@ describe('FilterGroup', () => {
|
|||
);
|
||||
|
||||
// Should show MAX_FILTER_GROUP_ITEMS (10) by default
|
||||
let options = screen.getAllByRole('checkbox');
|
||||
let options = screen.getAllByTestId(/filter-checkbox-input/g);
|
||||
expect(options).toHaveLength(10);
|
||||
|
||||
// Selected items should be visible even if they would be beyond MAX_FILTER_GROUP_ITEMS
|
||||
|
|
@ -289,7 +372,7 @@ describe('FilterGroup', () => {
|
|||
await userEvent.click(showMoreButton);
|
||||
|
||||
// Should show all items
|
||||
options = screen.getAllByRole('checkbox');
|
||||
options = screen.getAllByTestId(/filter-checkbox-input/g);
|
||||
expect(options).toHaveLength(15);
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -210,6 +210,36 @@ export function useMultipleGetKeyValues(
|
|||
});
|
||||
}
|
||||
|
||||
export function useGetValuesDistribution(
|
||||
{
|
||||
chartConfig,
|
||||
key,
|
||||
limit,
|
||||
}: {
|
||||
chartConfig: ChartConfigWithDateRange;
|
||||
key: string;
|
||||
limit: number;
|
||||
},
|
||||
options?: Omit<UseQueryOptions<Map<string, number>, Error>, 'queryKey'>,
|
||||
) {
|
||||
const metadata = useMetadataWithSettings();
|
||||
return useQuery<Map<string, number>>({
|
||||
queryKey: ['useMetadata.useGetValuesDistribution', chartConfig, key],
|
||||
queryFn: async () => {
|
||||
return await metadata.getValuesDistribution({
|
||||
chartConfig,
|
||||
key,
|
||||
limit,
|
||||
});
|
||||
},
|
||||
staleTime: Infinity,
|
||||
enabled: !!key,
|
||||
placeholderData: keepPreviousData,
|
||||
retry: false,
|
||||
...options,
|
||||
});
|
||||
}
|
||||
|
||||
export function useGetKeyValues(
|
||||
{
|
||||
chartConfig,
|
||||
|
|
|
|||
|
|
@ -59,7 +59,7 @@
|
|||
.filterCheckbox {
|
||||
width: 100%;
|
||||
display: grid;
|
||||
grid-template-columns: 1fr 20px;
|
||||
grid-template-columns: 1fr 13px;
|
||||
padding: 2px 6px;
|
||||
justify-content: space-between;
|
||||
align-items: center;
|
||||
|
|
@ -81,12 +81,12 @@
|
|||
backdrop-filter: blur(4px);
|
||||
border-radius: 4px;
|
||||
align-items: center;
|
||||
padding: 0 8px;
|
||||
padding: 0 4px;
|
||||
gap: 4px;
|
||||
background-color: $slate-950;
|
||||
|
||||
.textButton {
|
||||
padding: 2px 6px;
|
||||
padding: 2px 4px;
|
||||
border-radius: 3px;
|
||||
|
||||
&:hover {
|
||||
|
|
|
|||
|
|
@ -422,4 +422,139 @@ describe('Metadata', () => {
|
|||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe('getValuesDistribution', () => {
|
||||
const mockChartConfig: ChartConfigWithDateRange = {
|
||||
from: {
|
||||
databaseName: 'test_db',
|
||||
tableName: 'test_table',
|
||||
},
|
||||
select: '',
|
||||
where: '',
|
||||
whereLanguage: 'sql',
|
||||
timestampValueExpression: '',
|
||||
connection: 'test_connection',
|
||||
dateRange: [new Date('2024-01-01'), new Date('2024-01-02')],
|
||||
};
|
||||
|
||||
beforeEach(() => {
|
||||
(mockClickhouseClient.query as jest.Mock).mockResolvedValue({
|
||||
json: () =>
|
||||
Promise.resolve({
|
||||
data: [
|
||||
{
|
||||
__hdx_value: 'info',
|
||||
__hdx_percentage: '85.9',
|
||||
},
|
||||
{
|
||||
__hdx_value: 'debug',
|
||||
__hdx_percentage: '3.0',
|
||||
},
|
||||
{
|
||||
__hdx_value: 'warn',
|
||||
__hdx_percentage: '6.5',
|
||||
},
|
||||
{
|
||||
__hdx_value: 'error',
|
||||
__hdx_percentage: '4.1',
|
||||
},
|
||||
],
|
||||
}),
|
||||
});
|
||||
});
|
||||
|
||||
it('should fetch and return values distribution for severity', async () => {
|
||||
const result = await metadata.getValuesDistribution({
|
||||
chartConfig: mockChartConfig,
|
||||
key: 'severity',
|
||||
});
|
||||
|
||||
expect(result).toEqual(
|
||||
new Map([
|
||||
['info', Number(85.9)],
|
||||
['debug', Number(3.0)],
|
||||
['warn', Number(6.5)],
|
||||
['error', Number(4.1)],
|
||||
]),
|
||||
);
|
||||
});
|
||||
|
||||
it('should include alias CTEs when provided in the config', async () => {
|
||||
const configWithAliases = {
|
||||
...mockChartConfig,
|
||||
with: [
|
||||
{
|
||||
name: 'service',
|
||||
sql: {
|
||||
sql: 'ServiceName',
|
||||
params: {},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: 'severity',
|
||||
sql: {
|
||||
sql: 'SeverityText',
|
||||
params: {},
|
||||
},
|
||||
},
|
||||
],
|
||||
where: "severity = 'info'",
|
||||
};
|
||||
|
||||
const renderChartConfigSpy = jest.spyOn(
|
||||
renderChartConfigModule,
|
||||
'renderChartConfig',
|
||||
);
|
||||
|
||||
await metadata.getValuesDistribution({
|
||||
chartConfig: configWithAliases,
|
||||
key: 'severity',
|
||||
});
|
||||
|
||||
const actualConfig = renderChartConfigSpy.mock.calls[0][0];
|
||||
expect(actualConfig.with).toContainEqual({
|
||||
name: 'service',
|
||||
sql: {
|
||||
sql: 'ServiceName',
|
||||
params: {},
|
||||
},
|
||||
});
|
||||
expect(actualConfig.with).toContainEqual({
|
||||
name: 'severity',
|
||||
sql: {
|
||||
sql: 'SeverityText',
|
||||
params: {},
|
||||
},
|
||||
});
|
||||
expect(actualConfig.where).toBe("severity = 'info'");
|
||||
});
|
||||
|
||||
it('should include filters from the config in the query', async () => {
|
||||
const configWithFilters: ChartConfigWithDateRange = {
|
||||
...mockChartConfig,
|
||||
filters: [
|
||||
{
|
||||
type: 'sql',
|
||||
condition: "ServiceName IN ('clickhouse')",
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
const renderChartConfigSpy = jest.spyOn(
|
||||
renderChartConfigModule,
|
||||
'renderChartConfig',
|
||||
);
|
||||
|
||||
await metadata.getValuesDistribution({
|
||||
chartConfig: configWithFilters,
|
||||
key: 'severity',
|
||||
});
|
||||
|
||||
const actualConfig = renderChartConfigSpy.mock.calls[0][0];
|
||||
expect(actualConfig.filters).toContainEqual({
|
||||
type: 'sql',
|
||||
condition: "ServiceName IN ('clickhouse')",
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
import type { ClickHouseSettings } from '@clickhouse/client-common';
|
||||
import { omit, pick } from 'lodash';
|
||||
|
||||
import {
|
||||
BaseClickhouseClient,
|
||||
|
|
@ -570,6 +571,87 @@ export class Metadata {
|
|||
return tableMetadata;
|
||||
}
|
||||
|
||||
async getValuesDistribution({
|
||||
chartConfig,
|
||||
key,
|
||||
samples = 100_000,
|
||||
limit = 100,
|
||||
}: {
|
||||
chartConfig: ChartConfigWithDateRange;
|
||||
key: string;
|
||||
samples?: number;
|
||||
limit?: number;
|
||||
}) {
|
||||
const cacheKeyConfig = pick(chartConfig, [
|
||||
'connection',
|
||||
'from',
|
||||
'dateRange',
|
||||
'filters',
|
||||
'where',
|
||||
'with',
|
||||
]);
|
||||
return this.cache.getOrFetch(
|
||||
`${JSON.stringify(cacheKeyConfig)}.${key}.valuesDistribution`,
|
||||
async () => {
|
||||
const config: ChartConfigWithDateRange = {
|
||||
...chartConfig,
|
||||
with: [
|
||||
...(chartConfig.with || []),
|
||||
// Add CTE to get total row count and sample factor
|
||||
{
|
||||
name: 'tableStats',
|
||||
chartConfig: {
|
||||
...omit(chartConfig, ['with', 'groupBy', 'orderBy', 'limit']),
|
||||
select: `count() as total, greatest(CAST(total / ${samples} AS UInt32), 1) as sample_factor`,
|
||||
},
|
||||
},
|
||||
],
|
||||
// Add sampling condition as a filter. The query will still read all rows to evaluate
|
||||
// the sampling condition, but will only read values column from selected rows.
|
||||
filters: [
|
||||
...(chartConfig.filters || []),
|
||||
{
|
||||
type: 'sql',
|
||||
condition: `cityHash64(${chartConfig.timestampValueExpression}, rand()) % (SELECT sample_factor FROM tableStats) = 0`,
|
||||
},
|
||||
],
|
||||
select: `${key} AS __hdx_value, count() as __hdx_count, __hdx_count / (sum(__hdx_count) OVER ()) * 100 AS __hdx_percentage`,
|
||||
orderBy: '__hdx_percentage DESC',
|
||||
groupBy: `__hdx_value`,
|
||||
limit: { limit },
|
||||
};
|
||||
|
||||
const sql = await renderChartConfig(config, this);
|
||||
|
||||
const json = await this.clickhouseClient
|
||||
.query<'JSON'>({
|
||||
query: sql.sql,
|
||||
query_params: sql.params,
|
||||
connectionId: chartConfig.connection,
|
||||
clickhouse_settings: {
|
||||
...this.getClickHouseSettings(),
|
||||
// Set max_rows_to_group_by to avoid using too much memory when grouping on high cardinality key columns
|
||||
max_rows_to_group_by: `${limit * 10}`,
|
||||
group_by_overflow_mode: 'any',
|
||||
},
|
||||
})
|
||||
.then(res =>
|
||||
res.json<{
|
||||
__hdx_value: string;
|
||||
__hdx_percentage: string | number;
|
||||
}>(),
|
||||
);
|
||||
|
||||
return new Map(
|
||||
json.data.map(({ __hdx_value, __hdx_percentage }) => [
|
||||
__hdx_value,
|
||||
Number(__hdx_percentage),
|
||||
]),
|
||||
);
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
async getKeyValues({
|
||||
chartConfig,
|
||||
keys,
|
||||
|
|
|
|||
Loading…
Reference in a new issue