mirror of
https://github.com/graphql-hive/console
synced 2026-05-23 17:18:23 +00:00
Dynamic sampling in GraphQL Hive client (#3331)
This commit is contained in:
parent
c03a58c05e
commit
dad92067ff
10 changed files with 372 additions and 9 deletions
5
.changeset/old-pigs-dress.md
Normal file
5
.changeset/old-pigs-dress.md
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
---
|
||||
'@graphql-hive/client': minor
|
||||
---
|
||||
|
||||
Add atLeastOnceSampler
|
||||
5
.changeset/twenty-adults-cough.md
Normal file
5
.changeset/twenty-adults-cough.md
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
---
|
||||
'@graphql-hive/client': minor
|
||||
---
|
||||
|
||||
Introduce sampler for dynamic sampling
|
||||
|
|
@ -319,6 +319,89 @@ const server = new ApolloServer({
|
|||
})
|
||||
```
|
||||
|
||||
### Sampling
|
||||
|
||||
#### Basic sampling
|
||||
|
||||
With `sampleRate` option, you're able to control the sampling rate of the usage reporting. Setting
|
||||
it to `0.5` will result in 50% of the operations being sent to Hive. There is no guarantee that
|
||||
every operation will be reported at least once (see `atLeastOnceSampler`).
|
||||
|
||||
Default: `1` (100%)
|
||||
|
||||
```typescript
|
||||
useHive({
|
||||
/* ... other options ... */,
|
||||
usage: {
|
||||
sampleRate: 0.6 // 60% of the operations will be sent to Hive
|
||||
}
|
||||
})
|
||||
```
|
||||
|
||||
#### Dynamic sampling
|
||||
|
||||
GraphQL Hive client accepts a function that returns a number between 0 and 1. This allows you to
|
||||
implement dynamic sampling based on the operation's context.
|
||||
|
||||
If `sampler` is defined, `sampleRate` is ignored.
|
||||
|
||||
A sample rate between 0 and 1.
|
||||
|
||||
- `0.0` = 0% chance of being sent
|
||||
- `1.0` = 100% chance of being sent.
|
||||
- `true` = 100%
|
||||
- `false` = 0%
|
||||
|
||||
```typescript
|
||||
useHive({
|
||||
/* ... other options ... */,
|
||||
usage: {
|
||||
sampler(samplingContext) {
|
||||
if (samplingContext.operationName === 'GetUser') {
|
||||
return 0.5 // 50% of GetUser operations will be sent to Hive
|
||||
}
|
||||
|
||||
return 0.7; // 70% of the other operations will be sent to Hive
|
||||
}
|
||||
}
|
||||
})
|
||||
```
|
||||
|
||||
#### At-least-once sampling
|
||||
|
||||
If you want to make sure that every operation is reported at least once, you can use the
|
||||
`atLeastOnceSampler`. Every operation is reported at least once, but every next occurrence is
|
||||
decided by the sampler.
|
||||
|
||||
```typescript
|
||||
import { useHive, atLeastOnceSampler} from '@graphql-hive/client';
|
||||
|
||||
useHive({
|
||||
/* ... other options ... */,
|
||||
usage: {
|
||||
sampler: atLeastOnceSampler({
|
||||
// Produces a unique key for a given GraphQL request.
|
||||
// This key is used to determine the uniqueness of a GraphQL operation.
|
||||
keyFn(samplingContext) {
|
||||
// Operation name is a good candidate for a key, but not perfect,
|
||||
// as not all operations have names
|
||||
// and some operations may have the same name but different body.
|
||||
return samplingContext.operationName;
|
||||
},
|
||||
sampler(_samplingContext) {
|
||||
const hour = new Date().getHours();
|
||||
|
||||
if (hour >= 9 && hour <= 17) {
|
||||
return 0.3;
|
||||
}
|
||||
|
||||
return 0.8;
|
||||
}
|
||||
})
|
||||
}
|
||||
})
|
||||
```
|
||||
|
||||
## Self-Hosting
|
||||
|
||||
To align the client with your own instance of GraphQL Hive, you should use `selfHosting` options in
|
||||
|
|
|
|||
|
|
@ -4,3 +4,4 @@ export { useHive as useYogaHive } from './yoga.js';
|
|||
export { hiveApollo, createSupergraphSDLFetcher, createSupergraphManager } from './apollo.js';
|
||||
export { createSchemaFetcher, createServicesFetcher } from './gateways.js';
|
||||
export { createHive } from './client.js';
|
||||
export { atLeastOnceSampler } from './samplers.js';
|
||||
|
|
|
|||
|
|
@ -1,3 +1,5 @@
|
|||
import type { SamplingContext } from './types.js';
|
||||
|
||||
export function randomSampling(sampleRate: number) {
|
||||
if (sampleRate > 1 || sampleRate < 0) {
|
||||
throw new Error(`Expected usage.sampleRate to be 0 <= x <= 1, received ${sampleRate}`);
|
||||
|
|
@ -7,3 +9,21 @@ export function randomSampling(sampleRate: number) {
|
|||
return Math.random() <= sampleRate;
|
||||
};
|
||||
}
|
||||
|
||||
export function dynamicSampling(sampler: (context: SamplingContext) => number | boolean) {
|
||||
return function shouldInclude(context: SamplingContext): boolean {
|
||||
let sampleRate = sampler(context);
|
||||
|
||||
if (sampleRate === true) {
|
||||
sampleRate = 1;
|
||||
} else if (sampleRate === false) {
|
||||
sampleRate = 0;
|
||||
}
|
||||
|
||||
if (sampleRate > 1 || sampleRate < 0) {
|
||||
throw new Error(`Expected usage.sampleRate to be 0 <= x <= 1, received ${sampleRate}`);
|
||||
}
|
||||
|
||||
return Math.random() <= sampleRate;
|
||||
};
|
||||
}
|
||||
|
|
|
|||
|
|
@ -78,6 +78,18 @@ export interface HiveUsagePluginOptions {
|
|||
* Default: 1.0
|
||||
*/
|
||||
sampleRate?: number;
|
||||
/**
|
||||
* Compute sample rate dynamically.
|
||||
*
|
||||
* If `sampler` is defined, `sampleRate` is ignored.
|
||||
*
|
||||
* @returns A sample rate between 0 and 1.
|
||||
* 0.0 = 0% chance of being sent
|
||||
* 1.0 = 100% chance of being sent.
|
||||
* true = 100%
|
||||
* false = 0%
|
||||
*/
|
||||
sampler?: (context: SamplingContext) => number | boolean;
|
||||
/**
|
||||
* (Experimental) Enables collecting Input fields usage based on the variables passed to the operation.
|
||||
*
|
||||
|
|
@ -86,6 +98,11 @@ export interface HiveUsagePluginOptions {
|
|||
processVariables?: boolean;
|
||||
}
|
||||
|
||||
export interface SamplingContext
|
||||
extends Pick<ExecutionArgs, 'document' | 'contextValue' | 'variableValues'> {
|
||||
operationName: string;
|
||||
}
|
||||
|
||||
export interface HiveReportingPluginOptions {
|
||||
/**
|
||||
* Custom endpoint to collect schema reports
|
||||
|
|
|
|||
|
|
@ -25,7 +25,7 @@ import LRU from 'tiny-lru';
|
|||
import { normalizeOperation } from '@graphql-hive/core';
|
||||
import { version } from '../version.js';
|
||||
import { createAgent } from './agent.js';
|
||||
import { randomSampling } from './sampling.js';
|
||||
import { dynamicSampling, randomSampling } from './sampling.js';
|
||||
import type {
|
||||
AbortAction,
|
||||
ClientInfo,
|
||||
|
|
@ -145,7 +145,10 @@ export function createUsage(pluginOptions: HivePluginOptions): UsageCollector {
|
|||
logger.error,
|
||||
);
|
||||
|
||||
const shouldInclude = randomSampling(options.sampleRate ?? 1.0);
|
||||
const shouldInclude =
|
||||
options.sampler && typeof options.sampler === 'function'
|
||||
? dynamicSampling(options.sampler)
|
||||
: randomSampling(options.sampleRate ?? 1.0);
|
||||
|
||||
return {
|
||||
dispose: agent.dispose,
|
||||
|
|
@ -175,7 +178,15 @@ export function createUsage(pluginOptions: HivePluginOptions): UsageCollector {
|
|||
providedOperationName = args.operationName || rootOperation.name?.value;
|
||||
const operationName = providedOperationName || 'anonymous';
|
||||
|
||||
if (!excludeSet.has(operationName) && shouldInclude()) {
|
||||
if (
|
||||
!excludeSet.has(operationName) &&
|
||||
shouldInclude({
|
||||
operationName,
|
||||
document,
|
||||
variableValues: args.variableValues,
|
||||
contextValue: args.contextValue,
|
||||
})
|
||||
) {
|
||||
const errors =
|
||||
result.errors?.map(error => ({
|
||||
message: error.message,
|
||||
|
|
|
|||
28
packages/libraries/client/src/samplers.ts
Normal file
28
packages/libraries/client/src/samplers.ts
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
import { dynamicSampling } from './internal/sampling.js';
|
||||
import type { SamplingContext } from './internal/types.js';
|
||||
|
||||
/**
|
||||
* Every operation is reported at least once, but every next occurrence is decided by the sampler.
|
||||
*/
|
||||
export function atLeastOnceSampler(config: {
|
||||
/**
|
||||
* Produces a unique key for a given GraphQL request.
|
||||
* This key is used to determine the uniqueness of a GraphQL operation.
|
||||
*/
|
||||
keyFn(context: SamplingContext): string;
|
||||
sampler(context: SamplingContext): number | boolean;
|
||||
}) {
|
||||
const sampler = dynamicSampling(config.sampler);
|
||||
const reportedKeys = new Set<string>();
|
||||
|
||||
return function shouldInclude(context: SamplingContext): boolean {
|
||||
const key = config.keyFn(context);
|
||||
|
||||
if (!reportedKeys.has(key)) {
|
||||
reportedKeys.add(key);
|
||||
return true;
|
||||
}
|
||||
|
||||
return sampler(context);
|
||||
};
|
||||
}
|
||||
|
|
@ -3,6 +3,7 @@ import { buildSchema, parse } from 'graphql';
|
|||
import nock from 'nock';
|
||||
import { createHive } from '../src/client';
|
||||
import type { Report } from '../src/internal/usage';
|
||||
import { atLeastOnceSampler } from '../src/samplers';
|
||||
import { version } from '../src/version';
|
||||
import { waitFor } from './test-utils';
|
||||
|
||||
|
|
@ -86,6 +87,21 @@ const op = parse(/* GraphQL */ `
|
|||
}
|
||||
`);
|
||||
|
||||
const op2 = parse(/* GraphQL */ `
|
||||
query getProject($selector: ProjectSelectorInput!) {
|
||||
project(selector: $selector) {
|
||||
...ProjectFields
|
||||
}
|
||||
}
|
||||
|
||||
fragment ProjectFields on Project {
|
||||
id
|
||||
cleanId
|
||||
name
|
||||
type
|
||||
}
|
||||
`);
|
||||
|
||||
beforeEach(() => {
|
||||
vi.restoreAllMocks();
|
||||
});
|
||||
|
|
@ -433,3 +449,110 @@ test('sendImmediately should not stop the schedule', async () => {
|
|||
await waitFor(1000);
|
||||
http.done();
|
||||
});
|
||||
|
||||
test('should send data to Hive at least once when using atLeastOnceSampler', async () => {
|
||||
const logger = {
|
||||
error: vi.fn(),
|
||||
info: vi.fn(),
|
||||
};
|
||||
|
||||
const token = 'Token';
|
||||
|
||||
let report: Report = {
|
||||
size: 0,
|
||||
map: {},
|
||||
operations: [],
|
||||
};
|
||||
const http = nock('http://localhost')
|
||||
.post('/200')
|
||||
.matchHeader('Authorization', `Bearer ${token}`)
|
||||
.matchHeader('Content-Type', headers['Content-Type'])
|
||||
.matchHeader('graphql-client-name', headers['graphql-client-name'])
|
||||
.matchHeader('graphql-client-version', headers['graphql-client-version'])
|
||||
.once()
|
||||
.reply((_, _body) => {
|
||||
report = _body as any;
|
||||
return [200];
|
||||
});
|
||||
|
||||
const hive = createHive({
|
||||
enabled: true,
|
||||
debug: true,
|
||||
agent: {
|
||||
timeout: 500,
|
||||
maxRetries: 0,
|
||||
logger,
|
||||
},
|
||||
token,
|
||||
selfHosting: {
|
||||
graphqlEndpoint: 'http://localhost/graphql',
|
||||
applicationUrl: 'http://localhost/',
|
||||
usageEndpoint: 'http://localhost/200',
|
||||
},
|
||||
usage: {
|
||||
sampler: atLeastOnceSampler({
|
||||
keyFn(ctx) {
|
||||
return ctx.operationName;
|
||||
},
|
||||
sampler() {
|
||||
// only
|
||||
return 0;
|
||||
},
|
||||
}),
|
||||
},
|
||||
});
|
||||
|
||||
const collect = hive.collectUsage();
|
||||
|
||||
await waitFor(2000);
|
||||
collect(
|
||||
{
|
||||
schema,
|
||||
document: op,
|
||||
operationName: 'deleteProject',
|
||||
},
|
||||
{},
|
||||
);
|
||||
// different query
|
||||
collect(
|
||||
{
|
||||
schema,
|
||||
document: op2,
|
||||
operationName: 'getProject',
|
||||
},
|
||||
{},
|
||||
);
|
||||
// duplicated call
|
||||
collect(
|
||||
{
|
||||
schema,
|
||||
document: op,
|
||||
operationName: 'deleteProject',
|
||||
},
|
||||
{},
|
||||
);
|
||||
await hive.dispose();
|
||||
await waitFor(1000);
|
||||
http.done();
|
||||
|
||||
expect(logger.error).not.toHaveBeenCalled();
|
||||
expect(logger.info).toHaveBeenCalledWith(`[hive][usage] Sending (queue 2) (attempt 1)`);
|
||||
expect(logger.info).toHaveBeenCalledWith(`[hive][usage] Sent!`);
|
||||
|
||||
// Map
|
||||
expect(report.size).toEqual(2);
|
||||
expect(Object.keys(report.map)).toHaveLength(2);
|
||||
|
||||
const foundRecords: string[] = [];
|
||||
for (const key in report.map) {
|
||||
const record = report.map[key];
|
||||
|
||||
foundRecords.push(record.operationName ?? 'anonymous');
|
||||
}
|
||||
|
||||
expect(foundRecords).toContainEqual('deleteProject');
|
||||
expect(foundRecords).toContainEqual('getProject');
|
||||
|
||||
const operations = report.operations;
|
||||
expect(operations).toHaveLength(2); // two operations
|
||||
});
|
||||
|
|
|
|||
|
|
@ -68,15 +68,85 @@ const config: HivePluginOptions = {
|
|||
|
||||
#### Sampling
|
||||
|
||||
You can pass a custom `sampleRate` array to the `HivePluginOptions` to sample a percentage of the
|
||||
total operations reported. By default, Hive agent reports 100% of the operations (`1.0`).
|
||||
##### Basic sampling
|
||||
|
||||
```ts
|
||||
const config: HivePluginOptions = {
|
||||
With `sampleRate` option, you're able to control the sampling rate of the usage reporting. Setting
|
||||
it to `0.5` will result in 50% of the operations being sent to Hive. There is no guarantee that
|
||||
every operation will be reported at least once (see `atLeastOnceSampler`).
|
||||
|
||||
Default: `1` (100%)
|
||||
|
||||
```typescript
|
||||
useHive({
|
||||
/* ... other options ... */,
|
||||
usage: {
|
||||
sampleRate: 0.1
|
||||
sampleRate: 0.6 // 60% of the operations will be sent to Hive
|
||||
}
|
||||
}
|
||||
})
|
||||
```
|
||||
|
||||
##### Dynamic sampling
|
||||
|
||||
GraphQL Hive client accepts a function that returns a number between 0 and 1. This allows you to
|
||||
implement dynamic sampling based on the operation's context.
|
||||
|
||||
If `sampler` is defined, `sampleRate` is ignored.
|
||||
|
||||
A sample rate between 0 and 1.
|
||||
|
||||
- `0.0` = 0% chance of being sent
|
||||
- `1.0` = 100% chance of being sent.
|
||||
- `true` = 100%
|
||||
- `false` = 0%
|
||||
|
||||
```typescript
|
||||
useHive({
|
||||
/* ... other options ... */,
|
||||
usage: {
|
||||
sampler(samplingContext) {
|
||||
if (samplingContext.operationName === 'GetUser') {
|
||||
return 0.5 // 50% of GetUser operations will be sent to Hive
|
||||
}
|
||||
|
||||
return 0.7; // 70% of the other operations will be sent to Hive
|
||||
}
|
||||
}
|
||||
})
|
||||
```
|
||||
|
||||
##### At-least-once sampling
|
||||
|
||||
If you want to make sure that every operation is reported at least once, you can use the
|
||||
`atLeastOnceSampler`. Every operation is reported at least once, but every next occurrence is
|
||||
decided by the sampler.
|
||||
|
||||
```typescript
|
||||
import { useHive, atLeastOnceSampler} from '@graphql-hive/client';
|
||||
|
||||
useHive({
|
||||
/* ... other options ... */,
|
||||
usage: {
|
||||
sampler: atLeastOnceSampler({
|
||||
// Produces a unique key for a given GraphQL request.
|
||||
// This key is used to determine the uniqueness of a GraphQL operation.
|
||||
keyFn(samplingContext) {
|
||||
// Operation name is a good candidate for a key, but not perfect,
|
||||
// as not all operations have names
|
||||
// and some operations may have the same name but different body.
|
||||
return samplingContext.operationName;
|
||||
},
|
||||
sampler(_samplingContext) {
|
||||
const hour = new Date().getHours();
|
||||
|
||||
if (hour >= 9 && hour <= 17) {
|
||||
return 0.3;
|
||||
}
|
||||
|
||||
return 0.8;
|
||||
}
|
||||
})
|
||||
}
|
||||
})
|
||||
```
|
||||
|
||||
#### Custom Integration
|
||||
|
|
|
|||
Loading…
Reference in a new issue