Use a single query to fetch client names per coordinate (#2696)

This commit is contained in:
Kamil Kisiela 2023-08-02 17:48:47 +02:00 committed by GitHub
parent 00cb49dba0
commit e31491fe34
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 112 additions and 164 deletions

View file

@ -696,32 +696,30 @@ export class OperationsManager {
});
}
private clientListForSchemaCoordinateDataLoaderCache = new Map<
private clientNamesPerCoordinateOfTypeDataLoaderCache = new Map<
string,
DataLoader<string, Array<string> | null>
DataLoader<string, Map<string, Set<string>>>
>();
private getClientListForSchemaCoordinateDataLoader(args: { target: string; period: DateRange }) {
private getClientNamesPerCoordinateOfTypeLoader(args: { target: string; period: DateRange }) {
// Stores a DataLoader per target and date range
// A many type names can share the same DataLoader as long as they share the same target and date range.
const cacheKey = [args.target, args.period.from, args.period.to].join('__');
let loader = this.clientListForSchemaCoordinateDataLoaderCache.get(cacheKey);
let loader = this.clientNamesPerCoordinateOfTypeDataLoaderCache.get(cacheKey);
if (loader == null) {
loader = new DataLoader<string, Array<string> | null>(async schemaCoordinates => {
const clientsBySchemaCoordinate = await this.reader.getClientListForSchemaCoordinates({
targetId: args.target,
period: args.period,
schemaCoordinates,
});
return schemaCoordinates.map(schemaCoordinate => {
const clients = clientsBySchemaCoordinate.get(schemaCoordinate);
if (clients == null) {
return null;
}
return Array.from(clients);
});
loader = new DataLoader<string, Map<string, Set<string>>>(typenames => {
return Promise.all(
typenames.map(typename => {
return this.reader.getClientNamesPerCoordinateOfType({
targetId: args.target,
period: args.period,
typename,
});
}),
);
});
this.clientListForSchemaCoordinateDataLoaderCache.set(cacheKey, loader);
this.clientNamesPerCoordinateOfTypeDataLoaderCache.set(cacheKey, loader);
}
return loader;
@ -731,10 +729,11 @@ export class OperationsManager {
* Receive a list of clients that queried a specific schema coordinate.
* Uses DataLoader underneath for batching.
*/
async getClientListForSchemaCoordinate(
async getClientNamesPerCoordinateOfType(
args: {
period: DateRange;
schemaCoordinate: string;
typename: string;
} & TargetSelector,
) {
await this.authManager.ensureTargetAccess({
@ -744,12 +743,14 @@ export class OperationsManager {
scope: TargetAccessScope.REGISTRY_READ,
});
const loader = this.getClientListForSchemaCoordinateDataLoader({
const loader = this.getClientNamesPerCoordinateOfTypeLoader({
target: args.target,
period: args.period,
});
return loader.load(args.schemaCoordinate);
const clientNamesCoordinateMap = await loader.load(args.typename);
return Array.from(clientNamesCoordinateMap.get(args.schemaCoordinate) ?? []);
}
async hasOperationsForOrganization(selector: OrganizationSelector): Promise<boolean> {

View file

@ -9,35 +9,12 @@ import { ClickHouse, RowOf, sql } from './clickhouse-client';
import { calculateTimeWindow } from './helpers';
import { SqlValue } from './sql';
const GetHashesForSchemaCoordinateModel = z
.array(
z.object({
hash: z.string(),
coordinate: z.string(),
}),
)
.transform(data => (data.length === 0 ? null : data));
const GetClientNamesForHashesModel = z
.array(
z.object({
clientName: z.string(),
hash: z.string(),
}),
)
.transform(data => {
const map = new Map<string, Set<string>>();
for (const record of data) {
let clientNames = map.get(record.hash);
if (clientNames == null) {
clientNames = new Set();
map.set(record.hash, clientNames);
}
clientNames.add(record.clientName === '' ? 'unknown' : record.clientName);
}
return map;
});
const CoordinateClientNamesGroupModel = z.array(
z.object({
coordinate: z.string(),
client_names: z.array(z.string()),
}),
);
function formatDate(date: Date): string {
return format(addMinutes(date, date.getTimezoneOffset()), 'yyyy-MM-dd HH:mm:ss');
@ -733,96 +710,53 @@ export class OperationsReader {
});
}
@sentry('OperationsReader.getClientListForSchemaCoordinates')
async getClientListForSchemaCoordinates(args: {
@sentry('OperationsReader.getClientNamesPerCoordinateOfType')
async getClientNamesPerCoordinateOfType(args: {
targetId: string;
period: DateRange;
schemaCoordinates: ReadonlyArray<string>;
typename: string;
}): Promise<Map<string, Set<string>>> {
// 1. Fetch all hashes for schema coordinates
const hashQuery = sql`
SELECT
"hash",
"coordinate"
FROM
"coordinates_daily"
${this.createFilter({
target: args.targetId,
period: args.period,
extra: [sql`coordinate IN (${sql.array(args.schemaCoordinates, 'String')})`],
})}
GROUP BY
"hash",
"coordinate"
`;
// The Explorer page is the only consumer of this method.
// It displays:
// - a list of fields of a given (interface, input object, object) type (in this case we can use Type.*)
// - a list of fields of root types (in this case we can use Query.*, Mutation.*, Subscription.*)
// - enums (in this case we can use Enum.* + Enum)
// - union (in this case we can use Union.* + Union)
// - scalar (in this case we can use Scalar)
// We clearly over-fetch here as we fetch all coordinates of a given type,
// even though some coordinates may no longer be used in the schema.
// But it's a fine tradeoff for the sake of simplicity.
const hashesForSchemaCoordinates = await this.clickHouse
.query({
queryId: 'get_hashes_for_schema_coordinates',
query: hashQuery,
timeout: 10_000,
})
.then(result => GetHashesForSchemaCoordinateModel.parse(result.data));
if (hashesForSchemaCoordinates === null) {
return new Map();
}
// 1. Fetch all client names for hashes
const clientNamesForHashesQuery = sql`
SELECT
"client_name" as "clientName",
"hash"
FROM
"clients_daily"
${this.createFilter({
target: args.targetId,
period: args.period,
extra: [
sql`
"hash" IN (
${sql.array(
hashesForSchemaCoordinates.map(record => record.hash),
'String',
)}
const dbResult = await this.clickHouse.query({
queryId: 'get_hashes_for_schema_coordinates',
query: sql`
SELECT
co.coordinate,
arrayDistinct(groupArray(cl.client_name)) as client_names
FROM coordinates_daily as co
LEFT JOIN clients_daily as cl ON (
co.hash = cl.hash AND
${this.createFilter({
target: args.targetId,
period: args.period,
skipWhere: true,
namespace: 'cl',
})}
)
`,
],
})}
GROUP BY
"client_name",
"hash"
`;
${this.createFilter({
target: args.targetId,
period: args.period,
extra: [sql`coordinate = ${args.typename} OR coordinate LIKE ${args.typename + '.%'}`],
})}
GROUP BY co.hash, co.coordinate
`,
timeout: 15_000,
});
const clientNamesForHashes = await this.clickHouse
.query({
queryId: 'get_client_names_for_hashes',
query: clientNamesForHashesQuery,
timeout: 10_000,
})
.then(result => GetClientNamesForHashesModel.parse(result.data));
// 3. Match hashes to schema coordinates
const clientsBySchemaCoordinate = new Map<string, Set<string>>();
for (const record of hashesForSchemaCoordinates) {
const newItems = clientNamesForHashes.get(record.hash);
if (newItems === undefined) {
continue;
}
let mapping = clientsBySchemaCoordinate.get(record.coordinate);
if (mapping == null) {
mapping = new Set();
clientsBySchemaCoordinate.set(record.coordinate, mapping);
}
for (const item of newItems) {
mapping.add(item);
}
}
return clientsBySchemaCoordinate;
const list = CoordinateClientNamesGroupModel.parse(dbResult.data);
return new Map<string, Set<string>>(
list.map(item => [item.coordinate, new Set(item.client_names)]),
);
}
@sentry('OperationsReader.readUniqueClientNames')
@ -1676,43 +1610,51 @@ export class OperationsReader {
operations,
clients,
extra = [],
skipWhere = false,
namespace,
}: {
target?: string | readonly string[];
period?: DateRange;
operations?: readonly string[];
clients?: readonly string[];
extra?: SqlValue[];
skipWhere?: boolean;
namespace?: string;
}): SqlValue {
const where: SqlValue[] = [];
const columnPrefix = sql.raw(namespace ? `${namespace}.` : '');
if (target) {
if (Array.isArray(target)) {
where.push(sql`target IN (${sql.array(target, 'String')})`);
where.push(sql`${columnPrefix}target IN (${sql.array(target, 'String')})`);
} else {
where.push(sql`target = ${target as string}`);
where.push(sql`${columnPrefix}target = ${target as string}`);
}
}
if (period) {
where.push(
sql`timestamp >= toDateTime(${formatDate(period.from)}, 'UTC')`,
sql`timestamp <= toDateTime(${formatDate(period.to)}, 'UTC')`,
sql`${columnPrefix}timestamp >= toDateTime(${formatDate(period.from)}, 'UTC')`,
sql`${columnPrefix}timestamp <= toDateTime(${formatDate(period.to)}, 'UTC')`,
);
}
if (operations?.length) {
where.push(sql`(hash) IN (${sql.array(operations, 'String')})`);
where.push(sql`(${columnPrefix}hash) IN (${sql.array(operations, 'String')})`);
}
if (clients?.length) {
where.push(sql`"client_name" IN (${sql.array(clients, 'String')})`);
where.push(sql`${sql.raw(namespace ?? '')}client_name IN (${sql.array(clients, 'String')})`);
}
if (extra.length) {
where.push(...extra);
}
const statement = where.length ? sql` PREWHERE ${sql.join(where, ' AND ')} ` : sql``;
const statement = where.length
? sql` ${sql.raw(skipWhere ? '' : 'PREWHERE')} ${sql.join(where, ' AND ')} `
: sql``;
return statement;
}

View file

@ -1093,6 +1093,7 @@ export const resolvers: SchemaModule.Resolvers = {
selector: source.usage,
period: source.usage.period,
operationsManager,
typename: entity.name,
}),
);
@ -1198,7 +1199,7 @@ export const resolvers: SchemaModule.Resolvers = {
const typeMap = schema.getTypeMap();
const operationsManager = injector.get(OperationsManager);
async function getStats() {
async function getStats(typename: string) {
const stats = await operationsManager.countCoordinatesOfTarget({
target: usage.target,
organization: usage.organization,
@ -1210,6 +1211,7 @@ export const resolvers: SchemaModule.Resolvers = {
selector: usage,
period: usage.period,
operationsManager,
typename,
});
}
@ -1224,7 +1226,7 @@ export const resolvers: SchemaModule.Resolvers = {
types.push({
entity,
get usage() {
return getStats();
return getStats(entity.name);
},
supergraph: supergraph
? {
@ -1240,7 +1242,7 @@ export const resolvers: SchemaModule.Resolvers = {
types.push({
entity,
get usage() {
return getStats();
return getStats(entity.name);
},
supergraph: supergraph
? {
@ -1256,7 +1258,7 @@ export const resolvers: SchemaModule.Resolvers = {
types.push({
entity,
get usage() {
return getStats();
return getStats(entity.name);
},
supergraph: supergraph
? {
@ -1272,7 +1274,7 @@ export const resolvers: SchemaModule.Resolvers = {
types.push({
entity,
get usage() {
return getStats();
return getStats(entity.name);
},
supergraph: supergraph
? {
@ -1287,7 +1289,7 @@ export const resolvers: SchemaModule.Resolvers = {
types.push({
entity,
get usage() {
return getStats();
return getStats(entity.name);
},
supergraph: supergraph
? {
@ -1304,7 +1306,7 @@ export const resolvers: SchemaModule.Resolvers = {
types.push({
entity,
get usage() {
return getStats();
return getStats(entity.name);
},
supergraph: supergraph
? {
@ -1344,6 +1346,7 @@ export const resolvers: SchemaModule.Resolvers = {
selector: usage,
period: usage.period,
operationsManager,
typename: entity.name,
}),
);
},
@ -1382,6 +1385,7 @@ export const resolvers: SchemaModule.Resolvers = {
selector: usage,
period: usage.period,
operationsManager,
typename: entity.name,
}),
);
},
@ -1421,6 +1425,7 @@ export const resolvers: SchemaModule.Resolvers = {
selector: usage,
period: usage.period,
operationsManager,
typename: entity.name,
}),
);
},
@ -1750,6 +1755,7 @@ function withUsedByClients<
selector: TargetSelector;
operationsManager: OperationsManager;
period: DateRange;
typename: string;
},
): Record<string, T & { usedByClients: PromiseOrValue<Array<string> | null> }> {
return Object.fromEntries(
@ -1762,16 +1768,13 @@ function withUsedByClients<
return null;
}
return deps.operationsManager
.getClientListForSchemaCoordinate({
...deps.selector,
period: deps.period,
schemaCoordinate,
})
.catch(err => {
console.error(err);
return null;
});
// It's using DataLoader under the hood so it's safe to call it multiple times for different coordinates
return deps.operationsManager.getClientNamesPerCoordinateOfType({
...deps.selector,
period: deps.period,
typename: deps.typename,
schemaCoordinate,
});
},
},
]),

View file

@ -116,6 +116,7 @@ async function graphql(req: NextApiRequest, res: NextApiResponse) {
const code = (error as Record<string, unknown | undefined>)?.['code'] ?? '';
const message = (error as Record<string, unknown | undefined>)?.['message'] ?? '';
res.setHeader('x-request-id', requestId);
res.status(status).json({
code,
error: message,
@ -128,7 +129,7 @@ export default wrapApiHandlerWithSentry(graphql, 'api/proxy');
export const config = {
api: {
bodyParser: {
sizeLimit: '6mb',
sizeLimit: '10mb',
},
externalResolver: true,
},

View file

@ -18,13 +18,14 @@ type Period = {
to: string;
};
function floorDate(date: Date): Date {
const time = 1000 * 60;
return new Date(Math.floor(date.getTime() / time) * time);
function toStartOfToday(): Date {
const today = new Date();
today.setHours(0, 0, 0, 0);
return today;
}
function createPeriod(option: PeriodOption): Period {
const now = floorDate(new Date());
const now = toStartOfToday();
const value = parseInt(option.replace('d', ''), 10);
return {