From e31491fe34f2df671b3f2e9dbeca10d3cd23ffd5 Mon Sep 17 00:00:00 2001 From: Kamil Kisiela Date: Wed, 2 Aug 2023 17:48:47 +0200 Subject: [PATCH] Use a single query to fetch client names per coordinate (#2696) --- .../providers/operations-manager.ts | 45 ++--- .../operations/providers/operations-reader.ts | 182 ++++++------------ .../api/src/modules/schema/resolvers.ts | 37 ++-- packages/web/app/pages/api/proxy.ts | 3 +- .../components/target/explorer/provider.tsx | 9 +- 5 files changed, 112 insertions(+), 164 deletions(-) diff --git a/packages/services/api/src/modules/operations/providers/operations-manager.ts b/packages/services/api/src/modules/operations/providers/operations-manager.ts index bce7c6770..1b47c636c 100644 --- a/packages/services/api/src/modules/operations/providers/operations-manager.ts +++ b/packages/services/api/src/modules/operations/providers/operations-manager.ts @@ -696,32 +696,30 @@ export class OperationsManager { }); } - private clientListForSchemaCoordinateDataLoaderCache = new Map< + private clientNamesPerCoordinateOfTypeDataLoaderCache = new Map< string, - DataLoader | null> + DataLoader>> >(); - private getClientListForSchemaCoordinateDataLoader(args: { target: string; period: DateRange }) { + private getClientNamesPerCoordinateOfTypeLoader(args: { target: string; period: DateRange }) { + // Stores a DataLoader per target and date range + // A many type names can share the same DataLoader as long as they share the same target and date range. const cacheKey = [args.target, args.period.from, args.period.to].join('__'); - let loader = this.clientListForSchemaCoordinateDataLoaderCache.get(cacheKey); + let loader = this.clientNamesPerCoordinateOfTypeDataLoaderCache.get(cacheKey); if (loader == null) { - loader = new DataLoader | null>(async schemaCoordinates => { - const clientsBySchemaCoordinate = await this.reader.getClientListForSchemaCoordinates({ - targetId: args.target, - period: args.period, - schemaCoordinates, - }); - - return schemaCoordinates.map(schemaCoordinate => { - const clients = clientsBySchemaCoordinate.get(schemaCoordinate); - if (clients == null) { - return null; - } - return Array.from(clients); - }); + loader = new DataLoader>>(typenames => { + return Promise.all( + typenames.map(typename => { + return this.reader.getClientNamesPerCoordinateOfType({ + targetId: args.target, + period: args.period, + typename, + }); + }), + ); }); - this.clientListForSchemaCoordinateDataLoaderCache.set(cacheKey, loader); + this.clientNamesPerCoordinateOfTypeDataLoaderCache.set(cacheKey, loader); } return loader; @@ -731,10 +729,11 @@ export class OperationsManager { * Receive a list of clients that queried a specific schema coordinate. * Uses DataLoader underneath for batching. */ - async getClientListForSchemaCoordinate( + async getClientNamesPerCoordinateOfType( args: { period: DateRange; schemaCoordinate: string; + typename: string; } & TargetSelector, ) { await this.authManager.ensureTargetAccess({ @@ -744,12 +743,14 @@ export class OperationsManager { scope: TargetAccessScope.REGISTRY_READ, }); - const loader = this.getClientListForSchemaCoordinateDataLoader({ + const loader = this.getClientNamesPerCoordinateOfTypeLoader({ target: args.target, period: args.period, }); - return loader.load(args.schemaCoordinate); + const clientNamesCoordinateMap = await loader.load(args.typename); + + return Array.from(clientNamesCoordinateMap.get(args.schemaCoordinate) ?? []); } async hasOperationsForOrganization(selector: OrganizationSelector): Promise { diff --git a/packages/services/api/src/modules/operations/providers/operations-reader.ts b/packages/services/api/src/modules/operations/providers/operations-reader.ts index 484629b05..abb67ac84 100644 --- a/packages/services/api/src/modules/operations/providers/operations-reader.ts +++ b/packages/services/api/src/modules/operations/providers/operations-reader.ts @@ -9,35 +9,12 @@ import { ClickHouse, RowOf, sql } from './clickhouse-client'; import { calculateTimeWindow } from './helpers'; import { SqlValue } from './sql'; -const GetHashesForSchemaCoordinateModel = z - .array( - z.object({ - hash: z.string(), - coordinate: z.string(), - }), - ) - .transform(data => (data.length === 0 ? null : data)); - -const GetClientNamesForHashesModel = z - .array( - z.object({ - clientName: z.string(), - hash: z.string(), - }), - ) - .transform(data => { - const map = new Map>(); - for (const record of data) { - let clientNames = map.get(record.hash); - if (clientNames == null) { - clientNames = new Set(); - map.set(record.hash, clientNames); - } - clientNames.add(record.clientName === '' ? 'unknown' : record.clientName); - } - - return map; - }); +const CoordinateClientNamesGroupModel = z.array( + z.object({ + coordinate: z.string(), + client_names: z.array(z.string()), + }), +); function formatDate(date: Date): string { return format(addMinutes(date, date.getTimezoneOffset()), 'yyyy-MM-dd HH:mm:ss'); @@ -733,96 +710,53 @@ export class OperationsReader { }); } - @sentry('OperationsReader.getClientListForSchemaCoordinates') - async getClientListForSchemaCoordinates(args: { + @sentry('OperationsReader.getClientNamesPerCoordinateOfType') + async getClientNamesPerCoordinateOfType(args: { targetId: string; period: DateRange; - schemaCoordinates: ReadonlyArray; + typename: string; }): Promise>> { - // 1. Fetch all hashes for schema coordinates - const hashQuery = sql` - SELECT - "hash", - "coordinate" - FROM - "coordinates_daily" - ${this.createFilter({ - target: args.targetId, - period: args.period, - extra: [sql`coordinate IN (${sql.array(args.schemaCoordinates, 'String')})`], - })} - GROUP BY - "hash", - "coordinate" - `; + // The Explorer page is the only consumer of this method. + // It displays: + // - a list of fields of a given (interface, input object, object) type (in this case we can use Type.*) + // - a list of fields of root types (in this case we can use Query.*, Mutation.*, Subscription.*) + // - enums (in this case we can use Enum.* + Enum) + // - union (in this case we can use Union.* + Union) + // - scalar (in this case we can use Scalar) + // We clearly over-fetch here as we fetch all coordinates of a given type, + // even though some coordinates may no longer be used in the schema. + // But it's a fine tradeoff for the sake of simplicity. - const hashesForSchemaCoordinates = await this.clickHouse - .query({ - queryId: 'get_hashes_for_schema_coordinates', - query: hashQuery, - timeout: 10_000, - }) - .then(result => GetHashesForSchemaCoordinateModel.parse(result.data)); - - if (hashesForSchemaCoordinates === null) { - return new Map(); - } - - // 1. Fetch all client names for hashes - const clientNamesForHashesQuery = sql` - SELECT - "client_name" as "clientName", - "hash" - FROM - "clients_daily" - ${this.createFilter({ - target: args.targetId, - period: args.period, - extra: [ - sql` - "hash" IN ( - ${sql.array( - hashesForSchemaCoordinates.map(record => record.hash), - 'String', - )} + const dbResult = await this.clickHouse.query({ + queryId: 'get_hashes_for_schema_coordinates', + query: sql` + SELECT + co.coordinate, + arrayDistinct(groupArray(cl.client_name)) as client_names + FROM coordinates_daily as co + LEFT JOIN clients_daily as cl ON ( + co.hash = cl.hash AND + ${this.createFilter({ + target: args.targetId, + period: args.period, + skipWhere: true, + namespace: 'cl', + })} ) - `, - ], - })} - GROUP BY - "client_name", - "hash" - `; + ${this.createFilter({ + target: args.targetId, + period: args.period, + extra: [sql`coordinate = ${args.typename} OR coordinate LIKE ${args.typename + '.%'}`], + })} + GROUP BY co.hash, co.coordinate + `, + timeout: 15_000, + }); - const clientNamesForHashes = await this.clickHouse - .query({ - queryId: 'get_client_names_for_hashes', - query: clientNamesForHashesQuery, - timeout: 10_000, - }) - .then(result => GetClientNamesForHashesModel.parse(result.data)); - - // 3. Match hashes to schema coordinates - const clientsBySchemaCoordinate = new Map>(); - - for (const record of hashesForSchemaCoordinates) { - const newItems = clientNamesForHashes.get(record.hash); - if (newItems === undefined) { - continue; - } - - let mapping = clientsBySchemaCoordinate.get(record.coordinate); - if (mapping == null) { - mapping = new Set(); - clientsBySchemaCoordinate.set(record.coordinate, mapping); - } - - for (const item of newItems) { - mapping.add(item); - } - } - - return clientsBySchemaCoordinate; + const list = CoordinateClientNamesGroupModel.parse(dbResult.data); + return new Map>( + list.map(item => [item.coordinate, new Set(item.client_names)]), + ); } @sentry('OperationsReader.readUniqueClientNames') @@ -1676,43 +1610,51 @@ export class OperationsReader { operations, clients, extra = [], + skipWhere = false, + namespace, }: { target?: string | readonly string[]; period?: DateRange; operations?: readonly string[]; clients?: readonly string[]; extra?: SqlValue[]; + skipWhere?: boolean; + namespace?: string; }): SqlValue { const where: SqlValue[] = []; + const columnPrefix = sql.raw(namespace ? `${namespace}.` : ''); + if (target) { if (Array.isArray(target)) { - where.push(sql`target IN (${sql.array(target, 'String')})`); + where.push(sql`${columnPrefix}target IN (${sql.array(target, 'String')})`); } else { - where.push(sql`target = ${target as string}`); + where.push(sql`${columnPrefix}target = ${target as string}`); } } if (period) { where.push( - sql`timestamp >= toDateTime(${formatDate(period.from)}, 'UTC')`, - sql`timestamp <= toDateTime(${formatDate(period.to)}, 'UTC')`, + sql`${columnPrefix}timestamp >= toDateTime(${formatDate(period.from)}, 'UTC')`, + sql`${columnPrefix}timestamp <= toDateTime(${formatDate(period.to)}, 'UTC')`, ); } if (operations?.length) { - where.push(sql`(hash) IN (${sql.array(operations, 'String')})`); + where.push(sql`(${columnPrefix}hash) IN (${sql.array(operations, 'String')})`); } if (clients?.length) { - where.push(sql`"client_name" IN (${sql.array(clients, 'String')})`); + where.push(sql`${sql.raw(namespace ?? '')}client_name IN (${sql.array(clients, 'String')})`); } if (extra.length) { where.push(...extra); } - const statement = where.length ? sql` PREWHERE ${sql.join(where, ' AND ')} ` : sql``; + const statement = where.length + ? sql` ${sql.raw(skipWhere ? '' : 'PREWHERE')} ${sql.join(where, ' AND ')} ` + : sql``; return statement; } diff --git a/packages/services/api/src/modules/schema/resolvers.ts b/packages/services/api/src/modules/schema/resolvers.ts index 73e419b5c..b1bb00f64 100644 --- a/packages/services/api/src/modules/schema/resolvers.ts +++ b/packages/services/api/src/modules/schema/resolvers.ts @@ -1093,6 +1093,7 @@ export const resolvers: SchemaModule.Resolvers = { selector: source.usage, period: source.usage.period, operationsManager, + typename: entity.name, }), ); @@ -1198,7 +1199,7 @@ export const resolvers: SchemaModule.Resolvers = { const typeMap = schema.getTypeMap(); const operationsManager = injector.get(OperationsManager); - async function getStats() { + async function getStats(typename: string) { const stats = await operationsManager.countCoordinatesOfTarget({ target: usage.target, organization: usage.organization, @@ -1210,6 +1211,7 @@ export const resolvers: SchemaModule.Resolvers = { selector: usage, period: usage.period, operationsManager, + typename, }); } @@ -1224,7 +1226,7 @@ export const resolvers: SchemaModule.Resolvers = { types.push({ entity, get usage() { - return getStats(); + return getStats(entity.name); }, supergraph: supergraph ? { @@ -1240,7 +1242,7 @@ export const resolvers: SchemaModule.Resolvers = { types.push({ entity, get usage() { - return getStats(); + return getStats(entity.name); }, supergraph: supergraph ? { @@ -1256,7 +1258,7 @@ export const resolvers: SchemaModule.Resolvers = { types.push({ entity, get usage() { - return getStats(); + return getStats(entity.name); }, supergraph: supergraph ? { @@ -1272,7 +1274,7 @@ export const resolvers: SchemaModule.Resolvers = { types.push({ entity, get usage() { - return getStats(); + return getStats(entity.name); }, supergraph: supergraph ? { @@ -1287,7 +1289,7 @@ export const resolvers: SchemaModule.Resolvers = { types.push({ entity, get usage() { - return getStats(); + return getStats(entity.name); }, supergraph: supergraph ? { @@ -1304,7 +1306,7 @@ export const resolvers: SchemaModule.Resolvers = { types.push({ entity, get usage() { - return getStats(); + return getStats(entity.name); }, supergraph: supergraph ? { @@ -1344,6 +1346,7 @@ export const resolvers: SchemaModule.Resolvers = { selector: usage, period: usage.period, operationsManager, + typename: entity.name, }), ); }, @@ -1382,6 +1385,7 @@ export const resolvers: SchemaModule.Resolvers = { selector: usage, period: usage.period, operationsManager, + typename: entity.name, }), ); }, @@ -1421,6 +1425,7 @@ export const resolvers: SchemaModule.Resolvers = { selector: usage, period: usage.period, operationsManager, + typename: entity.name, }), ); }, @@ -1750,6 +1755,7 @@ function withUsedByClients< selector: TargetSelector; operationsManager: OperationsManager; period: DateRange; + typename: string; }, ): Record | null> }> { return Object.fromEntries( @@ -1762,16 +1768,13 @@ function withUsedByClients< return null; } - return deps.operationsManager - .getClientListForSchemaCoordinate({ - ...deps.selector, - period: deps.period, - schemaCoordinate, - }) - .catch(err => { - console.error(err); - return null; - }); + // It's using DataLoader under the hood so it's safe to call it multiple times for different coordinates + return deps.operationsManager.getClientNamesPerCoordinateOfType({ + ...deps.selector, + period: deps.period, + typename: deps.typename, + schemaCoordinate, + }); }, }, ]), diff --git a/packages/web/app/pages/api/proxy.ts b/packages/web/app/pages/api/proxy.ts index 73022ab3a..a819f76a9 100644 --- a/packages/web/app/pages/api/proxy.ts +++ b/packages/web/app/pages/api/proxy.ts @@ -116,6 +116,7 @@ async function graphql(req: NextApiRequest, res: NextApiResponse) { const code = (error as Record)?.['code'] ?? ''; const message = (error as Record)?.['message'] ?? ''; + res.setHeader('x-request-id', requestId); res.status(status).json({ code, error: message, @@ -128,7 +129,7 @@ export default wrapApiHandlerWithSentry(graphql, 'api/proxy'); export const config = { api: { bodyParser: { - sizeLimit: '6mb', + sizeLimit: '10mb', }, externalResolver: true, }, diff --git a/packages/web/app/src/components/target/explorer/provider.tsx b/packages/web/app/src/components/target/explorer/provider.tsx index 86b15afd9..570e68df9 100644 --- a/packages/web/app/src/components/target/explorer/provider.tsx +++ b/packages/web/app/src/components/target/explorer/provider.tsx @@ -18,13 +18,14 @@ type Period = { to: string; }; -function floorDate(date: Date): Date { - const time = 1000 * 60; - return new Date(Math.floor(date.getTime() / time) * time); +function toStartOfToday(): Date { + const today = new Date(); + today.setHours(0, 0, 0, 0); + return today; } function createPeriod(option: PeriodOption): Period { - const now = floorDate(new Date()); + const now = toStartOfToday(); const value = parseInt(option.replace('d', ''), 10); return {