feat(test): add large conversation scenario for performance test (#25331)

This commit is contained in:
cynthialong0-0 2026-04-15 11:08:43 -07:00 committed by GitHub
parent 166845d933
commit 5333e5ab20
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 457 additions and 21 deletions

View file

@ -17,13 +17,18 @@ import {
_resetGlobalMemoryMonitorForTests,
} from './memory-monitor.js';
import type { Config } from '../config/config.js';
import { recordMemoryUsage, isPerformanceMonitoringActive } from './metrics.js';
import {
recordMemoryUsage,
recordCpuUsage,
isPerformanceMonitoringActive,
} from './metrics.js';
import { HighWaterMarkTracker } from './high-water-mark-tracker.js';
import { RateLimiter } from './rate-limiter.js';
// Mock dependencies
vi.mock('./metrics.js', () => ({
recordMemoryUsage: vi.fn(),
recordCpuUsage: vi.fn(),
isPerformanceMonitoringActive: vi.fn(),
MemoryMetricType: {
HEAP_USED: 'heap_used',
@ -50,6 +55,7 @@ vi.mock('node:process', () => ({
}));
const mockRecordMemoryUsage = vi.mocked(recordMemoryUsage);
const mockRecordCpuUsage = vi.mocked(recordCpuUsage);
const mockIsPerformanceMonitoringActive = vi.mocked(
isPerformanceMonitoringActive,
);
@ -192,6 +198,13 @@ describe('MemoryMonitor', () => {
component: 'test_context',
},
);
expect(mockRecordCpuUsage).toHaveBeenCalledWith(
mockConfig,
expect.any(Number),
{
component: 'test_context',
},
);
});
it('should not record metrics when performance monitoring is inactive', () => {

View file

@ -12,6 +12,7 @@ import { isUserActive } from './activity-detector.js';
import { HighWaterMarkTracker } from './high-water-mark-tracker.js';
import {
recordMemoryUsage,
recordCpuUsage,
MemoryMetricType,
isPerformanceMonitoringActive,
} from './metrics.js';
@ -37,6 +38,7 @@ export class MemoryMonitor {
private intervalId: NodeJS.Timeout | null = null;
private isRunning = false;
private lastSnapshot: MemorySnapshot | null = null;
private lastCpuUsage: NodeJS.CpuUsage | null = null;
private monitoringInterval: number = 10000;
private highWaterMarkTracker: HighWaterMarkTracker;
private rateLimiter: RateLimiter;
@ -191,6 +193,13 @@ export class MemoryMonitor {
memory_type: MemoryMetricType.RSS,
component: context,
});
// Record delta CPU usage (in microseconds)
const cpuUsage = process.cpuUsage(this.lastCpuUsage ?? undefined);
this.lastCpuUsage = process.cpuUsage();
recordCpuUsage(config, cpuUsage.user + cpuUsage.system, {
component: context,
});
}
this.lastSnapshot = snapshot;

View file

@ -147,7 +147,9 @@ export class PerfTestHarness {
throw new Error(`No active timer found for label "${label}"`);
}
const wallClockMs = performance.now() - timer.startTime;
// Round wall-clock time to nearest 0.1 ms
const wallClockMs =
Math.round((performance.now() - timer.startTime) * 10) / 10;
const cpuDelta = process.cpuUsage(timer.startCpuUsage);
this.activeTimers.delete(label);

View file

@ -193,6 +193,28 @@ export function checkModelOutputContent(
return isValid;
}
export interface MetricDataPoint {
attributes?: Record<string, unknown>;
value?: {
sum?: number;
min?: number;
max?: number;
count?: number;
};
startTime?: [number, number];
endTime?: string;
}
export interface TelemetryMetric {
descriptor: {
name: string;
type?: string;
description?: string;
unit?: string;
};
dataPoints: MetricDataPoint[];
}
export interface ParsedLog {
attributes?: {
'event.name'?: string;
@ -213,11 +235,7 @@ export interface ParsedLog {
prompt_id?: string;
};
scopeMetrics?: {
metrics: {
descriptor: {
name: string;
};
}[];
metrics: TelemetryMetric[];
}[];
}
@ -1297,6 +1315,10 @@ export class TestRig {
return logs;
}
readTelemetryLogs(): ParsedLog[] {
return this._readAndParseTelemetryLog();
}
private _readAndParseTelemetryLog(): ParsedLog[] {
// Telemetry is always written to the test directory
const logFilePath = join(this.homeDir!, 'telemetry.log');
@ -1450,7 +1472,7 @@ export class TestRig {
);
}
readMetric(metricName: string): Record<string, unknown> | null {
readMetric(metricName: string): TelemetryMetric | null {
const logs = this._readAndParseTelemetryLog();
for (const logData of logs) {
if (logData.scopeMetrics) {

View file

@ -1,14 +1,14 @@
{
"version": 1,
"updatedAt": "2026-04-09T02:30:22.000Z",
"updatedAt": "2026-04-14T14:04:02.662Z",
"scenarios": {
"cold-startup-time": {
"wallClockMs": 927.553249999999,
"wallClockMs": 927.6,
"cpuTotalUs": 1470,
"timestamp": "2026-04-08T22:27:54.871Z"
},
"idle-cpu-usage": {
"wallClockMs": 5000.460750000002,
"wallClockMs": 5000.5,
"cpuTotalUs": 12157,
"timestamp": "2026-04-08T22:28:19.098Z"
},
@ -18,7 +18,7 @@
"timestamp": "2026-04-14T15:22:56.133Z"
},
"skill-loading-time": {
"wallClockMs": 930.0920409999962,
"wallClockMs": 930.1,
"cpuTotalUs": 1323,
"timestamp": "2026-04-08T22:28:23.290Z"
},
@ -26,6 +26,31 @@
"wallClockMs": 1119.9,
"cpuTotalUs": 2100,
"timestamp": "2026-04-09T02:30:22.000Z"
},
"long-conversation-resume": {
"wallClockMs": 4212.5,
"cpuTotalUs": 351393,
"timestamp": "2026-04-14T14:02:53.268Z"
},
"long-conversation-typing": {
"wallClockMs": 113.7,
"cpuTotalUs": 3304,
"timestamp": "2026-04-14T14:03:12.525Z"
},
"long-conversation-execution": {
"wallClockMs": 248.7,
"cpuTotalUs": 3825,
"timestamp": "2026-04-14T14:03:28.575Z"
},
"long-conversation-terminal-scrolling": {
"wallClockMs": 362.4,
"cpuTotalUs": 12755860,
"timestamp": "2026-04-14T14:03:45.687Z"
},
"long-conversation-alternate-scrolling": {
"wallClockMs": 362.4,
"cpuTotalUs": 12755860,
"timestamp": "2026-04-14T14:04:02.662Z"
}
}
}

View file

@ -5,10 +5,20 @@
*/
import { describe, it, beforeAll, afterAll } from 'vitest';
import { TestRig, PerfTestHarness } from '@google/gemini-cli-test-utils';
import {
TestRig,
PerfTestHarness,
type PerfSnapshot,
} from '@google/gemini-cli-test-utils';
import { join, dirname } from 'node:path';
import { fileURLToPath } from 'node:url';
import { existsSync, readFileSync } from 'node:fs';
import {
existsSync,
readFileSync,
mkdirSync,
copyFileSync,
writeFileSync,
} from 'node:fs';
const __dirname = dirname(fileURLToPath(import.meta.url));
const BASELINES_PATH = join(__dirname, 'baselines.json');
@ -195,7 +205,7 @@ describe('CPU Performance Tests', () => {
const snapshot = await harness.measureWithEventLoop(
'high-volume-output',
async () => {
const runResult = await rig.run({
await rig.run({
args: ['Generate 1M lines of output'],
timeout: 120000,
env: {
@ -206,7 +216,6 @@ describe('CPU Performance Tests', () => {
DEBUG: 'true',
},
});
console.log(` Child Process Output:`, runResult);
},
);
@ -246,8 +255,7 @@ describe('CPU Performance Tests', () => {
JSON.stringify(toolLatencyMetric),
);
}
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const logs = (rig as any)._readAndParseTelemetryLog();
const logs = rig.readTelemetryLogs();
console.log(` Total telemetry log entries: ${logs.length}`);
for (const logData of logs) {
if (logData.scopeMetrics) {
@ -272,10 +280,9 @@ describe('CPU Performance Tests', () => {
const findValue = (percentile: string) => {
const dp = eventLoopMetric.dataPoints.find(
// eslint-disable-next-line @typescript-eslint/no-explicit-any
(p: any) => p.attributes.percentile === percentile,
(p) => p.attributes?.['percentile'] === percentile,
);
return dp ? dp.value.min : undefined;
return dp?.value?.min;
};
snapshot.childEventLoopDelayP50Ms = findValue('p50');
@ -296,4 +303,358 @@ describe('CPU Performance Tests', () => {
harness.assertWithinBaseline(result);
}
});
describe('long-conversation', () => {
let rig: TestRig;
const identifier = 'perf-long-conversation';
const SESSION_ID =
'anonymous_unique_id_577296e0eee5afecdcec05d11838e0cd1a851cd97a28119a4a876b11';
const LARGE_CHAT_SOURCE = join(
__dirname,
'..',
'memory-tests',
'large-chat-session.json',
);
beforeAll(async () => {
if (!existsSync(LARGE_CHAT_SOURCE)) {
throw new Error(
`Performance test fixture missing: ${LARGE_CHAT_SOURCE}.`,
);
}
rig = new TestRig();
rig.setup(identifier, {
fakeResponsesPath: join(__dirname, 'perf.long-chat.responses'),
});
const geminiDir = join(rig.homeDir!, '.gemini');
const projectTempDir = join(geminiDir, 'tmp', identifier);
const targetChatsDir = join(projectTempDir, 'chats');
mkdirSync(targetChatsDir, { recursive: true });
writeFileSync(
join(geminiDir, 'projects.json'),
JSON.stringify({
projects: { [rig.testDir!]: identifier },
}),
);
writeFileSync(join(projectTempDir, '.project_root'), rig.testDir!);
copyFileSync(
LARGE_CHAT_SOURCE,
join(targetChatsDir, `session-${SESSION_ID}.json`),
);
});
afterAll(async () => {
await rig.cleanup();
});
it('session-load: resume a 60MB chat history', async () => {
const result = await harness.runScenario(
'long-conversation-resume',
async () => {
const snapshot = await harness.measureWithEventLoop(
'resume',
async () => {
const run = await rig.runInteractive({
args: ['--resume', 'latest'],
env: {
GEMINI_API_KEY: 'fake-perf-test-key',
GEMINI_TELEMETRY_ENABLED: 'true',
GEMINI_MEMORY_MONITOR_INTERVAL: '500',
GEMINI_EVENT_LOOP_MONITOR_ENABLED: 'true',
DEBUG: 'true',
},
});
await run.kill();
},
);
return snapshot;
},
);
if (UPDATE_BASELINES) {
harness.updateScenarioBaseline(result);
} else {
harness.assertWithinBaseline(result);
}
});
it('typing: latency when typing into a large session', async () => {
const result = await harness.runScenario(
'long-conversation-typing',
async () => {
const run = await rig.runInteractive({
args: ['--resume', 'latest'],
env: {
GEMINI_API_KEY: 'fake-perf-test-key',
GEMINI_TELEMETRY_ENABLED: 'true',
GEMINI_MEMORY_MONITOR_INTERVAL: '500',
GEMINI_EVENT_LOOP_MONITOR_ENABLED: 'true',
DEBUG: 'true',
},
});
const snapshot = await harness.measureWithEventLoop(
'typing',
async () => {
// On average, the expected latency per key is under 30ms.
for (const char of 'Hello') {
await run.type(char);
}
},
);
await run.kill();
return snapshot;
},
);
if (UPDATE_BASELINES) {
harness.updateScenarioBaseline(result);
} else {
harness.assertWithinBaseline(result);
}
});
it('execution: response latency for a simple shell command', async () => {
const result = await harness.runScenario(
'long-conversation-execution',
async () => {
const run = await rig.runInteractive({
args: ['--resume', 'latest'],
env: {
GEMINI_API_KEY: 'fake-perf-test-key',
GEMINI_TELEMETRY_ENABLED: 'true',
GEMINI_MEMORY_MONITOR_INTERVAL: '500',
GEMINI_EVENT_LOOP_MONITOR_ENABLED: 'true',
DEBUG: 'true',
},
});
await run.expectText('Type your message');
const snapshot = await harness.measureWithEventLoop(
'execution',
async () => {
await run.sendKeys('!echo hi\r');
await run.expectText('hi');
},
);
await run.kill();
return snapshot;
},
);
if (UPDATE_BASELINES) {
harness.updateScenarioBaseline(result);
} else {
harness.assertWithinBaseline(result);
}
});
it('terminal-scrolling: latency when scrolling a large terminal buffer', async () => {
const result = await harness.runScenario(
'long-conversation-terminal-scrolling',
async () => {
// Enable terminalBuffer to intentionally test CLI scrolling logic
const settingsPath = join(rig.homeDir!, '.gemini', 'settings.json');
writeFileSync(
settingsPath,
JSON.stringify({
security: { folderTrust: { enabled: false } },
ui: { terminalBuffer: true },
}),
);
const run = await rig.runInteractive({
args: ['--resume', 'latest'],
env: {
GEMINI_API_KEY: 'fake-perf-test-key',
GEMINI_TELEMETRY_ENABLED: 'true',
GEMINI_MEMORY_MONITOR_INTERVAL: '500',
GEMINI_EVENT_LOOP_MONITOR_ENABLED: 'true',
DEBUG: 'true',
},
});
await run.expectText('Type your message');
for (let i = 0; i < 5; i++) {
await run.sendKeys('\u001b[5~'); // PageUp
}
// Scroll to the very top
await run.sendKeys('\u001b[H'); // Home
// Verify top line of chat is visible.
await run.expectText('Authenticated with');
for (let i = 0; i < 5; i++) {
await run.sendKeys('\u001b[6~'); // PageDown
}
await rig.waitForTelemetryReady();
await run.kill();
const eventLoopMetric = rig.readMetric('event_loop.delay');
const cpuMetric = rig.readMetric('cpu.usage');
let p50Ms = 0;
let p95Ms = 0;
let maxMs = 0;
if (eventLoopMetric) {
const dataPoints = eventLoopMetric.dataPoints;
const p50Data = dataPoints.find(
(dp) => dp.attributes?.['percentile'] === 'p50',
);
const p95Data = dataPoints.find(
(dp) => dp.attributes?.['percentile'] === 'p95',
);
const maxData = dataPoints.find(
(dp) => dp.attributes?.['percentile'] === 'max',
);
if (p50Data?.value?.sum) p50Ms = p50Data.value.sum;
if (p95Data?.value?.sum) p95Ms = p95Data.value.sum;
if (maxData?.value?.sum) maxMs = maxData.value.sum;
}
let cpuTotalUs = 0;
if (cpuMetric) {
const dataPoints = cpuMetric.dataPoints;
for (const dp of dataPoints) {
if (dp.value?.sum && dp.value.sum > 0) {
cpuTotalUs += dp.value.sum;
}
}
}
const cpuUserUs = cpuTotalUs;
const cpuSystemUs = 0;
const snapshot: PerfSnapshot = {
timestamp: Date.now(),
label: 'scrolling',
wallClockMs: Math.round(p50Ms * 10) / 10,
cpuTotalUs,
cpuUserUs,
cpuSystemUs,
eventLoopDelayP50Ms: p50Ms,
eventLoopDelayP95Ms: p95Ms,
eventLoopDelayMaxMs: maxMs,
};
return snapshot;
},
);
if (UPDATE_BASELINES) {
harness.updateScenarioBaseline(result);
} else {
harness.assertWithinBaseline(result);
}
});
it('alternate-scrolling: latency when scrolling a large alternate buffer', async () => {
const result = await harness.runScenario(
'long-conversation-alternate-scrolling',
async () => {
// Enable useAlternateBuffer to intentionally test CLI scrolling logic
const settingsPath = join(rig.homeDir!, '.gemini', 'settings.json');
writeFileSync(
settingsPath,
JSON.stringify({
security: { folderTrust: { enabled: false } },
ui: { useAlternateBuffer: true },
}),
);
const run = await rig.runInteractive({
args: ['--resume', 'latest'],
env: {
GEMINI_API_KEY: 'fake-perf-test-key',
GEMINI_TELEMETRY_ENABLED: 'true',
GEMINI_MEMORY_MONITOR_INTERVAL: '500',
GEMINI_EVENT_LOOP_MONITOR_ENABLED: 'true',
DEBUG: 'true',
},
});
await run.expectText('Type your message');
for (let i = 0; i < 5; i++) {
await run.sendKeys('\u001b[5~'); // PageUp
}
// Scroll to the very top
await run.sendKeys('\u001b[H'); // Home
// Verify top line of chat is visible.
await run.expectText('Authenticated with');
for (let i = 0; i < 5; i++) {
await run.sendKeys('\u001b[6~'); // PageDown
}
await rig.waitForTelemetryReady();
await run.kill();
const eventLoopMetric = rig.readMetric('event_loop.delay');
const cpuMetric = rig.readMetric('cpu.usage');
let p50Ms = 0;
let p95Ms = 0;
let maxMs = 0;
if (eventLoopMetric) {
const dataPoints = eventLoopMetric.dataPoints;
const p50Data = dataPoints.find(
(dp) => dp.attributes?.['percentile'] === 'p50',
);
const p95Data = dataPoints.find(
(dp) => dp.attributes?.['percentile'] === 'p95',
);
const maxData = dataPoints.find(
(dp) => dp.attributes?.['percentile'] === 'max',
);
if (p50Data?.value?.sum) p50Ms = p50Data.value.sum;
if (p95Data?.value?.sum) p95Ms = p95Data.value.sum;
if (maxData?.value?.sum) maxMs = maxData.value.sum;
}
let cpuTotalUs = 0;
if (cpuMetric) {
const dataPoints = cpuMetric.dataPoints;
for (const dp of dataPoints) {
if (dp.value?.sum && dp.value.sum > 0) {
cpuTotalUs += dp.value.sum;
}
}
}
const cpuUserUs = cpuTotalUs;
const cpuSystemUs = 0;
const snapshot: PerfSnapshot = {
timestamp: Date.now(),
label: 'scrolling',
wallClockMs: Math.round(p50Ms * 10) / 10,
cpuTotalUs,
cpuUserUs,
cpuSystemUs,
eventLoopDelayP50Ms: p50Ms,
eventLoopDelayP95Ms: p95Ms,
eventLoopDelayMaxMs: maxMs,
};
return snapshot;
},
);
if (UPDATE_BASELINES) {
harness.updateScenarioBaseline(result);
} else {
harness.assertWithinBaseline(result);
}
});
});
});

View file

@ -0,0 +1,4 @@
{"method":"generateContent","response":{"candidates":[{"content":{"parts":[{"text":"{\"complexity_reasoning\":\"simple\",\"complexity_score\":1}"}],"role":"model"},"finishReason":"STOP","index":0}]}}
{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"I am a large conversation model response."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"candidatesTokenCount":10,"promptTokenCount":20,"totalTokenCount":30}}]}
{"method":"generateContent","response":{"candidates":[{"content":{"parts":[{"text":"{\"originalSummary\":\"large chat summary\",\"events\":[]}"}],"role":"model"},"finishReason":"STOP","index":0}]}}
{"method":"countTokens","response":{"totalTokens":100}}