feat(test): add large conversation scenario for performance test (#25331)

2026-04-21 13:37:17 +00:00 · 2026-04-15 11:08:43 -07:00 · 2026-04-15 11:08:43 -07:00 · 5333e5ab20
commit 5333e5ab20
parent 166845d933
7 changed files with 457 additions and 21 deletions
--- a/packages/core/src/telemetry/memory-monitor.test.ts
+++ b/packages/core/src/telemetry/memory-monitor.test.ts
@ -17,13 +17,18 @@ import {
  _resetGlobalMemoryMonitorForTests,
 } from './memory-monitor.js';
 import type { Config } from '../config/config.js';
-import { recordMemoryUsage, isPerformanceMonitoringActive } from './metrics.js';
+import {
+  recordMemoryUsage,
+  recordCpuUsage,
+  isPerformanceMonitoringActive,
+} from './metrics.js';
 import { HighWaterMarkTracker } from './high-water-mark-tracker.js';
 import { RateLimiter } from './rate-limiter.js';

 // Mock dependencies
 vi.mock('./metrics.js', () => ({
  recordMemoryUsage: vi.fn(),
+  recordCpuUsage: vi.fn(),
  isPerformanceMonitoringActive: vi.fn(),
  MemoryMetricType: {
    HEAP_USED: 'heap_used',
@ -50,6 +55,7 @@ vi.mock('node:process', () => ({
 }));

 const mockRecordMemoryUsage = vi.mocked(recordMemoryUsage);
+const mockRecordCpuUsage = vi.mocked(recordCpuUsage);
 const mockIsPerformanceMonitoringActive = vi.mocked(
  isPerformanceMonitoringActive,
 );
@ -192,6 +198,13 @@ describe('MemoryMonitor', () => {
            component: 'test_context',
          },
        );
+        expect(mockRecordCpuUsage).toHaveBeenCalledWith(
+          mockConfig,
+          expect.any(Number),
+          {
+            component: 'test_context',
+          },
+        );
      });

      it('should not record metrics when performance monitoring is inactive', () => {
--- a/packages/core/src/telemetry/memory-monitor.ts
+++ b/packages/core/src/telemetry/memory-monitor.ts
@ -12,6 +12,7 @@ import { isUserActive } from './activity-detector.js';
 import { HighWaterMarkTracker } from './high-water-mark-tracker.js';
 import {
  recordMemoryUsage,
+  recordCpuUsage,
  MemoryMetricType,
  isPerformanceMonitoringActive,
 } from './metrics.js';
@ -37,6 +38,7 @@ export class MemoryMonitor {
  private intervalId: NodeJS.Timeout | null = null;
  private isRunning = false;
  private lastSnapshot: MemorySnapshot | null = null;
+  private lastCpuUsage: NodeJS.CpuUsage | null = null;
  private monitoringInterval: number = 10000;
  private highWaterMarkTracker: HighWaterMarkTracker;
  private rateLimiter: RateLimiter;
@ -191,6 +193,13 @@ export class MemoryMonitor {
        memory_type: MemoryMetricType.RSS,
        component: context,
      });
+
+      // Record delta CPU usage (in microseconds)
+      const cpuUsage = process.cpuUsage(this.lastCpuUsage ?? undefined);
+      this.lastCpuUsage = process.cpuUsage();
+      recordCpuUsage(config, cpuUsage.user + cpuUsage.system, {
+        component: context,
+      });
    }

    this.lastSnapshot = snapshot;
--- a/packages/test-utils/src/perf-test-harness.ts
+++ b/packages/test-utils/src/perf-test-harness.ts
@ -147,7 +147,9 @@ export class PerfTestHarness {
      throw new Error(`No active timer found for label "${label}"`);
    }

-    const wallClockMs = performance.now() - timer.startTime;
+    // Round wall-clock time to nearest 0.1 ms
+    const wallClockMs =
+      Math.round((performance.now() - timer.startTime) * 10) / 10;
    const cpuDelta = process.cpuUsage(timer.startCpuUsage);
    this.activeTimers.delete(label);

--- a/packages/test-utils/src/test-rig.ts
+++ b/packages/test-utils/src/test-rig.ts
@ -193,6 +193,28 @@ export function checkModelOutputContent(
  return isValid;
 }

+export interface MetricDataPoint {
+  attributes?: Record<string, unknown>;
+  value?: {
+    sum?: number;
+    min?: number;
+    max?: number;
+    count?: number;
+  };
+  startTime?: [number, number];
+  endTime?: string;
+}
+
+export interface TelemetryMetric {
+  descriptor: {
+    name: string;
+    type?: string;
+    description?: string;
+    unit?: string;
+  };
+  dataPoints: MetricDataPoint[];
+}
+
 export interface ParsedLog {
  attributes?: {
    'event.name'?: string;
@ -213,11 +235,7 @@ export interface ParsedLog {
    prompt_id?: string;
  };
  scopeMetrics?: {
-    metrics: {
-      descriptor: {
-        name: string;
-      };
-    }[];
+    metrics: TelemetryMetric[];
  }[];
 }

@ -1297,6 +1315,10 @@ export class TestRig {
    return logs;
  }

+  readTelemetryLogs(): ParsedLog[] {
+    return this._readAndParseTelemetryLog();
+  }
+
  private _readAndParseTelemetryLog(): ParsedLog[] {
    // Telemetry is always written to the test directory
    const logFilePath = join(this.homeDir!, 'telemetry.log');
@ -1450,7 +1472,7 @@ export class TestRig {
    );
  }

-  readMetric(metricName: string): Record<string, unknown> | null {
+  readMetric(metricName: string): TelemetryMetric | null {
    const logs = this._readAndParseTelemetryLog();
    for (const logData of logs) {
      if (logData.scopeMetrics) {
--- a/perf-tests/baselines.json
+++ b/perf-tests/baselines.json
@ -1,14 +1,14 @@
 {
  "version": 1,
-  "updatedAt": "2026-04-09T02:30:22.000Z",
+  "updatedAt": "2026-04-14T14:04:02.662Z",
  "scenarios": {
    "cold-startup-time": {
-      "wallClockMs": 927.553249999999,
+      "wallClockMs": 927.6,
      "cpuTotalUs": 1470,
      "timestamp": "2026-04-08T22:27:54.871Z"
    },
    "idle-cpu-usage": {
-      "wallClockMs": 5000.460750000002,
+      "wallClockMs": 5000.5,
      "cpuTotalUs": 12157,
      "timestamp": "2026-04-08T22:28:19.098Z"
    },
@ -18,7 +18,7 @@
      "timestamp": "2026-04-14T15:22:56.133Z"
    },
    "skill-loading-time": {
-      "wallClockMs": 930.0920409999962,
+      "wallClockMs": 930.1,
      "cpuTotalUs": 1323,
      "timestamp": "2026-04-08T22:28:23.290Z"
    },
@ -26,6 +26,31 @@
      "wallClockMs": 1119.9,
      "cpuTotalUs": 2100,
      "timestamp": "2026-04-09T02:30:22.000Z"
+    },
+    "long-conversation-resume": {
+      "wallClockMs": 4212.5,
+      "cpuTotalUs": 351393,
+      "timestamp": "2026-04-14T14:02:53.268Z"
+    },
+    "long-conversation-typing": {
+      "wallClockMs": 113.7,
+      "cpuTotalUs": 3304,
+      "timestamp": "2026-04-14T14:03:12.525Z"
+    },
+    "long-conversation-execution": {
+      "wallClockMs": 248.7,
+      "cpuTotalUs": 3825,
+      "timestamp": "2026-04-14T14:03:28.575Z"
+    },
+    "long-conversation-terminal-scrolling": {
+      "wallClockMs": 362.4,
+      "cpuTotalUs": 12755860,
+      "timestamp": "2026-04-14T14:03:45.687Z"
+    },
+    "long-conversation-alternate-scrolling": {
+      "wallClockMs": 362.4,
+      "cpuTotalUs": 12755860,
+      "timestamp": "2026-04-14T14:04:02.662Z"
    }
  }
 }
--- a/perf-tests/perf-usage.test.ts
+++ b/perf-tests/perf-usage.test.ts
@ -5,10 +5,20 @@
 */

 import { describe, it, beforeAll, afterAll } from 'vitest';
-import { TestRig, PerfTestHarness } from '@google/gemini-cli-test-utils';
+import {
+  TestRig,
+  PerfTestHarness,
+  type PerfSnapshot,
+} from '@google/gemini-cli-test-utils';
 import { join, dirname } from 'node:path';
 import { fileURLToPath } from 'node:url';
-import { existsSync, readFileSync } from 'node:fs';
+import {
+  existsSync,
+  readFileSync,
+  mkdirSync,
+  copyFileSync,
+  writeFileSync,
+} from 'node:fs';

 const __dirname = dirname(fileURLToPath(import.meta.url));
 const BASELINES_PATH = join(__dirname, 'baselines.json');
@ -195,7 +205,7 @@ describe('CPU Performance Tests', () => {
          const snapshot = await harness.measureWithEventLoop(
            'high-volume-output',
            async () => {
-              const runResult = await rig.run({
+              await rig.run({
                args: ['Generate 1M lines of output'],
                timeout: 120000,
                env: {
@ -206,7 +216,6 @@ describe('CPU Performance Tests', () => {
                  DEBUG: 'true',
                },
              });
-              console.log(`  Child Process Output:`, runResult);
            },
          );

@ -246,8 +255,7 @@ describe('CPU Performance Tests', () => {
              JSON.stringify(toolLatencyMetric),
            );
          }
-          // eslint-disable-next-line @typescript-eslint/no-explicit-any
-          const logs = (rig as any)._readAndParseTelemetryLog();
+          const logs = rig.readTelemetryLogs();
          console.log(`  Total telemetry log entries: ${logs.length}`);
          for (const logData of logs) {
            if (logData.scopeMetrics) {
@ -272,10 +280,9 @@ describe('CPU Performance Tests', () => {

            const findValue = (percentile: string) => {
              const dp = eventLoopMetric.dataPoints.find(
-                // eslint-disable-next-line @typescript-eslint/no-explicit-any
-                (p: any) => p.attributes.percentile === percentile,
+                (p) => p.attributes?.['percentile'] === percentile,
              );
-              return dp ? dp.value.min : undefined;
+              return dp?.value?.min;
            };

            snapshot.childEventLoopDelayP50Ms = findValue('p50');
@ -296,4 +303,358 @@ describe('CPU Performance Tests', () => {
      harness.assertWithinBaseline(result);
    }
  });
+
+  describe('long-conversation', () => {
+    let rig: TestRig;
+    const identifier = 'perf-long-conversation';
+    const SESSION_ID =
+      'anonymous_unique_id_577296e0eee5afecdcec05d11838e0cd1a851cd97a28119a4a876b11';
+    const LARGE_CHAT_SOURCE = join(
+      __dirname,
+      '..',
+      'memory-tests',
+      'large-chat-session.json',
+    );
+
+    beforeAll(async () => {
+      if (!existsSync(LARGE_CHAT_SOURCE)) {
+        throw new Error(
+          `Performance test fixture missing: ${LARGE_CHAT_SOURCE}.`,
+        );
+      }
+
+      rig = new TestRig();
+      rig.setup(identifier, {
+        fakeResponsesPath: join(__dirname, 'perf.long-chat.responses'),
+      });
+
+      const geminiDir = join(rig.homeDir!, '.gemini');
+      const projectTempDir = join(geminiDir, 'tmp', identifier);
+      const targetChatsDir = join(projectTempDir, 'chats');
+
+      mkdirSync(targetChatsDir, { recursive: true });
+      writeFileSync(
+        join(geminiDir, 'projects.json'),
+        JSON.stringify({
+          projects: { [rig.testDir!]: identifier },
+        }),
+      );
+      writeFileSync(join(projectTempDir, '.project_root'), rig.testDir!);
+      copyFileSync(
+        LARGE_CHAT_SOURCE,
+        join(targetChatsDir, `session-${SESSION_ID}.json`),
+      );
+    });
+
+    afterAll(async () => {
+      await rig.cleanup();
+    });
+
+    it('session-load: resume a 60MB chat history', async () => {
+      const result = await harness.runScenario(
+        'long-conversation-resume',
+        async () => {
+          const snapshot = await harness.measureWithEventLoop(
+            'resume',
+            async () => {
+              const run = await rig.runInteractive({
+                args: ['--resume', 'latest'],
+                env: {
+                  GEMINI_API_KEY: 'fake-perf-test-key',
+                  GEMINI_TELEMETRY_ENABLED: 'true',
+                  GEMINI_MEMORY_MONITOR_INTERVAL: '500',
+                  GEMINI_EVENT_LOOP_MONITOR_ENABLED: 'true',
+                  DEBUG: 'true',
+                },
+              });
+              await run.kill();
+            },
+          );
+          return snapshot;
+        },
+      );
+
+      if (UPDATE_BASELINES) {
+        harness.updateScenarioBaseline(result);
+      } else {
+        harness.assertWithinBaseline(result);
+      }
+    });
+
+    it('typing: latency when typing into a large session', async () => {
+      const result = await harness.runScenario(
+        'long-conversation-typing',
+        async () => {
+          const run = await rig.runInteractive({
+            args: ['--resume', 'latest'],
+            env: {
+              GEMINI_API_KEY: 'fake-perf-test-key',
+              GEMINI_TELEMETRY_ENABLED: 'true',
+              GEMINI_MEMORY_MONITOR_INTERVAL: '500',
+              GEMINI_EVENT_LOOP_MONITOR_ENABLED: 'true',
+              DEBUG: 'true',
+            },
+          });
+
+          const snapshot = await harness.measureWithEventLoop(
+            'typing',
+            async () => {
+              // On average, the expected latency per key is under 30ms.
+              for (const char of 'Hello') {
+                await run.type(char);
+              }
+            },
+          );
+
+          await run.kill();
+          return snapshot;
+        },
+      );
+
+      if (UPDATE_BASELINES) {
+        harness.updateScenarioBaseline(result);
+      } else {
+        harness.assertWithinBaseline(result);
+      }
+    });
+
+    it('execution: response latency for a simple shell command', async () => {
+      const result = await harness.runScenario(
+        'long-conversation-execution',
+        async () => {
+          const run = await rig.runInteractive({
+            args: ['--resume', 'latest'],
+            env: {
+              GEMINI_API_KEY: 'fake-perf-test-key',
+              GEMINI_TELEMETRY_ENABLED: 'true',
+              GEMINI_MEMORY_MONITOR_INTERVAL: '500',
+              GEMINI_EVENT_LOOP_MONITOR_ENABLED: 'true',
+              DEBUG: 'true',
+            },
+          });
+
+          await run.expectText('Type your message');
+
+          const snapshot = await harness.measureWithEventLoop(
+            'execution',
+            async () => {
+              await run.sendKeys('!echo hi\r');
+              await run.expectText('hi');
+            },
+          );
+
+          await run.kill();
+          return snapshot;
+        },
+      );
+
+      if (UPDATE_BASELINES) {
+        harness.updateScenarioBaseline(result);
+      } else {
+        harness.assertWithinBaseline(result);
+      }
+    });
+
+    it('terminal-scrolling: latency when scrolling a large terminal buffer', async () => {
+      const result = await harness.runScenario(
+        'long-conversation-terminal-scrolling',
+        async () => {
+          // Enable terminalBuffer to intentionally test CLI scrolling logic
+          const settingsPath = join(rig.homeDir!, '.gemini', 'settings.json');
+          writeFileSync(
+            settingsPath,
+            JSON.stringify({
+              security: { folderTrust: { enabled: false } },
+              ui: { terminalBuffer: true },
+            }),
+          );
+
+          const run = await rig.runInteractive({
+            args: ['--resume', 'latest'],
+            env: {
+              GEMINI_API_KEY: 'fake-perf-test-key',
+              GEMINI_TELEMETRY_ENABLED: 'true',
+              GEMINI_MEMORY_MONITOR_INTERVAL: '500',
+              GEMINI_EVENT_LOOP_MONITOR_ENABLED: 'true',
+              DEBUG: 'true',
+            },
+          });
+
+          await run.expectText('Type your message');
+
+          for (let i = 0; i < 5; i++) {
+            await run.sendKeys('\u001b[5~'); // PageUp
+          }
+
+          // Scroll to the very top
+          await run.sendKeys('\u001b[H'); // Home
+          // Verify top line of chat is visible.
+          await run.expectText('Authenticated with');
+
+          for (let i = 0; i < 5; i++) {
+            await run.sendKeys('\u001b[6~'); // PageDown
+          }
+
+          await rig.waitForTelemetryReady();
+          await run.kill();
+
+          const eventLoopMetric = rig.readMetric('event_loop.delay');
+          const cpuMetric = rig.readMetric('cpu.usage');
+
+          let p50Ms = 0;
+          let p95Ms = 0;
+          let maxMs = 0;
+          if (eventLoopMetric) {
+            const dataPoints = eventLoopMetric.dataPoints;
+            const p50Data = dataPoints.find(
+              (dp) => dp.attributes?.['percentile'] === 'p50',
+            );
+            const p95Data = dataPoints.find(
+              (dp) => dp.attributes?.['percentile'] === 'p95',
+            );
+            const maxData = dataPoints.find(
+              (dp) => dp.attributes?.['percentile'] === 'max',
+            );
+
+            if (p50Data?.value?.sum) p50Ms = p50Data.value.sum;
+            if (p95Data?.value?.sum) p95Ms = p95Data.value.sum;
+            if (maxData?.value?.sum) maxMs = maxData.value.sum;
+          }
+
+          let cpuTotalUs = 0;
+          if (cpuMetric) {
+            const dataPoints = cpuMetric.dataPoints;
+            for (const dp of dataPoints) {
+              if (dp.value?.sum && dp.value.sum > 0) {
+                cpuTotalUs += dp.value.sum;
+              }
+            }
+          }
+          const cpuUserUs = cpuTotalUs;
+          const cpuSystemUs = 0;
+
+          const snapshot: PerfSnapshot = {
+            timestamp: Date.now(),
+            label: 'scrolling',
+            wallClockMs: Math.round(p50Ms * 10) / 10,
+            cpuTotalUs,
+            cpuUserUs,
+            cpuSystemUs,
+            eventLoopDelayP50Ms: p50Ms,
+            eventLoopDelayP95Ms: p95Ms,
+            eventLoopDelayMaxMs: maxMs,
+          };
+
+          return snapshot;
+        },
+      );
+
+      if (UPDATE_BASELINES) {
+        harness.updateScenarioBaseline(result);
+      } else {
+        harness.assertWithinBaseline(result);
+      }
+    });
+
+    it('alternate-scrolling: latency when scrolling a large alternate buffer', async () => {
+      const result = await harness.runScenario(
+        'long-conversation-alternate-scrolling',
+        async () => {
+          // Enable useAlternateBuffer to intentionally test CLI scrolling logic
+          const settingsPath = join(rig.homeDir!, '.gemini', 'settings.json');
+          writeFileSync(
+            settingsPath,
+            JSON.stringify({
+              security: { folderTrust: { enabled: false } },
+              ui: { useAlternateBuffer: true },
+            }),
+          );
+
+          const run = await rig.runInteractive({
+            args: ['--resume', 'latest'],
+            env: {
+              GEMINI_API_KEY: 'fake-perf-test-key',
+              GEMINI_TELEMETRY_ENABLED: 'true',
+              GEMINI_MEMORY_MONITOR_INTERVAL: '500',
+              GEMINI_EVENT_LOOP_MONITOR_ENABLED: 'true',
+              DEBUG: 'true',
+            },
+          });
+
+          await run.expectText('Type your message');
+
+          for (let i = 0; i < 5; i++) {
+            await run.sendKeys('\u001b[5~'); // PageUp
+          }
+
+          // Scroll to the very top
+          await run.sendKeys('\u001b[H'); // Home
+          // Verify top line of chat is visible.
+          await run.expectText('Authenticated with');
+
+          for (let i = 0; i < 5; i++) {
+            await run.sendKeys('\u001b[6~'); // PageDown
+          }
+
+          await rig.waitForTelemetryReady();
+          await run.kill();
+
+          const eventLoopMetric = rig.readMetric('event_loop.delay');
+          const cpuMetric = rig.readMetric('cpu.usage');
+
+          let p50Ms = 0;
+          let p95Ms = 0;
+          let maxMs = 0;
+          if (eventLoopMetric) {
+            const dataPoints = eventLoopMetric.dataPoints;
+            const p50Data = dataPoints.find(
+              (dp) => dp.attributes?.['percentile'] === 'p50',
+            );
+            const p95Data = dataPoints.find(
+              (dp) => dp.attributes?.['percentile'] === 'p95',
+            );
+            const maxData = dataPoints.find(
+              (dp) => dp.attributes?.['percentile'] === 'max',
+            );
+
+            if (p50Data?.value?.sum) p50Ms = p50Data.value.sum;
+            if (p95Data?.value?.sum) p95Ms = p95Data.value.sum;
+            if (maxData?.value?.sum) maxMs = maxData.value.sum;
+          }
+
+          let cpuTotalUs = 0;
+          if (cpuMetric) {
+            const dataPoints = cpuMetric.dataPoints;
+            for (const dp of dataPoints) {
+              if (dp.value?.sum && dp.value.sum > 0) {
+                cpuTotalUs += dp.value.sum;
+              }
+            }
+          }
+          const cpuUserUs = cpuTotalUs;
+          const cpuSystemUs = 0;
+
+          const snapshot: PerfSnapshot = {
+            timestamp: Date.now(),
+            label: 'scrolling',
+            wallClockMs: Math.round(p50Ms * 10) / 10,
+            cpuTotalUs,
+            cpuUserUs,
+            cpuSystemUs,
+            eventLoopDelayP50Ms: p50Ms,
+            eventLoopDelayP95Ms: p95Ms,
+            eventLoopDelayMaxMs: maxMs,
+          };
+
+          return snapshot;
+        },
+      );
+
+      if (UPDATE_BASELINES) {
+        harness.updateScenarioBaseline(result);
+      } else {
+        harness.assertWithinBaseline(result);
+      }
+    });
+  });
 });
--- a/perf-tests/perf.long-chat.responses
+++ b/perf-tests/perf.long-chat.responses
@ -0,0 +1,4 @@
+{"method":"generateContent","response":{"candidates":[{"content":{"parts":[{"text":"{\"complexity_reasoning\":\"simple\",\"complexity_score\":1}"}],"role":"model"},"finishReason":"STOP","index":0}]}}
+{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"I am a large conversation model response."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"candidatesTokenCount":10,"promptTokenCount":20,"totalTokenCount":30}}]}
+{"method":"generateContent","response":{"candidates":[{"content":{"parts":[{"text":"{\"originalSummary\":\"large chat summary\",\"events\":[]}"}],"role":"model"},"finishReason":"STOP","index":0}]}}
+{"method":"countTokens","response":{"totalTokens":100}}