feat(test-utils): add memory usage integration test harness (#24876)

2026-04-21 13:37:17 +00:00 · 2026-04-08 10:42:18 -07:00 · 2026-04-08 10:42:18 -07:00 · 4ebc43bc66
commit 4ebc43bc66
parent 34b4f1c6e4
18 changed files with 1021 additions and 3 deletions
--- a/.github/workflows/memory-nightly.yml
+++ b/.github/workflows/memory-nightly.yml
@ -0,0 +1,33 @@
+name: 'Memory Tests: Nightly'
+
+on:
+  schedule:
+    - cron: '0 2 * * *' # Runs at 2 AM every day
+  workflow_dispatch: # Allow manual trigger
+
+permissions:
+  contents: 'read'
+
+jobs:
+  memory-test:
+    name: 'Run Memory Usage Tests'
+    runs-on: 'gemini-cli-ubuntu-16-core'
+    if: "github.repository == 'google-gemini/gemini-cli'"
+    steps:
+      - name: 'Checkout'
+        uses: 'actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8' # ratchet:actions/checkout@v5
+
+      - name: 'Set up Node.js'
+        uses: 'actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020' # ratchet:actions/setup-node@v4
+        with:
+          node-version-file: '.nvmrc'
+          cache: 'npm'
+
+      - name: 'Install dependencies'
+        run: 'npm ci'
+
+      - name: 'Build project'
+        run: 'npm run build'
+
+      - name: 'Run Memory Tests'
+        run: 'npm run test:memory'
--- a/GEMINI.md
+++ b/GEMINI.md
@ -44,6 +44,8 @@ powerful tool for developers.
 - **Test Commands:**
  - **Unit (All):** `npm run test`
  - **Integration (E2E):** `npm run test:e2e`
+  - **Memory (Nightly):** `npm run test:memory` (Runs memory regression tests
+    against baselines. Excluded from `preflight`, run nightly.)
  - **Workspace-Specific:** `npm test -w <pkg> -- <path>` (Note: `<path>` must
    be relative to the workspace root, e.g.,
    `-w @google/gemini-cli-core -- src/routing/modelRouterService.test.ts`)
--- a/docs/integration-tests.md
+++ b/docs/integration-tests.md
@ -117,6 +117,46 @@ npm run test:integration:sandbox:docker
 npm run test:integration:sandbox:podman
 ```

+## Memory regression tests
+
+Memory regression tests are designed to detect heap growth and leaks across key
+CLI scenarios. They are located in the `memory-tests` directory.
+
+These tests are distinct from standard integration tests because they measure
+memory usage and compare it against committed baselines.
+
+### Running memory tests
+
+Memory tests are not run as part of the default `npm run test` or
+`npm run test:e2e` commands. They are run nightly in CI but can be run manually:
+
+```bash
+npm run test:memory
+```
+
+### Updating baselines
+
+If you intentionally change behavior that affects memory usage, you may need to
+update the baselines. Set the `UPDATE_MEMORY_BASELINES` environment variable to
+`true`:
+
+```bash
+UPDATE_MEMORY_BASELINES=true npm run test:memory
+```
+
+This will run the tests, take median snapshots, and overwrite
+`memory-tests/baselines.json`. You should review the changes and commit the
+updated baseline file.
+
+### How it works
+
+The harness (`MemoryTestHarness` in `packages/test-utils`):
+
+- Forces garbage collection multiple times to reduce noise.
+- Takes median snapshots to filter spikes.
+- Compares against baselines with a 10% tolerance.
+- Can analyze sustained leaks across 3 snapshots using `analyzeSnapshots()`.
+
 ## Diagnostics

 The integration test runner provides several options for diagnostics to help
--- a/memory-tests/baselines.json
+++ b/memory-tests/baselines.json
@ -0,0 +1,30 @@
+{
+  "version": 1,
+  "updatedAt": "2026-04-08T01:21:58.770Z",
+  "scenarios": {
+    "multi-turn-conversation": {
+      "heapUsedBytes": 120082704,
+      "heapTotalBytes": 177586176,
+      "rssBytes": 269172736,
+      "timestamp": "2026-04-08T01:21:57.127Z"
+    },
+    "multi-function-call-repo-search": {
+      "heapUsedBytes": 104644984,
+      "heapTotalBytes": 111575040,
+      "rssBytes": 204079104,
+      "timestamp": "2026-04-08T01:21:58.770Z"
+    },
+    "idle-session-startup": {
+      "heapUsedBytes": 119813672,
+      "heapTotalBytes": 177061888,
+      "rssBytes": 267943936,
+      "timestamp": "2026-04-08T01:21:53.855Z"
+    },
+    "simple-prompt-response": {
+      "heapUsedBytes": 119722064,
+      "heapTotalBytes": 177324032,
+      "rssBytes": 268812288,
+      "timestamp": "2026-04-08T01:21:55.491Z"
+    }
+  }
+}
--- a/memory-tests/globalSetup.ts
+++ b/memory-tests/globalSetup.ts
@ -0,0 +1,71 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { mkdir, readdir, rm } from 'node:fs/promises';
+import { join, dirname } from 'node:path';
+import { fileURLToPath } from 'node:url';
+import { canUseRipgrep } from '../packages/core/src/tools/ripGrep.js';
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const rootDir = join(__dirname, '..');
+const memoryTestsDir = join(rootDir, '.memory-tests');
+let runDir = '';
+
+export async function setup() {
+  runDir = join(memoryTestsDir, `${Date.now()}`);
+  await mkdir(runDir, { recursive: true });
+
+  // Set the home directory to the test run directory to avoid conflicts
+  // with the user's local config.
+  process.env['HOME'] = runDir;
+  if (process.platform === 'win32') {
+    process.env['USERPROFILE'] = runDir;
+  }
+  process.env['GEMINI_CONFIG_DIR'] = join(runDir, '.gemini');
+
+  // Download ripgrep to avoid race conditions
+  const available = await canUseRipgrep();
+  if (!available) {
+    throw new Error('Failed to download ripgrep binary');
+  }
+
+  // Clean up old test runs, keeping the latest few for debugging
+  try {
+    const testRuns = await readdir(memoryTestsDir);
+    if (testRuns.length > 3) {
+      const oldRuns = testRuns.sort().slice(0, testRuns.length - 3);
+      await Promise.all(
+        oldRuns.map((oldRun) =>
+          rm(join(memoryTestsDir, oldRun), {
+            recursive: true,
+            force: true,
+          }),
+        ),
+      );
+    }
+  } catch (e) {
+    console.error('Error cleaning up old memory test runs:', e);
+  }
+
+  process.env['INTEGRATION_TEST_FILE_DIR'] = runDir;
+  process.env['GEMINI_CLI_INTEGRATION_TEST'] = 'true';
+  process.env['GEMINI_FORCE_FILE_STORAGE'] = 'true';
+  process.env['TELEMETRY_LOG_FILE'] = join(runDir, 'telemetry.log');
+  process.env['VERBOSE'] = process.env['VERBOSE'] ?? 'false';
+
+  console.log(`\nMemory test output directory: ${runDir}`);
+}
+
+export async function teardown() {
+  // Cleanup unless KEEP_OUTPUT is set
+  if (process.env['KEEP_OUTPUT'] !== 'true' && runDir) {
+    try {
+      await rm(runDir, { recursive: true, force: true });
+    } catch (e) {
+      console.warn('Failed to clean up memory test directory:', e);
+    }
+  }
+}
--- a/memory-tests/memory-usage.test.ts
+++ b/memory-tests/memory-usage.test.ts
@ -0,0 +1,185 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { describe, it, beforeAll, afterAll, afterEach } from 'vitest';
+import { TestRig, MemoryTestHarness } from '@google/gemini-cli-test-utils';
+import { join, dirname } from 'node:path';
+import { fileURLToPath } from 'node:url';
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const BASELINES_PATH = join(__dirname, 'baselines.json');
+const UPDATE_BASELINES = process.env['UPDATE_MEMORY_BASELINES'] === 'true';
+const TOLERANCE_PERCENT = 10;
+
+// Fake API key for tests using fake responses
+const TEST_ENV = { GEMINI_API_KEY: 'fake-memory-test-key' };
+
+describe('Memory Usage Tests', () => {
+  let harness: MemoryTestHarness;
+  let rig: TestRig;
+
+  beforeAll(() => {
+    harness = new MemoryTestHarness({
+      baselinesPath: BASELINES_PATH,
+      defaultTolerancePercent: TOLERANCE_PERCENT,
+      gcCycles: 3,
+      gcDelayMs: 100,
+      sampleCount: 3,
+    });
+  });
+
+  afterEach(async () => {
+    await rig.cleanup();
+  });
+
+  afterAll(async () => {
+    // Generate the summary report after all tests
+    await harness.generateReport();
+  });
+
+  it('idle-session-startup: memory usage within baseline', async () => {
+    rig = new TestRig();
+    rig.setup('memory-idle-startup', {
+      fakeResponsesPath: join(__dirname, 'memory.idle-startup.responses'),
+    });
+
+    const result = await harness.runScenario(
+      'idle-session-startup',
+      async (recordSnapshot) => {
+        await rig.run({
+          args: ['hello'],
+          timeout: 120000,
+          env: TEST_ENV,
+        });
+
+        await recordSnapshot('after-startup');
+      },
+    );
+
+    if (UPDATE_BASELINES) {
+      harness.updateScenarioBaseline(result);
+      console.log(
+        `Updated baseline for idle-session-startup: ${(result.finalHeapUsed / (1024 * 1024)).toFixed(1)} MB`,
+      );
+    } else {
+      harness.assertWithinBaseline(result);
+    }
+  });
+
+  it('simple-prompt-response: memory usage within baseline', async () => {
+    rig = new TestRig();
+    rig.setup('memory-simple-prompt', {
+      fakeResponsesPath: join(__dirname, 'memory.simple-prompt.responses'),
+    });
+
+    const result = await harness.runScenario(
+      'simple-prompt-response',
+      async (recordSnapshot) => {
+        await rig.run({
+          args: ['What is the capital of France?'],
+          timeout: 120000,
+          env: TEST_ENV,
+        });
+
+        await recordSnapshot('after-response');
+      },
+    );
+
+    if (UPDATE_BASELINES) {
+      harness.updateScenarioBaseline(result);
+      console.log(
+        `Updated baseline for simple-prompt-response: ${(result.finalHeapUsed / (1024 * 1024)).toFixed(1)} MB`,
+      );
+    } else {
+      harness.assertWithinBaseline(result);
+    }
+  });
+
+  it('multi-turn-conversation: memory remains stable over turns', async () => {
+    rig = new TestRig();
+    rig.setup('memory-multi-turn', {
+      fakeResponsesPath: join(__dirname, 'memory.multi-turn.responses'),
+    });
+
+    const prompts = [
+      'Hello, what can you help me with?',
+      'Tell me about JavaScript',
+      'How is TypeScript different?',
+      'Can you write a simple TypeScript function?',
+      'What are some TypeScript best practices?',
+    ];
+
+    const result = await harness.runScenario(
+      'multi-turn-conversation',
+      async (recordSnapshot) => {
+        // Run through all turns as a piped sequence
+        const stdinContent = prompts.join('\n');
+        await rig.run({
+          stdin: stdinContent,
+          timeout: 120000,
+          env: TEST_ENV,
+        });
+
+        // Take snapshots after the conversation completes
+        await recordSnapshot('after-all-turns');
+      },
+    );
+
+    if (UPDATE_BASELINES) {
+      harness.updateScenarioBaseline(result);
+      console.log(
+        `Updated baseline for multi-turn-conversation: ${(result.finalHeapUsed / (1024 * 1024)).toFixed(1)} MB`,
+      );
+    } else {
+      harness.assertWithinBaseline(result);
+    }
+  });
+
+  it('multi-function-call-repo-search: memory after tool use', async () => {
+    rig = new TestRig();
+    rig.setup('memory-multi-func-call', {
+      fakeResponsesPath: join(
+        __dirname,
+        'memory.multi-function-call.responses',
+      ),
+    });
+
+    // Create directories first, then files in the workspace so the tools have targets
+    rig.mkdir('packages/core/src/telemetry');
+    rig.createFile(
+      'packages/core/src/telemetry/memory-monitor.ts',
+      'export class MemoryMonitor { constructor() {} }',
+    );
+    rig.createFile(
+      'packages/core/src/telemetry/metrics.ts',
+      'export function recordMemoryUsage() {}',
+    );
+
+    const result = await harness.runScenario(
+      'multi-function-call-repo-search',
+      async (recordSnapshot) => {
+        await rig.run({
+          args: [
+            'Search this repository for MemoryMonitor and tell me what it does',
+          ],
+          timeout: 120000,
+          env: TEST_ENV,
+        });
+
+        await recordSnapshot('after-tool-calls');
+      },
+    );
+
+    if (UPDATE_BASELINES) {
+      harness.updateScenarioBaseline(result);
+      console.log(
+        `Updated baseline for multi-function-call-repo-search: ${(result.finalHeapUsed / (1024 * 1024)).toFixed(1)} MB`,
+      );
+    } else {
+      harness.assertWithinBaseline(result);
+    }
+  });
+});
--- a/memory-tests/memory.idle-startup.responses
+++ b/memory-tests/memory.idle-startup.responses
@ -0,0 +1,2 @@
+{"method":"generateContent","response":{"candidates":[{"content":{"parts":[{"text":"0"}],"role":"model"},"finishReason":"STOP","index":0}]}}
+{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"Hello! I'm ready to help. What would you like to work on?"}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":5,"candidatesTokenCount":12,"totalTokenCount":17,"promptTokensDetails":[{"modality":"TEXT","tokenCount":5}]}}]}
--- a/memory-tests/memory.multi-function-call.responses
+++ b/memory-tests/memory.multi-function-call.responses
@ -0,0 +1,4 @@
+{"method":"generateContent","response":{"candidates":[{"content":{"parts":[{"text":"0"}],"role":"model"},"finishReason":"STOP","index":0}]}}
+{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"I'll search for MemoryMonitor in the repository and analyze what it does."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":30,"candidatesTokenCount":15,"totalTokenCount":45,"promptTokensDetails":[{"modality":"TEXT","tokenCount":30}]}}]}
+{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"functionCall":{"name":"grep_search","args":{"pattern":"MemoryMonitor","path":".","include_pattern":"*.ts"}}},{"functionCall":{"name":"list_directory","args":{"path":"packages/core/src/telemetry"}}},{"functionCall":{"name":"read_file","args":{"file_path":"packages/core/src/telemetry/memory-monitor.ts"}}}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":30,"candidatesTokenCount":80,"totalTokenCount":110,"promptTokensDetails":[{"modality":"TEXT","tokenCount":30}]}}]}
+{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"I found the memory monitoring code. Here's a summary:\n\nThe `MemoryMonitor` class in `packages/core/src/telemetry/memory-monitor.ts` provides:\n\n1. **Continuous monitoring** via `start()`/`stop()` with configurable intervals\n2. **V8 heap snapshots** using `v8.getHeapStatistics()` and `process.memoryUsage()`\n3. **High-water mark tracking** to detect significant memory growth\n4. **Rate-limited recording** to avoid metric flood\n5. **Activity detection** — only records when user is active\n\nThe class uses a singleton pattern via `initializeMemoryMonitor()` for global access."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":500,"candidatesTokenCount":120,"totalTokenCount":620,"promptTokensDetails":[{"modality":"TEXT","tokenCount":500}]}}]}
--- a/memory-tests/memory.multi-turn.responses
+++ b/memory-tests/memory.multi-turn.responses
@ -0,0 +1,10 @@
+{"method":"generateContent","response":{"candidates":[{"content":{"parts":[{"text":"0"}],"role":"model"},"finishReason":"STOP","index":0}]}}
+{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"Hello! I'm ready to help you with your coding tasks. What would you like to work on today?"}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":5,"candidatesTokenCount":18,"totalTokenCount":23,"promptTokensDetails":[{"modality":"TEXT","tokenCount":5}]}}]}
+{"method":"generateContent","response":{"candidates":[{"content":{"parts":[{"text":"0"}],"role":"model"},"finishReason":"STOP","index":0}]}}
+{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"JavaScript is a high-level, interpreted programming language. It was originally designed for adding interactivity to web pages."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":25,"candidatesTokenCount":60,"totalTokenCount":85,"promptTokensDetails":[{"modality":"TEXT","tokenCount":25}]}}]}
+{"method":"generateContent","response":{"candidates":[{"content":{"parts":[{"text":"0"}],"role":"model"},"finishReason":"STOP","index":0}]}}
+{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"TypeScript is a typed superset of JavaScript developed by Microsoft. The main differences from JavaScript are static typing and better tooling."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":45,"candidatesTokenCount":80,"totalTokenCount":125,"promptTokensDetails":[{"modality":"TEXT","tokenCount":45}]}}]}
+{"method":"generateContent","response":{"candidates":[{"content":{"parts":[{"text":"0"}],"role":"model"},"finishReason":"STOP","index":0}]}}
+{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"Here is a simple TypeScript function:\n\nfunction greet(name: string): string { return `Hello, ${name}!`; }"}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":60,"candidatesTokenCount":55,"totalTokenCount":115,"promptTokensDetails":[{"modality":"TEXT","tokenCount":60}]}}]}
+{"method":"generateContent","response":{"candidates":[{"content":{"parts":[{"text":"0"}],"role":"model"},"finishReason":"STOP","index":0}]}}
+{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"Here are 5 key TypeScript best practices: Enable strict mode, prefer interfaces, use union types, leverage type inference, and use readonly."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":75,"candidatesTokenCount":70,"totalTokenCount":145,"promptTokensDetails":[{"modality":"TEXT","tokenCount":75}]}}]}
--- a/memory-tests/memory.simple-prompt.responses
+++ b/memory-tests/memory.simple-prompt.responses
@ -0,0 +1,2 @@
+{"method":"generateContent","response":{"candidates":[{"content":{"parts":[{"text":"0"}],"role":"model"},"finishReason":"STOP","index":0}]}}
+{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"The capital of France is Paris. It has been the capital since the 10th century and is known for iconic landmarks like the Eiffel Tower, the Louvre Museum, and Notre-Dame Cathedral. Paris is also the most populous city in France, with a metropolitan area population of over 12 million people."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":7,"candidatesTokenCount":55,"totalTokenCount":62,"promptTokensDetails":[{"modality":"TEXT","tokenCount":7}]}}]}
--- a/memory-tests/tsconfig.json
+++ b/memory-tests/tsconfig.json
@ -0,0 +1,12 @@
+{
+  "extends": "../tsconfig.json",
+  "compilerOptions": {
+    "noEmit": true,
+    "allowJs": true
+  },
+  "include": ["**/*.ts"],
+  "references": [
+    { "path": "../packages/core" },
+    { "path": "../packages/test-utils" }
+  ]
+}
--- a/memory-tests/vitest.config.ts
+++ b/memory-tests/vitest.config.ts
@ -0,0 +1,28 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { defineConfig } from 'vitest/config';
+
+export default defineConfig({
+  test: {
+    testTimeout: 600000, // 10 minutes — memory profiling is slow
+    globalSetup: './globalSetup.ts',
+    reporters: ['default'],
+    include: ['**/*.test.ts'],
+    retry: 0, // No retries for memory tests — noise is handled by tolerance
+    fileParallelism: false, // Must run serially to avoid memory interference
+    pool: 'forks', // Use forks pool for --expose-gc support
+    poolOptions: {
+      forks: {
+        singleFork: true, // Single process for accurate per-test memory readings
+        execArgv: ['--expose-gc'], // Enable global.gc() for forced GC
+      },
+    },
+    env: {
+      GEMINI_TEST_TYPE: 'memory',
+    },
+  },
+});
--- a/package-lock.json
+++ b/package-lock.json
@ -446,7 +446,8 @@
      "version": "2.11.0",
      "resolved": "https://registry.npmjs.org/@bufbuild/protobuf/-/protobuf-2.11.0.tgz",
      "integrity": "sha512-sBXGT13cpmPR5BMgHE6UEEfEaShh5Ror6rfN3yEK5si7QVrtZg8LEPQb0VVhiLRUslD2yLnXtnRzG035J/mZXQ==",
-      "license": "(Apache-2.0 AND BSD-3-Clause)"
+      "license": "(Apache-2.0 AND BSD-3-Clause)",
+      "peer": true
    },
    "node_modules/@bundled-es-modules/cookie": {
      "version": "2.0.1",
@ -1449,6 +1450,7 @@
      "resolved": "https://registry.npmjs.org/@grpc/grpc-js/-/grpc-js-1.13.4.tgz",
      "integrity": "sha512-GsFaMXCkMqkKIvwCQjCrwH+GHbPKBjhwo/8ZuUkWHqbI73Kky9I+pQltrlT0+MWpedCoosda53lgjYfyEPgxBg==",
      "license": "Apache-2.0",
+      "peer": true,
      "dependencies": {
        "@grpc/proto-loader": "^0.7.13",
        "@js-sdsl/ordered-map": "^4.4.2"
@ -2155,6 +2157,7 @@
      "integrity": "sha512-t54CUOsFMappY1Jbzb7fetWeO0n6K0k/4+/ZpkS+3Joz8I4VcvY9OiEBFRYISqaI2fq5sCiPtAjRDOzVYG8m+Q==",
      "dev": true,
      "license": "MIT",
+      "peer": true,
      "dependencies": {
        "@octokit/auth-token": "^6.0.0",
        "@octokit/graphql": "^9.0.2",
@ -2335,6 +2338,7 @@
      "resolved": "https://registry.npmjs.org/@opentelemetry/api/-/api-1.9.0.tgz",
      "integrity": "sha512-3giAOQvZiH5F9bMlMiv8+GSPMeqg0dbaeo58/0SlA9sxSqZhnUtxzX9/2FzyhS9sWQf5S0GJE0AKBrFqjpeYcg==",
      "license": "Apache-2.0",
+      "peer": true,
      "engines": {
        "node": ">=8.0.0"
      }
@ -2384,6 +2388,7 @@
      "resolved": "https://registry.npmjs.org/@opentelemetry/core/-/core-2.5.0.tgz",
      "integrity": "sha512-ka4H8OM6+DlUhSAZpONu0cPBtPPTQKxbxVzC4CzVx5+K4JnroJVBtDzLAMx4/3CDTJXRvVFhpFjtl4SaiTNoyQ==",
      "license": "Apache-2.0",
+      "peer": true,
      "dependencies": {
        "@opentelemetry/semantic-conventions": "^1.29.0"
      },
@ -2758,6 +2763,7 @@
      "resolved": "https://registry.npmjs.org/@opentelemetry/resources/-/resources-2.5.0.tgz",
      "integrity": "sha512-F8W52ApePshpoSrfsSk1H2yJn9aKjCrbpQF1M9Qii0GHzbfVeFUB+rc3X4aggyZD8x9Gu3Slua+s6krmq6Dt8g==",
      "license": "Apache-2.0",
+      "peer": true,
      "dependencies": {
        "@opentelemetry/core": "2.5.0",
        "@opentelemetry/semantic-conventions": "^1.29.0"
@ -2791,6 +2797,7 @@
      "resolved": "https://registry.npmjs.org/@opentelemetry/sdk-metrics/-/sdk-metrics-2.5.0.tgz",
      "integrity": "sha512-BeJLtU+f5Gf905cJX9vXFQorAr6TAfK3SPvTFqP+scfIpDQEJfRaGJWta7sJgP+m4dNtBf9y3yvBKVAZZtJQVA==",
      "license": "Apache-2.0",
+      "peer": true,
      "dependencies": {
        "@opentelemetry/core": "2.5.0",
        "@opentelemetry/resources": "2.5.0"
@ -2845,6 +2852,7 @@
      "resolved": "https://registry.npmjs.org/@opentelemetry/sdk-trace-base/-/sdk-trace-base-2.5.0.tgz",
      "integrity": "sha512-VzRf8LzotASEyNDUxTdaJ9IRJ1/h692WyArDBInf5puLCjxbICD6XkHgpuudis56EndyS7LYFmtTMny6UABNdQ==",
      "license": "Apache-2.0",
+      "peer": true,
      "dependencies": {
        "@opentelemetry/core": "2.5.0",
        "@opentelemetry/resources": "2.5.0",
@ -4081,6 +4089,7 @@
      "integrity": "sha512-6mDvHUFSjyT2B2yeNx2nUgMxh9LtOWvkhIU3uePn2I2oyNymUAX1NIsdgviM4CH+JSrp2D2hsMvJOkxY+0wNRA==",
      "devOptional": true,
      "license": "MIT",
+      "peer": true,
      "dependencies": {
        "csstype": "^3.0.2"
      }
@ -4355,6 +4364,7 @@
      "integrity": "sha512-6sMvZePQrnZH2/cJkwRpkT7DxoAWh+g6+GFRK6bV3YQo7ogi3SX5rgF6099r5Q53Ma5qeT7LGmOmuIutF4t3lA==",
      "dev": true,
      "license": "MIT",
+      "peer": true,
      "dependencies": {
        "@typescript-eslint/scope-manager": "8.35.0",
        "@typescript-eslint/types": "8.35.0",
@ -5228,6 +5238,7 @@
      "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.15.0.tgz",
      "integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==",
      "license": "MIT",
+      "peer": true,
      "bin": {
        "acorn": "bin/acorn"
      },
@ -5569,6 +5580,12 @@
      "dev": true,
      "license": "MIT"
    },
+    "node_modules/asciichart": {
+      "version": "1.5.25",
+      "resolved": "https://registry.npmjs.org/asciichart/-/asciichart-1.5.25.tgz",
+      "integrity": "sha512-PNxzXIPPOtWq8T7bgzBtk9cI2lgS4SJZthUHEiQ1aoIc3lNzGfUvIvo9LiAnq26TACo9t1/4qP6KTGAUbzX9Xg==",
+      "license": "MIT"
+    },
    "node_modules/assertion-error": {
      "version": "2.0.1",
      "resolved": "https://registry.npmjs.org/assertion-error/-/assertion-error-2.0.1.tgz",
@ -7362,7 +7379,8 @@
      "version": "0.0.1581282",
      "resolved": "https://registry.npmjs.org/devtools-protocol/-/devtools-protocol-0.0.1581282.tgz",
      "integrity": "sha512-nv7iKtNZQshSW2hKzYNr46nM/Cfh5SEvE2oV0/SEGgc9XupIY5ggf84Cz8eJIkBce7S3bmTAauFD6aysMpnqsQ==",
-      "license": "BSD-3-Clause"
+      "license": "BSD-3-Clause",
+      "peer": true
    },
    "node_modules/dezalgo": {
      "version": "1.0.4",
@ -7946,6 +7964,7 @@
      "integrity": "sha512-GsGizj2Y1rCWDu6XoEekL3RLilp0voSePurjZIkxL3wlm5o5EC9VpgaP7lrCvjnkuLvzFBQWB3vWB3K5KQTveQ==",
      "dev": true,
      "license": "MIT",
+      "peer": true,
      "dependencies": {
        "@eslint-community/eslint-utils": "^4.2.0",
        "@eslint-community/regexpp": "^4.12.1",
@ -8463,6 +8482,7 @@
      "resolved": "https://registry.npmjs.org/express/-/express-5.2.1.tgz",
      "integrity": "sha512-hIS4idWWai69NezIdRt2xFVofaF4j+6INOpJlVOLDO8zXGpUVEVzIYk12UUi2JzjEzWL3IOAxcTubgz9Po0yXw==",
      "license": "MIT",
+      "peer": true,
      "dependencies": {
        "accepts": "^2.0.0",
        "body-parser": "^2.2.1",
@ -9775,6 +9795,7 @@
      "resolved": "https://registry.npmjs.org/hono/-/hono-4.12.7.tgz",
      "integrity": "sha512-jq9l1DM0zVIvsm3lv9Nw9nlJnMNPOcAtsbsgiUhWcFzPE99Gvo6yRTlszSLLYacMeQ6quHD6hMfId8crVHvexw==",
      "license": "MIT",
+      "peer": true,
      "engines": {
        "node": ">=16.9.0"
      }
@ -10053,6 +10074,7 @@
      "resolved": "https://registry.npmjs.org/@jrichman/ink/-/ink-6.6.7.tgz",
      "integrity": "sha512-bDzQLpLzK/dn9Ur/Ku88ZZR9totVcMGrGYAgPHidsAAbe9NKztU1fggj/iu0wRp5g1kBeALb3cfagFGdDxAU1w==",
      "license": "MIT",
+      "peer": true,
      "dependencies": {
        "ansi-escapes": "^7.0.0",
        "ansi-styles": "^6.2.3",
@ -13826,6 +13848,7 @@
      "resolved": "https://registry.npmjs.org/react/-/react-19.2.4.tgz",
      "integrity": "sha512-9nfp2hYpCwOjAN+8TZFGhtWEwgvWHXqESH8qT89AT/lWklpLON22Lc8pEtnpsZz7VmawabSU0gCjnj8aC0euHQ==",
      "license": "MIT",
+      "peer": true,
      "engines": {
        "node": ">=0.10.0"
      }
@ -13836,6 +13859,7 @@
      "integrity": "sha512-ePrwPfxAnB+7hgnEr8vpKxL9cmnp7F322t8oqcPshbIQQhDKgFDW4tjhF2wjVbdXF9O/nyuy3sQWd9JGpiLPvA==",
      "devOptional": true,
      "license": "MIT",
+      "peer": true,
      "dependencies": {
        "shell-quote": "^1.6.1",
        "ws": "^7"
@ -15985,6 +16009,7 @@
      "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz",
      "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==",
      "license": "MIT",
+      "peer": true,
      "engines": {
        "node": ">=12"
      },
@ -16207,7 +16232,8 @@
      "version": "2.8.1",
      "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz",
      "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==",
-      "license": "0BSD"
+      "license": "0BSD",
+      "peer": true
    },
    "node_modules/tsx": {
      "version": "4.20.3",
@ -16215,6 +16241,7 @@
      "integrity": "sha512-qjbnuR9Tr+FJOMBqJCW5ehvIo/buZq7vH7qD7JziU98h6l3qGy0a/yPFjwO+y0/T7GFpNgNAvEcPPVfyT8rrPQ==",
      "devOptional": true,
      "license": "MIT",
+      "peer": true,
      "dependencies": {
        "esbuild": "~0.25.0",
        "get-tsconfig": "^4.7.5"
@ -16380,6 +16407,7 @@
      "integrity": "sha512-p1diW6TqL9L07nNxvRMM7hMMw4c5XOo/1ibL4aAIGmSAt9slTE1Xgw5KWuof2uTOvCg9BY7ZRi+GaF+7sfgPeQ==",
      "devOptional": true,
      "license": "Apache-2.0",
+      "peer": true,
      "bin": {
        "tsc": "bin/tsc",
        "tsserver": "bin/tsserver"
@ -16602,6 +16630,7 @@
      "resolved": "https://registry.npmjs.org/vite/-/vite-7.2.2.tgz",
      "integrity": "sha512-BxAKBWmIbrDgrokdGZH1IgkIk/5mMHDreLDmCJ0qpyJaAteP8NvMhkwr/ZCQNqNH97bw/dANTE9PDzqwJghfMQ==",
      "license": "MIT",
+      "peer": true,
      "dependencies": {
        "esbuild": "^0.25.0",
        "fdir": "^6.5.0",
@ -16715,6 +16744,7 @@
      "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz",
      "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==",
      "license": "MIT",
+      "peer": true,
      "engines": {
        "node": ">=12"
      },
@ -16727,6 +16757,7 @@
      "resolved": "https://registry.npmjs.org/vitest/-/vitest-3.2.4.tgz",
      "integrity": "sha512-LUCP5ev3GURDysTWiP47wRRUpLKMOfPh+yKTx3kVIEiu5KOMeqzpnYNsKyOoVrULivR8tLcks4+lga33Whn90A==",
      "license": "MIT",
+      "peer": true,
      "dependencies": {
        "@types/chai": "^5.2.2",
        "@vitest/expect": "3.2.4",
@ -17374,6 +17405,7 @@
      "resolved": "https://registry.npmjs.org/zod/-/zod-3.25.76.tgz",
      "integrity": "sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ==",
      "license": "MIT",
+      "peer": true,
      "funding": {
        "url": "https://github.com/sponsors/colinhacks"
      }
@ -17817,6 +17849,7 @@
      "resolved": "https://registry.npmjs.org/@grpc/grpc-js/-/grpc-js-1.14.3.tgz",
      "integrity": "sha512-Iq8QQQ/7X3Sac15oB6p0FmUg/klxQvXLeileoqrTRGJYLV+/9tubbr9ipz0GKHjmXVsgFPo/+W+2cA8eNcR+XA==",
      "license": "Apache-2.0",
+      "peer": true,
      "dependencies": {
        "@grpc/proto-loader": "^0.8.0",
        "@js-sdsl/ordered-map": "^4.4.2"
@ -17920,6 +17953,7 @@
      "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz",
      "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==",
      "license": "MIT",
+      "peer": true,
      "engines": {
        "node": ">=12"
      },
@ -17979,6 +18013,7 @@
      "dependencies": {
        "@google/gemini-cli-core": "file:../core",
        "@lydell/node-pty": "1.1.0",
+        "asciichart": "^1.5.25",
        "strip-ansi": "^7.1.2",
        "vitest": "^3.2.4"
      },
--- a/package.json
+++ b/package.json
@ -51,6 +51,8 @@
    "test:integration:all": "npm run test:integration:sandbox:none && npm run test:integration:sandbox:docker && npm run test:integration:sandbox:podman",
    "test:integration:flaky": "cross-env RUN_FLAKY_INTEGRATION=1 npm run test:integration:sandbox:none",
    "test:integration:sandbox:none": "cross-env GEMINI_SANDBOX=false vitest run --root ./integration-tests",
+    "test:memory": "vitest run --root ./memory-tests",
+    "test:memory:update-baselines": "cross-env UPDATE_MEMORY_BASELINES=true vitest run --root ./memory-tests",
    "test:integration:sandbox:docker": "cross-env GEMINI_SANDBOX=docker npm run build:sandbox && cross-env GEMINI_SANDBOX=docker vitest run --root ./integration-tests",
    "test:integration:sandbox:podman": "cross-env GEMINI_SANDBOX=podman vitest run --root ./integration-tests",
    "lint": "cross-env NODE_OPTIONS=\"--max-old-space-size=8192\" eslint . --cache --max-warnings 0",
--- a/packages/test-utils/package.json
+++ b/packages/test-utils/package.json
@ -12,6 +12,7 @@
  "dependencies": {
    "@google/gemini-cli-core": "file:../core",
    "@lydell/node-pty": "1.1.0",
+    "asciichart": "^1.5.25",
    "strip-ansi": "^7.1.2",
    "vitest": "^3.2.4"
  },
--- a/packages/test-utils/src/index.ts
+++ b/packages/test-utils/src/index.ts
@ -6,6 +6,8 @@

 export * from './file-system-test-helpers.js';
 export * from './fixtures/agents.js';
+export * from './memory-baselines.js';
+export * from './memory-test-harness.js';
 export * from './mock-utils.js';
 export * from './test-mcp-server.js';
 export * from './test-rig.js';
--- a/packages/test-utils/src/memory-baselines.ts
+++ b/packages/test-utils/src/memory-baselines.ts
@ -0,0 +1,76 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { readFileSync, writeFileSync, existsSync } from 'node:fs';
+
+/**
+ * Baseline entry for a single memory test scenario.
+ */
+export interface MemoryBaseline {
+  heapUsedBytes: number;
+  heapTotalBytes: number;
+  rssBytes: number;
+  timestamp: string;
+}
+
+/**
+ * Top-level structure of the baselines JSON file.
+ */
+export interface MemoryBaselineFile {
+  version: number;
+  updatedAt: string;
+  scenarios: Record<string, MemoryBaseline>;
+}
+
+/**
+ * Load baselines from a JSON file.
+ * Returns an empty baseline file if the file does not exist yet.
+ */
+export function loadBaselines(path: string): MemoryBaselineFile {
+  if (!existsSync(path)) {
+    return {
+      version: 1,
+      updatedAt: new Date().toISOString(),
+      scenarios: {},
+    };
+  }
+
+  const content = readFileSync(path, 'utf-8');
+  return JSON.parse(content) as MemoryBaselineFile;
+}
+
+/**
+ * Save baselines to a JSON file.
+ */
+export function saveBaselines(
+  path: string,
+  baselines: MemoryBaselineFile,
+): void {
+  baselines.updatedAt = new Date().toISOString();
+  writeFileSync(path, JSON.stringify(baselines, null, 2) + '\n');
+}
+
+/**
+ * Update (or create) a single scenario baseline in the file.
+ */
+export function updateBaseline(
+  path: string,
+  scenarioName: string,
+  measured: {
+    heapUsedBytes: number;
+    heapTotalBytes: number;
+    rssBytes: number;
+  },
+): void {
+  const baselines = loadBaselines(path);
+  baselines.scenarios[scenarioName] = {
+    heapUsedBytes: measured.heapUsedBytes,
+    heapTotalBytes: measured.heapTotalBytes,
+    rssBytes: measured.rssBytes,
+    timestamp: new Date().toISOString(),
+  };
+  saveBaselines(path, baselines);
+}
--- a/packages/test-utils/src/memory-test-harness.ts
+++ b/packages/test-utils/src/memory-test-harness.ts
@ -0,0 +1,483 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import v8 from 'node:v8';
+import { setTimeout as sleep } from 'node:timers/promises';
+import { loadBaselines, updateBaseline } from './memory-baselines.js';
+import type { MemoryBaseline, MemoryBaselineFile } from './memory-baselines.js';
+
+/** Configuration for asciichart plot function. */
+interface PlotConfig {
+  height?: number;
+  format?: (x: number) => string;
+}
+
+/** Type for the asciichart plot function. */
+type PlotFn = (series: number[], config?: PlotConfig) => string;
+
+/**
+ * A single memory snapshot at a point in time.
+ */
+export interface MemorySnapshot {
+  timestamp: number;
+  label: string;
+  heapUsed: number;
+  heapTotal: number;
+  rss: number;
+  external: number;
+  arrayBuffers: number;
+  heapSizeLimit: number;
+  heapSpaces: any[];
+}
+
+/**
+ * Result from running a memory test scenario.
+ */
+export interface MemoryTestResult {
+  scenarioName: string;
+  snapshots: MemorySnapshot[];
+  peakHeapUsed: number;
+  peakRss: number;
+  finalHeapUsed: number;
+  finalRss: number;
+  baseline: MemoryBaseline | undefined;
+  withinTolerance: boolean;
+  deltaPercent: number;
+}
+
+/**
+ * Options for the MemoryTestHarness.
+ */
+export interface MemoryTestHarnessOptions {
+  /** Path to the baselines JSON file */
+  baselinesPath: string;
+  /** Default tolerance percentage (0-100). Default: 10 */
+  defaultTolerancePercent?: number;
+  /** Number of GC cycles to run before each snapshot. Default: 3 */
+  gcCycles?: number;
+  /** Delay in ms between GC cycles. Default: 100 */
+  gcDelayMs?: number;
+  /** Number of samples to take for median calculation. Default: 3 */
+  sampleCount?: number;
+  /** Pause in ms between samples. Default: 50 */
+  samplePauseMs?: number;
+}
+
+/**
+ * MemoryTestHarness provides infrastructure for running memory usage tests.
+ *
+ * It handles:
+ * - Forcing V8 garbage collection to reduce noise
+ * - Taking V8 heap snapshots for accurate memory measurement
+ * - Comparing against baselines with configurable tolerance
+ * - Generating ASCII chart reports of memory trends
+ */
+export class MemoryTestHarness {
+  private baselines: MemoryBaselineFile;
+  private readonly baselinesPath: string;
+  private readonly defaultTolerancePercent: number;
+  private readonly gcCycles: number;
+  private readonly gcDelayMs: number;
+  private readonly sampleCount: number;
+  private readonly samplePauseMs: number;
+  private allResults: MemoryTestResult[] = [];
+
+  constructor(options: MemoryTestHarnessOptions) {
+    this.baselinesPath = options.baselinesPath;
+    this.defaultTolerancePercent = options.defaultTolerancePercent ?? 10;
+    this.gcCycles = options.gcCycles ?? 3;
+    this.gcDelayMs = options.gcDelayMs ?? 100;
+    this.sampleCount = options.sampleCount ?? 3;
+    this.samplePauseMs = options.samplePauseMs ?? 50;
+    this.baselines = loadBaselines(this.baselinesPath);
+  }
+
+  /**
+   * Force garbage collection multiple times and take a V8 heap snapshot.
+   * Forces GC multiple times with delays to allow weak references and
+   * FinalizationRegistry callbacks to run, reducing measurement noise.
+   */
+  async takeSnapshot(label: string = 'snapshot'): Promise<MemorySnapshot> {
+    await this.forceGC();
+
+    const memUsage = process.memoryUsage();
+    const heapStats = v8.getHeapStatistics();
+
+    return {
+      timestamp: Date.now(),
+      label,
+      heapUsed: memUsage.heapUsed,
+      heapTotal: memUsage.heapTotal,
+      rss: memUsage.rss,
+      external: memUsage.external,
+      arrayBuffers: memUsage.arrayBuffers,
+      heapSizeLimit: heapStats.heap_size_limit,
+      heapSpaces: v8.getHeapSpaceStatistics(),
+    };
+  }
+
+  /**
+   * Take multiple snapshot samples and return the median to reduce noise.
+   */
+  async takeMedianSnapshot(
+    label: string = 'median',
+    count?: number,
+  ): Promise<MemorySnapshot> {
+    const samples: MemorySnapshot[] = [];
+    const numSamples = count ?? this.sampleCount;
+
+    for (let i = 0; i < numSamples; i++) {
+      samples.push(await this.takeSnapshot(`${label}_sample_${i}`));
+      if (i < numSamples - 1) {
+        await sleep(this.samplePauseMs);
+      }
+    }
+
+    // Sort by heapUsed and take the median
+    samples.sort((a, b) => a.heapUsed - b.heapUsed);
+    const medianIdx = Math.floor(samples.length / 2);
+    const median = samples[medianIdx]!;
+
+    return {
+      ...median,
+      label,
+      timestamp: Date.now(),
+    };
+  }
+
+  /**
+   * Run a memory test scenario.
+   *
+   * Takes before/after snapshots around the scenario function, collects
+   * intermediate snapshots if the scenario provides them, and compares
+   * the result against the stored baseline.
+   *
+   * @param name - Scenario name (must match baseline key)
+   * @param fn - Async function that executes the scenario. Receives a
+   *   `recordSnapshot` callback for recording intermediate snapshots.
+   * @param tolerancePercent - Override default tolerance for this scenario
+   */
+  async runScenario(
+    name: string,
+    fn: (
+      recordSnapshot: (label: string) => Promise<MemorySnapshot>,
+    ) => Promise<void>,
+    tolerancePercent?: number,
+  ): Promise<MemoryTestResult> {
+    const tolerance = tolerancePercent ?? this.defaultTolerancePercent;
+    const snapshots: MemorySnapshot[] = [];
+
+    // Record a callback for intermediate snapshots
+    const recordSnapshot = async (label: string): Promise<MemorySnapshot> => {
+      const snap = await this.takeMedianSnapshot(label);
+      snapshots.push(snap);
+      return snap;
+    };
+
+    // Before snapshot
+    const beforeSnap = await this.takeMedianSnapshot('before');
+    snapshots.push(beforeSnap);
+
+    // Run the scenario
+    await fn(recordSnapshot);
+
+    // After snapshot (median of multiple samples)
+    const afterSnap = await this.takeMedianSnapshot('after');
+    snapshots.push(afterSnap);
+
+    // Calculate peak values
+    const peakHeapUsed = Math.max(...snapshots.map((s) => s.heapUsed));
+    const peakRss = Math.max(...snapshots.map((s) => s.rss));
+
+    // Get baseline
+    const baseline = this.baselines.scenarios[name];
+
+    // Determine if within tolerance
+    let deltaPercent = 0;
+    let withinTolerance = true;
+
+    if (baseline) {
+      deltaPercent =
+        ((afterSnap.heapUsed - baseline.heapUsedBytes) /
+          baseline.heapUsedBytes) *
+        100;
+      withinTolerance = deltaPercent <= tolerance;
+    }
+
+    const result: MemoryTestResult = {
+      scenarioName: name,
+      snapshots,
+      peakHeapUsed,
+      peakRss,
+      finalHeapUsed: afterSnap.heapUsed,
+      finalRss: afterSnap.rss,
+      baseline,
+      withinTolerance,
+      deltaPercent,
+    };
+
+    this.allResults.push(result);
+    return result;
+  }
+
+  /**
+   * Assert that a scenario result is within the baseline tolerance.
+   * Throws an assertion error with details if it exceeds the threshold.
+   */
+  assertWithinBaseline(
+    result: MemoryTestResult,
+    tolerancePercent?: number,
+  ): void {
+    const tolerance = tolerancePercent ?? this.defaultTolerancePercent;
+
+    if (!result.baseline) {
+      console.warn(
+        `⚠ No baseline found for "${result.scenarioName}". ` +
+          `Run with UPDATE_MEMORY_BASELINES=true to create one. ` +
+          `Measured: ${formatMB(result.finalHeapUsed)} heap used.`,
+      );
+      return; // Don't fail if no baseline exists yet
+    }
+
+    const deltaPercent =
+      ((result.finalHeapUsed - result.baseline.heapUsedBytes) /
+        result.baseline.heapUsedBytes) *
+      100;
+
+    if (deltaPercent > tolerance) {
+      throw new Error(
+        `Memory regression detected for "${result.scenarioName}"!\n` +
+          `  Measured:  ${formatMB(result.finalHeapUsed)} heap used\n` +
+          `  Baseline:  ${formatMB(result.baseline.heapUsedBytes)} heap used\n` +
+          `  Delta:     ${deltaPercent.toFixed(1)}% (tolerance: ${tolerance}%)\n` +
+          `  Peak heap: ${formatMB(result.peakHeapUsed)}\n` +
+          `  Peak RSS:  ${formatMB(result.peakRss)}`,
+      );
+    }
+  }
+
+  /**
+   * Update the baseline for a scenario with the current measured values.
+   */
+  updateScenarioBaseline(result: MemoryTestResult): void {
+    updateBaseline(this.baselinesPath, result.scenarioName, {
+      heapUsedBytes: result.finalHeapUsed,
+      heapTotalBytes:
+        result.snapshots[result.snapshots.length - 1]?.heapTotal ?? 0,
+      rssBytes: result.finalRss,
+    });
+    // Reload baselines after update
+    this.baselines = loadBaselines(this.baselinesPath);
+  }
+
+  /**
+   * Analyze snapshots to detect sustained leaks across 3 snapshots.
+   * A leak is flagged if growth is observed in both phases for any heap space.
+   */
+  analyzeSnapshots(
+    snapshots: MemorySnapshot[],
+    thresholdBytes: number = 1024 * 1024, // 1 MB
+  ): { leaked: boolean; message: string } {
+    if (snapshots.length < 3) {
+      return { leaked: false, message: 'Not enough snapshots to analyze' };
+    }
+
+    const snap1 = snapshots[snapshots.length - 3];
+    const snap2 = snapshots[snapshots.length - 2];
+    const snap3 = snapshots[snapshots.length - 1];
+
+    if (!snap1 || !snap2 || !snap3) {
+      return { leaked: false, message: 'Missing snapshots' };
+    }
+
+    const spaceNames = new Set<string>();
+    snap1.heapSpaces.forEach((s: any) => spaceNames.add(s.space_name));
+    snap2.heapSpaces.forEach((s: any) => spaceNames.add(s.space_name));
+    snap3.heapSpaces.forEach((s: any) => spaceNames.add(s.space_name));
+
+    let hasSustainedGrowth = false;
+    const growthDetails: string[] = [];
+
+    for (const name of spaceNames) {
+      const size1 =
+        snap1.heapSpaces.find((s: any) => s.space_name === name)
+          ?.space_used_size ?? 0;
+      const size2 =
+        snap2.heapSpaces.find((s: any) => s.space_name === name)
+          ?.space_used_size ?? 0;
+      const size3 =
+        snap3.heapSpaces.find((s: any) => s.space_name === name)
+          ?.space_used_size ?? 0;
+
+      const growth1 = size2 - size1;
+      const growth2 = size3 - size2;
+
+      if (growth1 > thresholdBytes && growth2 > thresholdBytes) {
+        hasSustainedGrowth = true;
+        growthDetails.push(
+          `${name}: sustained growth (${formatMB(growth1)} -> ${formatMB(growth2)})`,
+        );
+      }
+    }
+
+    let message = '';
+    if (hasSustainedGrowth) {
+      message =
+        `Memory bloat detected in heap spaces:\n  ` +
+        growthDetails.join('\n  ');
+    } else {
+      message = `No sustained growth detected in any heap space above threshold.`;
+    }
+
+    return { leaked: hasSustainedGrowth, message };
+  }
+
+  /**
+   * Assert that memory returns to a baseline level after a peak.
+   * Useful for verifying that large tool outputs are not retained.
+   */
+  assertMemoryReturnsToBaseline(
+    snapshots: MemorySnapshot[],
+    tolerancePercent: number = 10,
+  ): void {
+    if (snapshots.length < 3) {
+      throw new Error('Need at least 3 snapshots to check return to baseline');
+    }
+
+    const baseline = snapshots[0]; // Assume first is baseline
+    const peak = snapshots.reduce(
+      (max, s) => (s.heapUsed > max.heapUsed ? s : max),
+      snapshots[0],
+    );
+    const final = snapshots[snapshots.length - 1];
+
+    if (!baseline || !peak || !final) {
+      throw new Error('Missing snapshots for return to baseline check');
+    }
+
+    const tolerance = baseline.heapUsed * (tolerancePercent / 100);
+    const delta = final.heapUsed - baseline.heapUsed;
+
+    if (delta > tolerance) {
+      throw new Error(
+        `Memory did not return to baseline!\n` +
+          `  Baseline: ${formatMB(baseline.heapUsed)}\n` +
+          `  Peak:     ${formatMB(peak.heapUsed)}\n` +
+          `  Final:    ${formatMB(final.heapUsed)}\n` +
+          `  Delta:    ${formatMB(delta)} (tolerance: ${formatMB(tolerance)})`,
+      );
+    }
+  }
+
+  /**
+   * Generate a report with ASCII charts and summary table.
+   * Uses the `asciichart` library for terminal visualization.
+   */
+  async generateReport(results?: MemoryTestResult[]): Promise<string> {
+    const resultsToReport = results ?? this.allResults;
+    const lines: string[] = [];
+
+    lines.push('');
+    lines.push('═══════════════════════════════════════════════════');
+    lines.push('         MEMORY USAGE TEST REPORT');
+    lines.push('═══════════════════════════════════════════════════');
+    lines.push('');
+
+    for (const result of resultsToReport) {
+      const measured = formatMB(result.finalHeapUsed);
+      const baseline = result.baseline
+        ? formatMB(result.baseline.heapUsedBytes)
+        : 'N/A';
+      const delta = result.baseline
+        ? `${result.deltaPercent >= 0 ? '+' : ''}${result.deltaPercent.toFixed(1)}%`
+        : 'N/A';
+      const status = !result.baseline
+        ? 'NEW'
+        : result.withinTolerance
+          ? '✅'
+          : '❌';
+
+      lines.push(
+        `${result.scenarioName}: ${measured} (Baseline: ${baseline}, Delta: ${delta}) ${status}`,
+      );
+    }
+    lines.push('');
+
+    // Generate ASCII chart for each scenario with multiple snapshots
+    try {
+      // @ts-expect-error - asciichart may not have types
+      const asciichart = (await import('asciichart')) as {
+        default?: { plot?: PlotFn };
+        plot?: PlotFn;
+      };
+      const plot: PlotFn | undefined =
+        asciichart.default?.plot ?? asciichart.plot;
+
+      for (const result of resultsToReport) {
+        if (result.snapshots.length > 2) {
+          lines.push(`📈 Memory trend: ${result.scenarioName}`);
+          lines.push('─'.repeat(60));
+
+          const heapDataMB = result.snapshots.map(
+            (s) => s.heapUsed / (1024 * 1024),
+          );
+
+          if (plot) {
+            const chart = plot(heapDataMB, {
+              height: 10,
+              format: (x: number) => `${x.toFixed(1)} MB`.padStart(10),
+            });
+            lines.push(chart);
+          }
+
+          // Label the x-axis with snapshot labels
+          const labels = result.snapshots.map((s) => s.label);
+          lines.push('  ' + labels.join(' → '));
+          lines.push('');
+        }
+      }
+    } catch {
+      lines.push(
+        '(asciichart not available — install with: npm install --save-dev asciichart)',
+      );
+      lines.push('');
+    }
+
+    lines.push('═══════════════════════════════════════════════════');
+    lines.push('');
+
+    const report = lines.join('\n');
+    console.log(report);
+    return report;
+  }
+
+  /**
+   * Force V8 garbage collection.
+   * Runs multiple GC cycles with delays to allow weak references
+   * and FinalizationRegistry callbacks to run.
+   */
+  private async forceGC(): Promise<void> {
+    if (typeof globalThis.gc !== 'function') {
+      throw new Error(
+        'global.gc() not available. Run with --expose-gc for accurate measurements.',
+      );
+    }
+
+    for (let i = 0; i < this.gcCycles; i++) {
+      globalThis.gc();
+      if (i < this.gcCycles - 1) {
+        await sleep(this.gcDelayMs);
+      }
+    }
+  }
+}
+
+/**
+ * Format bytes as a human-readable MB string.
+ */
+function formatMB(bytes: number): string {
+  return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
+}