mirror of
https://github.com/google-gemini/gemini-cli
synced 2026-04-21 13:37:17 +00:00
feat(test-utils): add memory usage integration test harness (#24876)
This commit is contained in:
parent
34b4f1c6e4
commit
4ebc43bc66
18 changed files with 1021 additions and 3 deletions
33
.github/workflows/memory-nightly.yml
vendored
Normal file
33
.github/workflows/memory-nightly.yml
vendored
Normal file
|
|
@ -0,0 +1,33 @@
|
|||
name: 'Memory Tests: Nightly'
|
||||
|
||||
on:
|
||||
schedule:
|
||||
- cron: '0 2 * * *' # Runs at 2 AM every day
|
||||
workflow_dispatch: # Allow manual trigger
|
||||
|
||||
permissions:
|
||||
contents: 'read'
|
||||
|
||||
jobs:
|
||||
memory-test:
|
||||
name: 'Run Memory Usage Tests'
|
||||
runs-on: 'gemini-cli-ubuntu-16-core'
|
||||
if: "github.repository == 'google-gemini/gemini-cli'"
|
||||
steps:
|
||||
- name: 'Checkout'
|
||||
uses: 'actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8' # ratchet:actions/checkout@v5
|
||||
|
||||
- name: 'Set up Node.js'
|
||||
uses: 'actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020' # ratchet:actions/setup-node@v4
|
||||
with:
|
||||
node-version-file: '.nvmrc'
|
||||
cache: 'npm'
|
||||
|
||||
- name: 'Install dependencies'
|
||||
run: 'npm ci'
|
||||
|
||||
- name: 'Build project'
|
||||
run: 'npm run build'
|
||||
|
||||
- name: 'Run Memory Tests'
|
||||
run: 'npm run test:memory'
|
||||
|
|
@ -44,6 +44,8 @@ powerful tool for developers.
|
|||
- **Test Commands:**
|
||||
- **Unit (All):** `npm run test`
|
||||
- **Integration (E2E):** `npm run test:e2e`
|
||||
- **Memory (Nightly):** `npm run test:memory` (Runs memory regression tests
|
||||
against baselines. Excluded from `preflight`, run nightly.)
|
||||
- **Workspace-Specific:** `npm test -w <pkg> -- <path>` (Note: `<path>` must
|
||||
be relative to the workspace root, e.g.,
|
||||
`-w @google/gemini-cli-core -- src/routing/modelRouterService.test.ts`)
|
||||
|
|
|
|||
|
|
@ -117,6 +117,46 @@ npm run test:integration:sandbox:docker
|
|||
npm run test:integration:sandbox:podman
|
||||
```
|
||||
|
||||
## Memory regression tests
|
||||
|
||||
Memory regression tests are designed to detect heap growth and leaks across key
|
||||
CLI scenarios. They are located in the `memory-tests` directory.
|
||||
|
||||
These tests are distinct from standard integration tests because they measure
|
||||
memory usage and compare it against committed baselines.
|
||||
|
||||
### Running memory tests
|
||||
|
||||
Memory tests are not run as part of the default `npm run test` or
|
||||
`npm run test:e2e` commands. They are run nightly in CI but can be run manually:
|
||||
|
||||
```bash
|
||||
npm run test:memory
|
||||
```
|
||||
|
||||
### Updating baselines
|
||||
|
||||
If you intentionally change behavior that affects memory usage, you may need to
|
||||
update the baselines. Set the `UPDATE_MEMORY_BASELINES` environment variable to
|
||||
`true`:
|
||||
|
||||
```bash
|
||||
UPDATE_MEMORY_BASELINES=true npm run test:memory
|
||||
```
|
||||
|
||||
This will run the tests, take median snapshots, and overwrite
|
||||
`memory-tests/baselines.json`. You should review the changes and commit the
|
||||
updated baseline file.
|
||||
|
||||
### How it works
|
||||
|
||||
The harness (`MemoryTestHarness` in `packages/test-utils`):
|
||||
|
||||
- Forces garbage collection multiple times to reduce noise.
|
||||
- Takes median snapshots to filter spikes.
|
||||
- Compares against baselines with a 10% tolerance.
|
||||
- Can analyze sustained leaks across 3 snapshots using `analyzeSnapshots()`.
|
||||
|
||||
## Diagnostics
|
||||
|
||||
The integration test runner provides several options for diagnostics to help
|
||||
|
|
|
|||
30
memory-tests/baselines.json
Normal file
30
memory-tests/baselines.json
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
{
|
||||
"version": 1,
|
||||
"updatedAt": "2026-04-08T01:21:58.770Z",
|
||||
"scenarios": {
|
||||
"multi-turn-conversation": {
|
||||
"heapUsedBytes": 120082704,
|
||||
"heapTotalBytes": 177586176,
|
||||
"rssBytes": 269172736,
|
||||
"timestamp": "2026-04-08T01:21:57.127Z"
|
||||
},
|
||||
"multi-function-call-repo-search": {
|
||||
"heapUsedBytes": 104644984,
|
||||
"heapTotalBytes": 111575040,
|
||||
"rssBytes": 204079104,
|
||||
"timestamp": "2026-04-08T01:21:58.770Z"
|
||||
},
|
||||
"idle-session-startup": {
|
||||
"heapUsedBytes": 119813672,
|
||||
"heapTotalBytes": 177061888,
|
||||
"rssBytes": 267943936,
|
||||
"timestamp": "2026-04-08T01:21:53.855Z"
|
||||
},
|
||||
"simple-prompt-response": {
|
||||
"heapUsedBytes": 119722064,
|
||||
"heapTotalBytes": 177324032,
|
||||
"rssBytes": 268812288,
|
||||
"timestamp": "2026-04-08T01:21:55.491Z"
|
||||
}
|
||||
}
|
||||
}
|
||||
71
memory-tests/globalSetup.ts
Normal file
71
memory-tests/globalSetup.ts
Normal file
|
|
@ -0,0 +1,71 @@
|
|||
/**
|
||||
* @license
|
||||
* Copyright 2026 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { mkdir, readdir, rm } from 'node:fs/promises';
|
||||
import { join, dirname } from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
import { canUseRipgrep } from '../packages/core/src/tools/ripGrep.js';
|
||||
|
||||
const __dirname = dirname(fileURLToPath(import.meta.url));
|
||||
const rootDir = join(__dirname, '..');
|
||||
const memoryTestsDir = join(rootDir, '.memory-tests');
|
||||
let runDir = '';
|
||||
|
||||
export async function setup() {
|
||||
runDir = join(memoryTestsDir, `${Date.now()}`);
|
||||
await mkdir(runDir, { recursive: true });
|
||||
|
||||
// Set the home directory to the test run directory to avoid conflicts
|
||||
// with the user's local config.
|
||||
process.env['HOME'] = runDir;
|
||||
if (process.platform === 'win32') {
|
||||
process.env['USERPROFILE'] = runDir;
|
||||
}
|
||||
process.env['GEMINI_CONFIG_DIR'] = join(runDir, '.gemini');
|
||||
|
||||
// Download ripgrep to avoid race conditions
|
||||
const available = await canUseRipgrep();
|
||||
if (!available) {
|
||||
throw new Error('Failed to download ripgrep binary');
|
||||
}
|
||||
|
||||
// Clean up old test runs, keeping the latest few for debugging
|
||||
try {
|
||||
const testRuns = await readdir(memoryTestsDir);
|
||||
if (testRuns.length > 3) {
|
||||
const oldRuns = testRuns.sort().slice(0, testRuns.length - 3);
|
||||
await Promise.all(
|
||||
oldRuns.map((oldRun) =>
|
||||
rm(join(memoryTestsDir, oldRun), {
|
||||
recursive: true,
|
||||
force: true,
|
||||
}),
|
||||
),
|
||||
);
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('Error cleaning up old memory test runs:', e);
|
||||
}
|
||||
|
||||
process.env['INTEGRATION_TEST_FILE_DIR'] = runDir;
|
||||
process.env['GEMINI_CLI_INTEGRATION_TEST'] = 'true';
|
||||
process.env['GEMINI_FORCE_FILE_STORAGE'] = 'true';
|
||||
process.env['TELEMETRY_LOG_FILE'] = join(runDir, 'telemetry.log');
|
||||
process.env['VERBOSE'] = process.env['VERBOSE'] ?? 'false';
|
||||
|
||||
console.log(`\nMemory test output directory: ${runDir}`);
|
||||
}
|
||||
|
||||
export async function teardown() {
|
||||
// Cleanup unless KEEP_OUTPUT is set
|
||||
if (process.env['KEEP_OUTPUT'] !== 'true' && runDir) {
|
||||
try {
|
||||
await rm(runDir, { recursive: true, force: true });
|
||||
} catch (e) {
|
||||
console.warn('Failed to clean up memory test directory:', e);
|
||||
}
|
||||
}
|
||||
}
|
||||
185
memory-tests/memory-usage.test.ts
Normal file
185
memory-tests/memory-usage.test.ts
Normal file
|
|
@ -0,0 +1,185 @@
|
|||
/**
|
||||
* @license
|
||||
* Copyright 2026 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { describe, it, beforeAll, afterAll, afterEach } from 'vitest';
|
||||
import { TestRig, MemoryTestHarness } from '@google/gemini-cli-test-utils';
|
||||
import { join, dirname } from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
|
||||
const __dirname = dirname(fileURLToPath(import.meta.url));
|
||||
const BASELINES_PATH = join(__dirname, 'baselines.json');
|
||||
const UPDATE_BASELINES = process.env['UPDATE_MEMORY_BASELINES'] === 'true';
|
||||
const TOLERANCE_PERCENT = 10;
|
||||
|
||||
// Fake API key for tests using fake responses
|
||||
const TEST_ENV = { GEMINI_API_KEY: 'fake-memory-test-key' };
|
||||
|
||||
describe('Memory Usage Tests', () => {
|
||||
let harness: MemoryTestHarness;
|
||||
let rig: TestRig;
|
||||
|
||||
beforeAll(() => {
|
||||
harness = new MemoryTestHarness({
|
||||
baselinesPath: BASELINES_PATH,
|
||||
defaultTolerancePercent: TOLERANCE_PERCENT,
|
||||
gcCycles: 3,
|
||||
gcDelayMs: 100,
|
||||
sampleCount: 3,
|
||||
});
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await rig.cleanup();
|
||||
});
|
||||
|
||||
afterAll(async () => {
|
||||
// Generate the summary report after all tests
|
||||
await harness.generateReport();
|
||||
});
|
||||
|
||||
it('idle-session-startup: memory usage within baseline', async () => {
|
||||
rig = new TestRig();
|
||||
rig.setup('memory-idle-startup', {
|
||||
fakeResponsesPath: join(__dirname, 'memory.idle-startup.responses'),
|
||||
});
|
||||
|
||||
const result = await harness.runScenario(
|
||||
'idle-session-startup',
|
||||
async (recordSnapshot) => {
|
||||
await rig.run({
|
||||
args: ['hello'],
|
||||
timeout: 120000,
|
||||
env: TEST_ENV,
|
||||
});
|
||||
|
||||
await recordSnapshot('after-startup');
|
||||
},
|
||||
);
|
||||
|
||||
if (UPDATE_BASELINES) {
|
||||
harness.updateScenarioBaseline(result);
|
||||
console.log(
|
||||
`Updated baseline for idle-session-startup: ${(result.finalHeapUsed / (1024 * 1024)).toFixed(1)} MB`,
|
||||
);
|
||||
} else {
|
||||
harness.assertWithinBaseline(result);
|
||||
}
|
||||
});
|
||||
|
||||
it('simple-prompt-response: memory usage within baseline', async () => {
|
||||
rig = new TestRig();
|
||||
rig.setup('memory-simple-prompt', {
|
||||
fakeResponsesPath: join(__dirname, 'memory.simple-prompt.responses'),
|
||||
});
|
||||
|
||||
const result = await harness.runScenario(
|
||||
'simple-prompt-response',
|
||||
async (recordSnapshot) => {
|
||||
await rig.run({
|
||||
args: ['What is the capital of France?'],
|
||||
timeout: 120000,
|
||||
env: TEST_ENV,
|
||||
});
|
||||
|
||||
await recordSnapshot('after-response');
|
||||
},
|
||||
);
|
||||
|
||||
if (UPDATE_BASELINES) {
|
||||
harness.updateScenarioBaseline(result);
|
||||
console.log(
|
||||
`Updated baseline for simple-prompt-response: ${(result.finalHeapUsed / (1024 * 1024)).toFixed(1)} MB`,
|
||||
);
|
||||
} else {
|
||||
harness.assertWithinBaseline(result);
|
||||
}
|
||||
});
|
||||
|
||||
it('multi-turn-conversation: memory remains stable over turns', async () => {
|
||||
rig = new TestRig();
|
||||
rig.setup('memory-multi-turn', {
|
||||
fakeResponsesPath: join(__dirname, 'memory.multi-turn.responses'),
|
||||
});
|
||||
|
||||
const prompts = [
|
||||
'Hello, what can you help me with?',
|
||||
'Tell me about JavaScript',
|
||||
'How is TypeScript different?',
|
||||
'Can you write a simple TypeScript function?',
|
||||
'What are some TypeScript best practices?',
|
||||
];
|
||||
|
||||
const result = await harness.runScenario(
|
||||
'multi-turn-conversation',
|
||||
async (recordSnapshot) => {
|
||||
// Run through all turns as a piped sequence
|
||||
const stdinContent = prompts.join('\n');
|
||||
await rig.run({
|
||||
stdin: stdinContent,
|
||||
timeout: 120000,
|
||||
env: TEST_ENV,
|
||||
});
|
||||
|
||||
// Take snapshots after the conversation completes
|
||||
await recordSnapshot('after-all-turns');
|
||||
},
|
||||
);
|
||||
|
||||
if (UPDATE_BASELINES) {
|
||||
harness.updateScenarioBaseline(result);
|
||||
console.log(
|
||||
`Updated baseline for multi-turn-conversation: ${(result.finalHeapUsed / (1024 * 1024)).toFixed(1)} MB`,
|
||||
);
|
||||
} else {
|
||||
harness.assertWithinBaseline(result);
|
||||
}
|
||||
});
|
||||
|
||||
it('multi-function-call-repo-search: memory after tool use', async () => {
|
||||
rig = new TestRig();
|
||||
rig.setup('memory-multi-func-call', {
|
||||
fakeResponsesPath: join(
|
||||
__dirname,
|
||||
'memory.multi-function-call.responses',
|
||||
),
|
||||
});
|
||||
|
||||
// Create directories first, then files in the workspace so the tools have targets
|
||||
rig.mkdir('packages/core/src/telemetry');
|
||||
rig.createFile(
|
||||
'packages/core/src/telemetry/memory-monitor.ts',
|
||||
'export class MemoryMonitor { constructor() {} }',
|
||||
);
|
||||
rig.createFile(
|
||||
'packages/core/src/telemetry/metrics.ts',
|
||||
'export function recordMemoryUsage() {}',
|
||||
);
|
||||
|
||||
const result = await harness.runScenario(
|
||||
'multi-function-call-repo-search',
|
||||
async (recordSnapshot) => {
|
||||
await rig.run({
|
||||
args: [
|
||||
'Search this repository for MemoryMonitor and tell me what it does',
|
||||
],
|
||||
timeout: 120000,
|
||||
env: TEST_ENV,
|
||||
});
|
||||
|
||||
await recordSnapshot('after-tool-calls');
|
||||
},
|
||||
);
|
||||
|
||||
if (UPDATE_BASELINES) {
|
||||
harness.updateScenarioBaseline(result);
|
||||
console.log(
|
||||
`Updated baseline for multi-function-call-repo-search: ${(result.finalHeapUsed / (1024 * 1024)).toFixed(1)} MB`,
|
||||
);
|
||||
} else {
|
||||
harness.assertWithinBaseline(result);
|
||||
}
|
||||
});
|
||||
});
|
||||
2
memory-tests/memory.idle-startup.responses
Normal file
2
memory-tests/memory.idle-startup.responses
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
{"method":"generateContent","response":{"candidates":[{"content":{"parts":[{"text":"0"}],"role":"model"},"finishReason":"STOP","index":0}]}}
|
||||
{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"Hello! I'm ready to help. What would you like to work on?"}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":5,"candidatesTokenCount":12,"totalTokenCount":17,"promptTokensDetails":[{"modality":"TEXT","tokenCount":5}]}}]}
|
||||
4
memory-tests/memory.multi-function-call.responses
Normal file
4
memory-tests/memory.multi-function-call.responses
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
{"method":"generateContent","response":{"candidates":[{"content":{"parts":[{"text":"0"}],"role":"model"},"finishReason":"STOP","index":0}]}}
|
||||
{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"I'll search for MemoryMonitor in the repository and analyze what it does."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":30,"candidatesTokenCount":15,"totalTokenCount":45,"promptTokensDetails":[{"modality":"TEXT","tokenCount":30}]}}]}
|
||||
{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"functionCall":{"name":"grep_search","args":{"pattern":"MemoryMonitor","path":".","include_pattern":"*.ts"}}},{"functionCall":{"name":"list_directory","args":{"path":"packages/core/src/telemetry"}}},{"functionCall":{"name":"read_file","args":{"file_path":"packages/core/src/telemetry/memory-monitor.ts"}}}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":30,"candidatesTokenCount":80,"totalTokenCount":110,"promptTokensDetails":[{"modality":"TEXT","tokenCount":30}]}}]}
|
||||
{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"I found the memory monitoring code. Here's a summary:\n\nThe `MemoryMonitor` class in `packages/core/src/telemetry/memory-monitor.ts` provides:\n\n1. **Continuous monitoring** via `start()`/`stop()` with configurable intervals\n2. **V8 heap snapshots** using `v8.getHeapStatistics()` and `process.memoryUsage()`\n3. **High-water mark tracking** to detect significant memory growth\n4. **Rate-limited recording** to avoid metric flood\n5. **Activity detection** — only records when user is active\n\nThe class uses a singleton pattern via `initializeMemoryMonitor()` for global access."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":500,"candidatesTokenCount":120,"totalTokenCount":620,"promptTokensDetails":[{"modality":"TEXT","tokenCount":500}]}}]}
|
||||
10
memory-tests/memory.multi-turn.responses
Normal file
10
memory-tests/memory.multi-turn.responses
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
{"method":"generateContent","response":{"candidates":[{"content":{"parts":[{"text":"0"}],"role":"model"},"finishReason":"STOP","index":0}]}}
|
||||
{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"Hello! I'm ready to help you with your coding tasks. What would you like to work on today?"}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":5,"candidatesTokenCount":18,"totalTokenCount":23,"promptTokensDetails":[{"modality":"TEXT","tokenCount":5}]}}]}
|
||||
{"method":"generateContent","response":{"candidates":[{"content":{"parts":[{"text":"0"}],"role":"model"},"finishReason":"STOP","index":0}]}}
|
||||
{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"JavaScript is a high-level, interpreted programming language. It was originally designed for adding interactivity to web pages."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":25,"candidatesTokenCount":60,"totalTokenCount":85,"promptTokensDetails":[{"modality":"TEXT","tokenCount":25}]}}]}
|
||||
{"method":"generateContent","response":{"candidates":[{"content":{"parts":[{"text":"0"}],"role":"model"},"finishReason":"STOP","index":0}]}}
|
||||
{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"TypeScript is a typed superset of JavaScript developed by Microsoft. The main differences from JavaScript are static typing and better tooling."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":45,"candidatesTokenCount":80,"totalTokenCount":125,"promptTokensDetails":[{"modality":"TEXT","tokenCount":45}]}}]}
|
||||
{"method":"generateContent","response":{"candidates":[{"content":{"parts":[{"text":"0"}],"role":"model"},"finishReason":"STOP","index":0}]}}
|
||||
{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"Here is a simple TypeScript function:\n\nfunction greet(name: string): string { return `Hello, ${name}!`; }"}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":60,"candidatesTokenCount":55,"totalTokenCount":115,"promptTokensDetails":[{"modality":"TEXT","tokenCount":60}]}}]}
|
||||
{"method":"generateContent","response":{"candidates":[{"content":{"parts":[{"text":"0"}],"role":"model"},"finishReason":"STOP","index":0}]}}
|
||||
{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"Here are 5 key TypeScript best practices: Enable strict mode, prefer interfaces, use union types, leverage type inference, and use readonly."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":75,"candidatesTokenCount":70,"totalTokenCount":145,"promptTokensDetails":[{"modality":"TEXT","tokenCount":75}]}}]}
|
||||
2
memory-tests/memory.simple-prompt.responses
Normal file
2
memory-tests/memory.simple-prompt.responses
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
{"method":"generateContent","response":{"candidates":[{"content":{"parts":[{"text":"0"}],"role":"model"},"finishReason":"STOP","index":0}]}}
|
||||
{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"The capital of France is Paris. It has been the capital since the 10th century and is known for iconic landmarks like the Eiffel Tower, the Louvre Museum, and Notre-Dame Cathedral. Paris is also the most populous city in France, with a metropolitan area population of over 12 million people."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":7,"candidatesTokenCount":55,"totalTokenCount":62,"promptTokensDetails":[{"modality":"TEXT","tokenCount":7}]}}]}
|
||||
12
memory-tests/tsconfig.json
Normal file
12
memory-tests/tsconfig.json
Normal file
|
|
@ -0,0 +1,12 @@
|
|||
{
|
||||
"extends": "../tsconfig.json",
|
||||
"compilerOptions": {
|
||||
"noEmit": true,
|
||||
"allowJs": true
|
||||
},
|
||||
"include": ["**/*.ts"],
|
||||
"references": [
|
||||
{ "path": "../packages/core" },
|
||||
{ "path": "../packages/test-utils" }
|
||||
]
|
||||
}
|
||||
28
memory-tests/vitest.config.ts
Normal file
28
memory-tests/vitest.config.ts
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
/**
|
||||
* @license
|
||||
* Copyright 2026 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { defineConfig } from 'vitest/config';
|
||||
|
||||
export default defineConfig({
|
||||
test: {
|
||||
testTimeout: 600000, // 10 minutes — memory profiling is slow
|
||||
globalSetup: './globalSetup.ts',
|
||||
reporters: ['default'],
|
||||
include: ['**/*.test.ts'],
|
||||
retry: 0, // No retries for memory tests — noise is handled by tolerance
|
||||
fileParallelism: false, // Must run serially to avoid memory interference
|
||||
pool: 'forks', // Use forks pool for --expose-gc support
|
||||
poolOptions: {
|
||||
forks: {
|
||||
singleFork: true, // Single process for accurate per-test memory readings
|
||||
execArgv: ['--expose-gc'], // Enable global.gc() for forced GC
|
||||
},
|
||||
},
|
||||
env: {
|
||||
GEMINI_TEST_TYPE: 'memory',
|
||||
},
|
||||
},
|
||||
});
|
||||
41
package-lock.json
generated
41
package-lock.json
generated
|
|
@ -446,7 +446,8 @@
|
|||
"version": "2.11.0",
|
||||
"resolved": "https://registry.npmjs.org/@bufbuild/protobuf/-/protobuf-2.11.0.tgz",
|
||||
"integrity": "sha512-sBXGT13cpmPR5BMgHE6UEEfEaShh5Ror6rfN3yEK5si7QVrtZg8LEPQb0VVhiLRUslD2yLnXtnRzG035J/mZXQ==",
|
||||
"license": "(Apache-2.0 AND BSD-3-Clause)"
|
||||
"license": "(Apache-2.0 AND BSD-3-Clause)",
|
||||
"peer": true
|
||||
},
|
||||
"node_modules/@bundled-es-modules/cookie": {
|
||||
"version": "2.0.1",
|
||||
|
|
@ -1449,6 +1450,7 @@
|
|||
"resolved": "https://registry.npmjs.org/@grpc/grpc-js/-/grpc-js-1.13.4.tgz",
|
||||
"integrity": "sha512-GsFaMXCkMqkKIvwCQjCrwH+GHbPKBjhwo/8ZuUkWHqbI73Kky9I+pQltrlT0+MWpedCoosda53lgjYfyEPgxBg==",
|
||||
"license": "Apache-2.0",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"@grpc/proto-loader": "^0.7.13",
|
||||
"@js-sdsl/ordered-map": "^4.4.2"
|
||||
|
|
@ -2155,6 +2157,7 @@
|
|||
"integrity": "sha512-t54CUOsFMappY1Jbzb7fetWeO0n6K0k/4+/ZpkS+3Joz8I4VcvY9OiEBFRYISqaI2fq5sCiPtAjRDOzVYG8m+Q==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"@octokit/auth-token": "^6.0.0",
|
||||
"@octokit/graphql": "^9.0.2",
|
||||
|
|
@ -2335,6 +2338,7 @@
|
|||
"resolved": "https://registry.npmjs.org/@opentelemetry/api/-/api-1.9.0.tgz",
|
||||
"integrity": "sha512-3giAOQvZiH5F9bMlMiv8+GSPMeqg0dbaeo58/0SlA9sxSqZhnUtxzX9/2FzyhS9sWQf5S0GJE0AKBrFqjpeYcg==",
|
||||
"license": "Apache-2.0",
|
||||
"peer": true,
|
||||
"engines": {
|
||||
"node": ">=8.0.0"
|
||||
}
|
||||
|
|
@ -2384,6 +2388,7 @@
|
|||
"resolved": "https://registry.npmjs.org/@opentelemetry/core/-/core-2.5.0.tgz",
|
||||
"integrity": "sha512-ka4H8OM6+DlUhSAZpONu0cPBtPPTQKxbxVzC4CzVx5+K4JnroJVBtDzLAMx4/3CDTJXRvVFhpFjtl4SaiTNoyQ==",
|
||||
"license": "Apache-2.0",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"@opentelemetry/semantic-conventions": "^1.29.0"
|
||||
},
|
||||
|
|
@ -2758,6 +2763,7 @@
|
|||
"resolved": "https://registry.npmjs.org/@opentelemetry/resources/-/resources-2.5.0.tgz",
|
||||
"integrity": "sha512-F8W52ApePshpoSrfsSk1H2yJn9aKjCrbpQF1M9Qii0GHzbfVeFUB+rc3X4aggyZD8x9Gu3Slua+s6krmq6Dt8g==",
|
||||
"license": "Apache-2.0",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"@opentelemetry/core": "2.5.0",
|
||||
"@opentelemetry/semantic-conventions": "^1.29.0"
|
||||
|
|
@ -2791,6 +2797,7 @@
|
|||
"resolved": "https://registry.npmjs.org/@opentelemetry/sdk-metrics/-/sdk-metrics-2.5.0.tgz",
|
||||
"integrity": "sha512-BeJLtU+f5Gf905cJX9vXFQorAr6TAfK3SPvTFqP+scfIpDQEJfRaGJWta7sJgP+m4dNtBf9y3yvBKVAZZtJQVA==",
|
||||
"license": "Apache-2.0",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"@opentelemetry/core": "2.5.0",
|
||||
"@opentelemetry/resources": "2.5.0"
|
||||
|
|
@ -2845,6 +2852,7 @@
|
|||
"resolved": "https://registry.npmjs.org/@opentelemetry/sdk-trace-base/-/sdk-trace-base-2.5.0.tgz",
|
||||
"integrity": "sha512-VzRf8LzotASEyNDUxTdaJ9IRJ1/h692WyArDBInf5puLCjxbICD6XkHgpuudis56EndyS7LYFmtTMny6UABNdQ==",
|
||||
"license": "Apache-2.0",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"@opentelemetry/core": "2.5.0",
|
||||
"@opentelemetry/resources": "2.5.0",
|
||||
|
|
@ -4081,6 +4089,7 @@
|
|||
"integrity": "sha512-6mDvHUFSjyT2B2yeNx2nUgMxh9LtOWvkhIU3uePn2I2oyNymUAX1NIsdgviM4CH+JSrp2D2hsMvJOkxY+0wNRA==",
|
||||
"devOptional": true,
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"csstype": "^3.0.2"
|
||||
}
|
||||
|
|
@ -4355,6 +4364,7 @@
|
|||
"integrity": "sha512-6sMvZePQrnZH2/cJkwRpkT7DxoAWh+g6+GFRK6bV3YQo7ogi3SX5rgF6099r5Q53Ma5qeT7LGmOmuIutF4t3lA==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"@typescript-eslint/scope-manager": "8.35.0",
|
||||
"@typescript-eslint/types": "8.35.0",
|
||||
|
|
@ -5228,6 +5238,7 @@
|
|||
"resolved": "https://registry.npmjs.org/acorn/-/acorn-8.15.0.tgz",
|
||||
"integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==",
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"bin": {
|
||||
"acorn": "bin/acorn"
|
||||
},
|
||||
|
|
@ -5569,6 +5580,12 @@
|
|||
"dev": true,
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/asciichart": {
|
||||
"version": "1.5.25",
|
||||
"resolved": "https://registry.npmjs.org/asciichart/-/asciichart-1.5.25.tgz",
|
||||
"integrity": "sha512-PNxzXIPPOtWq8T7bgzBtk9cI2lgS4SJZthUHEiQ1aoIc3lNzGfUvIvo9LiAnq26TACo9t1/4qP6KTGAUbzX9Xg==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/assertion-error": {
|
||||
"version": "2.0.1",
|
||||
"resolved": "https://registry.npmjs.org/assertion-error/-/assertion-error-2.0.1.tgz",
|
||||
|
|
@ -7362,7 +7379,8 @@
|
|||
"version": "0.0.1581282",
|
||||
"resolved": "https://registry.npmjs.org/devtools-protocol/-/devtools-protocol-0.0.1581282.tgz",
|
||||
"integrity": "sha512-nv7iKtNZQshSW2hKzYNr46nM/Cfh5SEvE2oV0/SEGgc9XupIY5ggf84Cz8eJIkBce7S3bmTAauFD6aysMpnqsQ==",
|
||||
"license": "BSD-3-Clause"
|
||||
"license": "BSD-3-Clause",
|
||||
"peer": true
|
||||
},
|
||||
"node_modules/dezalgo": {
|
||||
"version": "1.0.4",
|
||||
|
|
@ -7946,6 +7964,7 @@
|
|||
"integrity": "sha512-GsGizj2Y1rCWDu6XoEekL3RLilp0voSePurjZIkxL3wlm5o5EC9VpgaP7lrCvjnkuLvzFBQWB3vWB3K5KQTveQ==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"@eslint-community/eslint-utils": "^4.2.0",
|
||||
"@eslint-community/regexpp": "^4.12.1",
|
||||
|
|
@ -8463,6 +8482,7 @@
|
|||
"resolved": "https://registry.npmjs.org/express/-/express-5.2.1.tgz",
|
||||
"integrity": "sha512-hIS4idWWai69NezIdRt2xFVofaF4j+6INOpJlVOLDO8zXGpUVEVzIYk12UUi2JzjEzWL3IOAxcTubgz9Po0yXw==",
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"accepts": "^2.0.0",
|
||||
"body-parser": "^2.2.1",
|
||||
|
|
@ -9775,6 +9795,7 @@
|
|||
"resolved": "https://registry.npmjs.org/hono/-/hono-4.12.7.tgz",
|
||||
"integrity": "sha512-jq9l1DM0zVIvsm3lv9Nw9nlJnMNPOcAtsbsgiUhWcFzPE99Gvo6yRTlszSLLYacMeQ6quHD6hMfId8crVHvexw==",
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"engines": {
|
||||
"node": ">=16.9.0"
|
||||
}
|
||||
|
|
@ -10053,6 +10074,7 @@
|
|||
"resolved": "https://registry.npmjs.org/@jrichman/ink/-/ink-6.6.7.tgz",
|
||||
"integrity": "sha512-bDzQLpLzK/dn9Ur/Ku88ZZR9totVcMGrGYAgPHidsAAbe9NKztU1fggj/iu0wRp5g1kBeALb3cfagFGdDxAU1w==",
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"ansi-escapes": "^7.0.0",
|
||||
"ansi-styles": "^6.2.3",
|
||||
|
|
@ -13826,6 +13848,7 @@
|
|||
"resolved": "https://registry.npmjs.org/react/-/react-19.2.4.tgz",
|
||||
"integrity": "sha512-9nfp2hYpCwOjAN+8TZFGhtWEwgvWHXqESH8qT89AT/lWklpLON22Lc8pEtnpsZz7VmawabSU0gCjnj8aC0euHQ==",
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"engines": {
|
||||
"node": ">=0.10.0"
|
||||
}
|
||||
|
|
@ -13836,6 +13859,7 @@
|
|||
"integrity": "sha512-ePrwPfxAnB+7hgnEr8vpKxL9cmnp7F322t8oqcPshbIQQhDKgFDW4tjhF2wjVbdXF9O/nyuy3sQWd9JGpiLPvA==",
|
||||
"devOptional": true,
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"shell-quote": "^1.6.1",
|
||||
"ws": "^7"
|
||||
|
|
@ -15985,6 +16009,7 @@
|
|||
"resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz",
|
||||
"integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==",
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
},
|
||||
|
|
@ -16207,7 +16232,8 @@
|
|||
"version": "2.8.1",
|
||||
"resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz",
|
||||
"integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==",
|
||||
"license": "0BSD"
|
||||
"license": "0BSD",
|
||||
"peer": true
|
||||
},
|
||||
"node_modules/tsx": {
|
||||
"version": "4.20.3",
|
||||
|
|
@ -16215,6 +16241,7 @@
|
|||
"integrity": "sha512-qjbnuR9Tr+FJOMBqJCW5ehvIo/buZq7vH7qD7JziU98h6l3qGy0a/yPFjwO+y0/T7GFpNgNAvEcPPVfyT8rrPQ==",
|
||||
"devOptional": true,
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"esbuild": "~0.25.0",
|
||||
"get-tsconfig": "^4.7.5"
|
||||
|
|
@ -16380,6 +16407,7 @@
|
|||
"integrity": "sha512-p1diW6TqL9L07nNxvRMM7hMMw4c5XOo/1ibL4aAIGmSAt9slTE1Xgw5KWuof2uTOvCg9BY7ZRi+GaF+7sfgPeQ==",
|
||||
"devOptional": true,
|
||||
"license": "Apache-2.0",
|
||||
"peer": true,
|
||||
"bin": {
|
||||
"tsc": "bin/tsc",
|
||||
"tsserver": "bin/tsserver"
|
||||
|
|
@ -16602,6 +16630,7 @@
|
|||
"resolved": "https://registry.npmjs.org/vite/-/vite-7.2.2.tgz",
|
||||
"integrity": "sha512-BxAKBWmIbrDgrokdGZH1IgkIk/5mMHDreLDmCJ0qpyJaAteP8NvMhkwr/ZCQNqNH97bw/dANTE9PDzqwJghfMQ==",
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"esbuild": "^0.25.0",
|
||||
"fdir": "^6.5.0",
|
||||
|
|
@ -16715,6 +16744,7 @@
|
|||
"resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz",
|
||||
"integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==",
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
},
|
||||
|
|
@ -16727,6 +16757,7 @@
|
|||
"resolved": "https://registry.npmjs.org/vitest/-/vitest-3.2.4.tgz",
|
||||
"integrity": "sha512-LUCP5ev3GURDysTWiP47wRRUpLKMOfPh+yKTx3kVIEiu5KOMeqzpnYNsKyOoVrULivR8tLcks4+lga33Whn90A==",
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"@types/chai": "^5.2.2",
|
||||
"@vitest/expect": "3.2.4",
|
||||
|
|
@ -17374,6 +17405,7 @@
|
|||
"resolved": "https://registry.npmjs.org/zod/-/zod-3.25.76.tgz",
|
||||
"integrity": "sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ==",
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/colinhacks"
|
||||
}
|
||||
|
|
@ -17817,6 +17849,7 @@
|
|||
"resolved": "https://registry.npmjs.org/@grpc/grpc-js/-/grpc-js-1.14.3.tgz",
|
||||
"integrity": "sha512-Iq8QQQ/7X3Sac15oB6p0FmUg/klxQvXLeileoqrTRGJYLV+/9tubbr9ipz0GKHjmXVsgFPo/+W+2cA8eNcR+XA==",
|
||||
"license": "Apache-2.0",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"@grpc/proto-loader": "^0.8.0",
|
||||
"@js-sdsl/ordered-map": "^4.4.2"
|
||||
|
|
@ -17920,6 +17953,7 @@
|
|||
"resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz",
|
||||
"integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==",
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
},
|
||||
|
|
@ -17979,6 +18013,7 @@
|
|||
"dependencies": {
|
||||
"@google/gemini-cli-core": "file:../core",
|
||||
"@lydell/node-pty": "1.1.0",
|
||||
"asciichart": "^1.5.25",
|
||||
"strip-ansi": "^7.1.2",
|
||||
"vitest": "^3.2.4"
|
||||
},
|
||||
|
|
|
|||
|
|
@ -51,6 +51,8 @@
|
|||
"test:integration:all": "npm run test:integration:sandbox:none && npm run test:integration:sandbox:docker && npm run test:integration:sandbox:podman",
|
||||
"test:integration:flaky": "cross-env RUN_FLAKY_INTEGRATION=1 npm run test:integration:sandbox:none",
|
||||
"test:integration:sandbox:none": "cross-env GEMINI_SANDBOX=false vitest run --root ./integration-tests",
|
||||
"test:memory": "vitest run --root ./memory-tests",
|
||||
"test:memory:update-baselines": "cross-env UPDATE_MEMORY_BASELINES=true vitest run --root ./memory-tests",
|
||||
"test:integration:sandbox:docker": "cross-env GEMINI_SANDBOX=docker npm run build:sandbox && cross-env GEMINI_SANDBOX=docker vitest run --root ./integration-tests",
|
||||
"test:integration:sandbox:podman": "cross-env GEMINI_SANDBOX=podman vitest run --root ./integration-tests",
|
||||
"lint": "cross-env NODE_OPTIONS=\"--max-old-space-size=8192\" eslint . --cache --max-warnings 0",
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@
|
|||
"dependencies": {
|
||||
"@google/gemini-cli-core": "file:../core",
|
||||
"@lydell/node-pty": "1.1.0",
|
||||
"asciichart": "^1.5.25",
|
||||
"strip-ansi": "^7.1.2",
|
||||
"vitest": "^3.2.4"
|
||||
},
|
||||
|
|
|
|||
|
|
@ -6,6 +6,8 @@
|
|||
|
||||
export * from './file-system-test-helpers.js';
|
||||
export * from './fixtures/agents.js';
|
||||
export * from './memory-baselines.js';
|
||||
export * from './memory-test-harness.js';
|
||||
export * from './mock-utils.js';
|
||||
export * from './test-mcp-server.js';
|
||||
export * from './test-rig.js';
|
||||
|
|
|
|||
76
packages/test-utils/src/memory-baselines.ts
Normal file
76
packages/test-utils/src/memory-baselines.ts
Normal file
|
|
@ -0,0 +1,76 @@
|
|||
/**
|
||||
* @license
|
||||
* Copyright 2026 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { readFileSync, writeFileSync, existsSync } from 'node:fs';
|
||||
|
||||
/**
|
||||
* Baseline entry for a single memory test scenario.
|
||||
*/
|
||||
export interface MemoryBaseline {
|
||||
heapUsedBytes: number;
|
||||
heapTotalBytes: number;
|
||||
rssBytes: number;
|
||||
timestamp: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Top-level structure of the baselines JSON file.
|
||||
*/
|
||||
export interface MemoryBaselineFile {
|
||||
version: number;
|
||||
updatedAt: string;
|
||||
scenarios: Record<string, MemoryBaseline>;
|
||||
}
|
||||
|
||||
/**
|
||||
* Load baselines from a JSON file.
|
||||
* Returns an empty baseline file if the file does not exist yet.
|
||||
*/
|
||||
export function loadBaselines(path: string): MemoryBaselineFile {
|
||||
if (!existsSync(path)) {
|
||||
return {
|
||||
version: 1,
|
||||
updatedAt: new Date().toISOString(),
|
||||
scenarios: {},
|
||||
};
|
||||
}
|
||||
|
||||
const content = readFileSync(path, 'utf-8');
|
||||
return JSON.parse(content) as MemoryBaselineFile;
|
||||
}
|
||||
|
||||
/**
|
||||
* Save baselines to a JSON file.
|
||||
*/
|
||||
export function saveBaselines(
|
||||
path: string,
|
||||
baselines: MemoryBaselineFile,
|
||||
): void {
|
||||
baselines.updatedAt = new Date().toISOString();
|
||||
writeFileSync(path, JSON.stringify(baselines, null, 2) + '\n');
|
||||
}
|
||||
|
||||
/**
|
||||
* Update (or create) a single scenario baseline in the file.
|
||||
*/
|
||||
export function updateBaseline(
|
||||
path: string,
|
||||
scenarioName: string,
|
||||
measured: {
|
||||
heapUsedBytes: number;
|
||||
heapTotalBytes: number;
|
||||
rssBytes: number;
|
||||
},
|
||||
): void {
|
||||
const baselines = loadBaselines(path);
|
||||
baselines.scenarios[scenarioName] = {
|
||||
heapUsedBytes: measured.heapUsedBytes,
|
||||
heapTotalBytes: measured.heapTotalBytes,
|
||||
rssBytes: measured.rssBytes,
|
||||
timestamp: new Date().toISOString(),
|
||||
};
|
||||
saveBaselines(path, baselines);
|
||||
}
|
||||
483
packages/test-utils/src/memory-test-harness.ts
Normal file
483
packages/test-utils/src/memory-test-harness.ts
Normal file
|
|
@ -0,0 +1,483 @@
|
|||
/**
|
||||
* @license
|
||||
* Copyright 2026 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import v8 from 'node:v8';
|
||||
import { setTimeout as sleep } from 'node:timers/promises';
|
||||
import { loadBaselines, updateBaseline } from './memory-baselines.js';
|
||||
import type { MemoryBaseline, MemoryBaselineFile } from './memory-baselines.js';
|
||||
|
||||
/** Configuration for asciichart plot function. */
|
||||
interface PlotConfig {
|
||||
height?: number;
|
||||
format?: (x: number) => string;
|
||||
}
|
||||
|
||||
/** Type for the asciichart plot function. */
|
||||
type PlotFn = (series: number[], config?: PlotConfig) => string;
|
||||
|
||||
/**
|
||||
* A single memory snapshot at a point in time.
|
||||
*/
|
||||
export interface MemorySnapshot {
|
||||
timestamp: number;
|
||||
label: string;
|
||||
heapUsed: number;
|
||||
heapTotal: number;
|
||||
rss: number;
|
||||
external: number;
|
||||
arrayBuffers: number;
|
||||
heapSizeLimit: number;
|
||||
heapSpaces: any[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Result from running a memory test scenario.
|
||||
*/
|
||||
export interface MemoryTestResult {
|
||||
scenarioName: string;
|
||||
snapshots: MemorySnapshot[];
|
||||
peakHeapUsed: number;
|
||||
peakRss: number;
|
||||
finalHeapUsed: number;
|
||||
finalRss: number;
|
||||
baseline: MemoryBaseline | undefined;
|
||||
withinTolerance: boolean;
|
||||
deltaPercent: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Options for the MemoryTestHarness.
|
||||
*/
|
||||
export interface MemoryTestHarnessOptions {
|
||||
/** Path to the baselines JSON file */
|
||||
baselinesPath: string;
|
||||
/** Default tolerance percentage (0-100). Default: 10 */
|
||||
defaultTolerancePercent?: number;
|
||||
/** Number of GC cycles to run before each snapshot. Default: 3 */
|
||||
gcCycles?: number;
|
||||
/** Delay in ms between GC cycles. Default: 100 */
|
||||
gcDelayMs?: number;
|
||||
/** Number of samples to take for median calculation. Default: 3 */
|
||||
sampleCount?: number;
|
||||
/** Pause in ms between samples. Default: 50 */
|
||||
samplePauseMs?: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* MemoryTestHarness provides infrastructure for running memory usage tests.
|
||||
*
|
||||
* It handles:
|
||||
* - Forcing V8 garbage collection to reduce noise
|
||||
* - Taking V8 heap snapshots for accurate memory measurement
|
||||
* - Comparing against baselines with configurable tolerance
|
||||
* - Generating ASCII chart reports of memory trends
|
||||
*/
|
||||
export class MemoryTestHarness {
|
||||
private baselines: MemoryBaselineFile;
|
||||
private readonly baselinesPath: string;
|
||||
private readonly defaultTolerancePercent: number;
|
||||
private readonly gcCycles: number;
|
||||
private readonly gcDelayMs: number;
|
||||
private readonly sampleCount: number;
|
||||
private readonly samplePauseMs: number;
|
||||
private allResults: MemoryTestResult[] = [];
|
||||
|
||||
constructor(options: MemoryTestHarnessOptions) {
|
||||
this.baselinesPath = options.baselinesPath;
|
||||
this.defaultTolerancePercent = options.defaultTolerancePercent ?? 10;
|
||||
this.gcCycles = options.gcCycles ?? 3;
|
||||
this.gcDelayMs = options.gcDelayMs ?? 100;
|
||||
this.sampleCount = options.sampleCount ?? 3;
|
||||
this.samplePauseMs = options.samplePauseMs ?? 50;
|
||||
this.baselines = loadBaselines(this.baselinesPath);
|
||||
}
|
||||
|
||||
/**
|
||||
* Force garbage collection multiple times and take a V8 heap snapshot.
|
||||
* Forces GC multiple times with delays to allow weak references and
|
||||
* FinalizationRegistry callbacks to run, reducing measurement noise.
|
||||
*/
|
||||
async takeSnapshot(label: string = 'snapshot'): Promise<MemorySnapshot> {
|
||||
await this.forceGC();
|
||||
|
||||
const memUsage = process.memoryUsage();
|
||||
const heapStats = v8.getHeapStatistics();
|
||||
|
||||
return {
|
||||
timestamp: Date.now(),
|
||||
label,
|
||||
heapUsed: memUsage.heapUsed,
|
||||
heapTotal: memUsage.heapTotal,
|
||||
rss: memUsage.rss,
|
||||
external: memUsage.external,
|
||||
arrayBuffers: memUsage.arrayBuffers,
|
||||
heapSizeLimit: heapStats.heap_size_limit,
|
||||
heapSpaces: v8.getHeapSpaceStatistics(),
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Take multiple snapshot samples and return the median to reduce noise.
|
||||
*/
|
||||
async takeMedianSnapshot(
|
||||
label: string = 'median',
|
||||
count?: number,
|
||||
): Promise<MemorySnapshot> {
|
||||
const samples: MemorySnapshot[] = [];
|
||||
const numSamples = count ?? this.sampleCount;
|
||||
|
||||
for (let i = 0; i < numSamples; i++) {
|
||||
samples.push(await this.takeSnapshot(`${label}_sample_${i}`));
|
||||
if (i < numSamples - 1) {
|
||||
await sleep(this.samplePauseMs);
|
||||
}
|
||||
}
|
||||
|
||||
// Sort by heapUsed and take the median
|
||||
samples.sort((a, b) => a.heapUsed - b.heapUsed);
|
||||
const medianIdx = Math.floor(samples.length / 2);
|
||||
const median = samples[medianIdx]!;
|
||||
|
||||
return {
|
||||
...median,
|
||||
label,
|
||||
timestamp: Date.now(),
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Run a memory test scenario.
|
||||
*
|
||||
* Takes before/after snapshots around the scenario function, collects
|
||||
* intermediate snapshots if the scenario provides them, and compares
|
||||
* the result against the stored baseline.
|
||||
*
|
||||
* @param name - Scenario name (must match baseline key)
|
||||
* @param fn - Async function that executes the scenario. Receives a
|
||||
* `recordSnapshot` callback for recording intermediate snapshots.
|
||||
* @param tolerancePercent - Override default tolerance for this scenario
|
||||
*/
|
||||
async runScenario(
|
||||
name: string,
|
||||
fn: (
|
||||
recordSnapshot: (label: string) => Promise<MemorySnapshot>,
|
||||
) => Promise<void>,
|
||||
tolerancePercent?: number,
|
||||
): Promise<MemoryTestResult> {
|
||||
const tolerance = tolerancePercent ?? this.defaultTolerancePercent;
|
||||
const snapshots: MemorySnapshot[] = [];
|
||||
|
||||
// Record a callback for intermediate snapshots
|
||||
const recordSnapshot = async (label: string): Promise<MemorySnapshot> => {
|
||||
const snap = await this.takeMedianSnapshot(label);
|
||||
snapshots.push(snap);
|
||||
return snap;
|
||||
};
|
||||
|
||||
// Before snapshot
|
||||
const beforeSnap = await this.takeMedianSnapshot('before');
|
||||
snapshots.push(beforeSnap);
|
||||
|
||||
// Run the scenario
|
||||
await fn(recordSnapshot);
|
||||
|
||||
// After snapshot (median of multiple samples)
|
||||
const afterSnap = await this.takeMedianSnapshot('after');
|
||||
snapshots.push(afterSnap);
|
||||
|
||||
// Calculate peak values
|
||||
const peakHeapUsed = Math.max(...snapshots.map((s) => s.heapUsed));
|
||||
const peakRss = Math.max(...snapshots.map((s) => s.rss));
|
||||
|
||||
// Get baseline
|
||||
const baseline = this.baselines.scenarios[name];
|
||||
|
||||
// Determine if within tolerance
|
||||
let deltaPercent = 0;
|
||||
let withinTolerance = true;
|
||||
|
||||
if (baseline) {
|
||||
deltaPercent =
|
||||
((afterSnap.heapUsed - baseline.heapUsedBytes) /
|
||||
baseline.heapUsedBytes) *
|
||||
100;
|
||||
withinTolerance = deltaPercent <= tolerance;
|
||||
}
|
||||
|
||||
const result: MemoryTestResult = {
|
||||
scenarioName: name,
|
||||
snapshots,
|
||||
peakHeapUsed,
|
||||
peakRss,
|
||||
finalHeapUsed: afterSnap.heapUsed,
|
||||
finalRss: afterSnap.rss,
|
||||
baseline,
|
||||
withinTolerance,
|
||||
deltaPercent,
|
||||
};
|
||||
|
||||
this.allResults.push(result);
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Assert that a scenario result is within the baseline tolerance.
|
||||
* Throws an assertion error with details if it exceeds the threshold.
|
||||
*/
|
||||
assertWithinBaseline(
|
||||
result: MemoryTestResult,
|
||||
tolerancePercent?: number,
|
||||
): void {
|
||||
const tolerance = tolerancePercent ?? this.defaultTolerancePercent;
|
||||
|
||||
if (!result.baseline) {
|
||||
console.warn(
|
||||
`⚠ No baseline found for "${result.scenarioName}". ` +
|
||||
`Run with UPDATE_MEMORY_BASELINES=true to create one. ` +
|
||||
`Measured: ${formatMB(result.finalHeapUsed)} heap used.`,
|
||||
);
|
||||
return; // Don't fail if no baseline exists yet
|
||||
}
|
||||
|
||||
const deltaPercent =
|
||||
((result.finalHeapUsed - result.baseline.heapUsedBytes) /
|
||||
result.baseline.heapUsedBytes) *
|
||||
100;
|
||||
|
||||
if (deltaPercent > tolerance) {
|
||||
throw new Error(
|
||||
`Memory regression detected for "${result.scenarioName}"!\n` +
|
||||
` Measured: ${formatMB(result.finalHeapUsed)} heap used\n` +
|
||||
` Baseline: ${formatMB(result.baseline.heapUsedBytes)} heap used\n` +
|
||||
` Delta: ${deltaPercent.toFixed(1)}% (tolerance: ${tolerance}%)\n` +
|
||||
` Peak heap: ${formatMB(result.peakHeapUsed)}\n` +
|
||||
` Peak RSS: ${formatMB(result.peakRss)}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Update the baseline for a scenario with the current measured values.
|
||||
*/
|
||||
updateScenarioBaseline(result: MemoryTestResult): void {
|
||||
updateBaseline(this.baselinesPath, result.scenarioName, {
|
||||
heapUsedBytes: result.finalHeapUsed,
|
||||
heapTotalBytes:
|
||||
result.snapshots[result.snapshots.length - 1]?.heapTotal ?? 0,
|
||||
rssBytes: result.finalRss,
|
||||
});
|
||||
// Reload baselines after update
|
||||
this.baselines = loadBaselines(this.baselinesPath);
|
||||
}
|
||||
|
||||
/**
|
||||
* Analyze snapshots to detect sustained leaks across 3 snapshots.
|
||||
* A leak is flagged if growth is observed in both phases for any heap space.
|
||||
*/
|
||||
analyzeSnapshots(
|
||||
snapshots: MemorySnapshot[],
|
||||
thresholdBytes: number = 1024 * 1024, // 1 MB
|
||||
): { leaked: boolean; message: string } {
|
||||
if (snapshots.length < 3) {
|
||||
return { leaked: false, message: 'Not enough snapshots to analyze' };
|
||||
}
|
||||
|
||||
const snap1 = snapshots[snapshots.length - 3];
|
||||
const snap2 = snapshots[snapshots.length - 2];
|
||||
const snap3 = snapshots[snapshots.length - 1];
|
||||
|
||||
if (!snap1 || !snap2 || !snap3) {
|
||||
return { leaked: false, message: 'Missing snapshots' };
|
||||
}
|
||||
|
||||
const spaceNames = new Set<string>();
|
||||
snap1.heapSpaces.forEach((s: any) => spaceNames.add(s.space_name));
|
||||
snap2.heapSpaces.forEach((s: any) => spaceNames.add(s.space_name));
|
||||
snap3.heapSpaces.forEach((s: any) => spaceNames.add(s.space_name));
|
||||
|
||||
let hasSustainedGrowth = false;
|
||||
const growthDetails: string[] = [];
|
||||
|
||||
for (const name of spaceNames) {
|
||||
const size1 =
|
||||
snap1.heapSpaces.find((s: any) => s.space_name === name)
|
||||
?.space_used_size ?? 0;
|
||||
const size2 =
|
||||
snap2.heapSpaces.find((s: any) => s.space_name === name)
|
||||
?.space_used_size ?? 0;
|
||||
const size3 =
|
||||
snap3.heapSpaces.find((s: any) => s.space_name === name)
|
||||
?.space_used_size ?? 0;
|
||||
|
||||
const growth1 = size2 - size1;
|
||||
const growth2 = size3 - size2;
|
||||
|
||||
if (growth1 > thresholdBytes && growth2 > thresholdBytes) {
|
||||
hasSustainedGrowth = true;
|
||||
growthDetails.push(
|
||||
`${name}: sustained growth (${formatMB(growth1)} -> ${formatMB(growth2)})`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
let message = '';
|
||||
if (hasSustainedGrowth) {
|
||||
message =
|
||||
`Memory bloat detected in heap spaces:\n ` +
|
||||
growthDetails.join('\n ');
|
||||
} else {
|
||||
message = `No sustained growth detected in any heap space above threshold.`;
|
||||
}
|
||||
|
||||
return { leaked: hasSustainedGrowth, message };
|
||||
}
|
||||
|
||||
/**
|
||||
* Assert that memory returns to a baseline level after a peak.
|
||||
* Useful for verifying that large tool outputs are not retained.
|
||||
*/
|
||||
assertMemoryReturnsToBaseline(
|
||||
snapshots: MemorySnapshot[],
|
||||
tolerancePercent: number = 10,
|
||||
): void {
|
||||
if (snapshots.length < 3) {
|
||||
throw new Error('Need at least 3 snapshots to check return to baseline');
|
||||
}
|
||||
|
||||
const baseline = snapshots[0]; // Assume first is baseline
|
||||
const peak = snapshots.reduce(
|
||||
(max, s) => (s.heapUsed > max.heapUsed ? s : max),
|
||||
snapshots[0],
|
||||
);
|
||||
const final = snapshots[snapshots.length - 1];
|
||||
|
||||
if (!baseline || !peak || !final) {
|
||||
throw new Error('Missing snapshots for return to baseline check');
|
||||
}
|
||||
|
||||
const tolerance = baseline.heapUsed * (tolerancePercent / 100);
|
||||
const delta = final.heapUsed - baseline.heapUsed;
|
||||
|
||||
if (delta > tolerance) {
|
||||
throw new Error(
|
||||
`Memory did not return to baseline!\n` +
|
||||
` Baseline: ${formatMB(baseline.heapUsed)}\n` +
|
||||
` Peak: ${formatMB(peak.heapUsed)}\n` +
|
||||
` Final: ${formatMB(final.heapUsed)}\n` +
|
||||
` Delta: ${formatMB(delta)} (tolerance: ${formatMB(tolerance)})`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate a report with ASCII charts and summary table.
|
||||
* Uses the `asciichart` library for terminal visualization.
|
||||
*/
|
||||
async generateReport(results?: MemoryTestResult[]): Promise<string> {
|
||||
const resultsToReport = results ?? this.allResults;
|
||||
const lines: string[] = [];
|
||||
|
||||
lines.push('');
|
||||
lines.push('═══════════════════════════════════════════════════');
|
||||
lines.push(' MEMORY USAGE TEST REPORT');
|
||||
lines.push('═══════════════════════════════════════════════════');
|
||||
lines.push('');
|
||||
|
||||
for (const result of resultsToReport) {
|
||||
const measured = formatMB(result.finalHeapUsed);
|
||||
const baseline = result.baseline
|
||||
? formatMB(result.baseline.heapUsedBytes)
|
||||
: 'N/A';
|
||||
const delta = result.baseline
|
||||
? `${result.deltaPercent >= 0 ? '+' : ''}${result.deltaPercent.toFixed(1)}%`
|
||||
: 'N/A';
|
||||
const status = !result.baseline
|
||||
? 'NEW'
|
||||
: result.withinTolerance
|
||||
? '✅'
|
||||
: '❌';
|
||||
|
||||
lines.push(
|
||||
`${result.scenarioName}: ${measured} (Baseline: ${baseline}, Delta: ${delta}) ${status}`,
|
||||
);
|
||||
}
|
||||
lines.push('');
|
||||
|
||||
// Generate ASCII chart for each scenario with multiple snapshots
|
||||
try {
|
||||
// @ts-expect-error - asciichart may not have types
|
||||
const asciichart = (await import('asciichart')) as {
|
||||
default?: { plot?: PlotFn };
|
||||
plot?: PlotFn;
|
||||
};
|
||||
const plot: PlotFn | undefined =
|
||||
asciichart.default?.plot ?? asciichart.plot;
|
||||
|
||||
for (const result of resultsToReport) {
|
||||
if (result.snapshots.length > 2) {
|
||||
lines.push(`📈 Memory trend: ${result.scenarioName}`);
|
||||
lines.push('─'.repeat(60));
|
||||
|
||||
const heapDataMB = result.snapshots.map(
|
||||
(s) => s.heapUsed / (1024 * 1024),
|
||||
);
|
||||
|
||||
if (plot) {
|
||||
const chart = plot(heapDataMB, {
|
||||
height: 10,
|
||||
format: (x: number) => `${x.toFixed(1)} MB`.padStart(10),
|
||||
});
|
||||
lines.push(chart);
|
||||
}
|
||||
|
||||
// Label the x-axis with snapshot labels
|
||||
const labels = result.snapshots.map((s) => s.label);
|
||||
lines.push(' ' + labels.join(' → '));
|
||||
lines.push('');
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
lines.push(
|
||||
'(asciichart not available — install with: npm install --save-dev asciichart)',
|
||||
);
|
||||
lines.push('');
|
||||
}
|
||||
|
||||
lines.push('═══════════════════════════════════════════════════');
|
||||
lines.push('');
|
||||
|
||||
const report = lines.join('\n');
|
||||
console.log(report);
|
||||
return report;
|
||||
}
|
||||
|
||||
/**
|
||||
* Force V8 garbage collection.
|
||||
* Runs multiple GC cycles with delays to allow weak references
|
||||
* and FinalizationRegistry callbacks to run.
|
||||
*/
|
||||
private async forceGC(): Promise<void> {
|
||||
if (typeof globalThis.gc !== 'function') {
|
||||
throw new Error(
|
||||
'global.gc() not available. Run with --expose-gc for accurate measurements.',
|
||||
);
|
||||
}
|
||||
|
||||
for (let i = 0; i < this.gcCycles; i++) {
|
||||
globalThis.gc();
|
||||
if (i < this.gcCycles - 1) {
|
||||
await sleep(this.gcDelayMs);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Format bytes as a human-readable MB string.
|
||||
*/
|
||||
function formatMB(bytes: number): string {
|
||||
return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
|
||||
}
|
||||
Loading…
Reference in a new issue