feat(core): wire up the new ContextManager and AgentChatHistory

This commit is contained in:
Your Name 2026-04-14 01:47:38 +00:00
parent 8573650253
commit 6d7aa497aa
25 changed files with 467 additions and 310 deletions

View file

@ -1729,6 +1729,12 @@ their corresponding top-level category object in your `settings.json` file.
- **Default:** `false`
- **Requires restart:** Yes
- **`experimental.stressTestProfile`** (boolean):
- **Description:** Significantly lowers token limits to force early garbage
collection and distillation for testing purposes.
- **Default:** `false`
- **Requires restart:** Yes
- **`experimental.autoMemory`** (boolean):
- **Description:** Automatically extract reusable skills from past sessions in
the background. Review results with /memory inbox.

View file

@ -21,8 +21,6 @@ import {
type MCPServerConfig,
type GeminiCLIExtension,
Storage,
generalistProfile,
type ContextManagementConfig,
} from '@google/gemini-cli-core';
import { loadCliConfig, parseArguments, type CliArgs } from './config.js';
import {
@ -2210,51 +2208,6 @@ describe('loadCliConfig context management', () => {
},
});
const config = await loadCliConfig(settings, 'test-session', argv);
expect(config.getContextManagementConfig()).toStrictEqual(
generalistProfile,
);
expect(config.isContextManagementEnabled()).toBe(true);
});
it('should be true when contextManagement is set to true in settings', async () => {
process.argv = ['node', 'script.js'];
const argv = await parseArguments(createTestMergedSettings());
const contextManagementConfig: Partial<ContextManagementConfig> = {
historyWindow: {
maxTokens: 100_000,
retainedTokens: 50_000,
},
messageLimits: {
normalMaxTokens: 1000,
retainedMaxTokens: 10_000,
normalizationHeadRatio: 0.25,
},
tools: {
distillation: {
maxOutputTokens: 10_000,
summarizationThresholdTokens: 15_000,
},
outputMasking: {
protectionThresholdTokens: 30_000,
minPrunableThresholdTokens: 10_000,
protectLatestTurn: false,
},
},
};
const settings = createTestMergedSettings({
experimental: {
contextManagement: true,
},
// The type of numbers is being inferred strangely, and so we have to cast
// to `any` here.
// eslint-disable-next-line @typescript-eslint/no-explicit-any
contextManagement: contextManagementConfig as any,
});
const config = await loadCliConfig(settings, 'test-session', argv);
expect(config.getContextManagementConfig()).toStrictEqual({
enabled: true,
...contextManagementConfig,
});
expect(config.isContextManagementEnabled()).toBe(true);
});
});

View file

@ -46,7 +46,6 @@ import {
type HookEventName,
type OutputFormat,
detectIdeFromEnv,
generalistProfile,
} from '@google/gemini-cli-core';
import {
type Settings,
@ -884,14 +883,19 @@ export async function loadCliConfig(
}
}
const useGeneralistProfile =
settings.experimental?.generalistProfile ?? false;
const useContextManagement =
settings.experimental?.contextManagement ?? false;
// TODO(joshualitt): Clean this up alongside removal of the legacy config.
let profileSelector: string | undefined = undefined;
if (settings.experimental?.stressTestProfile) {
profileSelector = 'stressTestProfile';
} else if (
settings.experimental?.generalistProfile ||
settings.experimental?.contextManagement
) {
profileSelector = 'generalistProfile';
}
const contextManagement = {
...(useGeneralistProfile ? generalistProfile : {}),
...(useContextManagement ? settings?.contextManagement : {}),
enabled: useContextManagement || useGeneralistProfile,
enabled: !!profileSelector,
};
return new Config({
@ -915,6 +919,7 @@ export async function loadCliConfig(
worktreeSettings,
coreTools: settings.tools?.core || undefined,
experimentalContextManagementConfig: profileSelector,
allowedTools: allowedTools.length > 0 ? allowedTools : undefined,
policyEngineConfig,
policyUpdateConfirmationRequest,

View file

@ -2213,6 +2213,17 @@ const SETTINGS_SCHEMA = {
'Replace the built-in save_memory tool with a memory manager subagent that supports adding, removing, de-duplicating, and organizing memories.',
showInDialog: true,
},
stressTestProfile: {
type: 'boolean',
label:
'Use the stress test profile to aggressively trigger context management.',
category: 'Experimental',
requiresRestart: true,
default: false,
description:
'Significantly lowers token limits to force early garbage collection and distillation for testing purposes.',
showInDialog: false,
},
autoMemory: {
type: 'boolean',
label: 'Auto Memory',

View file

@ -6,19 +6,17 @@
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
import { loadContextManagementConfig } from './configLoader.js';
import { defaultContextProfile } from './profiles.js';
import { generalistProfile } from './profiles.js';
import { ContextProcessorRegistry } from './registry.js';
import * as fs from 'node:fs/promises';
import * as path from 'node:path';
import * as os from 'node:os';
import type { Config } from '../../config/config.js';
import type { JSONSchemaType } from 'ajv';
describe('SidecarLoader (Real FS)', () => {
let tmpDir: string;
let registry: ContextProcessorRegistry;
let sidecarPath: string;
let mockConfig: Config;
beforeEach(async () => {
tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), 'gemini-sidecar-test-'));
@ -32,10 +30,6 @@ describe('SidecarLoader (Real FS)', () => {
required: ['maxTokens'],
} as unknown as JSONSchemaType<{ maxTokens: number }>,
});
mockConfig = {
getExperimentalContextManagementConfig: () => sidecarPath,
} as unknown as Config;
});
afterEach(async () => {
@ -43,14 +37,14 @@ describe('SidecarLoader (Real FS)', () => {
});
it('returns default profile if file does not exist', async () => {
const result = await loadContextManagementConfig(mockConfig, registry);
expect(result).toBe(defaultContextProfile);
const result = await loadContextManagementConfig(sidecarPath, registry);
expect(result).toBe(generalistProfile);
});
it('returns default profile if file exists but is 0 bytes', async () => {
await fs.writeFile(sidecarPath, '');
const result = await loadContextManagementConfig(mockConfig, registry);
expect(result).toBe(defaultContextProfile);
const result = await loadContextManagementConfig(sidecarPath, registry);
expect(result).toBe(generalistProfile);
});
it('returns parsed config if file is valid', async () => {
@ -64,7 +58,7 @@ describe('SidecarLoader (Real FS)', () => {
},
};
await fs.writeFile(sidecarPath, JSON.stringify(validConfig));
const result = await loadContextManagementConfig(mockConfig, registry);
const result = await loadContextManagementConfig(sidecarPath, registry);
expect(result.config.budget?.maxTokens).toBe(2000);
expect(result.config.processorOptions?.['myTruncation']).toBeDefined();
});
@ -81,14 +75,14 @@ describe('SidecarLoader (Real FS)', () => {
};
await fs.writeFile(sidecarPath, JSON.stringify(invalidConfig));
await expect(
loadContextManagementConfig(mockConfig, registry),
loadContextManagementConfig(sidecarPath, registry),
).rejects.toThrow('Validation error');
});
it('throws validation error if file is empty whitespace', async () => {
await fs.writeFile(sidecarPath, ' \n ');
await expect(
loadContextManagementConfig(mockConfig, registry),
loadContextManagementConfig(sidecarPath, registry),
).rejects.toThrow('Unexpected end of JSON input');
});
});

View file

@ -4,11 +4,14 @@
* SPDX-License-Identifier: Apache-2.0
*/
import type { Config } from '../../config/config.js';
import * as fsSync from 'node:fs';
import * as fs from 'node:fs/promises';
import type { ContextManagementConfig } from './types.js';
import { defaultContextProfile, type ContextProfile } from './profiles.js';
import {
generalistProfile,
stressTestProfile,
type ContextProfile,
} from './profiles.js';
import { SchemaValidator } from '../../utils/schemaValidator.js';
import { getContextManagementConfigSchema } from './schema.js';
import type { ContextProcessorRegistry } from './registry.js';
@ -54,9 +57,9 @@ async function loadConfigFromFile(
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
const validConfig = parsed as ContextManagementConfig;
return {
...defaultContextProfile,
...generalistProfile,
config: {
...defaultContextProfile.config,
...generalistProfile.config,
...(validConfig.budget ? { budget: validConfig.budget } : {}),
...(validConfig.processorOptions
? { processorOptions: validConfig.processorOptions }
@ -70,21 +73,27 @@ async function loadConfigFromFile(
* If a config file is present but invalid, this will THROW to prevent silent misconfiguration.
*/
export async function loadContextManagementConfig(
config: Config,
sidecarPath: string | undefined,
registry: ContextProcessorRegistry,
): Promise<ContextProfile> {
const sidecarPath = config.getExperimentalContextManagementConfig();
if (sidecarPath === 'stressTestProfile') {
return stressTestProfile;
}
if (sidecarPath === 'generalistProfile') {
return generalistProfile;
}
if (sidecarPath && fsSync.existsSync(sidecarPath)) {
const size = fsSync.statSync(sidecarPath).size;
// If the file exists but is completely empty (0 bytes), it's safe to fallback.
if (size === 0) {
return defaultContextProfile;
return generalistProfile;
}
// If the file has content, enforce strict validation and throw on failure.
return loadConfigFromFile(sidecarPath, registry);
}
return defaultContextProfile;
return generalistProfile;
}

View file

@ -62,7 +62,7 @@ export interface ContextProfile {
* The standard default context management profile.
* Optimized for safety, precision, and reliable summarization.
*/
export const defaultContextProfile: ContextProfile = {
export const generalistProfile: ContextProfile = {
config: {
budget: {
retainedTokens: 65000,
@ -143,3 +143,41 @@ export const defaultContextProfile: ContextProfile = {
},
],
};
/**
* A highly aggressive profile designed exclusively for testing Context Management.
* Lowers token limits dramatically to force garbage collection and distillation loops
* within a few conversational turns.
*/
export const stressTestProfile: ContextProfile = {
config: {
budget: {
retainedTokens: 4000,
maxTokens: 10000,
},
processorOptions: {
ToolMasking: {
type: 'ToolMaskingProcessor',
options: {
stringLengthThresholdTokens: 500,
},
},
NodeTruncation: {
type: 'NodeTruncationProcessor',
options: {
maxTokensPerNode: 1000,
},
},
NodeDistillation: {
type: 'NodeDistillationProcessor',
options: {
nodeThresholdTokens: 1500,
},
},
},
},
// Re-use the generalist pipeline architecture exactly, but the `config` above
// will be passed into `resolveProcessorOptions` to aggressively override the thresholds.
buildPipelines: generalistProfile.buildPipelines,
buildAsyncPipelines: generalistProfile.buildAsyncPipelines,
};

View file

@ -47,9 +47,8 @@ export class ContextManager {
this.historyObserver.start();
this.eventBus.onPristineHistoryUpdated((event) => {
const existingIds = new Set(this.buffer.nodes.map((n) => n.id));
const newIds = new Set(event.nodes.map((n) => n.id));
const addedNodes = event.nodes.filter((n) => !existingIds.has(n.id));
const addedNodes = event.nodes.filter((n) => event.newNodes.has(n.id));
// Prune any pristine nodes that were dropped from the upstream history
this.buffer = this.buffer.prunePristineNodes(newIds);
@ -60,6 +59,13 @@ export class ContextManager {
this.evaluateTriggers(event.newNodes);
});
this.eventBus.onProcessorResult((event) => {
this.buffer = this.buffer.applyProcessorResult(
event.processorId,
event.targets,
event.returnedNodes,
);
});
}
/**

View file

@ -7,6 +7,12 @@
import { EventEmitter } from 'node:events';
import type { ConcreteNode } from './graph/types.js';
export interface ProcessorResultEvent {
processorId: string;
targets: readonly ConcreteNode[];
returnedNodes: readonly ConcreteNode[];
}
export interface PristineHistoryUpdatedEvent {
nodes: readonly ConcreteNode[];
newNodes: Set<string>;
@ -49,4 +55,12 @@ export class ContextEventBus extends EventEmitter {
onConsolidationNeeded(listener: (event: ContextConsolidationEvent) => void) {
this.on('BUDGET_RETAINED_CROSSED', listener);
}
emitProcessorResult(event: ProcessorResultEvent) {
this.emit('PROCESSOR_RESULT', event);
}
onProcessorResult(listener: (event: ProcessorResultEvent) => void) {
this.on('PROCESSOR_RESULT', listener);
}
}

View file

@ -3,9 +3,10 @@
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import type { ConcreteNode } from './types.js';
import { ContextGraphBuilder } from './toGraph.js';
import type { Content } from '@google/genai';
import type { Episode, ConcreteNode } from './types.js';
import { toGraph } from './toGraph.js';
import type { HistoryEvent } from '../../core/agentChatHistory.js';
import { fromGraph } from './fromGraph.js';
import type { ContextTokenCalculator } from '../utils/contextTokenCalculator.js';
import type { NodeBehaviorRegistry } from './behaviorRegistry.js';
@ -15,11 +16,30 @@ export class ContextGraphMapper {
constructor(private readonly registry: NodeBehaviorRegistry) {}
toGraph(
history: readonly Content[],
private builder?: ContextGraphBuilder;
applyEvent(
event: HistoryEvent,
tokenCalculator: ContextTokenCalculator,
): Episode[] {
return toGraph(history, tokenCalculator, this.nodeIdentityMap);
): ConcreteNode[] {
if (!this.builder) {
this.builder = new ContextGraphBuilder(
tokenCalculator,
this.nodeIdentityMap,
);
}
if (event.type === 'CLEAR') {
this.builder.clear();
return [];
}
if (event.type === 'SYNC_FULL') {
this.builder.clear();
}
this.builder.processHistory(event.payload);
return this.builder.getNodes();
}
fromGraph(nodes: readonly ConcreteNode[]): Content[] {

View file

@ -6,6 +6,7 @@
import type { Content, Part } from '@google/genai';
import type {
ConcreteNode,
Episode,
SemanticPart,
ToolExecution,
@ -38,61 +39,87 @@ function isCompleteEpisode(ep: Partial<Episode>): ep is Episode {
);
}
export function toGraph(
history: readonly Content[],
tokenCalculator: ContextTokenCalculator,
nodeIdentityMap: WeakMap<object, string>,
): Episode[] {
const episodes: Episode[] = [];
let currentEpisode: Partial<Episode> | null = null;
const pendingCallParts: Map<string, Part> = new Map();
export class ContextGraphBuilder {
private episodes: Episode[] = [];
private currentEpisode: Partial<Episode> | null = null;
private pendingCallParts: Map<string, Part> = new Map();
const finalizeEpisode = () => {
if (currentEpisode && isCompleteEpisode(currentEpisode)) {
episodes.push(currentEpisode);
}
currentEpisode = null;
};
constructor(
private readonly tokenCalculator: ContextTokenCalculator,
private readonly nodeIdentityMap: WeakMap<object, string>,
) {}
for (const msg of history) {
if (!msg.parts) continue;
clear() {
this.episodes = [];
this.currentEpisode = null;
this.pendingCallParts.clear();
}
if (msg.role === 'user') {
const hasToolResponses = msg.parts.some((p) => !!p.functionResponse);
const hasUserParts = msg.parts.some(
(p) => !!p.text || !!p.inlineData || !!p.fileData,
);
processHistory(history: readonly Content[]) {
const finalizeEpisode = () => {
if (this.currentEpisode && isCompleteEpisode(this.currentEpisode)) {
this.episodes.push(this.currentEpisode);
}
this.currentEpisode = null;
};
if (hasToolResponses) {
currentEpisode = parseToolResponses(
for (const msg of history) {
if (!msg.parts) continue;
if (msg.role === 'user') {
const hasToolResponses = msg.parts.some((p) => !!p.functionResponse);
const hasUserParts = msg.parts.some(
(p) => !!p.text || !!p.inlineData || !!p.fileData,
);
if (hasToolResponses) {
this.currentEpisode = parseToolResponses(
msg,
this.currentEpisode,
this.pendingCallParts,
this.tokenCalculator,
this.nodeIdentityMap,
);
}
if (hasUserParts) {
finalizeEpisode();
this.currentEpisode = parseUserParts(msg, this.nodeIdentityMap);
}
} else if (msg.role === 'model') {
this.currentEpisode = parseModelParts(
msg,
currentEpisode,
pendingCallParts,
tokenCalculator,
nodeIdentityMap,
this.currentEpisode,
this.pendingCallParts,
this.nodeIdentityMap,
);
}
if (hasUserParts) {
finalizeEpisode();
currentEpisode = parseUserParts(msg, nodeIdentityMap);
}
} else if (msg.role === 'model') {
currentEpisode = parseModelParts(
msg,
currentEpisode,
pendingCallParts,
nodeIdentityMap,
);
}
}
if (currentEpisode) {
finalizeYield(currentEpisode);
finalizeEpisode();
}
getNodes(): ConcreteNode[] {
const copy = [...this.episodes];
if (this.currentEpisode) {
const activeEp = {
...this.currentEpisode,
concreteNodes: [...(this.currentEpisode.concreteNodes || [])],
};
finalizeYield(activeEp);
if (isCompleteEpisode(activeEp)) {
copy.push(activeEp);
}
}
return episodes;
const nodes: ConcreteNode[] = [];
for (const ep of copy) {
if (ep.concreteNodes) {
for (const child of ep.concreteNodes) {
nodes.push(child);
}
}
}
return nodes;
}
}
function parseToolResponses(

View file

@ -39,24 +39,15 @@ export class HistoryObserver {
}
this.unsubscribeHistory = this.chatHistory.subscribe(
(_event: HistoryEvent) => {
// Rebuild the pristine Context Graph graph from the full source history on every change.
// Wait, toGraph still returns an Episode[].
// We actually need to map the Episode[] to a flat ConcreteNode[] here to form the 'nodes'.
const pristineEpisodes = this.graphMapper.toGraph(
this.chatHistory.get(),
this.tokenCalculator,
);
(event: HistoryEvent) => {
let nodes: ConcreteNode[] = [];
const nodes: ConcreteNode[] = [];
for (const ep of pristineEpisodes) {
if (ep.concreteNodes) {
for (const child of ep.concreteNodes) {
nodes.push(child);
}
}
if (event.type === 'CLEAR') {
this.seenNodeIds.clear();
}
nodes = this.graphMapper.applyEvent(event, this.tokenCalculator);
const newNodes = new Set<string>();
for (const node of nodes) {
if (!this.seenNodeIds.has(node.id)) {
@ -67,7 +58,7 @@ export class HistoryObserver {
this.tracer.logEvent(
'HistoryObserver',
'Rebuilt pristine graph from chat history update',
`Rebuilt pristine graph from ${event.type} event`,
{ nodesSize: nodes.length, newNodesCount: newNodes.size },
);

View file

@ -0,0 +1,117 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import type { Config } from '../config/config.js';
import type { GeminiChat } from '../core/geminiChat.js';
import { ContextProcessorRegistry } from './config/registry.js';
import { loadContextManagementConfig } from './config/configLoader.js';
import { ContextTracer } from './tracer.js';
import { ContextEventBus } from './eventBus.js';
import { ContextEnvironmentImpl } from './pipeline/environmentImpl.js';
import { PipelineOrchestrator } from './pipeline/orchestrator.js';
import { ContextManager } from './contextManager.js';
import { debugLogger } from '../utils/debugLogger.js';
import { NodeTruncationProcessorOptionsSchema } from './processors/nodeTruncationProcessor.js';
import { ToolMaskingProcessorOptionsSchema } from './processors/toolMaskingProcessor.js';
import { HistoryTruncationProcessorOptionsSchema } from './processors/historyTruncationProcessor.js';
import { BlobDegradationProcessorOptionsSchema } from './processors/blobDegradationProcessor.js';
import { NodeDistillationProcessorOptionsSchema } from './processors/nodeDistillationProcessor.js';
import { StateSnapshotProcessorOptionsSchema } from './processors/stateSnapshotProcessor.js';
import { StateSnapshotAsyncProcessorOptionsSchema } from './processors/stateSnapshotAsyncProcessor.js';
import { RollingSummaryProcessorOptionsSchema } from './processors/rollingSummaryProcessor.js';
export async function initializeContextManager(
config: Config,
chat: GeminiChat,
lastPromptId: string,
): Promise<ContextManager | undefined> {
const isV1Enabled = config.getContextManagementConfig().enabled;
debugLogger.log(
`[initializer] called with enabled=${isV1Enabled}, GEMINI_CONTEXT_TRACE_DIR=${process.env['GEMINI_CONTEXT_TRACE_DIR']}`,
);
if (!isV1Enabled) {
return undefined;
}
const registry = new ContextProcessorRegistry();
registry.registerProcessor({
id: 'NodeTruncationProcessor',
schema: NodeTruncationProcessorOptionsSchema,
});
registry.registerProcessor({
id: 'ToolMaskingProcessor',
schema: ToolMaskingProcessorOptionsSchema,
});
registry.registerProcessor({
id: 'HistoryTruncationProcessor',
schema: HistoryTruncationProcessorOptionsSchema,
});
registry.registerProcessor({
id: 'BlobDegradationProcessor',
schema: BlobDegradationProcessorOptionsSchema,
});
registry.registerProcessor({
id: 'NodeDistillationProcessor',
schema: NodeDistillationProcessorOptionsSchema,
});
registry.registerProcessor({
id: 'StateSnapshotProcessor',
schema: StateSnapshotProcessorOptionsSchema,
});
registry.registerProcessor({
id: 'StateSnapshotAsyncProcessor',
schema: StateSnapshotAsyncProcessorOptionsSchema,
});
registry.registerProcessor({
id: 'RollingSummaryProcessor',
schema: RollingSummaryProcessorOptionsSchema,
});
const sidecarProfile = await loadContextManagementConfig(
config.getExperimentalContextManagementConfig(),
registry,
);
const storage = config.storage;
const logDir = storage.getProjectTempLogsDir();
const projectTempDir = storage.getProjectTempDir();
const tracer = new ContextTracer({
enabled: !!process.env['GEMINI_CONTEXT_TRACE_DIR'],
targetDir: projectTempDir,
sessionId: lastPromptId,
});
const eventBus = new ContextEventBus();
const env = new ContextEnvironmentImpl(
config.getBaseLlmClient(),
config.getSessionId(),
lastPromptId,
logDir,
projectTempDir,
tracer,
4,
eventBus,
);
const orchestrator = new PipelineOrchestrator(
sidecarProfile.buildPipelines(env),
sidecarProfile.buildAsyncPipelines(env),
env,
eventBus,
tracer,
);
return new ContextManager(
sidecarProfile,
env,
tracer,
orchestrator,
chat.agentHistory,
);
}

View file

@ -95,7 +95,7 @@ describe('ContextWorkingBufferImpl', () => {
buffer = buffer.applyProcessorResult('Summarizer', [p1, p2], [summaryNode]);
// p1 and p2 are removed, p3 remains, s1 is added
expect(buffer.nodes.map((n) => n.id)).toEqual(['p3', 's1']);
expect(buffer.nodes.map((n) => n.id)).toEqual(['s1', 'p3']);
// Provenance lookup: The summary node should resolve to both p1 and p2!
const roots = buffer.getPristineNodes('s1');

View file

@ -107,13 +107,19 @@ export class ContextWorkingBufferImpl implements ContextWorkingBuffer {
// Calculate new node array
const removedSet = new Set(removedIds);
const retainedNodes = this.nodes.filter((n) => !removedSet.has(n.id));
const newGraph = [...retainedNodes];
// We append the output nodes in the same general position if possible,
// but in a complex graph we just ensure they exist. V2 graph uses timestamps for order.
// For simplicity, we just push added nodes to the end of the retained array
newGraph.push(...addedNodes);
const newGraph = this.nodes.filter((n) => !removedSet.has(n.id));
const insertionIndex = this.nodes.findIndex((n) => removedSet.has(n.id));
// IMPORTANT: We do NOT use structuredClone here.
// The ContextTokenCalculator relies on a WeakMap tied to exact object references
// for O(1) performance. Deep cloning would cause catastrophic cache misses.
// The pipeline enforces immutability, making reference passing safe.
if (insertionIndex !== -1) {
newGraph.splice(insertionIndex, 0, ...addedNodes);
} else {
newGraph.push(...addedNodes);
}
// Calculate new provenance map
const newProvenanceMap = new Map(this.provenanceMap);

View file

@ -204,6 +204,11 @@ export class PipelineOrchestrator {
allowedTargets,
returnedNodes,
);
this.eventBus.emitProcessorResult({
processorId: processor.id,
targets: allowedTargets,
returnedNodes,
});
} catch (error) {
debugLogger.error(
`Pipeline ${pipeline.name} failed async at ${processor.id}:`,

File diff suppressed because one or more lines are too long

View file

@ -22,6 +22,7 @@ describe('ContextTracer (Real FS & Mock ID Gen)', () => {
let tmpDir: string;
beforeEach(async () => {
vi.stubEnv('GEMINI_CONTEXT_TRACE_DIR', '');
tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), 'gemini-tracer-test-'));
vi.useFakeTimers();
@ -29,6 +30,7 @@ describe('ContextTracer (Real FS & Mock ID Gen)', () => {
});
afterEach(async () => {
vi.unstubAllEnvs();
vi.useRealTimers();
await fs.rm(tmpDir, { recursive: true, force: true });
});
@ -45,7 +47,9 @@ describe('ContextTracer (Real FS & Mock ID Gen)', () => {
// Verify Initialization
const traceLogPath = path.join(
tmpDir,
'.gemini/context_trace/test-session/trace.log',
'context_trace',
'test-session',
'trace.log',
);
const initTraceLog = readFileSync(traceLogPath, 'utf-8');
expect(initTraceLog).toContain('[SYSTEM] Context Tracer Initialized');
@ -65,7 +69,10 @@ describe('ContextTracer (Real FS & Mock ID Gen)', () => {
const expectedAssetPath = path.join(
tmpDir,
'.gemini/context_trace/test-session/assets/1767268800020-mock-uuid-1-largeKey.json',
'context_trace',
'test-session',
'assets',
'1767268800020-mock-uuid-1-largeKey.json',
);
expect(existsSync(expectedAssetPath)).toBe(true);

View file

@ -25,12 +25,9 @@ export class ContextTracer {
constructor(options: ContextTracerOptions) {
this.enabled = options.enabled ?? false;
this.traceDir = path.join(
options.targetDir,
'.gemini',
'context_trace',
options.sessionId,
);
this.traceDir =
process.env['GEMINI_CONTEXT_TRACE_DIR'] ||
path.join(options.targetDir, 'context_trace', options.sessionId);
this.assetsDir = path.join(this.traceDir, 'assets');
if (this.enabled) {

View file

@ -5,7 +5,7 @@
*/
import type { Part } from '@google/genai';
import { estimateTokenCountSync as baseEstimate } from '../../utils/tokenCalculation.js';
import { estimateTokenCountSync } from '../../utils/tokenCalculation.js';
import type { ConcreteNode } from '../graph/types.js';
import type { NodeBehaviorRegistry } from '../graph/behaviorRegistry.js';
@ -84,24 +84,27 @@ export class ContextTokenCalculator {
}
return tokens;
}
/**
* Slower, precise estimation for a Gemini Content/Part graph.
* Deeply inspects the nested structure and uses the base tokenization math.
*/
estimateTokensForParts(parts: Part[], depth: number = 0): number {
let totalTokens = 0;
private readonly partTokenCache = new WeakMap<object, number>();
estimateTokensForParts(parts: Part[]): number {
let total = 0;
for (const part of parts) {
if (typeof part.text === 'string') {
totalTokens += Math.ceil(part.text.length / this.charsPerToken);
} else if (part.inlineData !== undefined || part.fileData !== undefined) {
totalTokens += 258;
if (part !== null && typeof part === 'object') {
let cost = this.partTokenCache.get(part);
if (cost === undefined) {
cost = estimateTokenCountSync([part], 0, this.charsPerToken);
this.partTokenCache.set(part, cost);
}
total += cost;
} else {
totalTokens += Math.ceil(
JSON.stringify(part).length / this.charsPerToken,
);
total += estimateTokenCountSync([part], 0, this.charsPerToken);
}
}
// Also include structural overhead
return totalTokens + baseEstimate(parts, depth);
return total;
}
}

View file

@ -45,6 +45,7 @@ import type { ContentGenerator } from './contentGenerator.js';
import { LoopDetectionService } from '../services/loopDetectionService.js';
import { ChatCompressionService } from '../context/chatCompressionService.js';
import { AgentHistoryProvider } from '../context/agentHistoryProvider.js';
import type { ContextManager } from '../context/contextManager.js';
import { ideContextStore } from '../ide/ideContext.js';
import {
logContentRetryFailure,
@ -74,6 +75,7 @@ import {
import { getDisplayString, resolveModel } from '../config/models.js';
import { partToString } from '../utils/partUtils.js';
import { coreEvents, CoreEvent } from '../utils/events.js';
import { initializeContextManager } from '../context/initializer.js';
const MAX_TURNS = 100;
@ -97,6 +99,7 @@ export class GeminiClient {
private readonly compressionService: ChatCompressionService;
private readonly agentHistoryProvider: AgentHistoryProvider;
private readonly toolOutputMaskingService: ToolOutputMaskingService;
private contextManager?: ContextManager;
private lastPromptId: string;
private currentSequenceModel: string | null = null;
private lastSentIdeContext: IdeContext | undefined;
@ -393,6 +396,11 @@ export class GeminiClient {
},
);
await chat.initialize(resumedSessionData, 'main');
this.contextManager = await initializeContextManager(
this.config,
chat,
this.lastPromptId,
);
return chat;
} catch (error) {
await reportError(
@ -618,10 +626,12 @@ export class GeminiClient {
const modelForLimitCheck = this._getActiveModelForCurrentTurn();
if (this.config.getContextManagementConfig().enabled) {
const newHistory = await this.agentHistoryProvider.manageHistory(
this.getHistory(),
signal,
);
const newHistory = this.contextManager
? await this.contextManager.renderHistory()
: await this.agentHistoryProvider.manageHistory(
this.getHistory(),
signal,
);
if (newHistory.length !== this.getHistory().length) {
this.getChat().setHistory(newHistory);
}

View file

@ -18,6 +18,7 @@ import {
type GenerateContentConfig,
type GenerateContentParameters,
} from '@google/genai';
import { AgentChatHistory } from './agentChatHistory.js';
import { toParts } from '../code_assist/converter.js';
import {
retryWithBackoff,
@ -248,19 +249,21 @@ export class GeminiChat {
private sendPromise: Promise<void> = Promise.resolve();
private readonly chatRecordingService: ChatRecordingService;
private lastPromptTokenCount: number;
agentHistory: AgentChatHistory;
constructor(
private readonly context: AgentLoopContext,
private systemInstruction: string = '',
private tools: Tool[] = [],
private history: Content[] = [],
history: Content[] = [],
resumedSessionData?: ResumedSessionData,
private readonly onModelChanged?: (modelId: string) => Promise<Tool[]>,
) {
validateHistory(history);
this.agentHistory = new AgentChatHistory(history);
this.chatRecordingService = new ChatRecordingService(context);
this.lastPromptTokenCount = estimateTokenCountSync(
this.history.flatMap((c) => c.parts || []),
this.agentHistory.flatMap((c) => c.parts || []),
);
}
@ -347,7 +350,7 @@ export class GeminiChat {
}
// Add user content to history ONCE before any attempts.
this.history.push(userContent);
this.agentHistory.push(userContent);
const requestContents = this.getHistory(true);
const streamWithRetries = async function* (
@ -747,8 +750,8 @@ export class GeminiChat {
*/
getHistory(curated: boolean = false): readonly Content[] {
const history = curated
? extractCuratedHistory(this.history)
: this.history;
? extractCuratedHistory([...this.agentHistory.get()])
: this.agentHistory.get();
return [...history];
}
@ -756,26 +759,26 @@ export class GeminiChat {
* Clears the chat history.
*/
clearHistory(): void {
this.history = [];
this.agentHistory.clear();
}
/**
* Adds a new entry to the chat history.
*/
addHistory(content: Content): void {
this.history.push(content);
this.agentHistory.push(content);
}
setHistory(history: readonly Content[]): void {
this.history = [...history];
this.agentHistory.set(history);
this.lastPromptTokenCount = estimateTokenCountSync(
this.history.flatMap((c) => c.parts || []),
this.agentHistory.flatMap((c) => c.parts || []),
);
this.chatRecordingService.updateMessagesFromHistory(history);
}
stripThoughtsFromHistory(): void {
this.history = this.history.map((content) => {
this.agentHistory.map((content) => {
const newContent = { ...content };
if (newContent.parts) {
newContent.parts = newContent.parts.map((part) => {
@ -1013,7 +1016,7 @@ export class GeminiChat {
}
}
this.history.push({ role: 'model', parts: consolidatedParts });
this.agentHistory.push({ role: 'model', parts: consolidatedParts });
}
getLastPromptTokenCount(): number {

View file

@ -293,4 +293,9 @@ export type { Content, Part, FunctionCall } from '@google/genai';
// Export context types and profiles
export * from './context/types.js';
export * from './context/profiles.js';
export { generalistProfile as legacyGeneralistProfile } from './context/profiles.js';
export {
generalistProfile,
stressTestProfile,
} from './context/config/profiles.js';

View file

@ -29,12 +29,14 @@ const MAX_CHARS_FOR_FULL_HEURISTIC = 100_000;
// standard multimodal responses are typically depth 1.
const MAX_RECURSION_DEPTH = 3;
const DEFAULT_CHARS_PER_TOKEN = 4;
/**
* Heuristic estimation of tokens for a text string.
*/
function estimateTextTokens(text: string): number {
function estimateTextTokens(text: string, charsPerToken: number): number {
if (text.length > MAX_CHARS_FOR_FULL_HEURISTIC) {
return text.length / 4;
return text.length / charsPerToken;
}
let tokens = 0;
@ -73,25 +75,33 @@ function estimateMediaTokens(part: Part): number | undefined {
* Heuristic estimation for tool responses, avoiding massive string copies
* and accounting for nested Gemini 3 multimodal parts.
*/
function estimateFunctionResponseTokens(part: Part, depth: number): number {
function estimateFunctionResponseTokens(
part: Part,
depth: number,
charsPerToken: number,
): number {
const fr = part.functionResponse;
if (!fr) return 0;
let totalTokens = (fr.name?.length ?? 0) / 4;
let totalTokens = (fr.name?.length ?? 0) / charsPerToken;
const response = fr.response as unknown;
if (typeof response === 'string') {
totalTokens += response.length / 4;
totalTokens += response.length / charsPerToken;
} else if (response !== undefined && response !== null) {
// For objects, stringify only the payload, not the whole Part object.
totalTokens += JSON.stringify(response).length / 4;
totalTokens += JSON.stringify(response).length / charsPerToken;
}
// Gemini 3: Handle nested multimodal parts recursively.
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
const nestedParts = (fr as unknown as { parts?: Part[] }).parts;
if (nestedParts && nestedParts.length > 0) {
totalTokens += estimateTokenCountSync(nestedParts, depth + 1);
totalTokens += estimateTokenCountSync(
nestedParts,
depth + 1,
charsPerToken,
);
}
return totalTokens;
@ -100,11 +110,12 @@ function estimateFunctionResponseTokens(part: Part, depth: number): number {
/**
* Estimates token count for parts synchronously using a heuristic.
* - Text: character-based heuristic (ASCII vs CJK) for small strings, length/4 for massive ones.
* - Non-text (Tools, etc): JSON string length / 4.
* - Non-text (Tools, etc): JSON string length / charsPerToken.
*/
export function estimateTokenCountSync(
parts: Part[],
depth: number = 0,
charsPerToken: number = DEFAULT_CHARS_PER_TOKEN,
): number {
if (depth > MAX_RECURSION_DEPTH) {
return 0;
@ -113,9 +124,9 @@ export function estimateTokenCountSync(
let totalTokens = 0;
for (const part of parts) {
if (typeof part.text === 'string') {
totalTokens += estimateTextTokens(part.text);
totalTokens += estimateTextTokens(part.text, charsPerToken);
} else if (part.functionResponse) {
totalTokens += estimateFunctionResponseTokens(part, depth);
totalTokens += estimateFunctionResponseTokens(part, depth, charsPerToken);
} else {
const mediaEstimate = estimateMediaTokens(part);
if (mediaEstimate !== undefined) {
@ -123,7 +134,7 @@ export function estimateTokenCountSync(
} else {
// Fallback for other non-text parts (e.g., functionCall).
// Note: JSON.stringify(part) here is safe as these parts are typically small.
totalTokens += JSON.stringify(part).length / 4;
totalTokens += JSON.stringify(part).length / charsPerToken;
}
}
}
@ -162,9 +173,9 @@ export async function calculateRequestTokenCount(
} catch (error) {
// Fallback to local estimation if the API call fails
debugLogger.debug('countTokens API failed:', error);
return estimateTokenCountSync(parts);
return estimateTokenCountSync(parts, 0, DEFAULT_CHARS_PER_TOKEN);
}
}
return estimateTokenCountSync(parts);
return estimateTokenCountSync(parts, 0, DEFAULT_CHARS_PER_TOKEN);
}

View file

@ -2954,6 +2954,13 @@
"default": false,
"type": "boolean"
},
"stressTestProfile": {
"title": "Use the stress test profile to aggressively trigger context management.",
"description": "Significantly lowers token limits to force early garbage collection and distillation for testing purposes.",
"markdownDescription": "Significantly lowers token limits to force early garbage collection and distillation for testing purposes.\n\n- Category: `Experimental`\n- Requires restart: `yes`\n- Default: `false`",
"default": false,
"type": "boolean"
},
"autoMemory": {
"title": "Auto Memory",
"description": "Automatically extract reusable skills from past sessions in the background. Review results with /memory inbox.",