mirror of
https://github.com/google-gemini/gemini-cli
synced 2026-04-21 13:37:17 +00:00
feat(core): wire up the new ContextManager and AgentChatHistory
This commit is contained in:
parent
8573650253
commit
6d7aa497aa
25 changed files with 467 additions and 310 deletions
|
|
@ -1729,6 +1729,12 @@ their corresponding top-level category object in your `settings.json` file.
|
|||
- **Default:** `false`
|
||||
- **Requires restart:** Yes
|
||||
|
||||
- **`experimental.stressTestProfile`** (boolean):
|
||||
- **Description:** Significantly lowers token limits to force early garbage
|
||||
collection and distillation for testing purposes.
|
||||
- **Default:** `false`
|
||||
- **Requires restart:** Yes
|
||||
|
||||
- **`experimental.autoMemory`** (boolean):
|
||||
- **Description:** Automatically extract reusable skills from past sessions in
|
||||
the background. Review results with /memory inbox.
|
||||
|
|
|
|||
|
|
@ -21,8 +21,6 @@ import {
|
|||
type MCPServerConfig,
|
||||
type GeminiCLIExtension,
|
||||
Storage,
|
||||
generalistProfile,
|
||||
type ContextManagementConfig,
|
||||
} from '@google/gemini-cli-core';
|
||||
import { loadCliConfig, parseArguments, type CliArgs } from './config.js';
|
||||
import {
|
||||
|
|
@ -2210,51 +2208,6 @@ describe('loadCliConfig context management', () => {
|
|||
},
|
||||
});
|
||||
const config = await loadCliConfig(settings, 'test-session', argv);
|
||||
expect(config.getContextManagementConfig()).toStrictEqual(
|
||||
generalistProfile,
|
||||
);
|
||||
expect(config.isContextManagementEnabled()).toBe(true);
|
||||
});
|
||||
|
||||
it('should be true when contextManagement is set to true in settings', async () => {
|
||||
process.argv = ['node', 'script.js'];
|
||||
const argv = await parseArguments(createTestMergedSettings());
|
||||
const contextManagementConfig: Partial<ContextManagementConfig> = {
|
||||
historyWindow: {
|
||||
maxTokens: 100_000,
|
||||
retainedTokens: 50_000,
|
||||
},
|
||||
messageLimits: {
|
||||
normalMaxTokens: 1000,
|
||||
retainedMaxTokens: 10_000,
|
||||
normalizationHeadRatio: 0.25,
|
||||
},
|
||||
tools: {
|
||||
distillation: {
|
||||
maxOutputTokens: 10_000,
|
||||
summarizationThresholdTokens: 15_000,
|
||||
},
|
||||
outputMasking: {
|
||||
protectionThresholdTokens: 30_000,
|
||||
minPrunableThresholdTokens: 10_000,
|
||||
protectLatestTurn: false,
|
||||
},
|
||||
},
|
||||
};
|
||||
const settings = createTestMergedSettings({
|
||||
experimental: {
|
||||
contextManagement: true,
|
||||
},
|
||||
// The type of numbers is being inferred strangely, and so we have to cast
|
||||
// to `any` here.
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
contextManagement: contextManagementConfig as any,
|
||||
});
|
||||
const config = await loadCliConfig(settings, 'test-session', argv);
|
||||
expect(config.getContextManagementConfig()).toStrictEqual({
|
||||
enabled: true,
|
||||
...contextManagementConfig,
|
||||
});
|
||||
expect(config.isContextManagementEnabled()).toBe(true);
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -46,7 +46,6 @@ import {
|
|||
type HookEventName,
|
||||
type OutputFormat,
|
||||
detectIdeFromEnv,
|
||||
generalistProfile,
|
||||
} from '@google/gemini-cli-core';
|
||||
import {
|
||||
type Settings,
|
||||
|
|
@ -884,14 +883,19 @@ export async function loadCliConfig(
|
|||
}
|
||||
}
|
||||
|
||||
const useGeneralistProfile =
|
||||
settings.experimental?.generalistProfile ?? false;
|
||||
const useContextManagement =
|
||||
settings.experimental?.contextManagement ?? false;
|
||||
// TODO(joshualitt): Clean this up alongside removal of the legacy config.
|
||||
let profileSelector: string | undefined = undefined;
|
||||
if (settings.experimental?.stressTestProfile) {
|
||||
profileSelector = 'stressTestProfile';
|
||||
} else if (
|
||||
settings.experimental?.generalistProfile ||
|
||||
settings.experimental?.contextManagement
|
||||
) {
|
||||
profileSelector = 'generalistProfile';
|
||||
}
|
||||
|
||||
const contextManagement = {
|
||||
...(useGeneralistProfile ? generalistProfile : {}),
|
||||
...(useContextManagement ? settings?.contextManagement : {}),
|
||||
enabled: useContextManagement || useGeneralistProfile,
|
||||
enabled: !!profileSelector,
|
||||
};
|
||||
|
||||
return new Config({
|
||||
|
|
@ -915,6 +919,7 @@ export async function loadCliConfig(
|
|||
worktreeSettings,
|
||||
|
||||
coreTools: settings.tools?.core || undefined,
|
||||
experimentalContextManagementConfig: profileSelector,
|
||||
allowedTools: allowedTools.length > 0 ? allowedTools : undefined,
|
||||
policyEngineConfig,
|
||||
policyUpdateConfirmationRequest,
|
||||
|
|
|
|||
|
|
@ -2213,6 +2213,17 @@ const SETTINGS_SCHEMA = {
|
|||
'Replace the built-in save_memory tool with a memory manager subagent that supports adding, removing, de-duplicating, and organizing memories.',
|
||||
showInDialog: true,
|
||||
},
|
||||
stressTestProfile: {
|
||||
type: 'boolean',
|
||||
label:
|
||||
'Use the stress test profile to aggressively trigger context management.',
|
||||
category: 'Experimental',
|
||||
requiresRestart: true,
|
||||
default: false,
|
||||
description:
|
||||
'Significantly lowers token limits to force early garbage collection and distillation for testing purposes.',
|
||||
showInDialog: false,
|
||||
},
|
||||
autoMemory: {
|
||||
type: 'boolean',
|
||||
label: 'Auto Memory',
|
||||
|
|
|
|||
|
|
@ -6,19 +6,17 @@
|
|||
|
||||
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
|
||||
import { loadContextManagementConfig } from './configLoader.js';
|
||||
import { defaultContextProfile } from './profiles.js';
|
||||
import { generalistProfile } from './profiles.js';
|
||||
import { ContextProcessorRegistry } from './registry.js';
|
||||
import * as fs from 'node:fs/promises';
|
||||
import * as path from 'node:path';
|
||||
import * as os from 'node:os';
|
||||
import type { Config } from '../../config/config.js';
|
||||
import type { JSONSchemaType } from 'ajv';
|
||||
|
||||
describe('SidecarLoader (Real FS)', () => {
|
||||
let tmpDir: string;
|
||||
let registry: ContextProcessorRegistry;
|
||||
let sidecarPath: string;
|
||||
let mockConfig: Config;
|
||||
|
||||
beforeEach(async () => {
|
||||
tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), 'gemini-sidecar-test-'));
|
||||
|
|
@ -32,10 +30,6 @@ describe('SidecarLoader (Real FS)', () => {
|
|||
required: ['maxTokens'],
|
||||
} as unknown as JSONSchemaType<{ maxTokens: number }>,
|
||||
});
|
||||
|
||||
mockConfig = {
|
||||
getExperimentalContextManagementConfig: () => sidecarPath,
|
||||
} as unknown as Config;
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
|
|
@ -43,14 +37,14 @@ describe('SidecarLoader (Real FS)', () => {
|
|||
});
|
||||
|
||||
it('returns default profile if file does not exist', async () => {
|
||||
const result = await loadContextManagementConfig(mockConfig, registry);
|
||||
expect(result).toBe(defaultContextProfile);
|
||||
const result = await loadContextManagementConfig(sidecarPath, registry);
|
||||
expect(result).toBe(generalistProfile);
|
||||
});
|
||||
|
||||
it('returns default profile if file exists but is 0 bytes', async () => {
|
||||
await fs.writeFile(sidecarPath, '');
|
||||
const result = await loadContextManagementConfig(mockConfig, registry);
|
||||
expect(result).toBe(defaultContextProfile);
|
||||
const result = await loadContextManagementConfig(sidecarPath, registry);
|
||||
expect(result).toBe(generalistProfile);
|
||||
});
|
||||
|
||||
it('returns parsed config if file is valid', async () => {
|
||||
|
|
@ -64,7 +58,7 @@ describe('SidecarLoader (Real FS)', () => {
|
|||
},
|
||||
};
|
||||
await fs.writeFile(sidecarPath, JSON.stringify(validConfig));
|
||||
const result = await loadContextManagementConfig(mockConfig, registry);
|
||||
const result = await loadContextManagementConfig(sidecarPath, registry);
|
||||
expect(result.config.budget?.maxTokens).toBe(2000);
|
||||
expect(result.config.processorOptions?.['myTruncation']).toBeDefined();
|
||||
});
|
||||
|
|
@ -81,14 +75,14 @@ describe('SidecarLoader (Real FS)', () => {
|
|||
};
|
||||
await fs.writeFile(sidecarPath, JSON.stringify(invalidConfig));
|
||||
await expect(
|
||||
loadContextManagementConfig(mockConfig, registry),
|
||||
loadContextManagementConfig(sidecarPath, registry),
|
||||
).rejects.toThrow('Validation error');
|
||||
});
|
||||
|
||||
it('throws validation error if file is empty whitespace', async () => {
|
||||
await fs.writeFile(sidecarPath, ' \n ');
|
||||
await expect(
|
||||
loadContextManagementConfig(mockConfig, registry),
|
||||
loadContextManagementConfig(sidecarPath, registry),
|
||||
).rejects.toThrow('Unexpected end of JSON input');
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -4,11 +4,14 @@
|
|||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import type { Config } from '../../config/config.js';
|
||||
import * as fsSync from 'node:fs';
|
||||
import * as fs from 'node:fs/promises';
|
||||
import type { ContextManagementConfig } from './types.js';
|
||||
import { defaultContextProfile, type ContextProfile } from './profiles.js';
|
||||
import {
|
||||
generalistProfile,
|
||||
stressTestProfile,
|
||||
type ContextProfile,
|
||||
} from './profiles.js';
|
||||
import { SchemaValidator } from '../../utils/schemaValidator.js';
|
||||
import { getContextManagementConfigSchema } from './schema.js';
|
||||
import type { ContextProcessorRegistry } from './registry.js';
|
||||
|
|
@ -54,9 +57,9 @@ async function loadConfigFromFile(
|
|||
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
|
||||
const validConfig = parsed as ContextManagementConfig;
|
||||
return {
|
||||
...defaultContextProfile,
|
||||
...generalistProfile,
|
||||
config: {
|
||||
...defaultContextProfile.config,
|
||||
...generalistProfile.config,
|
||||
...(validConfig.budget ? { budget: validConfig.budget } : {}),
|
||||
...(validConfig.processorOptions
|
||||
? { processorOptions: validConfig.processorOptions }
|
||||
|
|
@ -70,21 +73,27 @@ async function loadConfigFromFile(
|
|||
* If a config file is present but invalid, this will THROW to prevent silent misconfiguration.
|
||||
*/
|
||||
export async function loadContextManagementConfig(
|
||||
config: Config,
|
||||
sidecarPath: string | undefined,
|
||||
registry: ContextProcessorRegistry,
|
||||
): Promise<ContextProfile> {
|
||||
const sidecarPath = config.getExperimentalContextManagementConfig();
|
||||
if (sidecarPath === 'stressTestProfile') {
|
||||
return stressTestProfile;
|
||||
}
|
||||
|
||||
if (sidecarPath === 'generalistProfile') {
|
||||
return generalistProfile;
|
||||
}
|
||||
|
||||
if (sidecarPath && fsSync.existsSync(sidecarPath)) {
|
||||
const size = fsSync.statSync(sidecarPath).size;
|
||||
// If the file exists but is completely empty (0 bytes), it's safe to fallback.
|
||||
if (size === 0) {
|
||||
return defaultContextProfile;
|
||||
return generalistProfile;
|
||||
}
|
||||
|
||||
// If the file has content, enforce strict validation and throw on failure.
|
||||
return loadConfigFromFile(sidecarPath, registry);
|
||||
}
|
||||
|
||||
return defaultContextProfile;
|
||||
return generalistProfile;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -62,7 +62,7 @@ export interface ContextProfile {
|
|||
* The standard default context management profile.
|
||||
* Optimized for safety, precision, and reliable summarization.
|
||||
*/
|
||||
export const defaultContextProfile: ContextProfile = {
|
||||
export const generalistProfile: ContextProfile = {
|
||||
config: {
|
||||
budget: {
|
||||
retainedTokens: 65000,
|
||||
|
|
@ -143,3 +143,41 @@ export const defaultContextProfile: ContextProfile = {
|
|||
},
|
||||
],
|
||||
};
|
||||
|
||||
/**
|
||||
* A highly aggressive profile designed exclusively for testing Context Management.
|
||||
* Lowers token limits dramatically to force garbage collection and distillation loops
|
||||
* within a few conversational turns.
|
||||
*/
|
||||
export const stressTestProfile: ContextProfile = {
|
||||
config: {
|
||||
budget: {
|
||||
retainedTokens: 4000,
|
||||
maxTokens: 10000,
|
||||
},
|
||||
processorOptions: {
|
||||
ToolMasking: {
|
||||
type: 'ToolMaskingProcessor',
|
||||
options: {
|
||||
stringLengthThresholdTokens: 500,
|
||||
},
|
||||
},
|
||||
NodeTruncation: {
|
||||
type: 'NodeTruncationProcessor',
|
||||
options: {
|
||||
maxTokensPerNode: 1000,
|
||||
},
|
||||
},
|
||||
NodeDistillation: {
|
||||
type: 'NodeDistillationProcessor',
|
||||
options: {
|
||||
nodeThresholdTokens: 1500,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
// Re-use the generalist pipeline architecture exactly, but the `config` above
|
||||
// will be passed into `resolveProcessorOptions` to aggressively override the thresholds.
|
||||
buildPipelines: generalistProfile.buildPipelines,
|
||||
buildAsyncPipelines: generalistProfile.buildAsyncPipelines,
|
||||
};
|
||||
|
|
|
|||
|
|
@ -47,9 +47,8 @@ export class ContextManager {
|
|||
this.historyObserver.start();
|
||||
|
||||
this.eventBus.onPristineHistoryUpdated((event) => {
|
||||
const existingIds = new Set(this.buffer.nodes.map((n) => n.id));
|
||||
const newIds = new Set(event.nodes.map((n) => n.id));
|
||||
const addedNodes = event.nodes.filter((n) => !existingIds.has(n.id));
|
||||
const addedNodes = event.nodes.filter((n) => event.newNodes.has(n.id));
|
||||
|
||||
// Prune any pristine nodes that were dropped from the upstream history
|
||||
this.buffer = this.buffer.prunePristineNodes(newIds);
|
||||
|
|
@ -60,6 +59,13 @@ export class ContextManager {
|
|||
|
||||
this.evaluateTriggers(event.newNodes);
|
||||
});
|
||||
this.eventBus.onProcessorResult((event) => {
|
||||
this.buffer = this.buffer.applyProcessorResult(
|
||||
event.processorId,
|
||||
event.targets,
|
||||
event.returnedNodes,
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -7,6 +7,12 @@
|
|||
import { EventEmitter } from 'node:events';
|
||||
import type { ConcreteNode } from './graph/types.js';
|
||||
|
||||
export interface ProcessorResultEvent {
|
||||
processorId: string;
|
||||
targets: readonly ConcreteNode[];
|
||||
returnedNodes: readonly ConcreteNode[];
|
||||
}
|
||||
|
||||
export interface PristineHistoryUpdatedEvent {
|
||||
nodes: readonly ConcreteNode[];
|
||||
newNodes: Set<string>;
|
||||
|
|
@ -49,4 +55,12 @@ export class ContextEventBus extends EventEmitter {
|
|||
onConsolidationNeeded(listener: (event: ContextConsolidationEvent) => void) {
|
||||
this.on('BUDGET_RETAINED_CROSSED', listener);
|
||||
}
|
||||
|
||||
emitProcessorResult(event: ProcessorResultEvent) {
|
||||
this.emit('PROCESSOR_RESULT', event);
|
||||
}
|
||||
|
||||
onProcessorResult(listener: (event: ProcessorResultEvent) => void) {
|
||||
this.on('PROCESSOR_RESULT', listener);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,9 +3,10 @@
|
|||
* Copyright 2026 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
import type { ConcreteNode } from './types.js';
|
||||
import { ContextGraphBuilder } from './toGraph.js';
|
||||
import type { Content } from '@google/genai';
|
||||
import type { Episode, ConcreteNode } from './types.js';
|
||||
import { toGraph } from './toGraph.js';
|
||||
import type { HistoryEvent } from '../../core/agentChatHistory.js';
|
||||
import { fromGraph } from './fromGraph.js';
|
||||
import type { ContextTokenCalculator } from '../utils/contextTokenCalculator.js';
|
||||
import type { NodeBehaviorRegistry } from './behaviorRegistry.js';
|
||||
|
|
@ -15,11 +16,30 @@ export class ContextGraphMapper {
|
|||
|
||||
constructor(private readonly registry: NodeBehaviorRegistry) {}
|
||||
|
||||
toGraph(
|
||||
history: readonly Content[],
|
||||
private builder?: ContextGraphBuilder;
|
||||
|
||||
applyEvent(
|
||||
event: HistoryEvent,
|
||||
tokenCalculator: ContextTokenCalculator,
|
||||
): Episode[] {
|
||||
return toGraph(history, tokenCalculator, this.nodeIdentityMap);
|
||||
): ConcreteNode[] {
|
||||
if (!this.builder) {
|
||||
this.builder = new ContextGraphBuilder(
|
||||
tokenCalculator,
|
||||
this.nodeIdentityMap,
|
||||
);
|
||||
}
|
||||
|
||||
if (event.type === 'CLEAR') {
|
||||
this.builder.clear();
|
||||
return [];
|
||||
}
|
||||
|
||||
if (event.type === 'SYNC_FULL') {
|
||||
this.builder.clear();
|
||||
}
|
||||
|
||||
this.builder.processHistory(event.payload);
|
||||
return this.builder.getNodes();
|
||||
}
|
||||
|
||||
fromGraph(nodes: readonly ConcreteNode[]): Content[] {
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@
|
|||
|
||||
import type { Content, Part } from '@google/genai';
|
||||
import type {
|
||||
ConcreteNode,
|
||||
Episode,
|
||||
SemanticPart,
|
||||
ToolExecution,
|
||||
|
|
@ -38,61 +39,87 @@ function isCompleteEpisode(ep: Partial<Episode>): ep is Episode {
|
|||
);
|
||||
}
|
||||
|
||||
export function toGraph(
|
||||
history: readonly Content[],
|
||||
tokenCalculator: ContextTokenCalculator,
|
||||
nodeIdentityMap: WeakMap<object, string>,
|
||||
): Episode[] {
|
||||
const episodes: Episode[] = [];
|
||||
let currentEpisode: Partial<Episode> | null = null;
|
||||
const pendingCallParts: Map<string, Part> = new Map();
|
||||
export class ContextGraphBuilder {
|
||||
private episodes: Episode[] = [];
|
||||
private currentEpisode: Partial<Episode> | null = null;
|
||||
private pendingCallParts: Map<string, Part> = new Map();
|
||||
|
||||
const finalizeEpisode = () => {
|
||||
if (currentEpisode && isCompleteEpisode(currentEpisode)) {
|
||||
episodes.push(currentEpisode);
|
||||
}
|
||||
currentEpisode = null;
|
||||
};
|
||||
constructor(
|
||||
private readonly tokenCalculator: ContextTokenCalculator,
|
||||
private readonly nodeIdentityMap: WeakMap<object, string>,
|
||||
) {}
|
||||
|
||||
for (const msg of history) {
|
||||
if (!msg.parts) continue;
|
||||
clear() {
|
||||
this.episodes = [];
|
||||
this.currentEpisode = null;
|
||||
this.pendingCallParts.clear();
|
||||
}
|
||||
|
||||
if (msg.role === 'user') {
|
||||
const hasToolResponses = msg.parts.some((p) => !!p.functionResponse);
|
||||
const hasUserParts = msg.parts.some(
|
||||
(p) => !!p.text || !!p.inlineData || !!p.fileData,
|
||||
);
|
||||
processHistory(history: readonly Content[]) {
|
||||
const finalizeEpisode = () => {
|
||||
if (this.currentEpisode && isCompleteEpisode(this.currentEpisode)) {
|
||||
this.episodes.push(this.currentEpisode);
|
||||
}
|
||||
this.currentEpisode = null;
|
||||
};
|
||||
|
||||
if (hasToolResponses) {
|
||||
currentEpisode = parseToolResponses(
|
||||
for (const msg of history) {
|
||||
if (!msg.parts) continue;
|
||||
|
||||
if (msg.role === 'user') {
|
||||
const hasToolResponses = msg.parts.some((p) => !!p.functionResponse);
|
||||
const hasUserParts = msg.parts.some(
|
||||
(p) => !!p.text || !!p.inlineData || !!p.fileData,
|
||||
);
|
||||
|
||||
if (hasToolResponses) {
|
||||
this.currentEpisode = parseToolResponses(
|
||||
msg,
|
||||
this.currentEpisode,
|
||||
this.pendingCallParts,
|
||||
this.tokenCalculator,
|
||||
this.nodeIdentityMap,
|
||||
);
|
||||
}
|
||||
|
||||
if (hasUserParts) {
|
||||
finalizeEpisode();
|
||||
this.currentEpisode = parseUserParts(msg, this.nodeIdentityMap);
|
||||
}
|
||||
} else if (msg.role === 'model') {
|
||||
this.currentEpisode = parseModelParts(
|
||||
msg,
|
||||
currentEpisode,
|
||||
pendingCallParts,
|
||||
tokenCalculator,
|
||||
nodeIdentityMap,
|
||||
this.currentEpisode,
|
||||
this.pendingCallParts,
|
||||
this.nodeIdentityMap,
|
||||
);
|
||||
}
|
||||
|
||||
if (hasUserParts) {
|
||||
finalizeEpisode();
|
||||
currentEpisode = parseUserParts(msg, nodeIdentityMap);
|
||||
}
|
||||
} else if (msg.role === 'model') {
|
||||
currentEpisode = parseModelParts(
|
||||
msg,
|
||||
currentEpisode,
|
||||
pendingCallParts,
|
||||
nodeIdentityMap,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
if (currentEpisode) {
|
||||
finalizeYield(currentEpisode);
|
||||
finalizeEpisode();
|
||||
}
|
||||
getNodes(): ConcreteNode[] {
|
||||
const copy = [...this.episodes];
|
||||
if (this.currentEpisode) {
|
||||
const activeEp = {
|
||||
...this.currentEpisode,
|
||||
concreteNodes: [...(this.currentEpisode.concreteNodes || [])],
|
||||
};
|
||||
finalizeYield(activeEp);
|
||||
if (isCompleteEpisode(activeEp)) {
|
||||
copy.push(activeEp);
|
||||
}
|
||||
}
|
||||
|
||||
return episodes;
|
||||
const nodes: ConcreteNode[] = [];
|
||||
for (const ep of copy) {
|
||||
if (ep.concreteNodes) {
|
||||
for (const child of ep.concreteNodes) {
|
||||
nodes.push(child);
|
||||
}
|
||||
}
|
||||
}
|
||||
return nodes;
|
||||
}
|
||||
}
|
||||
|
||||
function parseToolResponses(
|
||||
|
|
|
|||
|
|
@ -39,24 +39,15 @@ export class HistoryObserver {
|
|||
}
|
||||
|
||||
this.unsubscribeHistory = this.chatHistory.subscribe(
|
||||
(_event: HistoryEvent) => {
|
||||
// Rebuild the pristine Context Graph graph from the full source history on every change.
|
||||
// Wait, toGraph still returns an Episode[].
|
||||
// We actually need to map the Episode[] to a flat ConcreteNode[] here to form the 'nodes'.
|
||||
const pristineEpisodes = this.graphMapper.toGraph(
|
||||
this.chatHistory.get(),
|
||||
this.tokenCalculator,
|
||||
);
|
||||
(event: HistoryEvent) => {
|
||||
let nodes: ConcreteNode[] = [];
|
||||
|
||||
const nodes: ConcreteNode[] = [];
|
||||
for (const ep of pristineEpisodes) {
|
||||
if (ep.concreteNodes) {
|
||||
for (const child of ep.concreteNodes) {
|
||||
nodes.push(child);
|
||||
}
|
||||
}
|
||||
if (event.type === 'CLEAR') {
|
||||
this.seenNodeIds.clear();
|
||||
}
|
||||
|
||||
nodes = this.graphMapper.applyEvent(event, this.tokenCalculator);
|
||||
|
||||
const newNodes = new Set<string>();
|
||||
for (const node of nodes) {
|
||||
if (!this.seenNodeIds.has(node.id)) {
|
||||
|
|
@ -67,7 +58,7 @@ export class HistoryObserver {
|
|||
|
||||
this.tracer.logEvent(
|
||||
'HistoryObserver',
|
||||
'Rebuilt pristine graph from chat history update',
|
||||
`Rebuilt pristine graph from ${event.type} event`,
|
||||
{ nodesSize: nodes.length, newNodesCount: newNodes.size },
|
||||
);
|
||||
|
||||
|
|
|
|||
117
packages/core/src/context/initializer.ts
Normal file
117
packages/core/src/context/initializer.ts
Normal file
|
|
@ -0,0 +1,117 @@
|
|||
/**
|
||||
* @license
|
||||
* Copyright 2026 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import type { Config } from '../config/config.js';
|
||||
import type { GeminiChat } from '../core/geminiChat.js';
|
||||
import { ContextProcessorRegistry } from './config/registry.js';
|
||||
import { loadContextManagementConfig } from './config/configLoader.js';
|
||||
import { ContextTracer } from './tracer.js';
|
||||
import { ContextEventBus } from './eventBus.js';
|
||||
import { ContextEnvironmentImpl } from './pipeline/environmentImpl.js';
|
||||
import { PipelineOrchestrator } from './pipeline/orchestrator.js';
|
||||
import { ContextManager } from './contextManager.js';
|
||||
import { debugLogger } from '../utils/debugLogger.js';
|
||||
import { NodeTruncationProcessorOptionsSchema } from './processors/nodeTruncationProcessor.js';
|
||||
import { ToolMaskingProcessorOptionsSchema } from './processors/toolMaskingProcessor.js';
|
||||
import { HistoryTruncationProcessorOptionsSchema } from './processors/historyTruncationProcessor.js';
|
||||
import { BlobDegradationProcessorOptionsSchema } from './processors/blobDegradationProcessor.js';
|
||||
import { NodeDistillationProcessorOptionsSchema } from './processors/nodeDistillationProcessor.js';
|
||||
import { StateSnapshotProcessorOptionsSchema } from './processors/stateSnapshotProcessor.js';
|
||||
import { StateSnapshotAsyncProcessorOptionsSchema } from './processors/stateSnapshotAsyncProcessor.js';
|
||||
import { RollingSummaryProcessorOptionsSchema } from './processors/rollingSummaryProcessor.js';
|
||||
|
||||
export async function initializeContextManager(
|
||||
config: Config,
|
||||
chat: GeminiChat,
|
||||
lastPromptId: string,
|
||||
): Promise<ContextManager | undefined> {
|
||||
const isV1Enabled = config.getContextManagementConfig().enabled;
|
||||
debugLogger.log(
|
||||
`[initializer] called with enabled=${isV1Enabled}, GEMINI_CONTEXT_TRACE_DIR=${process.env['GEMINI_CONTEXT_TRACE_DIR']}`,
|
||||
);
|
||||
|
||||
if (!isV1Enabled) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
const registry = new ContextProcessorRegistry();
|
||||
registry.registerProcessor({
|
||||
id: 'NodeTruncationProcessor',
|
||||
schema: NodeTruncationProcessorOptionsSchema,
|
||||
});
|
||||
registry.registerProcessor({
|
||||
id: 'ToolMaskingProcessor',
|
||||
schema: ToolMaskingProcessorOptionsSchema,
|
||||
});
|
||||
registry.registerProcessor({
|
||||
id: 'HistoryTruncationProcessor',
|
||||
schema: HistoryTruncationProcessorOptionsSchema,
|
||||
});
|
||||
registry.registerProcessor({
|
||||
id: 'BlobDegradationProcessor',
|
||||
schema: BlobDegradationProcessorOptionsSchema,
|
||||
});
|
||||
registry.registerProcessor({
|
||||
id: 'NodeDistillationProcessor',
|
||||
schema: NodeDistillationProcessorOptionsSchema,
|
||||
});
|
||||
registry.registerProcessor({
|
||||
id: 'StateSnapshotProcessor',
|
||||
schema: StateSnapshotProcessorOptionsSchema,
|
||||
});
|
||||
registry.registerProcessor({
|
||||
id: 'StateSnapshotAsyncProcessor',
|
||||
schema: StateSnapshotAsyncProcessorOptionsSchema,
|
||||
});
|
||||
registry.registerProcessor({
|
||||
id: 'RollingSummaryProcessor',
|
||||
schema: RollingSummaryProcessorOptionsSchema,
|
||||
});
|
||||
|
||||
const sidecarProfile = await loadContextManagementConfig(
|
||||
config.getExperimentalContextManagementConfig(),
|
||||
registry,
|
||||
);
|
||||
|
||||
const storage = config.storage;
|
||||
const logDir = storage.getProjectTempLogsDir();
|
||||
const projectTempDir = storage.getProjectTempDir();
|
||||
|
||||
const tracer = new ContextTracer({
|
||||
enabled: !!process.env['GEMINI_CONTEXT_TRACE_DIR'],
|
||||
targetDir: projectTempDir,
|
||||
sessionId: lastPromptId,
|
||||
});
|
||||
|
||||
const eventBus = new ContextEventBus();
|
||||
|
||||
const env = new ContextEnvironmentImpl(
|
||||
config.getBaseLlmClient(),
|
||||
config.getSessionId(),
|
||||
lastPromptId,
|
||||
logDir,
|
||||
projectTempDir,
|
||||
tracer,
|
||||
4,
|
||||
eventBus,
|
||||
);
|
||||
|
||||
const orchestrator = new PipelineOrchestrator(
|
||||
sidecarProfile.buildPipelines(env),
|
||||
sidecarProfile.buildAsyncPipelines(env),
|
||||
env,
|
||||
eventBus,
|
||||
tracer,
|
||||
);
|
||||
|
||||
return new ContextManager(
|
||||
sidecarProfile,
|
||||
env,
|
||||
tracer,
|
||||
orchestrator,
|
||||
chat.agentHistory,
|
||||
);
|
||||
}
|
||||
|
|
@ -95,7 +95,7 @@ describe('ContextWorkingBufferImpl', () => {
|
|||
buffer = buffer.applyProcessorResult('Summarizer', [p1, p2], [summaryNode]);
|
||||
|
||||
// p1 and p2 are removed, p3 remains, s1 is added
|
||||
expect(buffer.nodes.map((n) => n.id)).toEqual(['p3', 's1']);
|
||||
expect(buffer.nodes.map((n) => n.id)).toEqual(['s1', 'p3']);
|
||||
|
||||
// Provenance lookup: The summary node should resolve to both p1 and p2!
|
||||
const roots = buffer.getPristineNodes('s1');
|
||||
|
|
|
|||
|
|
@ -107,13 +107,19 @@ export class ContextWorkingBufferImpl implements ContextWorkingBuffer {
|
|||
|
||||
// Calculate new node array
|
||||
const removedSet = new Set(removedIds);
|
||||
const retainedNodes = this.nodes.filter((n) => !removedSet.has(n.id));
|
||||
const newGraph = [...retainedNodes];
|
||||
|
||||
// We append the output nodes in the same general position if possible,
|
||||
// but in a complex graph we just ensure they exist. V2 graph uses timestamps for order.
|
||||
// For simplicity, we just push added nodes to the end of the retained array
|
||||
newGraph.push(...addedNodes);
|
||||
const newGraph = this.nodes.filter((n) => !removedSet.has(n.id));
|
||||
const insertionIndex = this.nodes.findIndex((n) => removedSet.has(n.id));
|
||||
|
||||
// IMPORTANT: We do NOT use structuredClone here.
|
||||
// The ContextTokenCalculator relies on a WeakMap tied to exact object references
|
||||
// for O(1) performance. Deep cloning would cause catastrophic cache misses.
|
||||
// The pipeline enforces immutability, making reference passing safe.
|
||||
if (insertionIndex !== -1) {
|
||||
newGraph.splice(insertionIndex, 0, ...addedNodes);
|
||||
} else {
|
||||
newGraph.push(...addedNodes);
|
||||
}
|
||||
|
||||
// Calculate new provenance map
|
||||
const newProvenanceMap = new Map(this.provenanceMap);
|
||||
|
|
|
|||
|
|
@ -204,6 +204,11 @@ export class PipelineOrchestrator {
|
|||
allowedTargets,
|
||||
returnedNodes,
|
||||
);
|
||||
this.eventBus.emitProcessorResult({
|
||||
processorId: processor.id,
|
||||
targets: allowedTargets,
|
||||
returnedNodes,
|
||||
});
|
||||
} catch (error) {
|
||||
debugLogger.error(
|
||||
`Pipeline ${pipeline.name} failed async at ${processor.id}:`,
|
||||
|
|
|
|||
File diff suppressed because one or more lines are too long
|
|
@ -22,6 +22,7 @@ describe('ContextTracer (Real FS & Mock ID Gen)', () => {
|
|||
let tmpDir: string;
|
||||
|
||||
beforeEach(async () => {
|
||||
vi.stubEnv('GEMINI_CONTEXT_TRACE_DIR', '');
|
||||
tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), 'gemini-tracer-test-'));
|
||||
|
||||
vi.useFakeTimers();
|
||||
|
|
@ -29,6 +30,7 @@ describe('ContextTracer (Real FS & Mock ID Gen)', () => {
|
|||
});
|
||||
|
||||
afterEach(async () => {
|
||||
vi.unstubAllEnvs();
|
||||
vi.useRealTimers();
|
||||
await fs.rm(tmpDir, { recursive: true, force: true });
|
||||
});
|
||||
|
|
@ -45,7 +47,9 @@ describe('ContextTracer (Real FS & Mock ID Gen)', () => {
|
|||
// Verify Initialization
|
||||
const traceLogPath = path.join(
|
||||
tmpDir,
|
||||
'.gemini/context_trace/test-session/trace.log',
|
||||
'context_trace',
|
||||
'test-session',
|
||||
'trace.log',
|
||||
);
|
||||
const initTraceLog = readFileSync(traceLogPath, 'utf-8');
|
||||
expect(initTraceLog).toContain('[SYSTEM] Context Tracer Initialized');
|
||||
|
|
@ -65,7 +69,10 @@ describe('ContextTracer (Real FS & Mock ID Gen)', () => {
|
|||
|
||||
const expectedAssetPath = path.join(
|
||||
tmpDir,
|
||||
'.gemini/context_trace/test-session/assets/1767268800020-mock-uuid-1-largeKey.json',
|
||||
'context_trace',
|
||||
'test-session',
|
||||
'assets',
|
||||
'1767268800020-mock-uuid-1-largeKey.json',
|
||||
);
|
||||
expect(existsSync(expectedAssetPath)).toBe(true);
|
||||
|
||||
|
|
|
|||
|
|
@ -25,12 +25,9 @@ export class ContextTracer {
|
|||
constructor(options: ContextTracerOptions) {
|
||||
this.enabled = options.enabled ?? false;
|
||||
|
||||
this.traceDir = path.join(
|
||||
options.targetDir,
|
||||
'.gemini',
|
||||
'context_trace',
|
||||
options.sessionId,
|
||||
);
|
||||
this.traceDir =
|
||||
process.env['GEMINI_CONTEXT_TRACE_DIR'] ||
|
||||
path.join(options.targetDir, 'context_trace', options.sessionId);
|
||||
this.assetsDir = path.join(this.traceDir, 'assets');
|
||||
|
||||
if (this.enabled) {
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@
|
|||
*/
|
||||
|
||||
import type { Part } from '@google/genai';
|
||||
import { estimateTokenCountSync as baseEstimate } from '../../utils/tokenCalculation.js';
|
||||
import { estimateTokenCountSync } from '../../utils/tokenCalculation.js';
|
||||
import type { ConcreteNode } from '../graph/types.js';
|
||||
import type { NodeBehaviorRegistry } from '../graph/behaviorRegistry.js';
|
||||
|
||||
|
|
@ -84,24 +84,27 @@ export class ContextTokenCalculator {
|
|||
}
|
||||
return tokens;
|
||||
}
|
||||
|
||||
/**
|
||||
* Slower, precise estimation for a Gemini Content/Part graph.
|
||||
* Deeply inspects the nested structure and uses the base tokenization math.
|
||||
*/
|
||||
estimateTokensForParts(parts: Part[], depth: number = 0): number {
|
||||
let totalTokens = 0;
|
||||
private readonly partTokenCache = new WeakMap<object, number>();
|
||||
|
||||
estimateTokensForParts(parts: Part[]): number {
|
||||
let total = 0;
|
||||
for (const part of parts) {
|
||||
if (typeof part.text === 'string') {
|
||||
totalTokens += Math.ceil(part.text.length / this.charsPerToken);
|
||||
} else if (part.inlineData !== undefined || part.fileData !== undefined) {
|
||||
totalTokens += 258;
|
||||
if (part !== null && typeof part === 'object') {
|
||||
let cost = this.partTokenCache.get(part);
|
||||
if (cost === undefined) {
|
||||
cost = estimateTokenCountSync([part], 0, this.charsPerToken);
|
||||
this.partTokenCache.set(part, cost);
|
||||
}
|
||||
total += cost;
|
||||
} else {
|
||||
totalTokens += Math.ceil(
|
||||
JSON.stringify(part).length / this.charsPerToken,
|
||||
);
|
||||
total += estimateTokenCountSync([part], 0, this.charsPerToken);
|
||||
}
|
||||
}
|
||||
// Also include structural overhead
|
||||
return totalTokens + baseEstimate(parts, depth);
|
||||
return total;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -45,6 +45,7 @@ import type { ContentGenerator } from './contentGenerator.js';
|
|||
import { LoopDetectionService } from '../services/loopDetectionService.js';
|
||||
import { ChatCompressionService } from '../context/chatCompressionService.js';
|
||||
import { AgentHistoryProvider } from '../context/agentHistoryProvider.js';
|
||||
import type { ContextManager } from '../context/contextManager.js';
|
||||
import { ideContextStore } from '../ide/ideContext.js';
|
||||
import {
|
||||
logContentRetryFailure,
|
||||
|
|
@ -74,6 +75,7 @@ import {
|
|||
import { getDisplayString, resolveModel } from '../config/models.js';
|
||||
import { partToString } from '../utils/partUtils.js';
|
||||
import { coreEvents, CoreEvent } from '../utils/events.js';
|
||||
import { initializeContextManager } from '../context/initializer.js';
|
||||
|
||||
const MAX_TURNS = 100;
|
||||
|
||||
|
|
@ -97,6 +99,7 @@ export class GeminiClient {
|
|||
private readonly compressionService: ChatCompressionService;
|
||||
private readonly agentHistoryProvider: AgentHistoryProvider;
|
||||
private readonly toolOutputMaskingService: ToolOutputMaskingService;
|
||||
private contextManager?: ContextManager;
|
||||
private lastPromptId: string;
|
||||
private currentSequenceModel: string | null = null;
|
||||
private lastSentIdeContext: IdeContext | undefined;
|
||||
|
|
@ -393,6 +396,11 @@ export class GeminiClient {
|
|||
},
|
||||
);
|
||||
await chat.initialize(resumedSessionData, 'main');
|
||||
this.contextManager = await initializeContextManager(
|
||||
this.config,
|
||||
chat,
|
||||
this.lastPromptId,
|
||||
);
|
||||
return chat;
|
||||
} catch (error) {
|
||||
await reportError(
|
||||
|
|
@ -618,10 +626,12 @@ export class GeminiClient {
|
|||
const modelForLimitCheck = this._getActiveModelForCurrentTurn();
|
||||
|
||||
if (this.config.getContextManagementConfig().enabled) {
|
||||
const newHistory = await this.agentHistoryProvider.manageHistory(
|
||||
this.getHistory(),
|
||||
signal,
|
||||
);
|
||||
const newHistory = this.contextManager
|
||||
? await this.contextManager.renderHistory()
|
||||
: await this.agentHistoryProvider.manageHistory(
|
||||
this.getHistory(),
|
||||
signal,
|
||||
);
|
||||
if (newHistory.length !== this.getHistory().length) {
|
||||
this.getChat().setHistory(newHistory);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -18,6 +18,7 @@ import {
|
|||
type GenerateContentConfig,
|
||||
type GenerateContentParameters,
|
||||
} from '@google/genai';
|
||||
import { AgentChatHistory } from './agentChatHistory.js';
|
||||
import { toParts } from '../code_assist/converter.js';
|
||||
import {
|
||||
retryWithBackoff,
|
||||
|
|
@ -248,19 +249,21 @@ export class GeminiChat {
|
|||
private sendPromise: Promise<void> = Promise.resolve();
|
||||
private readonly chatRecordingService: ChatRecordingService;
|
||||
private lastPromptTokenCount: number;
|
||||
agentHistory: AgentChatHistory;
|
||||
|
||||
constructor(
|
||||
private readonly context: AgentLoopContext,
|
||||
private systemInstruction: string = '',
|
||||
private tools: Tool[] = [],
|
||||
private history: Content[] = [],
|
||||
history: Content[] = [],
|
||||
resumedSessionData?: ResumedSessionData,
|
||||
private readonly onModelChanged?: (modelId: string) => Promise<Tool[]>,
|
||||
) {
|
||||
validateHistory(history);
|
||||
this.agentHistory = new AgentChatHistory(history);
|
||||
this.chatRecordingService = new ChatRecordingService(context);
|
||||
this.lastPromptTokenCount = estimateTokenCountSync(
|
||||
this.history.flatMap((c) => c.parts || []),
|
||||
this.agentHistory.flatMap((c) => c.parts || []),
|
||||
);
|
||||
}
|
||||
|
||||
|
|
@ -347,7 +350,7 @@ export class GeminiChat {
|
|||
}
|
||||
|
||||
// Add user content to history ONCE before any attempts.
|
||||
this.history.push(userContent);
|
||||
this.agentHistory.push(userContent);
|
||||
const requestContents = this.getHistory(true);
|
||||
|
||||
const streamWithRetries = async function* (
|
||||
|
|
@ -747,8 +750,8 @@ export class GeminiChat {
|
|||
*/
|
||||
getHistory(curated: boolean = false): readonly Content[] {
|
||||
const history = curated
|
||||
? extractCuratedHistory(this.history)
|
||||
: this.history;
|
||||
? extractCuratedHistory([...this.agentHistory.get()])
|
||||
: this.agentHistory.get();
|
||||
return [...history];
|
||||
}
|
||||
|
||||
|
|
@ -756,26 +759,26 @@ export class GeminiChat {
|
|||
* Clears the chat history.
|
||||
*/
|
||||
clearHistory(): void {
|
||||
this.history = [];
|
||||
this.agentHistory.clear();
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a new entry to the chat history.
|
||||
*/
|
||||
addHistory(content: Content): void {
|
||||
this.history.push(content);
|
||||
this.agentHistory.push(content);
|
||||
}
|
||||
|
||||
setHistory(history: readonly Content[]): void {
|
||||
this.history = [...history];
|
||||
this.agentHistory.set(history);
|
||||
this.lastPromptTokenCount = estimateTokenCountSync(
|
||||
this.history.flatMap((c) => c.parts || []),
|
||||
this.agentHistory.flatMap((c) => c.parts || []),
|
||||
);
|
||||
this.chatRecordingService.updateMessagesFromHistory(history);
|
||||
}
|
||||
|
||||
stripThoughtsFromHistory(): void {
|
||||
this.history = this.history.map((content) => {
|
||||
this.agentHistory.map((content) => {
|
||||
const newContent = { ...content };
|
||||
if (newContent.parts) {
|
||||
newContent.parts = newContent.parts.map((part) => {
|
||||
|
|
@ -1013,7 +1016,7 @@ export class GeminiChat {
|
|||
}
|
||||
}
|
||||
|
||||
this.history.push({ role: 'model', parts: consolidatedParts });
|
||||
this.agentHistory.push({ role: 'model', parts: consolidatedParts });
|
||||
}
|
||||
|
||||
getLastPromptTokenCount(): number {
|
||||
|
|
|
|||
|
|
@ -293,4 +293,9 @@ export type { Content, Part, FunctionCall } from '@google/genai';
|
|||
|
||||
// Export context types and profiles
|
||||
export * from './context/types.js';
|
||||
export * from './context/profiles.js';
|
||||
|
||||
export { generalistProfile as legacyGeneralistProfile } from './context/profiles.js';
|
||||
export {
|
||||
generalistProfile,
|
||||
stressTestProfile,
|
||||
} from './context/config/profiles.js';
|
||||
|
|
|
|||
|
|
@ -29,12 +29,14 @@ const MAX_CHARS_FOR_FULL_HEURISTIC = 100_000;
|
|||
// standard multimodal responses are typically depth 1.
|
||||
const MAX_RECURSION_DEPTH = 3;
|
||||
|
||||
const DEFAULT_CHARS_PER_TOKEN = 4;
|
||||
|
||||
/**
|
||||
* Heuristic estimation of tokens for a text string.
|
||||
*/
|
||||
function estimateTextTokens(text: string): number {
|
||||
function estimateTextTokens(text: string, charsPerToken: number): number {
|
||||
if (text.length > MAX_CHARS_FOR_FULL_HEURISTIC) {
|
||||
return text.length / 4;
|
||||
return text.length / charsPerToken;
|
||||
}
|
||||
|
||||
let tokens = 0;
|
||||
|
|
@ -73,25 +75,33 @@ function estimateMediaTokens(part: Part): number | undefined {
|
|||
* Heuristic estimation for tool responses, avoiding massive string copies
|
||||
* and accounting for nested Gemini 3 multimodal parts.
|
||||
*/
|
||||
function estimateFunctionResponseTokens(part: Part, depth: number): number {
|
||||
function estimateFunctionResponseTokens(
|
||||
part: Part,
|
||||
depth: number,
|
||||
charsPerToken: number,
|
||||
): number {
|
||||
const fr = part.functionResponse;
|
||||
if (!fr) return 0;
|
||||
|
||||
let totalTokens = (fr.name?.length ?? 0) / 4;
|
||||
let totalTokens = (fr.name?.length ?? 0) / charsPerToken;
|
||||
const response = fr.response as unknown;
|
||||
|
||||
if (typeof response === 'string') {
|
||||
totalTokens += response.length / 4;
|
||||
totalTokens += response.length / charsPerToken;
|
||||
} else if (response !== undefined && response !== null) {
|
||||
// For objects, stringify only the payload, not the whole Part object.
|
||||
totalTokens += JSON.stringify(response).length / 4;
|
||||
totalTokens += JSON.stringify(response).length / charsPerToken;
|
||||
}
|
||||
|
||||
// Gemini 3: Handle nested multimodal parts recursively.
|
||||
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
|
||||
const nestedParts = (fr as unknown as { parts?: Part[] }).parts;
|
||||
if (nestedParts && nestedParts.length > 0) {
|
||||
totalTokens += estimateTokenCountSync(nestedParts, depth + 1);
|
||||
totalTokens += estimateTokenCountSync(
|
||||
nestedParts,
|
||||
depth + 1,
|
||||
charsPerToken,
|
||||
);
|
||||
}
|
||||
|
||||
return totalTokens;
|
||||
|
|
@ -100,11 +110,12 @@ function estimateFunctionResponseTokens(part: Part, depth: number): number {
|
|||
/**
|
||||
* Estimates token count for parts synchronously using a heuristic.
|
||||
* - Text: character-based heuristic (ASCII vs CJK) for small strings, length/4 for massive ones.
|
||||
* - Non-text (Tools, etc): JSON string length / 4.
|
||||
* - Non-text (Tools, etc): JSON string length / charsPerToken.
|
||||
*/
|
||||
export function estimateTokenCountSync(
|
||||
parts: Part[],
|
||||
depth: number = 0,
|
||||
charsPerToken: number = DEFAULT_CHARS_PER_TOKEN,
|
||||
): number {
|
||||
if (depth > MAX_RECURSION_DEPTH) {
|
||||
return 0;
|
||||
|
|
@ -113,9 +124,9 @@ export function estimateTokenCountSync(
|
|||
let totalTokens = 0;
|
||||
for (const part of parts) {
|
||||
if (typeof part.text === 'string') {
|
||||
totalTokens += estimateTextTokens(part.text);
|
||||
totalTokens += estimateTextTokens(part.text, charsPerToken);
|
||||
} else if (part.functionResponse) {
|
||||
totalTokens += estimateFunctionResponseTokens(part, depth);
|
||||
totalTokens += estimateFunctionResponseTokens(part, depth, charsPerToken);
|
||||
} else {
|
||||
const mediaEstimate = estimateMediaTokens(part);
|
||||
if (mediaEstimate !== undefined) {
|
||||
|
|
@ -123,7 +134,7 @@ export function estimateTokenCountSync(
|
|||
} else {
|
||||
// Fallback for other non-text parts (e.g., functionCall).
|
||||
// Note: JSON.stringify(part) here is safe as these parts are typically small.
|
||||
totalTokens += JSON.stringify(part).length / 4;
|
||||
totalTokens += JSON.stringify(part).length / charsPerToken;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -162,9 +173,9 @@ export async function calculateRequestTokenCount(
|
|||
} catch (error) {
|
||||
// Fallback to local estimation if the API call fails
|
||||
debugLogger.debug('countTokens API failed:', error);
|
||||
return estimateTokenCountSync(parts);
|
||||
return estimateTokenCountSync(parts, 0, DEFAULT_CHARS_PER_TOKEN);
|
||||
}
|
||||
}
|
||||
|
||||
return estimateTokenCountSync(parts);
|
||||
return estimateTokenCountSync(parts, 0, DEFAULT_CHARS_PER_TOKEN);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2954,6 +2954,13 @@
|
|||
"default": false,
|
||||
"type": "boolean"
|
||||
},
|
||||
"stressTestProfile": {
|
||||
"title": "Use the stress test profile to aggressively trigger context management.",
|
||||
"description": "Significantly lowers token limits to force early garbage collection and distillation for testing purposes.",
|
||||
"markdownDescription": "Significantly lowers token limits to force early garbage collection and distillation for testing purposes.\n\n- Category: `Experimental`\n- Requires restart: `yes`\n- Default: `false`",
|
||||
"default": false,
|
||||
"type": "boolean"
|
||||
},
|
||||
"autoMemory": {
|
||||
"title": "Auto Memory",
|
||||
"description": "Automatically extract reusable skills from past sessions in the background. Review results with /memory inbox.",
|
||||
|
|
|
|||
Loading…
Reference in a new issue