diff --git a/docs/get-started/configuration-v1.md b/docs/get-started/configuration-v1.md index 4c00b00f4d..866028a975 100644 --- a/docs/get-started/configuration-v1.md +++ b/docs/get-started/configuration-v1.md @@ -473,21 +473,6 @@ a few things you can try in order of recommendation: "loadMemoryFromIncludeDirectories": true ``` -- **`chatCompression`** (object): - - **Description:** Controls the settings for chat history compression, both - automatic and when manually invoked through the /compress command. - - **Properties:** - - **`contextPercentageThreshold`** (number): A value between 0 and 1 that - specifies the token threshold for compression as a percentage of the - model's total token limit. For example, a value of `0.6` will trigger - compression when the chat history exceeds 60% of the token limit. - - **Example:** - ```json - "chatCompression": { - "contextPercentageThreshold": 0.6 - } - ``` - - **`showLineNumbers`** (boolean): - **Description:** Controls whether line numbers are displayed in code blocks in the CLI output. diff --git a/docs/get-started/configuration.md b/docs/get-started/configuration.md index 45353b9034..632f28000c 100644 --- a/docs/get-started/configuration.md +++ b/docs/get-started/configuration.md @@ -244,13 +244,13 @@ their corresponding top-level category object in your `settings.json` file. example `{"run_shell_command": {"tokenBudget": 2000}}` - **Default:** `undefined` -- **`model.chatCompression.contextPercentageThreshold`** (number): +- **`model.compressionThreshold`** (number): - **Description:** Sets the threshold for chat history compression as a - percentage of the model's total token limit. This is a value between 0 and 1 + fraction of the model's total token limit. This is a value between 0 and 1 that applies to both automatic compression and the manual `/compress` command. For example, a value of `0.6` will trigger compression when the chat history exceeds 60% of the token limit. - - **Default:** `0.7` + - **Default:** `0.2` - **`model.skipNextSpeakerCheck`** (boolean): - **Description:** Skip the next speaker check. diff --git a/packages/cli/src/config/config.test.ts b/packages/cli/src/config/config.test.ts index 5490afb678..be0986ea9d 100644 --- a/packages/cli/src/config/config.test.ts +++ b/packages/cli/src/config/config.test.ts @@ -1572,9 +1572,13 @@ describe('loadCliConfig with includeDirectories', () => { }); }); +<<<<<<< HEAD describe('loadCliConfig chatCompression', () => { const originalArgv = process.argv; +======= +describe('loadCliConfig compressionThreshold', () => { +>>>>>>> 3332703f (Make compression threshold editable in the UI. (#12317)) beforeEach(() => { vi.resetAllMocks(); vi.mocked(os.homedir).mockReturnValue('/mock/home/user'); @@ -1587,28 +1591,36 @@ describe('loadCliConfig chatCompression', () => { vi.restoreAllMocks(); }); - it('should pass chatCompression settings to the core config', async () => { + it('should pass settings to the core config', async () => { process.argv = ['node', 'script.js']; const argv = await parseArguments({} as Settings); const settings: Settings = { model: { - chatCompression: { - contextPercentageThreshold: 0.5, - }, + compressionThreshold: 0.5, }, }; +<<<<<<< HEAD const config = await loadCliConfig(settings, [], 'test-session', argv); expect(config.getChatCompression()).toEqual({ contextPercentageThreshold: 0.5, }); +======= + const config = await loadCliConfig(settings, 'test-session', argv); + expect(config.getCompressionThreshold()).toBe(0.5); +>>>>>>> 3332703f (Make compression threshold editable in the UI. (#12317)) }); - it('should have undefined chatCompression if not in settings', async () => { + it('should have undefined compressionThreshold if not in settings', async () => { process.argv = ['node', 'script.js']; const argv = await parseArguments({} as Settings); const settings: Settings = {}; +<<<<<<< HEAD const config = await loadCliConfig(settings, [], 'test-session', argv); expect(config.getChatCompression()).toBeUndefined(); +======= + const config = await loadCliConfig(settings, 'test-session', argv); + expect(config.getCompressionThreshold()).toBeUndefined(); +>>>>>>> 3332703f (Make compression threshold editable in the UI. (#12317)) }); }); diff --git a/packages/cli/src/config/config.ts b/packages/cli/src/config/config.ts index 1bb1a3b70d..318f674170 100755 --- a/packages/cli/src/config/config.ts +++ b/packages/cli/src/config/config.ts @@ -610,7 +610,7 @@ export async function loadCliConfig( noBrowser: !!process.env['NO_BROWSER'], summarizeToolOutput: settings.model?.summarizeToolOutput, ideMode, - chatCompression: settings.model?.chatCompression, + compressionThreshold: settings.model?.compressionThreshold, folderTrust, interactive, trustedFolder, diff --git a/packages/cli/src/config/settings.test.ts b/packages/cli/src/config/settings.test.ts index 672f42bd5b..12687922e9 100644 --- a/packages/cli/src/config/settings.test.ts +++ b/packages/cli/src/config/settings.test.ts @@ -1051,15 +1051,15 @@ describe('Settings Loading and Merging', () => { }); }); - it('should merge chatCompression settings, with workspace taking precedence', () => { + it('should merge compressionThreshold settings, with workspace taking precedence', () => { (mockFsExistsSync as Mock).mockReturnValue(true); const userSettingsContent = { general: {}, - model: { chatCompression: { contextPercentageThreshold: 0.5 } }, + model: { compressionThreshold: 0.5 }, }; const workspaceSettingsContent = { general: {}, - model: { chatCompression: { contextPercentageThreshold: 0.8 } }, + model: { compressionThreshold: 0.8 }, }; (fs.readFileSync as Mock).mockImplementation( @@ -1074,15 +1074,11 @@ describe('Settings Loading and Merging', () => { const settings = loadSettings(MOCK_WORKSPACE_DIR); - expect(settings.user.settings.model?.chatCompression).toEqual({ - contextPercentageThreshold: 0.5, - }); - expect(settings.workspace.settings.model?.chatCompression).toEqual({ - contextPercentageThreshold: 0.8, - }); - expect(settings.merged.model?.chatCompression).toEqual({ - contextPercentageThreshold: 0.8, - }); + expect(settings.user.settings.model?.compressionThreshold).toEqual(0.5); + expect(settings.workspace.settings.model?.compressionThreshold).toEqual( + 0.8, + ); + expect(settings.merged.model?.compressionThreshold).toEqual(0.8); }); it('should merge output format settings, with workspace taking precedence', () => { @@ -1109,13 +1105,13 @@ describe('Settings Loading and Merging', () => { expect(settings.merged.output?.format).toBe('json'); }); - it('should handle chatCompression when only in user settings', () => { + it('should handle compressionThreshold when only in user settings', () => { (mockFsExistsSync as Mock).mockImplementation( (p: fs.PathLike) => p === USER_SETTINGS_PATH, ); const userSettingsContent = { general: {}, - model: { chatCompression: { contextPercentageThreshold: 0.5 } }, + model: { compressionThreshold: 0.5 }, }; (fs.readFileSync as Mock).mockImplementation( (p: fs.PathOrFileDescriptor) => { @@ -1126,9 +1122,7 @@ describe('Settings Loading and Merging', () => { ); const settings = loadSettings(MOCK_WORKSPACE_DIR); - expect(settings.merged.model?.chatCompression).toEqual({ - contextPercentageThreshold: 0.5, - }); + expect(settings.merged.model?.compressionThreshold).toEqual(0.5); }); it('should have model as undefined if not in any settings file', () => { @@ -1138,39 +1132,15 @@ describe('Settings Loading and Merging', () => { expect(settings.merged.model).toBeUndefined(); }); - it('should ignore chatCompression if contextPercentageThreshold is invalid', () => { - const warnSpy = vi.spyOn(console, 'warn').mockImplementation(() => {}); - (mockFsExistsSync as Mock).mockImplementation( - (p: fs.PathLike) => p === USER_SETTINGS_PATH, - ); - const userSettingsContent = { - general: {}, - model: { chatCompression: { contextPercentageThreshold: 1.5 } }, - }; - (fs.readFileSync as Mock).mockImplementation( - (p: fs.PathOrFileDescriptor) => { - if (p === USER_SETTINGS_PATH) - return JSON.stringify(userSettingsContent); - return '{}'; - }, - ); - - const settings = loadSettings(MOCK_WORKSPACE_DIR); - expect(settings.merged.model?.chatCompression).toEqual({ - contextPercentageThreshold: 1.5, - }); - warnSpy.mockRestore(); - }); - - it('should deep merge chatCompression settings', () => { + it('should use user compressionThreshold if workspace does not define it', () => { (mockFsExistsSync as Mock).mockReturnValue(true); const userSettingsContent = { general: {}, - model: { chatCompression: { contextPercentageThreshold: 0.5 } }, + model: { compressionThreshold: 0.5 }, }; const workspaceSettingsContent = { general: {}, - model: { chatCompression: {} }, + model: {}, }; (fs.readFileSync as Mock).mockImplementation( @@ -1185,9 +1155,7 @@ describe('Settings Loading and Merging', () => { const settings = loadSettings(MOCK_WORKSPACE_DIR); - expect(settings.merged.model?.chatCompression).toEqual({ - contextPercentageThreshold: 0.5, - }); + expect(settings.merged.model?.compressionThreshold).toEqual(0.5); }); it('should merge includeDirectories from all scopes', () => { @@ -1972,9 +1940,6 @@ describe('Settings Loading and Merging', () => { }, model: { name: 'gemini-pro', - chatCompression: { - contextPercentageThreshold: 0.5, - }, }, mcpServers: { 'server-1': { @@ -1993,9 +1958,6 @@ describe('Settings Loading and Merging', () => { myTheme: {}, }, model: 'gemini-pro', - chatCompression: { - contextPercentageThreshold: 0.5, - }, mcpServers: { 'server-1': { command: 'node server.js', @@ -2035,9 +1997,6 @@ describe('Settings Loading and Merging', () => { }, model: { name: 'gemini-pro', - chatCompression: { - contextPercentageThreshold: 0.8, - }, }, context: { fileName: 'CONTEXT.md', @@ -2077,9 +2036,6 @@ describe('Settings Loading and Merging', () => { theme: 'dark', usageStatisticsEnabled: false, model: 'gemini-pro', - chatCompression: { - contextPercentageThreshold: 0.8, - }, contextFileName: 'CONTEXT.md', includeDirectories: ['/src'], sandbox: true, diff --git a/packages/cli/src/config/settings.ts b/packages/cli/src/config/settings.ts index 0835cdd178..51c67b8b76 100644 --- a/packages/cli/src/config/settings.ts +++ b/packages/cli/src/config/settings.ts @@ -64,7 +64,7 @@ const MIGRATION_MAP: Record = { autoAccept: 'tools.autoAccept', autoConfigureMaxOldSpaceSize: 'advanced.autoConfigureMemory', bugCommand: 'advanced.bugCommand', - chatCompression: 'model.chatCompression', + chatCompression: 'model.compressionThreshold', checkpointing: 'general.checkpointing', coreTools: 'tools.core', contextFileName: 'context.fileName', diff --git a/packages/cli/src/config/settingsSchema.ts b/packages/cli/src/config/settingsSchema.ts index c01e691f44..7cdc36f860 100644 --- a/packages/cli/src/config/settingsSchema.ts +++ b/packages/cli/src/config/settingsSchema.ts @@ -14,7 +14,6 @@ import type { BugCommandSettings, TelemetrySettings, AuthType, - ChatCompressionSettings, } from '@google/gemini-cli-core'; import { DEFAULT_TRUNCATE_TOOL_OUTPUT_LINES, @@ -578,14 +577,15 @@ const SETTINGS_SCHEMA = { description: 'Settings for summarizing tool output.', showInDialog: false, }, - chatCompression: { - type: 'object', - label: 'Chat Compression', + compressionThreshold: { + type: 'number', + label: 'Compression Threshold', category: 'Model', requiresRestart: false, - default: undefined as ChatCompressionSettings | undefined, - description: 'Chat compression settings.', - showInDialog: false, + default: 0.2 as number, + description: + 'The fraction of context usage at which to trigger context compression (e.g. 0.2, 0.3).', + showInDialog: true, }, skipNextSpeakerCheck: { type: 'boolean', diff --git a/packages/core/src/config/config.ts b/packages/core/src/config/config.ts index 8e267965f2..4d677b2aa5 100644 --- a/packages/core/src/config/config.ts +++ b/packages/core/src/config/config.ts @@ -96,10 +96,6 @@ export interface BugCommandSettings { urlTemplate: string; } -export interface ChatCompressionSettings { - contextPercentageThreshold?: number; -} - export interface SummarizeToolOutputSettings { tokenBudget?: number; } @@ -261,7 +257,7 @@ export interface ConfigParameters { folderTrust?: boolean; ideMode?: boolean; loadMemoryFromIncludeDirectories?: boolean; - chatCompression?: ChatCompressionSettings; + compressionThreshold?: number; interactive?: boolean; trustedFolder?: boolean; useRipgrep?: boolean; @@ -354,7 +350,7 @@ export class Config { | undefined; private readonly experimentalZedIntegration: boolean = false; private readonly loadMemoryFromIncludeDirectories: boolean = false; - private readonly chatCompression: ChatCompressionSettings | undefined; + private readonly compressionThreshold: number | undefined; private readonly interactive: boolean; private readonly ptyInfo: string; private readonly trustedFolder: boolean | undefined; @@ -453,7 +449,7 @@ export class Config { this.ideMode = params.ideMode ?? false; this.loadMemoryFromIncludeDirectories = params.loadMemoryFromIncludeDirectories ?? false; - this.chatCompression = params.chatCompression; + this.compressionThreshold = params.compressionThreshold; this.interactive = params.interactive ?? false; this.ptyInfo = params.ptyInfo ?? 'child_process'; this.trustedFolder = params.trustedFolder; @@ -977,8 +973,8 @@ export class Config { this.fileSystemService = fileSystemService; } - getChatCompression(): ChatCompressionSettings | undefined { - return this.chatCompression; + getCompressionThreshold(): number | undefined { + return this.compressionThreshold; } isInteractiveShellEnabled(): boolean { diff --git a/packages/core/src/services/chatCompressionService.test.ts b/packages/core/src/services/chatCompressionService.test.ts new file mode 100644 index 0000000000..a0766f9ffc --- /dev/null +++ b/packages/core/src/services/chatCompressionService.test.ts @@ -0,0 +1,294 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; +import { + ChatCompressionService, + findCompressSplitPoint, +} from './chatCompressionService.js'; +import type { Content, GenerateContentResponse } from '@google/genai'; +import { CompressionStatus } from '../core/turn.js'; +import { tokenLimit } from '../core/tokenLimits.js'; +import type { GeminiChat } from '../core/geminiChat.js'; +import type { Config } from '../config/config.js'; +import { getInitialChatHistory } from '../utils/environmentContext.js'; +import type { ContentGenerator } from '../core/contentGenerator.js'; + +vi.mock('../core/tokenLimits.js'); +vi.mock('../telemetry/loggers.js'); +vi.mock('../utils/environmentContext.js'); + +describe('findCompressSplitPoint', () => { + it('should throw an error for non-positive numbers', () => { + expect(() => findCompressSplitPoint([], 0)).toThrow( + 'Fraction must be between 0 and 1', + ); + }); + + it('should throw an error for a fraction greater than or equal to 1', () => { + expect(() => findCompressSplitPoint([], 1)).toThrow( + 'Fraction must be between 0 and 1', + ); + }); + + it('should handle an empty history', () => { + expect(findCompressSplitPoint([], 0.5)).toBe(0); + }); + + it('should handle a fraction in the middle', () => { + const history: Content[] = [ + { role: 'user', parts: [{ text: 'This is the first message.' }] }, // JSON length: 66 (19%) + { role: 'model', parts: [{ text: 'This is the second message.' }] }, // JSON length: 68 (40%) + { role: 'user', parts: [{ text: 'This is the third message.' }] }, // JSON length: 66 (60%) + { role: 'model', parts: [{ text: 'This is the fourth message.' }] }, // JSON length: 68 (80%) + { role: 'user', parts: [{ text: 'This is the fifth message.' }] }, // JSON length: 65 (100%) + ]; + expect(findCompressSplitPoint(history, 0.5)).toBe(4); + }); + + it('should handle a fraction of last index', () => { + const history: Content[] = [ + { role: 'user', parts: [{ text: 'This is the first message.' }] }, // JSON length: 66 (19%) + { role: 'model', parts: [{ text: 'This is the second message.' }] }, // JSON length: 68 (40%) + { role: 'user', parts: [{ text: 'This is the third message.' }] }, // JSON length: 66 (60%) + { role: 'model', parts: [{ text: 'This is the fourth message.' }] }, // JSON length: 68 (80%) + { role: 'user', parts: [{ text: 'This is the fifth message.' }] }, // JSON length: 65 (100%) + ]; + expect(findCompressSplitPoint(history, 0.9)).toBe(4); + }); + + it('should handle a fraction of after last index', () => { + const history: Content[] = [ + { role: 'user', parts: [{ text: 'This is the first message.' }] }, // JSON length: 66 (24%) + { role: 'model', parts: [{ text: 'This is the second message.' }] }, // JSON length: 68 (50%) + { role: 'user', parts: [{ text: 'This is the third message.' }] }, // JSON length: 66 (74%) + { role: 'model', parts: [{ text: 'This is the fourth message.' }] }, // JSON length: 68 (100%) + ]; + expect(findCompressSplitPoint(history, 0.8)).toBe(4); + }); + + it('should return earlier splitpoint if no valid ones are after threshold', () => { + const history: Content[] = [ + { role: 'user', parts: [{ text: 'This is the first message.' }] }, + { role: 'model', parts: [{ text: 'This is the second message.' }] }, + { role: 'user', parts: [{ text: 'This is the third message.' }] }, + { role: 'model', parts: [{ functionCall: { name: 'foo', args: {} } }] }, + ]; + // Can't return 4 because the previous item has a function call. + expect(findCompressSplitPoint(history, 0.99)).toBe(2); + }); + + it('should handle a history with only one item', () => { + const historyWithEmptyParts: Content[] = [ + { role: 'user', parts: [{ text: 'Message 1' }] }, + ]; + expect(findCompressSplitPoint(historyWithEmptyParts, 0.5)).toBe(0); + }); + + it('should handle history with weird parts', () => { + const historyWithEmptyParts: Content[] = [ + { role: 'user', parts: [{ text: 'Message 1' }] }, + { + role: 'model', + parts: [{ fileData: { fileUri: 'derp', mimeType: 'text/plain' } }], + }, + { role: 'user', parts: [{ text: 'Message 2' }] }, + ]; + expect(findCompressSplitPoint(historyWithEmptyParts, 0.5)).toBe(2); + }); +}); + +describe('ChatCompressionService', () => { + let service: ChatCompressionService; + let mockChat: GeminiChat; + let mockConfig: Config; + const mockModel = 'gemini-pro'; + const mockPromptId = 'test-prompt-id'; + + beforeEach(() => { + service = new ChatCompressionService(); + mockChat = { + getHistory: vi.fn(), + getLastPromptTokenCount: vi.fn().mockReturnValue(500), + } as unknown as GeminiChat; + mockConfig = { + getCompressionThreshold: vi.fn(), + getContentGenerator: vi.fn(), + } as unknown as Config; + + vi.mocked(tokenLimit).mockReturnValue(1000); + vi.mocked(getInitialChatHistory).mockImplementation( + async (_config, extraHistory) => extraHistory || [], + ); + }); + + afterEach(() => { + vi.restoreAllMocks(); + }); + + it('should return NOOP if history is empty', async () => { + vi.mocked(mockChat.getHistory).mockReturnValue([]); + const result = await service.compress( + mockChat, + mockPromptId, + false, + mockModel, + mockConfig, + false, + ); + expect(result.info.compressionStatus).toBe(CompressionStatus.NOOP); + expect(result.newHistory).toBeNull(); + }); + + it('should return NOOP if previously failed and not forced', async () => { + vi.mocked(mockChat.getHistory).mockReturnValue([ + { role: 'user', parts: [{ text: 'hi' }] }, + ]); + const result = await service.compress( + mockChat, + mockPromptId, + false, + mockModel, + mockConfig, + true, + ); + expect(result.info.compressionStatus).toBe(CompressionStatus.NOOP); + expect(result.newHistory).toBeNull(); + }); + + it('should return NOOP if under token threshold and not forced', async () => { + vi.mocked(mockChat.getHistory).mockReturnValue([ + { role: 'user', parts: [{ text: 'hi' }] }, + ]); + vi.mocked(mockChat.getLastPromptTokenCount).mockReturnValue(600); + vi.mocked(tokenLimit).mockReturnValue(1000); + // Threshold is 0.7 * 1000 = 700. 600 < 700, so NOOP. + + const result = await service.compress( + mockChat, + mockPromptId, + false, + mockModel, + mockConfig, + false, + ); + expect(result.info.compressionStatus).toBe(CompressionStatus.NOOP); + expect(result.newHistory).toBeNull(); + }); + + it('should compress if over token threshold', async () => { + const history: Content[] = [ + { role: 'user', parts: [{ text: 'msg1' }] }, + { role: 'model', parts: [{ text: 'msg2' }] }, + { role: 'user', parts: [{ text: 'msg3' }] }, + { role: 'model', parts: [{ text: 'msg4' }] }, + ]; + vi.mocked(mockChat.getHistory).mockReturnValue(history); + vi.mocked(mockChat.getLastPromptTokenCount).mockReturnValue(800); + vi.mocked(tokenLimit).mockReturnValue(1000); + const mockGenerateContent = vi.fn().mockResolvedValue({ + candidates: [ + { + content: { + parts: [{ text: 'Summary' }], + }, + }, + ], + } as unknown as GenerateContentResponse); + vi.mocked(mockConfig.getContentGenerator).mockReturnValue({ + generateContent: mockGenerateContent, + } as unknown as ContentGenerator); + + const result = await service.compress( + mockChat, + mockPromptId, + false, + mockModel, + mockConfig, + false, + ); + + expect(result.info.compressionStatus).toBe(CompressionStatus.COMPRESSED); + expect(result.newHistory).not.toBeNull(); + expect(result.newHistory![0].parts![0].text).toBe('Summary'); + expect(mockGenerateContent).toHaveBeenCalled(); + }); + + it('should force compress even if under threshold', async () => { + const history: Content[] = [ + { role: 'user', parts: [{ text: 'msg1' }] }, + { role: 'model', parts: [{ text: 'msg2' }] }, + { role: 'user', parts: [{ text: 'msg3' }] }, + { role: 'model', parts: [{ text: 'msg4' }] }, + ]; + vi.mocked(mockChat.getHistory).mockReturnValue(history); + vi.mocked(mockChat.getLastPromptTokenCount).mockReturnValue(100); + vi.mocked(tokenLimit).mockReturnValue(1000); + + const mockGenerateContent = vi.fn().mockResolvedValue({ + candidates: [ + { + content: { + parts: [{ text: 'Summary' }], + }, + }, + ], + } as unknown as GenerateContentResponse); + vi.mocked(mockConfig.getContentGenerator).mockReturnValue({ + generateContent: mockGenerateContent, + } as unknown as ContentGenerator); + + const result = await service.compress( + mockChat, + mockPromptId, + true, // forced + mockModel, + mockConfig, + false, + ); + + expect(result.info.compressionStatus).toBe(CompressionStatus.COMPRESSED); + expect(result.newHistory).not.toBeNull(); + }); + + it('should return FAILED if new token count is inflated', async () => { + const history: Content[] = [ + { role: 'user', parts: [{ text: 'msg1' }] }, + { role: 'model', parts: [{ text: 'msg2' }] }, + ]; + vi.mocked(mockChat.getHistory).mockReturnValue(history); + vi.mocked(mockChat.getLastPromptTokenCount).mockReturnValue(10); + vi.mocked(tokenLimit).mockReturnValue(1000); + + const longSummary = 'a'.repeat(1000); // Long summary to inflate token count + const mockGenerateContent = vi.fn().mockResolvedValue({ + candidates: [ + { + content: { + parts: [{ text: longSummary }], + }, + }, + ], + } as unknown as GenerateContentResponse); + vi.mocked(mockConfig.getContentGenerator).mockReturnValue({ + generateContent: mockGenerateContent, + } as unknown as ContentGenerator); + + const result = await service.compress( + mockChat, + mockPromptId, + true, + mockModel, + mockConfig, + false, + ); + + expect(result.info.compressionStatus).toBe( + CompressionStatus.COMPRESSION_FAILED_INFLATED_TOKEN_COUNT, + ); + expect(result.newHistory).toBeNull(); + }); +}); diff --git a/packages/core/src/services/chatCompressionService.ts b/packages/core/src/services/chatCompressionService.ts new file mode 100644 index 0000000000..573b2ae458 --- /dev/null +++ b/packages/core/src/services/chatCompressionService.ts @@ -0,0 +1,215 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { Content } from '@google/genai'; +import type { Config } from '../config/config.js'; +import type { GeminiChat } from '../core/geminiChat.js'; +import { type ChatCompressionInfo, CompressionStatus } from '../core/turn.js'; +import { tokenLimit } from '../core/tokenLimits.js'; +import { getCompressionPrompt } from '../core/prompts.js'; +import { getResponseText } from '../utils/partUtils.js'; +import { logChatCompression } from '../telemetry/loggers.js'; +import { makeChatCompressionEvent } from '../telemetry/types.js'; +import { getInitialChatHistory } from '../utils/environmentContext.js'; + +/** + * Default threshold for compression token count as a fraction of the model's + * token limit. If the chat history exceeds this threshold, it will be compressed. + */ +export const DEFAULT_COMPRESSION_TOKEN_THRESHOLD = 0.2; + +/** + * The fraction of the latest chat history to keep. A value of 0.3 + * means that only the last 30% of the chat history will be kept after compression. + */ +export const COMPRESSION_PRESERVE_THRESHOLD = 0.3; + +/** + * Returns the index of the oldest item to keep when compressing. May return + * contents.length which indicates that everything should be compressed. + * + * Exported for testing purposes. + */ +export function findCompressSplitPoint( + contents: Content[], + fraction: number, +): number { + if (fraction <= 0 || fraction >= 1) { + throw new Error('Fraction must be between 0 and 1'); + } + + const charCounts = contents.map((content) => JSON.stringify(content).length); + const totalCharCount = charCounts.reduce((a, b) => a + b, 0); + const targetCharCount = totalCharCount * fraction; + + let lastSplitPoint = 0; // 0 is always valid (compress nothing) + let cumulativeCharCount = 0; + for (let i = 0; i < contents.length; i++) { + const content = contents[i]; + if ( + content.role === 'user' && + !content.parts?.some((part) => !!part.functionResponse) + ) { + if (cumulativeCharCount >= targetCharCount) { + return i; + } + lastSplitPoint = i; + } + cumulativeCharCount += charCounts[i]; + } + + // We found no split points after targetCharCount. + // Check if it's safe to compress everything. + const lastContent = contents[contents.length - 1]; + if ( + lastContent?.role === 'model' && + !lastContent?.parts?.some((part) => part.functionCall) + ) { + return contents.length; + } + + // Can't compress everything so just compress at last splitpoint. + return lastSplitPoint; +} + +export class ChatCompressionService { + async compress( + chat: GeminiChat, + promptId: string, + force: boolean, + model: string, + config: Config, + hasFailedCompressionAttempt: boolean, + ): Promise<{ newHistory: Content[] | null; info: ChatCompressionInfo }> { + const curatedHistory = chat.getHistory(true); + + // Regardless of `force`, don't do anything if the history is empty. + if ( + curatedHistory.length === 0 || + (hasFailedCompressionAttempt && !force) + ) { + return { + newHistory: null, + info: { + originalTokenCount: 0, + newTokenCount: 0, + compressionStatus: CompressionStatus.NOOP, + }, + }; + } + + const originalTokenCount = chat.getLastPromptTokenCount(); + + // Don't compress if not forced and we are under the limit. + if (!force) { + const threshold = + config.getCompressionThreshold() ?? DEFAULT_COMPRESSION_TOKEN_THRESHOLD; + if (originalTokenCount < threshold * tokenLimit(model)) { + return { + newHistory: null, + info: { + originalTokenCount, + newTokenCount: originalTokenCount, + compressionStatus: CompressionStatus.NOOP, + }, + }; + } + } + + const splitPoint = findCompressSplitPoint( + curatedHistory, + 1 - COMPRESSION_PRESERVE_THRESHOLD, + ); + + const historyToCompress = curatedHistory.slice(0, splitPoint); + const historyToKeep = curatedHistory.slice(splitPoint); + + if (historyToCompress.length === 0) { + return { + newHistory: null, + info: { + originalTokenCount, + newTokenCount: originalTokenCount, + compressionStatus: CompressionStatus.NOOP, + }, + }; + } + + const summaryResponse = await config.getContentGenerator().generateContent( + { + model, + contents: [ + ...historyToCompress, + { + role: 'user', + parts: [ + { + text: 'First, reason in your scratchpad. Then, generate the .', + }, + ], + }, + ], + config: { + systemInstruction: { text: getCompressionPrompt() }, + }, + }, + promptId, + ); + const summary = getResponseText(summaryResponse) ?? ''; + + const extraHistory: Content[] = [ + { + role: 'user', + parts: [{ text: summary }], + }, + { + role: 'model', + parts: [{ text: 'Got it. Thanks for the additional context!' }], + }, + ...historyToKeep, + ]; + + // Use a shared utility to construct the initial history for an accurate token count. + const fullNewHistory = await getInitialChatHistory(config, extraHistory); + + // Estimate token count 1 token ≈ 4 characters + const newTokenCount = Math.floor( + fullNewHistory.reduce( + (total, content) => total + JSON.stringify(content).length, + 0, + ) / 4, + ); + + logChatCompression( + config, + makeChatCompressionEvent({ + tokens_before: originalTokenCount, + tokens_after: newTokenCount, + }), + ); + + if (newTokenCount > originalTokenCount) { + return { + newHistory: null, + info: { + originalTokenCount, + newTokenCount, + compressionStatus: + CompressionStatus.COMPRESSION_FAILED_INFLATED_TOKEN_COUNT, + }, + }; + } else { + return { + newHistory: extraHistory, + info: { + originalTokenCount, + newTokenCount, + compressionStatus: CompressionStatus.COMPRESSED, + }, + }; + } + } +}