diff --git a/src/vs/workbench/contrib/void/browser/chatThreadService.ts b/src/vs/workbench/contrib/void/browser/chatThreadService.ts index ed24a78f..69f2f776 100644 --- a/src/vs/workbench/contrib/void/browser/chatThreadService.ts +++ b/src/vs/workbench/contrib/void/browser/chatThreadService.ts @@ -205,6 +205,7 @@ export type ThreadStreamState = { id: string; content: string; rawParams: RawToolParamsObj; + rawParamsStr?: string; mcpServerName: string | undefined; }; interrupt: Promise<() => void>; @@ -536,7 +537,7 @@ class ChatThreadService extends Disposable implements IChatThreadService { // if running now but stream state doesn't indicate it (happens if restart Void), cancel that last tool if (lastMessage && lastMessage.role === 'tool' && lastMessage.type === 'running_now') { - this._updateLatestTool(threadId, { role: 'tool', type: 'rejected', content: lastMessage.content, id: lastMessage.id, rawParams: lastMessage.rawParams, result: null, name: lastMessage.name, params: lastMessage.params, mcpServerName: lastMessage.mcpServerName }) + this._updateLatestTool(threadId, { role: 'tool', type: 'rejected', content: lastMessage.content, id: lastMessage.id, rawParams: lastMessage.rawParams, rawParamsStr: lastMessage.rawParamsStr, result: null, name: lastMessage.name, params: lastMessage.params, mcpServerName: lastMessage.mcpServerName }) } } @@ -681,10 +682,10 @@ class ChatThreadService extends Disposable implements IChatThreadService { } else return - const { name, id, rawParams, mcpServerName } = lastMsg + const { name, id, rawParams, rawParamsStr, mcpServerName } = lastMsg const errorMessage = this.toolErrMsgs.rejected - this._updateLatestTool(threadId, { role: 'tool', type: 'rejected', params: params, name: name, content: errorMessage, result: null, id, rawParams, mcpServerName }) + this._updateLatestTool(threadId, { role: 'tool', type: 'rejected', params: params, name: name, content: errorMessage, result: null, id, rawParams, rawParamsStr, mcpServerName }) this._setStreamState(threadId, undefined) } @@ -704,9 +705,9 @@ class ChatThreadService extends Disposable implements IChatThreadService { } // add tool that's running else if (this.streamState[threadId]?.isRunning === 'tool') { - const { toolName, toolParams, id, content: content_, rawParams, mcpServerName } = this.streamState[threadId].toolInfo + const { toolName, toolParams, id, content: content_, rawParams, rawParamsStr, mcpServerName } = this.streamState[threadId].toolInfo const content = content_ || this.toolErrMsgs.interrupted - this._updateLatestTool(threadId, { role: 'tool', name: toolName, params: toolParams, id, content, rawParams, type: 'rejected', result: null, mcpServerName }) + this._updateLatestTool(threadId, { role: 'tool', name: toolName, params: toolParams, id, content, rawParams, rawParamsStr, type: 'rejected', result: null, mcpServerName }) } // reject the tool for the user if relevant else if (this.streamState[threadId]?.isRunning === 'awaiting_user') { @@ -745,8 +746,12 @@ class ChatThreadService extends Disposable implements IChatThreadService { toolName: ToolName, toolId: string, mcpServerName: string | undefined, - opts: { preapproved: true, unvalidatedToolParams: RawToolParamsObj, validatedParams: ToolCallParams } | { preapproved: false, unvalidatedToolParams: RawToolParamsObj }, + opts: { preapproved: true, unvalidatedToolParams: RawToolParamsObj, validatedParams: ToolCallParams, rawParamsStr?: string } | { preapproved: false, unvalidatedToolParams: RawToolParamsObj, rawParamsStr?: string }, ): Promise<{ awaitingUserApproval?: boolean, interrupted?: boolean }> => { + // Carry the model's original serialized arguments string (when available) into + // every tool message we persist. This lets the replay path send byte-identical + // tool_calls back to the provider, preserving the prefix cache across turns. + const rawParamsStr = opts.rawParamsStr // compute these below let toolParams: ToolCallParams @@ -770,7 +775,7 @@ class ChatThreadService extends Disposable implements IChatThreadService { } catch (error) { const errorMessage = getErrorMessage(error) - this._addMessageToThread(threadId, { role: 'tool', type: 'invalid_params', rawParams: opts.unvalidatedToolParams, result: null, name: toolName, content: errorMessage, id: toolId, mcpServerName }) + this._addMessageToThread(threadId, { role: 'tool', type: 'invalid_params', rawParams: opts.unvalidatedToolParams, rawParamsStr, result: null, name: toolName, content: errorMessage, id: toolId, mcpServerName }) return {} } // once validated, add checkpoint for edit @@ -783,7 +788,7 @@ class ChatThreadService extends Disposable implements IChatThreadService { if (approvalType) { const autoApprove = this._settingsService.state.globalSettings.autoApprove[approvalType] // add a tool_request because we use it for UI if a tool is loading (this should be improved in the future) - this._addMessageToThread(threadId, { role: 'tool', type: 'tool_request', content: '(Awaiting user permission...)', result: null, name: toolName, params: toolParams, id: toolId, rawParams: opts.unvalidatedToolParams, mcpServerName }) + this._addMessageToThread(threadId, { role: 'tool', type: 'tool_request', content: '(Awaiting user permission...)', result: null, name: toolName, params: toolParams, id: toolId, rawParams: opts.unvalidatedToolParams, rawParamsStr, mcpServerName }) if (!autoApprove) { return { awaitingUserApproval: true } } @@ -800,7 +805,7 @@ class ChatThreadService extends Disposable implements IChatThreadService { // 3. call the tool // this._setStreamState(threadId, { isRunning: 'tool' }, 'merge') - const runningTool = { role: 'tool', type: 'running_now', name: toolName, params: toolParams, content: '(value not received yet...)', result: null, id: toolId, rawParams: opts.unvalidatedToolParams, mcpServerName } as const + const runningTool = { role: 'tool', type: 'running_now', name: toolName, params: toolParams, content: '(value not received yet...)', result: null, id: toolId, rawParams: opts.unvalidatedToolParams, rawParamsStr, mcpServerName } as const this._updateLatestTool(threadId, runningTool) @@ -810,7 +815,7 @@ class ChatThreadService extends Disposable implements IChatThreadService { try { // set stream state - this._setStreamState(threadId, { isRunning: 'tool', interrupt: interruptorPromise, toolInfo: { toolName, toolParams, id: toolId, content: 'interrupted...', rawParams: opts.unvalidatedToolParams, mcpServerName } }) + this._setStreamState(threadId, { isRunning: 'tool', interrupt: interruptorPromise, toolInfo: { toolName, toolParams, id: toolId, content: 'interrupted...', rawParams: opts.unvalidatedToolParams, rawParamsStr, mcpServerName } }) if (isBuiltInTool) { const { result, interruptTool } = await this._toolsService.callTool[toolName](toolParams as any) @@ -840,7 +845,7 @@ class ChatThreadService extends Disposable implements IChatThreadService { if (interrupted) { return { interrupted: true } } // the tool result is added where we interrupt, not here const errorMessage = getErrorMessage(error) - this._updateLatestTool(threadId, { role: 'tool', type: 'tool_error', params: toolParams, result: errorMessage, name: toolName, content: errorMessage, id: toolId, rawParams: opts.unvalidatedToolParams, mcpServerName }) + this._updateLatestTool(threadId, { role: 'tool', type: 'tool_error', params: toolParams, result: errorMessage, name: toolName, content: errorMessage, id: toolId, rawParams: opts.unvalidatedToolParams, rawParamsStr, mcpServerName }) return {} } @@ -855,12 +860,12 @@ class ChatThreadService extends Disposable implements IChatThreadService { } } catch (error) { const errorMessage = this.toolErrMsgs.errWhenStringifying(error) - this._updateLatestTool(threadId, { role: 'tool', type: 'tool_error', params: toolParams, result: errorMessage, name: toolName, content: errorMessage, id: toolId, rawParams: opts.unvalidatedToolParams, mcpServerName }) + this._updateLatestTool(threadId, { role: 'tool', type: 'tool_error', params: toolParams, result: errorMessage, name: toolName, content: errorMessage, id: toolId, rawParams: opts.unvalidatedToolParams, rawParamsStr, mcpServerName }) return {} } // 5. add to history and keep going - this._updateLatestTool(threadId, { role: 'tool', type: 'success', params: toolParams, result: toolResult, name: toolName, content: toolResultStr, id: toolId, rawParams: opts.unvalidatedToolParams, mcpServerName }) + this._updateLatestTool(threadId, { role: 'tool', type: 'success', params: toolParams, result: toolResult, name: toolName, content: toolResultStr, id: toolId, rawParams: opts.unvalidatedToolParams, rawParamsStr, mcpServerName }) return {} }; @@ -895,7 +900,7 @@ class ChatThreadService extends Disposable implements IChatThreadService { // before enter loop, call tool if (callThisToolFirst) { - const { interrupted } = await this._runToolCall(threadId, callThisToolFirst.name, callThisToolFirst.id, callThisToolFirst.mcpServerName, { preapproved: true, unvalidatedToolParams: callThisToolFirst.rawParams, validatedParams: callThisToolFirst.params }) + const { interrupted } = await this._runToolCall(threadId, callThisToolFirst.name, callThisToolFirst.id, callThisToolFirst.mcpServerName, { preapproved: true, unvalidatedToolParams: callThisToolFirst.rawParams, rawParamsStr: callThisToolFirst.rawParamsStr, validatedParams: callThisToolFirst.params }) if (interrupted) { this._setStreamState(threadId, undefined) this._addUserCheckpoint({ threadId }) @@ -1026,7 +1031,7 @@ class ChatThreadService extends Disposable implements IChatThreadService { const mcpTools = this._mcpService.getMCPTools() const mcpTool = mcpTools?.find(t => t.name === toolCall.name) - const { awaitingUserApproval, interrupted } = await this._runToolCall(threadId, toolCall.name, toolCall.id, mcpTool?.mcpServerName, { preapproved: false, unvalidatedToolParams: toolCall.rawParams }) + const { awaitingUserApproval, interrupted } = await this._runToolCall(threadId, toolCall.name, toolCall.id, mcpTool?.mcpServerName, { preapproved: false, unvalidatedToolParams: toolCall.rawParams, rawParamsStr: toolCall.rawParamsStr }) if (interrupted) { this._setStreamState(threadId, undefined) return @@ -1391,7 +1396,19 @@ We only need to do it for files that were edited since `from`, ie files between const currSelns: StagingSelectionItem[] = _chatSelections ?? thread.state.stagingSelections const userMessageContent = await chat_userMessageContent(instructions, currSelns, { directoryStrService: this._directoryStringService, fileService: this._fileService }) // user message + names of files (NOT content) - const userHistoryElt: ChatMessage = { role: 'user', content: userMessageContent, displayContent: instructions, selections: currSelns, state: defaultMessageState } + + // Snapshot the volatile runtime context (date, open files, active URI, + // directory listing, terminal IDs) into this user message's stored content + // so past turns stay byte-identical across subsequent requests. The volatile + // block goes into `content` (what the LLM sees) but NOT into `displayContent` + // (what the UI renders), so the chat bubble shows only the user's words. + const { chatMode } = this._settingsService.state.globalSettings + const volatileBlock = await this._convertToLLMMessagesService.generateChatVolatileContext({ chatMode }) + const contentWithVolatile = volatileBlock + ? `${volatileBlock}\n\n${userMessageContent}` + : userMessageContent + + const userHistoryElt: ChatMessage = { role: 'user', content: contentWithVolatile, displayContent: instructions, selections: currSelns, state: defaultMessageState } this._addMessageToThread(threadId, userHistoryElt) this._setThreadState(threadId, { currCheckpointIdx: null }) // no longer at a checkpoint because started streaming diff --git a/src/vs/workbench/contrib/void/browser/convertToLLMMessageService.ts b/src/vs/workbench/contrib/void/browser/convertToLLMMessageService.ts index 94545c0d..74d0c0c5 100644 --- a/src/vs/workbench/contrib/void/browser/convertToLLMMessageService.ts +++ b/src/vs/workbench/contrib/void/browser/convertToLLMMessageService.ts @@ -7,7 +7,7 @@ import { IWorkspaceContextService } from '../../../../platform/workspace/common/ import { IEditorService } from '../../../services/editor/common/editorService.js'; import { ChatMessage } from '../common/chatThreadServiceTypes.js'; import { getIsReasoningEnabledState, getReservedOutputTokenSpace, getModelCapabilities } from '../common/modelCapabilities.js'; -import { reParsedToolXMLString, chat_systemMessage } from '../common/prompt/prompts.js'; +import { reParsedToolXMLString, chat_systemMessage, chat_volatileContext } from '../common/prompt/prompts.js'; import { AnthropicLLMChatMessage, AnthropicReasoning, GeminiLLMChatMessage, LLMChatMessage, LLMFIMMessage, OpenAILLMChatMessage, RawToolParamsObj } from '../common/sendLLMMessageTypes.js'; import { IVoidSettingsService } from '../common/voidSettingsService.js'; import { ChatMode, FeatureName, ModelSelection, ProviderName } from '../common/voidSettingsTypes.js'; @@ -29,6 +29,10 @@ type SimpleLLMMessage = { id: string; name: ToolName; rawParams: RawToolParamsObj; + // Original serialized arguments string from the model's tool call (OpenAI-compat + // only). When present, used verbatim on replay to keep the provider's prefix cache + // matching across turns. Falls back to JSON.stringify(rawParams) when absent. + rawParamsStr?: string; } | { role: 'user'; content: string; @@ -84,12 +88,17 @@ const prepareMessages_openai_tools = (messages: SimpleLLMMessage[]): AnthropicOr // edit previous assistant message to have called the tool const prevMsg = 0 <= i - 1 && i - 1 <= newMessages.length ? newMessages[i - 1] : undefined if (prevMsg?.role === 'assistant') { + // Prefer the model's original serialized argument string when we have it + // (OpenAI-compatible providers expose it in the streaming delta). Sending + // byte-identical bytes back preserves the provider's prefix cache past the + // tool call. Fall back to re-serializing when the raw string is unavailable + // (e.g. conversations from before this field existed, or non-OpenAI provenance). prevMsg.tool_calls = [{ type: 'function', id: currMsg.id, function: { name: currMsg.name, - arguments: JSON.stringify(currMsg.rawParams) + arguments: currMsg.rawParamsStr ?? JSON.stringify(currMsg.rawParams) } }] } @@ -524,6 +533,12 @@ export interface IConvertToLLMMessageService { prepareLLMSimpleMessages: (opts: { simpleMessages: SimpleLLMMessage[], systemMessage: string, modelSelection: ModelSelection | null, featureName: FeatureName }) => { messages: LLMChatMessage[], separateSystemMessage: string | undefined } prepareLLMChatMessages: (opts: { chatMessages: ChatMessage[], chatMode: ChatMode, modelSelection: ModelSelection | null }) => Promise<{ messages: LLMChatMessage[], separateSystemMessage: string | undefined }> prepareFIMMessage(opts: { messages: LLMFIMMessage, }): { prefix: string, suffix: string, stopTokens: string[] } + // Called by chat creation paths to snapshot runtime grounding (date, open files, + // active URI, directory listing, terminal IDs) into a user message at storage time. + // Baking volatile into the stored content (rather than prepending at send time) + // keeps prior turns byte-identical across requests so the provider's prefix cache + // stays warm turn-over-turn. + generateChatVolatileContext: (opts: { chatMode: ChatMode }) => Promise } export const IConvertToLLMMessageService = createDecorator('ConvertToLLMMessageService'); @@ -575,26 +590,30 @@ class ConvertToLLMMessageService extends Disposable implements IConvertToLLMMess } - // system message - private _generateChatMessagesSystemMessage = async (chatMode: ChatMode, specialToolFormat: 'openai-style' | 'anthropic-style' | 'gemini-style' | undefined) => { - const workspaceFolders = this.workspaceContextService.getWorkspace().folders.map(f => f.uri.fsPath) + // Computes the stable system message and the volatile-context block in one pass. + // The stable system message contains only cacheable content (persona, rules, tool + // definitions). The volatile block (runtime grounding: date, open files, active + // URI, directory listing, terminal IDs) is generated separately via + // `generateChatVolatileContext` and baked into the user message at storage time + // by the chat thread creation path — that keeps historical turns byte-identical + // across requests so the provider's prefix cache stays warm. + private _generateChatSystemMessage = (chatMode: ChatMode, specialToolFormat: 'openai-style' | 'anthropic-style' | 'gemini-style' | undefined) => { + const includeXMLToolDefinitions = !specialToolFormat + const mcpTools = this.mcpService.getMCPTools() + return chat_systemMessage({ chatMode, mcpTools, includeXMLToolDefinitions }) + } + generateChatVolatileContext: IConvertToLLMMessageService['generateChatVolatileContext'] = async ({ chatMode }) => { + const workspaceFolders = this.workspaceContextService.getWorkspace().folders.map(f => f.uri.fsPath) const openedURIs = this.modelService.getModels().filter(m => m.isAttachedToEditor()).map(m => m.uri.fsPath) || []; const activeURI = this.editorService.activeEditor?.resource?.fsPath; - const directoryStr = await this.directoryStrService.getAllDirectoriesStr({ cutOffMessage: chatMode === 'agent' || chatMode === 'gather' ? `...Directories string cut off, use tools to read more...` : `...Directories string cut off, ask user for more if necessary...` }) - - const includeXMLToolDefinitions = !specialToolFormat - - const mcpTools = this.mcpService.getMCPTools() - const persistentTerminalIDs = this.terminalToolService.listPersistentTerminalIds() - const systemMessage = chat_systemMessage({ workspaceFolders, openedURIs, directoryStr, activeURI, persistentTerminalIDs, chatMode, mcpTools, includeXMLToolDefinitions }) - return systemMessage + return chat_volatileContext({ workspaceFolders, openedURIs, activeURI, persistentTerminalIDs, directoryStr, chatMode }) } @@ -622,6 +641,7 @@ class ConvertToLLMMessageService extends Disposable implements IConvertToLLMMess name: m.name, id: m.id, rawParams: m.rawParams, + rawParamsStr: m.rawParamsStr, }) } else if (m.role === 'user') { @@ -680,7 +700,7 @@ class ConvertToLLMMessageService extends Disposable implements IConvertToLLMMess } = getModelCapabilities(providerName, modelName, overridesOfModel) const { disableSystemMessage } = this.voidSettingsService.state.globalSettings; - const fullSystemMessage = await this._generateChatMessagesSystemMessage(chatMode, specialToolFormat) + const fullSystemMessage = this._generateChatSystemMessage(chatMode, specialToolFormat) const systemMessage = disableSystemMessage ? '' : fullSystemMessage; const modelSelectionOptions = this.voidSettingsService.state.optionsOfModelSelection['Chat'][modelSelection.providerName]?.[modelSelection.modelName] @@ -689,6 +709,11 @@ class ConvertToLLMMessageService extends Disposable implements IConvertToLLMMess const aiInstructions = this._getCombinedAIInstructions(); const isReasoningEnabled = getIsReasoningEnabledState('Chat', providerName, modelName, modelSelectionOptions, overridesOfModel) const reservedOutputTokenSpace = getReservedOutputTokenSpace(providerName, modelName, { isReasoningEnabled, overridesOfModel }) + // Volatile context is baked into user messages at thread-creation time + // (see `chatThreadService._addUserMessageAndStreamResponse`). At send time + // the stored content is passed through verbatim so each past turn is + // byte-identical to what was sent before, keeping the provider's prefix + // cache warm across turns. const llmMessages = this._chatMessagesToSimpleMessages(chatMessages) const { messages, separateSystemMessage } = prepareMessages({ diff --git a/src/vs/workbench/contrib/void/browser/react/src/sidebar-tsx/SidebarChat.tsx b/src/vs/workbench/contrib/void/browser/react/src/sidebar-tsx/SidebarChat.tsx index 1b711dcd..43a46ca6 100644 --- a/src/vs/workbench/contrib/void/browser/react/src/sidebar-tsx/SidebarChat.tsx +++ b/src/vs/workbench/contrib/void/browser/react/src/sidebar-tsx/SidebarChat.tsx @@ -1515,12 +1515,23 @@ const ReasoningWrapper = ({ isDoneReasoning, isStreaming, children }: { isDoneRe const isDone = isDoneReasoning || !isStreaming const isWriting = !isDone const [isOpen, setIsOpen] = useState(isWriting) + const scrollRef = useRef(null) useEffect(() => { if (!isWriting) setIsOpen(false) // if just finished reasoning, close }, [isWriting]) + // While streaming, keep the box pinned to the bottom so the user sees the + // latest thoughts without having to scroll. Once done, respect user scroll. + useEffect(() => { + if (!isWriting || !isOpen) return + const el = scrollRef.current + if (el) el.scrollTop = el.scrollHeight + }, [children, isWriting, isOpen]) return : ''} isOpen={isOpen} onClick={() => setIsOpen(v => !v)}> -
+
{children}
diff --git a/src/vs/workbench/contrib/void/browser/toolsService.ts b/src/vs/workbench/contrib/void/browser/toolsService.ts index dbd0bdd1..3187bf2c 100644 --- a/src/vs/workbench/contrib/void/browser/toolsService.ts +++ b/src/vs/workbench/contrib/void/browser/toolsService.ts @@ -38,38 +38,60 @@ const validateStr = (argName: string, value: unknown) => { } -// We are NOT checking to make sure in workspace -const validateURI = (uriStr: unknown) => { +// Detects whether a plain path string is absolute. +// - Unix absolute: starts with '/' +// - Windows absolute: drive letter followed by ':\' or ':/' (e.g. 'C:\...', 'c:/...') +// - UNC path: starts with '\\' +const isAbsolutePathString = (s: string) => { + if (s.startsWith('/')) return true + if (s.startsWith('\\\\')) return true + if (/^[a-zA-Z]:[\\/]/.test(s)) return true + return false +} + +// We are NOT checking to make sure in workspace. +// workspaceRoot is optional; when provided, bare relative paths like "src/foo.ts" or +// "./README.md" are resolved against it. Without it (or when no workspace is open), +// we fall back to URI.file which resolves relative paths against the filesystem root — +// same as the legacy behavior, but that's the pathological case we want to avoid. +// Prefer the workspace-aware `validateURI` bound inside ToolsService; this raw +// version is exported-by-module-scope only for internal re-use. +const validateURIWithRoot = (uriStr: unknown, workspaceRoot?: URI | null) => { if (uriStr === null) throw new Error(`Invalid LLM output: uri was null.`) if (typeof uriStr !== 'string') throw new Error(`Invalid LLM output format: Provided uri must be a string, but it's a(n) ${typeof uriStr}. Full value: ${JSON.stringify(uriStr)}.`) - // Check if it's already a full URI with scheme (e.g., vscode-remote://, file://, etc.) - // Look for :// pattern which indicates a scheme is present - // Examples of supported URIs: - // - vscode-remote://wsl+Ubuntu/home/user/file.txt (WSL) - // - vscode-remote://ssh-remote+myserver/home/user/file.txt (SSH) - // - file:///home/user/file.txt (local file with scheme) - // - /home/user/file.txt (local file path, will be converted to file://) - // - C:\Users\file.txt (Windows local path, will be converted to file://) + // Scheme-qualified URI (e.g. vscode-remote://, file://, etc.) — parse as-is. if (uriStr.includes('://')) { try { const uri = URI.parse(uriStr) return uri } catch (e) { - // If parsing fails, it's a malformed URI throw new Error(`Invalid URI format: ${uriStr}. Error: ${e}`) } - } else { - // No scheme present, treat as file path - // This handles regular file paths like /home/user/file.txt or C:\Users\file.txt - const uri = URI.file(uriStr) - return uri } + + // Absolute path — safe to pass to URI.file. + if (isAbsolutePathString(uriStr)) { + return URI.file(uriStr) + } + + // Relative path (e.g. "README.md", "src/foo.ts", "./foo", "../bar"). + // Resolve against workspace root when available. This is the critical branch: + // weak models naturally produce bare filenames, and without this resolution + // URI.file("README.md") would become file:///README.md (root of filesystem), + // forcing models to fall back to terminal commands. + if (workspaceRoot) { + return URI.joinPath(workspaceRoot, uriStr) + } + + // No workspace — legacy fallback. Will resolve from filesystem root and likely fail, + // but preserves prior behavior for the (rare) no-workspace case. + return URI.file(uriStr) } -const validateOptionalURI = (uriStr: unknown) => { +const validateOptionalURIWithRoot = (uriStr: unknown, workspaceRoot?: URI | null) => { if (isFalsy(uriStr)) return null - return validateURI(uriStr) + return validateURIWithRoot(uriStr, workspaceRoot) } const validateOptionalStr = (argName: string, str: unknown) => { @@ -156,6 +178,16 @@ export class ToolsService implements IToolsService { ) { const queryBuilder = instantiationService.createInstance(QueryBuilder); + // Resolve the current workspace root lazily so that multi-root / workspace-switch + // scenarios pick up the correct folder at call time rather than at construction time. + // These shadow the module-level helpers so the 11+ call sites below stay terse. + const getWorkspaceRoot = (): URI | null => { + const folders = workspaceContextService.getWorkspace().folders + return folders.length > 0 ? folders[0].uri : null + } + const validateURI = (uriStr: unknown) => validateURIWithRoot(uriStr, getWorkspaceRoot()) + const validateOptionalURI = (uriStr: unknown) => validateOptionalURIWithRoot(uriStr, getWorkspaceRoot()) + this.validateParams = { read_file: (params: RawToolParamsObj) => { const { uri: uriStr, start_line: startLineUnknown, end_line: endLineUnknown, page_number: pageNumberUnknown } = params diff --git a/src/vs/workbench/contrib/void/common/chatThreadServiceTypes.ts b/src/vs/workbench/contrib/void/common/chatThreadServiceTypes.ts index 44dc307e..61b628dd 100644 --- a/src/vs/workbench/contrib/void/common/chatThreadServiceTypes.ts +++ b/src/vs/workbench/contrib/void/common/chatThreadServiceTypes.ts @@ -13,6 +13,10 @@ export type ToolMessage = { content: string; // give this result to LLM (string of value) id: string; rawParams: RawToolParamsObj; + // Original serialized `arguments` string from the model's tool call (when available + // from the provider stream — OpenAI-compatible only). Used on replay to send + // byte-identical tool_calls back, preserving the provider's prefix cache. + rawParamsStr?: string; mcpServerName: string | undefined; // the server name at the time of the call } & ( // in order of events: diff --git a/src/vs/workbench/contrib/void/common/modelCapabilities.ts b/src/vs/workbench/contrib/void/common/modelCapabilities.ts index e0d6eb9f..d93a9c06 100644 --- a/src/vs/workbench/contrib/void/common/modelCapabilities.ts +++ b/src/vs/workbench/contrib/void/common/modelCapabilities.ts @@ -227,9 +227,12 @@ type ProviderReasoningIOSettings = { // include this in payload to get reasoning input?: { includeInPayload?: (reasoningState: SendableReasoningInfo) => null | { [key: string]: any }, }; // nameOfFieldInDelta: reasoning output is in response.choices[0].delta[deltaReasoningField] + // may be a single field name or a list of candidates tried in order (first non-empty wins) — + // lets one provider entry cover gateways that standardize on different field names + // (e.g. DeepSeek uses `reasoning_content`, OpenRouter uses `reasoning`). // needsManualParse: whether we must manually parse out the tags output?: - | { nameOfFieldInDelta?: string, needsManualParse?: undefined, } + | { nameOfFieldInDelta?: string | string[], needsManualParse?: undefined, } | { nameOfFieldInDelta?: undefined, needsManualParse?: true, }; } @@ -1254,9 +1257,12 @@ const openaiCompatible: VoidStaticProviderInfo = { modelOptionsFallback: (modelName) => extensiveModelOptionsFallback(modelName), modelOptions: {}, providerReasoningIOSettings: { - // reasoning: we have no idea what endpoint they used, so we can't consistently parse out reasoning + // reasoning: we have no idea what endpoint they used, so cover the common field names. + // `reasoning_content` — DeepSeek, vLLM, many self-hosted servers + // `reasoning` — OpenRouter, opencode-style gateways + // `thinking` — some Chinese gateways (Moonshot, Zhipu) input: { includeInPayload: openAICompatIncludeInPayloadReasoning }, - output: { nameOfFieldInDelta: 'reasoning_content' }, + output: { nameOfFieldInDelta: ['reasoning_content', 'reasoning', 'thinking'] }, }, } diff --git a/src/vs/workbench/contrib/void/common/prompt/prompts.ts b/src/vs/workbench/contrib/void/common/prompt/prompts.ts index fba76815..8ba1b0fd 100644 --- a/src/vs/workbench/contrib/void/common/prompt/prompts.ts +++ b/src/vs/workbench/contrib/void/common/prompt/prompts.ts @@ -425,17 +425,25 @@ const systemToolsXMLPrompt = (chatMode: ChatMode, mcpTools: InternalToolInfo[] | // ======================================================== chat (normal, gather, agent) ======================================================== -export const chat_systemMessage = ({ workspaceFolders, openedURIs, activeURI, persistentTerminalIDs, directoryStr, chatMode: mode, mcpTools, includeXMLToolDefinitions }: { workspaceFolders: string[], directoryStr: string, openedURIs: string[], activeURI: string | undefined, persistentTerminalIDs: string[], chatMode: ChatMode, mcpTools: InternalToolInfo[] | undefined, includeXMLToolDefinitions: boolean }) => { - const header = (`You are an expert coding ${mode === 'agent' ? 'agent' : 'assistant'} whose job is \ -${mode === 'agent' ? `to help the user develop, run, and make changes to their codebase.` - : mode === 'gather' ? `to search, understand, and reference files in the user's codebase.` - : mode === 'normal' ? `to assist the user with their coding tasks.` - : ''} -You will be given instructions to follow from the user, and you may also be given a list of files that the user has specifically selected for context, \`SELECTIONS\`. -Please assist the user with their query.`) - +// Shared input type between the stable system message and the volatile context. +// Kept together so callers can compute the workspace snapshot once and feed both. +export type ChatPromptContext = { + workspaceFolders: string[] + directoryStr: string + openedURIs: string[] + activeURI: string | undefined + persistentTerminalIDs: string[] + chatMode: ChatMode + mcpTools: InternalToolInfo[] | undefined + includeXMLToolDefinitions: boolean +} +// Returns the volatile runtime-grounding block as a standalone string. Callers +// should prepend this to the latest user message (Phase B caching layout) rather +// than embed it in the system message — keeping it out of the system message lets +// the stable prefix and the full conversation history be prefix-cached across turns. +export const chat_volatileContext = ({ workspaceFolders, openedURIs, activeURI, persistentTerminalIDs, directoryStr, chatMode: mode }: Pick) => { const sysInfo = (`Here is the user's system information: - ${os} @@ -459,6 +467,29 @@ ${directoryStr} `) + // XML tag is self-describing; no narration prefix. Keep field order stable + // so that on turns where volatile fields happen to match the previous turn, + // the cache can extend further into the prefix. + return (` +Today's date is ${new Date().toDateString()}. + +${sysInfo} + +${fsInfo} +`) +} + + +export const chat_systemMessage = ({ chatMode: mode, mcpTools, includeXMLToolDefinitions }: Pick) => { + const header = (`You are an expert coding ${mode === 'agent' ? 'agent' : 'assistant'} whose job is \ +${mode === 'agent' ? `to help the user develop, run, and make changes to their codebase.` + : mode === 'gather' ? `to search, understand, and reference files in the user's codebase.` + : mode === 'normal' ? `to assist the user with their coding tasks.` + : ''} +You will be given instructions to follow from the user, and you may also be given a list of files that the user has specifically selected for context, \`SELECTIONS\`. +Please assist the user with their query.`) + + const toolDefinitions = includeXMLToolDefinitions ? systemToolsXMLPrompt(mode, mcpTools) : null const details: string[] = [] @@ -506,19 +537,21 @@ Here's an example of a good code block:\n${chatSuggestionDiffExample}`) details.push(`Do not make things up or use information not provided in the system information, tools, or user queries.`) details.push(`Always use MARKDOWN to format lists, bullet points, etc. Do NOT write tables.`) - details.push(`Today's date is ${new Date().toDateString()}.`) const importantDetails = (`Important notes: ${details.map((d, i) => `${i + 1}. ${d}`).join('\n\n')}`) - - // return answer + // System message contains ONLY stable content (persona, rules, tool definitions) + // so the entire system prefix is eligible for cross-turn prefix caching. Anything + // that can change between turns (active file, open tabs, today's date, directory + // listing, terminal IDs) lives in `chat_volatileContext` and is baked into each + // user message's stored content at thread-creation time by chatThreadService. + // That keeps historical turns byte-identical across subsequent requests so the + // provider's prefix cache stays warm as the conversation grows. const ansStrs: string[] = [] ansStrs.push(header) - ansStrs.push(sysInfo) - if (toolDefinitions) ansStrs.push(toolDefinitions) ansStrs.push(importantDetails) - ansStrs.push(fsInfo) + if (toolDefinitions) ansStrs.push(toolDefinitions) const fullSystemMsgStr = ansStrs .join('\n\n\n') diff --git a/src/vs/workbench/contrib/void/common/sendLLMMessageTypes.ts b/src/vs/workbench/contrib/void/common/sendLLMMessageTypes.ts index 865cea85..00446785 100644 --- a/src/vs/workbench/contrib/void/common/sendLLMMessageTypes.ts +++ b/src/vs/workbench/contrib/void/common/sendLLMMessageTypes.ts @@ -84,6 +84,12 @@ export type RawToolParamsObj = { export type RawToolCallObj = { name: ToolName; rawParams: RawToolParamsObj; + // Original serialized `arguments` string as the model emitted it (OpenAI-compatible + // path only — Anthropic/Gemini deliver tool input as structured JSON with no raw + // source string). Preserved so that on replay we can send byte-identical content + // back to the provider, which keeps the prefix cache warm past the tool call. + // Absent/undefined when not available; callers should fall back to JSON.stringify(rawParams). + rawParamsStr?: string; doneParams: ToolParamName[]; id: string; isDone: boolean; diff --git a/src/vs/workbench/contrib/void/electron-main/llmMessage/sendLLMMessage.impl.ts b/src/vs/workbench/contrib/void/electron-main/llmMessage/sendLLMMessage.impl.ts index 3527b76a..68762f33 100644 --- a/src/vs/workbench/contrib/void/electron-main/llmMessage/sendLLMMessage.impl.ts +++ b/src/vs/workbench/contrib/void/electron-main/llmMessage/sendLLMMessage.impl.ts @@ -252,7 +252,10 @@ const rawToolCallObjOfParamsStr = (name: string, toolParamsStr: string, id: stri if (typeof input !== 'object') return null const rawParams: RawToolParamsObj = input - return { id, name, rawParams, doneParams: Object.keys(rawParams), isDone: true } + // Preserve the original argument string exactly as the model emitted it. On replay + // we'll send this back verbatim inside `tool_calls[].function.arguments` so the + // provider sees byte-identical content and the prefix cache stays warm. + return { id, name, rawParams, rawParamsStr: toolParamsStr, doneParams: Object.keys(rawParams), isDone: true } } @@ -365,11 +368,17 @@ const _sendOpenAICompatibleChat = async ({ messages, onText, onFinalMessage, onE } - // reasoning + // reasoning — nameOfFieldInDelta may be a single field or a list of candidates + // (some gateways like OpenRouter use `reasoning`, others like DeepSeek use + // `reasoning_content`). Take the first non-empty one this chunk provides. let newReasoning = '' if (nameOfReasoningFieldInDelta) { - // @ts-ignore - newReasoning = (chunk.choices[0]?.delta?.[nameOfReasoningFieldInDelta] || '') + '' + const fields = Array.isArray(nameOfReasoningFieldInDelta) ? nameOfReasoningFieldInDelta : [nameOfReasoningFieldInDelta] + for (const f of fields) { + // @ts-ignore + const val = (chunk.choices[0]?.delta?.[f] || '') + '' + if (val) { newReasoning = val; break } + } fullReasoningSoFar += newReasoning } @@ -821,9 +830,19 @@ const sendGeminiChat = async ({ // Process the stream for await (const chunk of stream) { - // message - const newText = chunk.text ?? '' - fullTextSoFar += newText + // message — split thought-tagged parts from answer parts. + // Gemini 2.5 Pro / Gemma 4 route internal reasoning through parts with + // `thought: true`; the visible answer lives in plain text parts. Using + // `chunk.text` (SDK shortcut) would concatenate both, polluting the + // chat view and the stored message history. + const parts = chunk.candidates?.[0]?.content?.parts + if (parts) { + for (const part of parts) { + if (typeof part.text !== 'string') continue // skip functionCall / inlineData / etc. + if (part.thought === true) fullReasoningSoFar += part.text + else fullTextSoFar += part.text + } + } // tool call const functionCalls = chunk.functionCalls @@ -834,17 +853,21 @@ const sendGeminiChat = async ({ toolId = functionCall.id ?? '' } - // (do not handle reasoning yet) - // usage (Gemini exposes promptTokenCount / candidatesTokenCount / totalTokenCount / - // thoughtsTokenCount via usageMetadata). Only update when the chunk reports it. + // thoughtsTokenCount / cachedContentTokenCount via usageMetadata). Multiple + // chunks can carry usageMetadata during a stream, and the field set is NOT + // consistent across chunks — notably, cachedContentTokenCount often appears + // on an early chunk and is absent from the final summary. Merge per-field + // with `??` so we preserve the best value seen so far instead of flickering + // to `undefined` when Google stops reporting a field. const usageMetadata = chunk.usageMetadata if (usageMetadata) { latestUsage = { - inputTokens: usageMetadata.promptTokenCount, - outputTokens: usageMetadata.candidatesTokenCount, - totalTokens: usageMetadata.totalTokenCount, - reasoningTokens: usageMetadata.thoughtsTokenCount, + inputTokens: usageMetadata.promptTokenCount ?? latestUsage?.inputTokens, + outputTokens: usageMetadata.candidatesTokenCount ?? latestUsage?.outputTokens, + totalTokens: usageMetadata.totalTokenCount ?? latestUsage?.totalTokens, + reasoningTokens: usageMetadata.thoughtsTokenCount ?? latestUsage?.reasoningTokens, + cachedInputTokens: usageMetadata.cachedContentTokenCount ?? latestUsage?.cachedInputTokens, } }