Feature/improve chat experience (#7)

* handle relative path when agent ask to interact with files or directories

* handle gemini reasoning output

* move volatile system message to the last part

* handle reasoning for openai compatible opencode

* serialize tool call arguments to improve cache hit rate
This commit is contained in:
davi0015 2026-04-22 11:29:49 +08:00 committed by GitHub
parent f4e5b9e91a
commit 7603d8f9a2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 238 additions and 81 deletions

View file

@ -205,6 +205,7 @@ export type ThreadStreamState = {
id: string;
content: string;
rawParams: RawToolParamsObj;
rawParamsStr?: string;
mcpServerName: string | undefined;
};
interrupt: Promise<() => void>;
@ -536,7 +537,7 @@ class ChatThreadService extends Disposable implements IChatThreadService {
// if running now but stream state doesn't indicate it (happens if restart Void), cancel that last tool
if (lastMessage && lastMessage.role === 'tool' && lastMessage.type === 'running_now') {
this._updateLatestTool(threadId, { role: 'tool', type: 'rejected', content: lastMessage.content, id: lastMessage.id, rawParams: lastMessage.rawParams, result: null, name: lastMessage.name, params: lastMessage.params, mcpServerName: lastMessage.mcpServerName })
this._updateLatestTool(threadId, { role: 'tool', type: 'rejected', content: lastMessage.content, id: lastMessage.id, rawParams: lastMessage.rawParams, rawParamsStr: lastMessage.rawParamsStr, result: null, name: lastMessage.name, params: lastMessage.params, mcpServerName: lastMessage.mcpServerName })
}
}
@ -681,10 +682,10 @@ class ChatThreadService extends Disposable implements IChatThreadService {
}
else return
const { name, id, rawParams, mcpServerName } = lastMsg
const { name, id, rawParams, rawParamsStr, mcpServerName } = lastMsg
const errorMessage = this.toolErrMsgs.rejected
this._updateLatestTool(threadId, { role: 'tool', type: 'rejected', params: params, name: name, content: errorMessage, result: null, id, rawParams, mcpServerName })
this._updateLatestTool(threadId, { role: 'tool', type: 'rejected', params: params, name: name, content: errorMessage, result: null, id, rawParams, rawParamsStr, mcpServerName })
this._setStreamState(threadId, undefined)
}
@ -704,9 +705,9 @@ class ChatThreadService extends Disposable implements IChatThreadService {
}
// add tool that's running
else if (this.streamState[threadId]?.isRunning === 'tool') {
const { toolName, toolParams, id, content: content_, rawParams, mcpServerName } = this.streamState[threadId].toolInfo
const { toolName, toolParams, id, content: content_, rawParams, rawParamsStr, mcpServerName } = this.streamState[threadId].toolInfo
const content = content_ || this.toolErrMsgs.interrupted
this._updateLatestTool(threadId, { role: 'tool', name: toolName, params: toolParams, id, content, rawParams, type: 'rejected', result: null, mcpServerName })
this._updateLatestTool(threadId, { role: 'tool', name: toolName, params: toolParams, id, content, rawParams, rawParamsStr, type: 'rejected', result: null, mcpServerName })
}
// reject the tool for the user if relevant
else if (this.streamState[threadId]?.isRunning === 'awaiting_user') {
@ -745,8 +746,12 @@ class ChatThreadService extends Disposable implements IChatThreadService {
toolName: ToolName,
toolId: string,
mcpServerName: string | undefined,
opts: { preapproved: true, unvalidatedToolParams: RawToolParamsObj, validatedParams: ToolCallParams<ToolName> } | { preapproved: false, unvalidatedToolParams: RawToolParamsObj },
opts: { preapproved: true, unvalidatedToolParams: RawToolParamsObj, validatedParams: ToolCallParams<ToolName>, rawParamsStr?: string } | { preapproved: false, unvalidatedToolParams: RawToolParamsObj, rawParamsStr?: string },
): Promise<{ awaitingUserApproval?: boolean, interrupted?: boolean }> => {
// Carry the model's original serialized arguments string (when available) into
// every tool message we persist. This lets the replay path send byte-identical
// tool_calls back to the provider, preserving the prefix cache across turns.
const rawParamsStr = opts.rawParamsStr
// compute these below
let toolParams: ToolCallParams<ToolName>
@ -770,7 +775,7 @@ class ChatThreadService extends Disposable implements IChatThreadService {
}
catch (error) {
const errorMessage = getErrorMessage(error)
this._addMessageToThread(threadId, { role: 'tool', type: 'invalid_params', rawParams: opts.unvalidatedToolParams, result: null, name: toolName, content: errorMessage, id: toolId, mcpServerName })
this._addMessageToThread(threadId, { role: 'tool', type: 'invalid_params', rawParams: opts.unvalidatedToolParams, rawParamsStr, result: null, name: toolName, content: errorMessage, id: toolId, mcpServerName })
return {}
}
// once validated, add checkpoint for edit
@ -783,7 +788,7 @@ class ChatThreadService extends Disposable implements IChatThreadService {
if (approvalType) {
const autoApprove = this._settingsService.state.globalSettings.autoApprove[approvalType]
// add a tool_request because we use it for UI if a tool is loading (this should be improved in the future)
this._addMessageToThread(threadId, { role: 'tool', type: 'tool_request', content: '(Awaiting user permission...)', result: null, name: toolName, params: toolParams, id: toolId, rawParams: opts.unvalidatedToolParams, mcpServerName })
this._addMessageToThread(threadId, { role: 'tool', type: 'tool_request', content: '(Awaiting user permission...)', result: null, name: toolName, params: toolParams, id: toolId, rawParams: opts.unvalidatedToolParams, rawParamsStr, mcpServerName })
if (!autoApprove) {
return { awaitingUserApproval: true }
}
@ -800,7 +805,7 @@ class ChatThreadService extends Disposable implements IChatThreadService {
// 3. call the tool
// this._setStreamState(threadId, { isRunning: 'tool' }, 'merge')
const runningTool = { role: 'tool', type: 'running_now', name: toolName, params: toolParams, content: '(value not received yet...)', result: null, id: toolId, rawParams: opts.unvalidatedToolParams, mcpServerName } as const
const runningTool = { role: 'tool', type: 'running_now', name: toolName, params: toolParams, content: '(value not received yet...)', result: null, id: toolId, rawParams: opts.unvalidatedToolParams, rawParamsStr, mcpServerName } as const
this._updateLatestTool(threadId, runningTool)
@ -810,7 +815,7 @@ class ChatThreadService extends Disposable implements IChatThreadService {
try {
// set stream state
this._setStreamState(threadId, { isRunning: 'tool', interrupt: interruptorPromise, toolInfo: { toolName, toolParams, id: toolId, content: 'interrupted...', rawParams: opts.unvalidatedToolParams, mcpServerName } })
this._setStreamState(threadId, { isRunning: 'tool', interrupt: interruptorPromise, toolInfo: { toolName, toolParams, id: toolId, content: 'interrupted...', rawParams: opts.unvalidatedToolParams, rawParamsStr, mcpServerName } })
if (isBuiltInTool) {
const { result, interruptTool } = await this._toolsService.callTool[toolName](toolParams as any)
@ -840,7 +845,7 @@ class ChatThreadService extends Disposable implements IChatThreadService {
if (interrupted) { return { interrupted: true } } // the tool result is added where we interrupt, not here
const errorMessage = getErrorMessage(error)
this._updateLatestTool(threadId, { role: 'tool', type: 'tool_error', params: toolParams, result: errorMessage, name: toolName, content: errorMessage, id: toolId, rawParams: opts.unvalidatedToolParams, mcpServerName })
this._updateLatestTool(threadId, { role: 'tool', type: 'tool_error', params: toolParams, result: errorMessage, name: toolName, content: errorMessage, id: toolId, rawParams: opts.unvalidatedToolParams, rawParamsStr, mcpServerName })
return {}
}
@ -855,12 +860,12 @@ class ChatThreadService extends Disposable implements IChatThreadService {
}
} catch (error) {
const errorMessage = this.toolErrMsgs.errWhenStringifying(error)
this._updateLatestTool(threadId, { role: 'tool', type: 'tool_error', params: toolParams, result: errorMessage, name: toolName, content: errorMessage, id: toolId, rawParams: opts.unvalidatedToolParams, mcpServerName })
this._updateLatestTool(threadId, { role: 'tool', type: 'tool_error', params: toolParams, result: errorMessage, name: toolName, content: errorMessage, id: toolId, rawParams: opts.unvalidatedToolParams, rawParamsStr, mcpServerName })
return {}
}
// 5. add to history and keep going
this._updateLatestTool(threadId, { role: 'tool', type: 'success', params: toolParams, result: toolResult, name: toolName, content: toolResultStr, id: toolId, rawParams: opts.unvalidatedToolParams, mcpServerName })
this._updateLatestTool(threadId, { role: 'tool', type: 'success', params: toolParams, result: toolResult, name: toolName, content: toolResultStr, id: toolId, rawParams: opts.unvalidatedToolParams, rawParamsStr, mcpServerName })
return {}
};
@ -895,7 +900,7 @@ class ChatThreadService extends Disposable implements IChatThreadService {
// before enter loop, call tool
if (callThisToolFirst) {
const { interrupted } = await this._runToolCall(threadId, callThisToolFirst.name, callThisToolFirst.id, callThisToolFirst.mcpServerName, { preapproved: true, unvalidatedToolParams: callThisToolFirst.rawParams, validatedParams: callThisToolFirst.params })
const { interrupted } = await this._runToolCall(threadId, callThisToolFirst.name, callThisToolFirst.id, callThisToolFirst.mcpServerName, { preapproved: true, unvalidatedToolParams: callThisToolFirst.rawParams, rawParamsStr: callThisToolFirst.rawParamsStr, validatedParams: callThisToolFirst.params })
if (interrupted) {
this._setStreamState(threadId, undefined)
this._addUserCheckpoint({ threadId })
@ -1026,7 +1031,7 @@ class ChatThreadService extends Disposable implements IChatThreadService {
const mcpTools = this._mcpService.getMCPTools()
const mcpTool = mcpTools?.find(t => t.name === toolCall.name)
const { awaitingUserApproval, interrupted } = await this._runToolCall(threadId, toolCall.name, toolCall.id, mcpTool?.mcpServerName, { preapproved: false, unvalidatedToolParams: toolCall.rawParams })
const { awaitingUserApproval, interrupted } = await this._runToolCall(threadId, toolCall.name, toolCall.id, mcpTool?.mcpServerName, { preapproved: false, unvalidatedToolParams: toolCall.rawParams, rawParamsStr: toolCall.rawParamsStr })
if (interrupted) {
this._setStreamState(threadId, undefined)
return
@ -1391,7 +1396,19 @@ We only need to do it for files that were edited since `from`, ie files between
const currSelns: StagingSelectionItem[] = _chatSelections ?? thread.state.stagingSelections
const userMessageContent = await chat_userMessageContent(instructions, currSelns, { directoryStrService: this._directoryStringService, fileService: this._fileService }) // user message + names of files (NOT content)
const userHistoryElt: ChatMessage = { role: 'user', content: userMessageContent, displayContent: instructions, selections: currSelns, state: defaultMessageState }
// Snapshot the volatile runtime context (date, open files, active URI,
// directory listing, terminal IDs) into this user message's stored content
// so past turns stay byte-identical across subsequent requests. The volatile
// block goes into `content` (what the LLM sees) but NOT into `displayContent`
// (what the UI renders), so the chat bubble shows only the user's words.
const { chatMode } = this._settingsService.state.globalSettings
const volatileBlock = await this._convertToLLMMessagesService.generateChatVolatileContext({ chatMode })
const contentWithVolatile = volatileBlock
? `${volatileBlock}\n\n${userMessageContent}`
: userMessageContent
const userHistoryElt: ChatMessage = { role: 'user', content: contentWithVolatile, displayContent: instructions, selections: currSelns, state: defaultMessageState }
this._addMessageToThread(threadId, userHistoryElt)
this._setThreadState(threadId, { currCheckpointIdx: null }) // no longer at a checkpoint because started streaming

View file

@ -7,7 +7,7 @@ import { IWorkspaceContextService } from '../../../../platform/workspace/common/
import { IEditorService } from '../../../services/editor/common/editorService.js';
import { ChatMessage } from '../common/chatThreadServiceTypes.js';
import { getIsReasoningEnabledState, getReservedOutputTokenSpace, getModelCapabilities } from '../common/modelCapabilities.js';
import { reParsedToolXMLString, chat_systemMessage } from '../common/prompt/prompts.js';
import { reParsedToolXMLString, chat_systemMessage, chat_volatileContext } from '../common/prompt/prompts.js';
import { AnthropicLLMChatMessage, AnthropicReasoning, GeminiLLMChatMessage, LLMChatMessage, LLMFIMMessage, OpenAILLMChatMessage, RawToolParamsObj } from '../common/sendLLMMessageTypes.js';
import { IVoidSettingsService } from '../common/voidSettingsService.js';
import { ChatMode, FeatureName, ModelSelection, ProviderName } from '../common/voidSettingsTypes.js';
@ -29,6 +29,10 @@ type SimpleLLMMessage = {
id: string;
name: ToolName;
rawParams: RawToolParamsObj;
// Original serialized arguments string from the model's tool call (OpenAI-compat
// only). When present, used verbatim on replay to keep the provider's prefix cache
// matching across turns. Falls back to JSON.stringify(rawParams) when absent.
rawParamsStr?: string;
} | {
role: 'user';
content: string;
@ -84,12 +88,17 @@ const prepareMessages_openai_tools = (messages: SimpleLLMMessage[]): AnthropicOr
// edit previous assistant message to have called the tool
const prevMsg = 0 <= i - 1 && i - 1 <= newMessages.length ? newMessages[i - 1] : undefined
if (prevMsg?.role === 'assistant') {
// Prefer the model's original serialized argument string when we have it
// (OpenAI-compatible providers expose it in the streaming delta). Sending
// byte-identical bytes back preserves the provider's prefix cache past the
// tool call. Fall back to re-serializing when the raw string is unavailable
// (e.g. conversations from before this field existed, or non-OpenAI provenance).
prevMsg.tool_calls = [{
type: 'function',
id: currMsg.id,
function: {
name: currMsg.name,
arguments: JSON.stringify(currMsg.rawParams)
arguments: currMsg.rawParamsStr ?? JSON.stringify(currMsg.rawParams)
}
}]
}
@ -524,6 +533,12 @@ export interface IConvertToLLMMessageService {
prepareLLMSimpleMessages: (opts: { simpleMessages: SimpleLLMMessage[], systemMessage: string, modelSelection: ModelSelection | null, featureName: FeatureName }) => { messages: LLMChatMessage[], separateSystemMessage: string | undefined }
prepareLLMChatMessages: (opts: { chatMessages: ChatMessage[], chatMode: ChatMode, modelSelection: ModelSelection | null }) => Promise<{ messages: LLMChatMessage[], separateSystemMessage: string | undefined }>
prepareFIMMessage(opts: { messages: LLMFIMMessage, }): { prefix: string, suffix: string, stopTokens: string[] }
// Called by chat creation paths to snapshot runtime grounding (date, open files,
// active URI, directory listing, terminal IDs) into a user message at storage time.
// Baking volatile into the stored content (rather than prepending at send time)
// keeps prior turns byte-identical across requests so the provider's prefix cache
// stays warm turn-over-turn.
generateChatVolatileContext: (opts: { chatMode: ChatMode }) => Promise<string>
}
export const IConvertToLLMMessageService = createDecorator<IConvertToLLMMessageService>('ConvertToLLMMessageService');
@ -575,26 +590,30 @@ class ConvertToLLMMessageService extends Disposable implements IConvertToLLMMess
}
// system message
private _generateChatMessagesSystemMessage = async (chatMode: ChatMode, specialToolFormat: 'openai-style' | 'anthropic-style' | 'gemini-style' | undefined) => {
const workspaceFolders = this.workspaceContextService.getWorkspace().folders.map(f => f.uri.fsPath)
// Computes the stable system message and the volatile-context block in one pass.
// The stable system message contains only cacheable content (persona, rules, tool
// definitions). The volatile block (runtime grounding: date, open files, active
// URI, directory listing, terminal IDs) is generated separately via
// `generateChatVolatileContext` and baked into the user message at storage time
// by the chat thread creation path — that keeps historical turns byte-identical
// across requests so the provider's prefix cache stays warm.
private _generateChatSystemMessage = (chatMode: ChatMode, specialToolFormat: 'openai-style' | 'anthropic-style' | 'gemini-style' | undefined) => {
const includeXMLToolDefinitions = !specialToolFormat
const mcpTools = this.mcpService.getMCPTools()
return chat_systemMessage({ chatMode, mcpTools, includeXMLToolDefinitions })
}
generateChatVolatileContext: IConvertToLLMMessageService['generateChatVolatileContext'] = async ({ chatMode }) => {
const workspaceFolders = this.workspaceContextService.getWorkspace().folders.map(f => f.uri.fsPath)
const openedURIs = this.modelService.getModels().filter(m => m.isAttachedToEditor()).map(m => m.uri.fsPath) || [];
const activeURI = this.editorService.activeEditor?.resource?.fsPath;
const directoryStr = await this.directoryStrService.getAllDirectoriesStr({
cutOffMessage: chatMode === 'agent' || chatMode === 'gather' ?
`...Directories string cut off, use tools to read more...`
: `...Directories string cut off, ask user for more if necessary...`
})
const includeXMLToolDefinitions = !specialToolFormat
const mcpTools = this.mcpService.getMCPTools()
const persistentTerminalIDs = this.terminalToolService.listPersistentTerminalIds()
const systemMessage = chat_systemMessage({ workspaceFolders, openedURIs, directoryStr, activeURI, persistentTerminalIDs, chatMode, mcpTools, includeXMLToolDefinitions })
return systemMessage
return chat_volatileContext({ workspaceFolders, openedURIs, activeURI, persistentTerminalIDs, directoryStr, chatMode })
}
@ -622,6 +641,7 @@ class ConvertToLLMMessageService extends Disposable implements IConvertToLLMMess
name: m.name,
id: m.id,
rawParams: m.rawParams,
rawParamsStr: m.rawParamsStr,
})
}
else if (m.role === 'user') {
@ -680,7 +700,7 @@ class ConvertToLLMMessageService extends Disposable implements IConvertToLLMMess
} = getModelCapabilities(providerName, modelName, overridesOfModel)
const { disableSystemMessage } = this.voidSettingsService.state.globalSettings;
const fullSystemMessage = await this._generateChatMessagesSystemMessage(chatMode, specialToolFormat)
const fullSystemMessage = this._generateChatSystemMessage(chatMode, specialToolFormat)
const systemMessage = disableSystemMessage ? '' : fullSystemMessage;
const modelSelectionOptions = this.voidSettingsService.state.optionsOfModelSelection['Chat'][modelSelection.providerName]?.[modelSelection.modelName]
@ -689,6 +709,11 @@ class ConvertToLLMMessageService extends Disposable implements IConvertToLLMMess
const aiInstructions = this._getCombinedAIInstructions();
const isReasoningEnabled = getIsReasoningEnabledState('Chat', providerName, modelName, modelSelectionOptions, overridesOfModel)
const reservedOutputTokenSpace = getReservedOutputTokenSpace(providerName, modelName, { isReasoningEnabled, overridesOfModel })
// Volatile context is baked into user messages at thread-creation time
// (see `chatThreadService._addUserMessageAndStreamResponse`). At send time
// the stored content is passed through verbatim so each past turn is
// byte-identical to what was sent before, keeping the provider's prefix
// cache warm across turns.
const llmMessages = this._chatMessagesToSimpleMessages(chatMessages)
const { messages, separateSystemMessage } = prepareMessages({

View file

@ -1515,12 +1515,23 @@ const ReasoningWrapper = ({ isDoneReasoning, isStreaming, children }: { isDoneRe
const isDone = isDoneReasoning || !isStreaming
const isWriting = !isDone
const [isOpen, setIsOpen] = useState(isWriting)
const scrollRef = useRef<HTMLDivElement>(null)
useEffect(() => {
if (!isWriting) setIsOpen(false) // if just finished reasoning, close
}, [isWriting])
// While streaming, keep the box pinned to the bottom so the user sees the
// latest thoughts without having to scroll. Once done, respect user scroll.
useEffect(() => {
if (!isWriting || !isOpen) return
const el = scrollRef.current
if (el) el.scrollTop = el.scrollHeight
}, [children, isWriting, isOpen])
return <ToolHeaderWrapper title='Reasoning' desc1={isWriting ? <IconLoading /> : ''} isOpen={isOpen} onClick={() => setIsOpen(v => !v)}>
<ToolChildrenWrapper>
<div className='!select-text cursor-auto'>
<div
ref={scrollRef}
className='!select-text cursor-auto max-h-60 overflow-y-auto'
>
{children}
</div>
</ToolChildrenWrapper>

View file

@ -38,38 +38,60 @@ const validateStr = (argName: string, value: unknown) => {
}
// We are NOT checking to make sure in workspace
const validateURI = (uriStr: unknown) => {
// Detects whether a plain path string is absolute.
// - Unix absolute: starts with '/'
// - Windows absolute: drive letter followed by ':\' or ':/' (e.g. 'C:\...', 'c:/...')
// - UNC path: starts with '\\'
const isAbsolutePathString = (s: string) => {
if (s.startsWith('/')) return true
if (s.startsWith('\\\\')) return true
if (/^[a-zA-Z]:[\\/]/.test(s)) return true
return false
}
// We are NOT checking to make sure in workspace.
// workspaceRoot is optional; when provided, bare relative paths like "src/foo.ts" or
// "./README.md" are resolved against it. Without it (or when no workspace is open),
// we fall back to URI.file which resolves relative paths against the filesystem root —
// same as the legacy behavior, but that's the pathological case we want to avoid.
// Prefer the workspace-aware `validateURI` bound inside ToolsService; this raw
// version is exported-by-module-scope only for internal re-use.
const validateURIWithRoot = (uriStr: unknown, workspaceRoot?: URI | null) => {
if (uriStr === null) throw new Error(`Invalid LLM output: uri was null.`)
if (typeof uriStr !== 'string') throw new Error(`Invalid LLM output format: Provided uri must be a string, but it's a(n) ${typeof uriStr}. Full value: ${JSON.stringify(uriStr)}.`)
// Check if it's already a full URI with scheme (e.g., vscode-remote://, file://, etc.)
// Look for :// pattern which indicates a scheme is present
// Examples of supported URIs:
// - vscode-remote://wsl+Ubuntu/home/user/file.txt (WSL)
// - vscode-remote://ssh-remote+myserver/home/user/file.txt (SSH)
// - file:///home/user/file.txt (local file with scheme)
// - /home/user/file.txt (local file path, will be converted to file://)
// - C:\Users\file.txt (Windows local path, will be converted to file://)
// Scheme-qualified URI (e.g. vscode-remote://, file://, etc.) — parse as-is.
if (uriStr.includes('://')) {
try {
const uri = URI.parse(uriStr)
return uri
} catch (e) {
// If parsing fails, it's a malformed URI
throw new Error(`Invalid URI format: ${uriStr}. Error: ${e}`)
}
} else {
// No scheme present, treat as file path
// This handles regular file paths like /home/user/file.txt or C:\Users\file.txt
const uri = URI.file(uriStr)
return uri
}
// Absolute path — safe to pass to URI.file.
if (isAbsolutePathString(uriStr)) {
return URI.file(uriStr)
}
// Relative path (e.g. "README.md", "src/foo.ts", "./foo", "../bar").
// Resolve against workspace root when available. This is the critical branch:
// weak models naturally produce bare filenames, and without this resolution
// URI.file("README.md") would become file:///README.md (root of filesystem),
// forcing models to fall back to terminal commands.
if (workspaceRoot) {
return URI.joinPath(workspaceRoot, uriStr)
}
// No workspace — legacy fallback. Will resolve from filesystem root and likely fail,
// but preserves prior behavior for the (rare) no-workspace case.
return URI.file(uriStr)
}
const validateOptionalURI = (uriStr: unknown) => {
const validateOptionalURIWithRoot = (uriStr: unknown, workspaceRoot?: URI | null) => {
if (isFalsy(uriStr)) return null
return validateURI(uriStr)
return validateURIWithRoot(uriStr, workspaceRoot)
}
const validateOptionalStr = (argName: string, str: unknown) => {
@ -156,6 +178,16 @@ export class ToolsService implements IToolsService {
) {
const queryBuilder = instantiationService.createInstance(QueryBuilder);
// Resolve the current workspace root lazily so that multi-root / workspace-switch
// scenarios pick up the correct folder at call time rather than at construction time.
// These shadow the module-level helpers so the 11+ call sites below stay terse.
const getWorkspaceRoot = (): URI | null => {
const folders = workspaceContextService.getWorkspace().folders
return folders.length > 0 ? folders[0].uri : null
}
const validateURI = (uriStr: unknown) => validateURIWithRoot(uriStr, getWorkspaceRoot())
const validateOptionalURI = (uriStr: unknown) => validateOptionalURIWithRoot(uriStr, getWorkspaceRoot())
this.validateParams = {
read_file: (params: RawToolParamsObj) => {
const { uri: uriStr, start_line: startLineUnknown, end_line: endLineUnknown, page_number: pageNumberUnknown } = params

View file

@ -13,6 +13,10 @@ export type ToolMessage<T extends ToolName> = {
content: string; // give this result to LLM (string of value)
id: string;
rawParams: RawToolParamsObj;
// Original serialized `arguments` string from the model's tool call (when available
// from the provider stream — OpenAI-compatible only). Used on replay to send
// byte-identical tool_calls back, preserving the provider's prefix cache.
rawParamsStr?: string;
mcpServerName: string | undefined; // the server name at the time of the call
} & (
// in order of events:

View file

@ -227,9 +227,12 @@ type ProviderReasoningIOSettings = {
// include this in payload to get reasoning
input?: { includeInPayload?: (reasoningState: SendableReasoningInfo) => null | { [key: string]: any }, };
// nameOfFieldInDelta: reasoning output is in response.choices[0].delta[deltaReasoningField]
// may be a single field name or a list of candidates tried in order (first non-empty wins) —
// lets one provider entry cover gateways that standardize on different field names
// (e.g. DeepSeek uses `reasoning_content`, OpenRouter uses `reasoning`).
// needsManualParse: whether we must manually parse out the <think> tags
output?:
| { nameOfFieldInDelta?: string, needsManualParse?: undefined, }
| { nameOfFieldInDelta?: string | string[], needsManualParse?: undefined, }
| { nameOfFieldInDelta?: undefined, needsManualParse?: true, };
}
@ -1254,9 +1257,12 @@ const openaiCompatible: VoidStaticProviderInfo = {
modelOptionsFallback: (modelName) => extensiveModelOptionsFallback(modelName),
modelOptions: {},
providerReasoningIOSettings: {
// reasoning: we have no idea what endpoint they used, so we can't consistently parse out reasoning
// reasoning: we have no idea what endpoint they used, so cover the common field names.
// `reasoning_content` — DeepSeek, vLLM, many self-hosted servers
// `reasoning` — OpenRouter, opencode-style gateways
// `thinking` — some Chinese gateways (Moonshot, Zhipu)
input: { includeInPayload: openAICompatIncludeInPayloadReasoning },
output: { nameOfFieldInDelta: 'reasoning_content' },
output: { nameOfFieldInDelta: ['reasoning_content', 'reasoning', 'thinking'] },
},
}

View file

@ -425,17 +425,25 @@ const systemToolsXMLPrompt = (chatMode: ChatMode, mcpTools: InternalToolInfo[] |
// ======================================================== chat (normal, gather, agent) ========================================================
export const chat_systemMessage = ({ workspaceFolders, openedURIs, activeURI, persistentTerminalIDs, directoryStr, chatMode: mode, mcpTools, includeXMLToolDefinitions }: { workspaceFolders: string[], directoryStr: string, openedURIs: string[], activeURI: string | undefined, persistentTerminalIDs: string[], chatMode: ChatMode, mcpTools: InternalToolInfo[] | undefined, includeXMLToolDefinitions: boolean }) => {
const header = (`You are an expert coding ${mode === 'agent' ? 'agent' : 'assistant'} whose job is \
${mode === 'agent' ? `to help the user develop, run, and make changes to their codebase.`
: mode === 'gather' ? `to search, understand, and reference files in the user's codebase.`
: mode === 'normal' ? `to assist the user with their coding tasks.`
: ''}
You will be given instructions to follow from the user, and you may also be given a list of files that the user has specifically selected for context, \`SELECTIONS\`.
Please assist the user with their query.`)
// Shared input type between the stable system message and the volatile context.
// Kept together so callers can compute the workspace snapshot once and feed both.
export type ChatPromptContext = {
workspaceFolders: string[]
directoryStr: string
openedURIs: string[]
activeURI: string | undefined
persistentTerminalIDs: string[]
chatMode: ChatMode
mcpTools: InternalToolInfo[] | undefined
includeXMLToolDefinitions: boolean
}
// Returns the volatile runtime-grounding block as a standalone string. Callers
// should prepend this to the latest user message (Phase B caching layout) rather
// than embed it in the system message — keeping it out of the system message lets
// the stable prefix and the full conversation history be prefix-cached across turns.
export const chat_volatileContext = ({ workspaceFolders, openedURIs, activeURI, persistentTerminalIDs, directoryStr, chatMode: mode }: Pick<ChatPromptContext, 'workspaceFolders' | 'directoryStr' | 'openedURIs' | 'activeURI' | 'persistentTerminalIDs' | 'chatMode'>) => {
const sysInfo = (`Here is the user's system information:
<system_info>
- ${os}
@ -459,6 +467,29 @@ ${directoryStr}
</files_overview>`)
// XML tag is self-describing; no narration prefix. Keep field order stable
// so that on turns where volatile fields happen to match the previous turn,
// the cache can extend further into the prefix.
return (`<volatile_context>
Today's date is ${new Date().toDateString()}.
${sysInfo}
${fsInfo}
</volatile_context>`)
}
export const chat_systemMessage = ({ chatMode: mode, mcpTools, includeXMLToolDefinitions }: Pick<ChatPromptContext, 'chatMode' | 'mcpTools' | 'includeXMLToolDefinitions'>) => {
const header = (`You are an expert coding ${mode === 'agent' ? 'agent' : 'assistant'} whose job is \
${mode === 'agent' ? `to help the user develop, run, and make changes to their codebase.`
: mode === 'gather' ? `to search, understand, and reference files in the user's codebase.`
: mode === 'normal' ? `to assist the user with their coding tasks.`
: ''}
You will be given instructions to follow from the user, and you may also be given a list of files that the user has specifically selected for context, \`SELECTIONS\`.
Please assist the user with their query.`)
const toolDefinitions = includeXMLToolDefinitions ? systemToolsXMLPrompt(mode, mcpTools) : null
const details: string[] = []
@ -506,19 +537,21 @@ Here's an example of a good code block:\n${chatSuggestionDiffExample}`)
details.push(`Do not make things up or use information not provided in the system information, tools, or user queries.`)
details.push(`Always use MARKDOWN to format lists, bullet points, etc. Do NOT write tables.`)
details.push(`Today's date is ${new Date().toDateString()}.`)
const importantDetails = (`Important notes:
${details.map((d, i) => `${i + 1}. ${d}`).join('\n\n')}`)
// return answer
// System message contains ONLY stable content (persona, rules, tool definitions)
// so the entire system prefix is eligible for cross-turn prefix caching. Anything
// that can change between turns (active file, open tabs, today's date, directory
// listing, terminal IDs) lives in `chat_volatileContext` and is baked into each
// user message's stored content at thread-creation time by chatThreadService.
// That keeps historical turns byte-identical across subsequent requests so the
// provider's prefix cache stays warm as the conversation grows.
const ansStrs: string[] = []
ansStrs.push(header)
ansStrs.push(sysInfo)
if (toolDefinitions) ansStrs.push(toolDefinitions)
ansStrs.push(importantDetails)
ansStrs.push(fsInfo)
if (toolDefinitions) ansStrs.push(toolDefinitions)
const fullSystemMsgStr = ansStrs
.join('\n\n\n')

View file

@ -84,6 +84,12 @@ export type RawToolParamsObj = {
export type RawToolCallObj = {
name: ToolName;
rawParams: RawToolParamsObj;
// Original serialized `arguments` string as the model emitted it (OpenAI-compatible
// path only — Anthropic/Gemini deliver tool input as structured JSON with no raw
// source string). Preserved so that on replay we can send byte-identical content
// back to the provider, which keeps the prefix cache warm past the tool call.
// Absent/undefined when not available; callers should fall back to JSON.stringify(rawParams).
rawParamsStr?: string;
doneParams: ToolParamName<ToolName>[];
id: string;
isDone: boolean;

View file

@ -252,7 +252,10 @@ const rawToolCallObjOfParamsStr = (name: string, toolParamsStr: string, id: stri
if (typeof input !== 'object') return null
const rawParams: RawToolParamsObj = input
return { id, name, rawParams, doneParams: Object.keys(rawParams), isDone: true }
// Preserve the original argument string exactly as the model emitted it. On replay
// we'll send this back verbatim inside `tool_calls[].function.arguments` so the
// provider sees byte-identical content and the prefix cache stays warm.
return { id, name, rawParams, rawParamsStr: toolParamsStr, doneParams: Object.keys(rawParams), isDone: true }
}
@ -365,11 +368,17 @@ const _sendOpenAICompatibleChat = async ({ messages, onText, onFinalMessage, onE
}
// reasoning
// reasoning — nameOfFieldInDelta may be a single field or a list of candidates
// (some gateways like OpenRouter use `reasoning`, others like DeepSeek use
// `reasoning_content`). Take the first non-empty one this chunk provides.
let newReasoning = ''
if (nameOfReasoningFieldInDelta) {
// @ts-ignore
newReasoning = (chunk.choices[0]?.delta?.[nameOfReasoningFieldInDelta] || '') + ''
const fields = Array.isArray(nameOfReasoningFieldInDelta) ? nameOfReasoningFieldInDelta : [nameOfReasoningFieldInDelta]
for (const f of fields) {
// @ts-ignore
const val = (chunk.choices[0]?.delta?.[f] || '') + ''
if (val) { newReasoning = val; break }
}
fullReasoningSoFar += newReasoning
}
@ -821,9 +830,19 @@ const sendGeminiChat = async ({
// Process the stream
for await (const chunk of stream) {
// message
const newText = chunk.text ?? ''
fullTextSoFar += newText
// message — split thought-tagged parts from answer parts.
// Gemini 2.5 Pro / Gemma 4 route internal reasoning through parts with
// `thought: true`; the visible answer lives in plain text parts. Using
// `chunk.text` (SDK shortcut) would concatenate both, polluting the
// chat view and the stored message history.
const parts = chunk.candidates?.[0]?.content?.parts
if (parts) {
for (const part of parts) {
if (typeof part.text !== 'string') continue // skip functionCall / inlineData / etc.
if (part.thought === true) fullReasoningSoFar += part.text
else fullTextSoFar += part.text
}
}
// tool call
const functionCalls = chunk.functionCalls
@ -834,17 +853,21 @@ const sendGeminiChat = async ({
toolId = functionCall.id ?? ''
}
// (do not handle reasoning yet)
// usage (Gemini exposes promptTokenCount / candidatesTokenCount / totalTokenCount /
// thoughtsTokenCount via usageMetadata). Only update when the chunk reports it.
// thoughtsTokenCount / cachedContentTokenCount via usageMetadata). Multiple
// chunks can carry usageMetadata during a stream, and the field set is NOT
// consistent across chunks — notably, cachedContentTokenCount often appears
// on an early chunk and is absent from the final summary. Merge per-field
// with `??` so we preserve the best value seen so far instead of flickering
// to `undefined` when Google stops reporting a field.
const usageMetadata = chunk.usageMetadata
if (usageMetadata) {
latestUsage = {
inputTokens: usageMetadata.promptTokenCount,
outputTokens: usageMetadata.candidatesTokenCount,
totalTokens: usageMetadata.totalTokenCount,
reasoningTokens: usageMetadata.thoughtsTokenCount,
inputTokens: usageMetadata.promptTokenCount ?? latestUsage?.inputTokens,
outputTokens: usageMetadata.candidatesTokenCount ?? latestUsage?.outputTokens,
totalTokens: usageMetadata.totalTokenCount ?? latestUsage?.totalTokens,
reasoningTokens: usageMetadata.thoughtsTokenCount ?? latestUsage?.reasoningTokens,
cachedInputTokens: usageMetadata.cachedContentTokenCount ?? latestUsage?.cachedInputTokens,
}
}