mirror of
https://github.com/voideditor/void
synced 2026-05-22 17:08:25 +00:00
Feature/improve chat experience (#7)
* handle relative path when agent ask to interact with files or directories * handle gemini reasoning output * move volatile system message to the last part * handle reasoning for openai compatible opencode * serialize tool call arguments to improve cache hit rate
This commit is contained in:
parent
f4e5b9e91a
commit
7603d8f9a2
9 changed files with 238 additions and 81 deletions
|
|
@ -205,6 +205,7 @@ export type ThreadStreamState = {
|
|||
id: string;
|
||||
content: string;
|
||||
rawParams: RawToolParamsObj;
|
||||
rawParamsStr?: string;
|
||||
mcpServerName: string | undefined;
|
||||
};
|
||||
interrupt: Promise<() => void>;
|
||||
|
|
@ -536,7 +537,7 @@ class ChatThreadService extends Disposable implements IChatThreadService {
|
|||
// if running now but stream state doesn't indicate it (happens if restart Void), cancel that last tool
|
||||
if (lastMessage && lastMessage.role === 'tool' && lastMessage.type === 'running_now') {
|
||||
|
||||
this._updateLatestTool(threadId, { role: 'tool', type: 'rejected', content: lastMessage.content, id: lastMessage.id, rawParams: lastMessage.rawParams, result: null, name: lastMessage.name, params: lastMessage.params, mcpServerName: lastMessage.mcpServerName })
|
||||
this._updateLatestTool(threadId, { role: 'tool', type: 'rejected', content: lastMessage.content, id: lastMessage.id, rawParams: lastMessage.rawParams, rawParamsStr: lastMessage.rawParamsStr, result: null, name: lastMessage.name, params: lastMessage.params, mcpServerName: lastMessage.mcpServerName })
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -681,10 +682,10 @@ class ChatThreadService extends Disposable implements IChatThreadService {
|
|||
}
|
||||
else return
|
||||
|
||||
const { name, id, rawParams, mcpServerName } = lastMsg
|
||||
const { name, id, rawParams, rawParamsStr, mcpServerName } = lastMsg
|
||||
|
||||
const errorMessage = this.toolErrMsgs.rejected
|
||||
this._updateLatestTool(threadId, { role: 'tool', type: 'rejected', params: params, name: name, content: errorMessage, result: null, id, rawParams, mcpServerName })
|
||||
this._updateLatestTool(threadId, { role: 'tool', type: 'rejected', params: params, name: name, content: errorMessage, result: null, id, rawParams, rawParamsStr, mcpServerName })
|
||||
this._setStreamState(threadId, undefined)
|
||||
}
|
||||
|
||||
|
|
@ -704,9 +705,9 @@ class ChatThreadService extends Disposable implements IChatThreadService {
|
|||
}
|
||||
// add tool that's running
|
||||
else if (this.streamState[threadId]?.isRunning === 'tool') {
|
||||
const { toolName, toolParams, id, content: content_, rawParams, mcpServerName } = this.streamState[threadId].toolInfo
|
||||
const { toolName, toolParams, id, content: content_, rawParams, rawParamsStr, mcpServerName } = this.streamState[threadId].toolInfo
|
||||
const content = content_ || this.toolErrMsgs.interrupted
|
||||
this._updateLatestTool(threadId, { role: 'tool', name: toolName, params: toolParams, id, content, rawParams, type: 'rejected', result: null, mcpServerName })
|
||||
this._updateLatestTool(threadId, { role: 'tool', name: toolName, params: toolParams, id, content, rawParams, rawParamsStr, type: 'rejected', result: null, mcpServerName })
|
||||
}
|
||||
// reject the tool for the user if relevant
|
||||
else if (this.streamState[threadId]?.isRunning === 'awaiting_user') {
|
||||
|
|
@ -745,8 +746,12 @@ class ChatThreadService extends Disposable implements IChatThreadService {
|
|||
toolName: ToolName,
|
||||
toolId: string,
|
||||
mcpServerName: string | undefined,
|
||||
opts: { preapproved: true, unvalidatedToolParams: RawToolParamsObj, validatedParams: ToolCallParams<ToolName> } | { preapproved: false, unvalidatedToolParams: RawToolParamsObj },
|
||||
opts: { preapproved: true, unvalidatedToolParams: RawToolParamsObj, validatedParams: ToolCallParams<ToolName>, rawParamsStr?: string } | { preapproved: false, unvalidatedToolParams: RawToolParamsObj, rawParamsStr?: string },
|
||||
): Promise<{ awaitingUserApproval?: boolean, interrupted?: boolean }> => {
|
||||
// Carry the model's original serialized arguments string (when available) into
|
||||
// every tool message we persist. This lets the replay path send byte-identical
|
||||
// tool_calls back to the provider, preserving the prefix cache across turns.
|
||||
const rawParamsStr = opts.rawParamsStr
|
||||
|
||||
// compute these below
|
||||
let toolParams: ToolCallParams<ToolName>
|
||||
|
|
@ -770,7 +775,7 @@ class ChatThreadService extends Disposable implements IChatThreadService {
|
|||
}
|
||||
catch (error) {
|
||||
const errorMessage = getErrorMessage(error)
|
||||
this._addMessageToThread(threadId, { role: 'tool', type: 'invalid_params', rawParams: opts.unvalidatedToolParams, result: null, name: toolName, content: errorMessage, id: toolId, mcpServerName })
|
||||
this._addMessageToThread(threadId, { role: 'tool', type: 'invalid_params', rawParams: opts.unvalidatedToolParams, rawParamsStr, result: null, name: toolName, content: errorMessage, id: toolId, mcpServerName })
|
||||
return {}
|
||||
}
|
||||
// once validated, add checkpoint for edit
|
||||
|
|
@ -783,7 +788,7 @@ class ChatThreadService extends Disposable implements IChatThreadService {
|
|||
if (approvalType) {
|
||||
const autoApprove = this._settingsService.state.globalSettings.autoApprove[approvalType]
|
||||
// add a tool_request because we use it for UI if a tool is loading (this should be improved in the future)
|
||||
this._addMessageToThread(threadId, { role: 'tool', type: 'tool_request', content: '(Awaiting user permission...)', result: null, name: toolName, params: toolParams, id: toolId, rawParams: opts.unvalidatedToolParams, mcpServerName })
|
||||
this._addMessageToThread(threadId, { role: 'tool', type: 'tool_request', content: '(Awaiting user permission...)', result: null, name: toolName, params: toolParams, id: toolId, rawParams: opts.unvalidatedToolParams, rawParamsStr, mcpServerName })
|
||||
if (!autoApprove) {
|
||||
return { awaitingUserApproval: true }
|
||||
}
|
||||
|
|
@ -800,7 +805,7 @@ class ChatThreadService extends Disposable implements IChatThreadService {
|
|||
|
||||
// 3. call the tool
|
||||
// this._setStreamState(threadId, { isRunning: 'tool' }, 'merge')
|
||||
const runningTool = { role: 'tool', type: 'running_now', name: toolName, params: toolParams, content: '(value not received yet...)', result: null, id: toolId, rawParams: opts.unvalidatedToolParams, mcpServerName } as const
|
||||
const runningTool = { role: 'tool', type: 'running_now', name: toolName, params: toolParams, content: '(value not received yet...)', result: null, id: toolId, rawParams: opts.unvalidatedToolParams, rawParamsStr, mcpServerName } as const
|
||||
this._updateLatestTool(threadId, runningTool)
|
||||
|
||||
|
||||
|
|
@ -810,7 +815,7 @@ class ChatThreadService extends Disposable implements IChatThreadService {
|
|||
try {
|
||||
|
||||
// set stream state
|
||||
this._setStreamState(threadId, { isRunning: 'tool', interrupt: interruptorPromise, toolInfo: { toolName, toolParams, id: toolId, content: 'interrupted...', rawParams: opts.unvalidatedToolParams, mcpServerName } })
|
||||
this._setStreamState(threadId, { isRunning: 'tool', interrupt: interruptorPromise, toolInfo: { toolName, toolParams, id: toolId, content: 'interrupted...', rawParams: opts.unvalidatedToolParams, rawParamsStr, mcpServerName } })
|
||||
|
||||
if (isBuiltInTool) {
|
||||
const { result, interruptTool } = await this._toolsService.callTool[toolName](toolParams as any)
|
||||
|
|
@ -840,7 +845,7 @@ class ChatThreadService extends Disposable implements IChatThreadService {
|
|||
if (interrupted) { return { interrupted: true } } // the tool result is added where we interrupt, not here
|
||||
|
||||
const errorMessage = getErrorMessage(error)
|
||||
this._updateLatestTool(threadId, { role: 'tool', type: 'tool_error', params: toolParams, result: errorMessage, name: toolName, content: errorMessage, id: toolId, rawParams: opts.unvalidatedToolParams, mcpServerName })
|
||||
this._updateLatestTool(threadId, { role: 'tool', type: 'tool_error', params: toolParams, result: errorMessage, name: toolName, content: errorMessage, id: toolId, rawParams: opts.unvalidatedToolParams, rawParamsStr, mcpServerName })
|
||||
return {}
|
||||
}
|
||||
|
||||
|
|
@ -855,12 +860,12 @@ class ChatThreadService extends Disposable implements IChatThreadService {
|
|||
}
|
||||
} catch (error) {
|
||||
const errorMessage = this.toolErrMsgs.errWhenStringifying(error)
|
||||
this._updateLatestTool(threadId, { role: 'tool', type: 'tool_error', params: toolParams, result: errorMessage, name: toolName, content: errorMessage, id: toolId, rawParams: opts.unvalidatedToolParams, mcpServerName })
|
||||
this._updateLatestTool(threadId, { role: 'tool', type: 'tool_error', params: toolParams, result: errorMessage, name: toolName, content: errorMessage, id: toolId, rawParams: opts.unvalidatedToolParams, rawParamsStr, mcpServerName })
|
||||
return {}
|
||||
}
|
||||
|
||||
// 5. add to history and keep going
|
||||
this._updateLatestTool(threadId, { role: 'tool', type: 'success', params: toolParams, result: toolResult, name: toolName, content: toolResultStr, id: toolId, rawParams: opts.unvalidatedToolParams, mcpServerName })
|
||||
this._updateLatestTool(threadId, { role: 'tool', type: 'success', params: toolParams, result: toolResult, name: toolName, content: toolResultStr, id: toolId, rawParams: opts.unvalidatedToolParams, rawParamsStr, mcpServerName })
|
||||
return {}
|
||||
};
|
||||
|
||||
|
|
@ -895,7 +900,7 @@ class ChatThreadService extends Disposable implements IChatThreadService {
|
|||
|
||||
// before enter loop, call tool
|
||||
if (callThisToolFirst) {
|
||||
const { interrupted } = await this._runToolCall(threadId, callThisToolFirst.name, callThisToolFirst.id, callThisToolFirst.mcpServerName, { preapproved: true, unvalidatedToolParams: callThisToolFirst.rawParams, validatedParams: callThisToolFirst.params })
|
||||
const { interrupted } = await this._runToolCall(threadId, callThisToolFirst.name, callThisToolFirst.id, callThisToolFirst.mcpServerName, { preapproved: true, unvalidatedToolParams: callThisToolFirst.rawParams, rawParamsStr: callThisToolFirst.rawParamsStr, validatedParams: callThisToolFirst.params })
|
||||
if (interrupted) {
|
||||
this._setStreamState(threadId, undefined)
|
||||
this._addUserCheckpoint({ threadId })
|
||||
|
|
@ -1026,7 +1031,7 @@ class ChatThreadService extends Disposable implements IChatThreadService {
|
|||
const mcpTools = this._mcpService.getMCPTools()
|
||||
const mcpTool = mcpTools?.find(t => t.name === toolCall.name)
|
||||
|
||||
const { awaitingUserApproval, interrupted } = await this._runToolCall(threadId, toolCall.name, toolCall.id, mcpTool?.mcpServerName, { preapproved: false, unvalidatedToolParams: toolCall.rawParams })
|
||||
const { awaitingUserApproval, interrupted } = await this._runToolCall(threadId, toolCall.name, toolCall.id, mcpTool?.mcpServerName, { preapproved: false, unvalidatedToolParams: toolCall.rawParams, rawParamsStr: toolCall.rawParamsStr })
|
||||
if (interrupted) {
|
||||
this._setStreamState(threadId, undefined)
|
||||
return
|
||||
|
|
@ -1391,7 +1396,19 @@ We only need to do it for files that were edited since `from`, ie files between
|
|||
const currSelns: StagingSelectionItem[] = _chatSelections ?? thread.state.stagingSelections
|
||||
|
||||
const userMessageContent = await chat_userMessageContent(instructions, currSelns, { directoryStrService: this._directoryStringService, fileService: this._fileService }) // user message + names of files (NOT content)
|
||||
const userHistoryElt: ChatMessage = { role: 'user', content: userMessageContent, displayContent: instructions, selections: currSelns, state: defaultMessageState }
|
||||
|
||||
// Snapshot the volatile runtime context (date, open files, active URI,
|
||||
// directory listing, terminal IDs) into this user message's stored content
|
||||
// so past turns stay byte-identical across subsequent requests. The volatile
|
||||
// block goes into `content` (what the LLM sees) but NOT into `displayContent`
|
||||
// (what the UI renders), so the chat bubble shows only the user's words.
|
||||
const { chatMode } = this._settingsService.state.globalSettings
|
||||
const volatileBlock = await this._convertToLLMMessagesService.generateChatVolatileContext({ chatMode })
|
||||
const contentWithVolatile = volatileBlock
|
||||
? `${volatileBlock}\n\n${userMessageContent}`
|
||||
: userMessageContent
|
||||
|
||||
const userHistoryElt: ChatMessage = { role: 'user', content: contentWithVolatile, displayContent: instructions, selections: currSelns, state: defaultMessageState }
|
||||
this._addMessageToThread(threadId, userHistoryElt)
|
||||
|
||||
this._setThreadState(threadId, { currCheckpointIdx: null }) // no longer at a checkpoint because started streaming
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ import { IWorkspaceContextService } from '../../../../platform/workspace/common/
|
|||
import { IEditorService } from '../../../services/editor/common/editorService.js';
|
||||
import { ChatMessage } from '../common/chatThreadServiceTypes.js';
|
||||
import { getIsReasoningEnabledState, getReservedOutputTokenSpace, getModelCapabilities } from '../common/modelCapabilities.js';
|
||||
import { reParsedToolXMLString, chat_systemMessage } from '../common/prompt/prompts.js';
|
||||
import { reParsedToolXMLString, chat_systemMessage, chat_volatileContext } from '../common/prompt/prompts.js';
|
||||
import { AnthropicLLMChatMessage, AnthropicReasoning, GeminiLLMChatMessage, LLMChatMessage, LLMFIMMessage, OpenAILLMChatMessage, RawToolParamsObj } from '../common/sendLLMMessageTypes.js';
|
||||
import { IVoidSettingsService } from '../common/voidSettingsService.js';
|
||||
import { ChatMode, FeatureName, ModelSelection, ProviderName } from '../common/voidSettingsTypes.js';
|
||||
|
|
@ -29,6 +29,10 @@ type SimpleLLMMessage = {
|
|||
id: string;
|
||||
name: ToolName;
|
||||
rawParams: RawToolParamsObj;
|
||||
// Original serialized arguments string from the model's tool call (OpenAI-compat
|
||||
// only). When present, used verbatim on replay to keep the provider's prefix cache
|
||||
// matching across turns. Falls back to JSON.stringify(rawParams) when absent.
|
||||
rawParamsStr?: string;
|
||||
} | {
|
||||
role: 'user';
|
||||
content: string;
|
||||
|
|
@ -84,12 +88,17 @@ const prepareMessages_openai_tools = (messages: SimpleLLMMessage[]): AnthropicOr
|
|||
// edit previous assistant message to have called the tool
|
||||
const prevMsg = 0 <= i - 1 && i - 1 <= newMessages.length ? newMessages[i - 1] : undefined
|
||||
if (prevMsg?.role === 'assistant') {
|
||||
// Prefer the model's original serialized argument string when we have it
|
||||
// (OpenAI-compatible providers expose it in the streaming delta). Sending
|
||||
// byte-identical bytes back preserves the provider's prefix cache past the
|
||||
// tool call. Fall back to re-serializing when the raw string is unavailable
|
||||
// (e.g. conversations from before this field existed, or non-OpenAI provenance).
|
||||
prevMsg.tool_calls = [{
|
||||
type: 'function',
|
||||
id: currMsg.id,
|
||||
function: {
|
||||
name: currMsg.name,
|
||||
arguments: JSON.stringify(currMsg.rawParams)
|
||||
arguments: currMsg.rawParamsStr ?? JSON.stringify(currMsg.rawParams)
|
||||
}
|
||||
}]
|
||||
}
|
||||
|
|
@ -524,6 +533,12 @@ export interface IConvertToLLMMessageService {
|
|||
prepareLLMSimpleMessages: (opts: { simpleMessages: SimpleLLMMessage[], systemMessage: string, modelSelection: ModelSelection | null, featureName: FeatureName }) => { messages: LLMChatMessage[], separateSystemMessage: string | undefined }
|
||||
prepareLLMChatMessages: (opts: { chatMessages: ChatMessage[], chatMode: ChatMode, modelSelection: ModelSelection | null }) => Promise<{ messages: LLMChatMessage[], separateSystemMessage: string | undefined }>
|
||||
prepareFIMMessage(opts: { messages: LLMFIMMessage, }): { prefix: string, suffix: string, stopTokens: string[] }
|
||||
// Called by chat creation paths to snapshot runtime grounding (date, open files,
|
||||
// active URI, directory listing, terminal IDs) into a user message at storage time.
|
||||
// Baking volatile into the stored content (rather than prepending at send time)
|
||||
// keeps prior turns byte-identical across requests so the provider's prefix cache
|
||||
// stays warm turn-over-turn.
|
||||
generateChatVolatileContext: (opts: { chatMode: ChatMode }) => Promise<string>
|
||||
}
|
||||
|
||||
export const IConvertToLLMMessageService = createDecorator<IConvertToLLMMessageService>('ConvertToLLMMessageService');
|
||||
|
|
@ -575,26 +590,30 @@ class ConvertToLLMMessageService extends Disposable implements IConvertToLLMMess
|
|||
}
|
||||
|
||||
|
||||
// system message
|
||||
private _generateChatMessagesSystemMessage = async (chatMode: ChatMode, specialToolFormat: 'openai-style' | 'anthropic-style' | 'gemini-style' | undefined) => {
|
||||
const workspaceFolders = this.workspaceContextService.getWorkspace().folders.map(f => f.uri.fsPath)
|
||||
// Computes the stable system message and the volatile-context block in one pass.
|
||||
// The stable system message contains only cacheable content (persona, rules, tool
|
||||
// definitions). The volatile block (runtime grounding: date, open files, active
|
||||
// URI, directory listing, terminal IDs) is generated separately via
|
||||
// `generateChatVolatileContext` and baked into the user message at storage time
|
||||
// by the chat thread creation path — that keeps historical turns byte-identical
|
||||
// across requests so the provider's prefix cache stays warm.
|
||||
private _generateChatSystemMessage = (chatMode: ChatMode, specialToolFormat: 'openai-style' | 'anthropic-style' | 'gemini-style' | undefined) => {
|
||||
const includeXMLToolDefinitions = !specialToolFormat
|
||||
const mcpTools = this.mcpService.getMCPTools()
|
||||
return chat_systemMessage({ chatMode, mcpTools, includeXMLToolDefinitions })
|
||||
}
|
||||
|
||||
generateChatVolatileContext: IConvertToLLMMessageService['generateChatVolatileContext'] = async ({ chatMode }) => {
|
||||
const workspaceFolders = this.workspaceContextService.getWorkspace().folders.map(f => f.uri.fsPath)
|
||||
const openedURIs = this.modelService.getModels().filter(m => m.isAttachedToEditor()).map(m => m.uri.fsPath) || [];
|
||||
const activeURI = this.editorService.activeEditor?.resource?.fsPath;
|
||||
|
||||
const directoryStr = await this.directoryStrService.getAllDirectoriesStr({
|
||||
cutOffMessage: chatMode === 'agent' || chatMode === 'gather' ?
|
||||
`...Directories string cut off, use tools to read more...`
|
||||
: `...Directories string cut off, ask user for more if necessary...`
|
||||
})
|
||||
|
||||
const includeXMLToolDefinitions = !specialToolFormat
|
||||
|
||||
const mcpTools = this.mcpService.getMCPTools()
|
||||
|
||||
const persistentTerminalIDs = this.terminalToolService.listPersistentTerminalIds()
|
||||
const systemMessage = chat_systemMessage({ workspaceFolders, openedURIs, directoryStr, activeURI, persistentTerminalIDs, chatMode, mcpTools, includeXMLToolDefinitions })
|
||||
return systemMessage
|
||||
return chat_volatileContext({ workspaceFolders, openedURIs, activeURI, persistentTerminalIDs, directoryStr, chatMode })
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -622,6 +641,7 @@ class ConvertToLLMMessageService extends Disposable implements IConvertToLLMMess
|
|||
name: m.name,
|
||||
id: m.id,
|
||||
rawParams: m.rawParams,
|
||||
rawParamsStr: m.rawParamsStr,
|
||||
})
|
||||
}
|
||||
else if (m.role === 'user') {
|
||||
|
|
@ -680,7 +700,7 @@ class ConvertToLLMMessageService extends Disposable implements IConvertToLLMMess
|
|||
} = getModelCapabilities(providerName, modelName, overridesOfModel)
|
||||
|
||||
const { disableSystemMessage } = this.voidSettingsService.state.globalSettings;
|
||||
const fullSystemMessage = await this._generateChatMessagesSystemMessage(chatMode, specialToolFormat)
|
||||
const fullSystemMessage = this._generateChatSystemMessage(chatMode, specialToolFormat)
|
||||
const systemMessage = disableSystemMessage ? '' : fullSystemMessage;
|
||||
|
||||
const modelSelectionOptions = this.voidSettingsService.state.optionsOfModelSelection['Chat'][modelSelection.providerName]?.[modelSelection.modelName]
|
||||
|
|
@ -689,6 +709,11 @@ class ConvertToLLMMessageService extends Disposable implements IConvertToLLMMess
|
|||
const aiInstructions = this._getCombinedAIInstructions();
|
||||
const isReasoningEnabled = getIsReasoningEnabledState('Chat', providerName, modelName, modelSelectionOptions, overridesOfModel)
|
||||
const reservedOutputTokenSpace = getReservedOutputTokenSpace(providerName, modelName, { isReasoningEnabled, overridesOfModel })
|
||||
// Volatile context is baked into user messages at thread-creation time
|
||||
// (see `chatThreadService._addUserMessageAndStreamResponse`). At send time
|
||||
// the stored content is passed through verbatim so each past turn is
|
||||
// byte-identical to what was sent before, keeping the provider's prefix
|
||||
// cache warm across turns.
|
||||
const llmMessages = this._chatMessagesToSimpleMessages(chatMessages)
|
||||
|
||||
const { messages, separateSystemMessage } = prepareMessages({
|
||||
|
|
|
|||
|
|
@ -1515,12 +1515,23 @@ const ReasoningWrapper = ({ isDoneReasoning, isStreaming, children }: { isDoneRe
|
|||
const isDone = isDoneReasoning || !isStreaming
|
||||
const isWriting = !isDone
|
||||
const [isOpen, setIsOpen] = useState(isWriting)
|
||||
const scrollRef = useRef<HTMLDivElement>(null)
|
||||
useEffect(() => {
|
||||
if (!isWriting) setIsOpen(false) // if just finished reasoning, close
|
||||
}, [isWriting])
|
||||
// While streaming, keep the box pinned to the bottom so the user sees the
|
||||
// latest thoughts without having to scroll. Once done, respect user scroll.
|
||||
useEffect(() => {
|
||||
if (!isWriting || !isOpen) return
|
||||
const el = scrollRef.current
|
||||
if (el) el.scrollTop = el.scrollHeight
|
||||
}, [children, isWriting, isOpen])
|
||||
return <ToolHeaderWrapper title='Reasoning' desc1={isWriting ? <IconLoading /> : ''} isOpen={isOpen} onClick={() => setIsOpen(v => !v)}>
|
||||
<ToolChildrenWrapper>
|
||||
<div className='!select-text cursor-auto'>
|
||||
<div
|
||||
ref={scrollRef}
|
||||
className='!select-text cursor-auto max-h-60 overflow-y-auto'
|
||||
>
|
||||
{children}
|
||||
</div>
|
||||
</ToolChildrenWrapper>
|
||||
|
|
|
|||
|
|
@ -38,38 +38,60 @@ const validateStr = (argName: string, value: unknown) => {
|
|||
}
|
||||
|
||||
|
||||
// We are NOT checking to make sure in workspace
|
||||
const validateURI = (uriStr: unknown) => {
|
||||
// Detects whether a plain path string is absolute.
|
||||
// - Unix absolute: starts with '/'
|
||||
// - Windows absolute: drive letter followed by ':\' or ':/' (e.g. 'C:\...', 'c:/...')
|
||||
// - UNC path: starts with '\\'
|
||||
const isAbsolutePathString = (s: string) => {
|
||||
if (s.startsWith('/')) return true
|
||||
if (s.startsWith('\\\\')) return true
|
||||
if (/^[a-zA-Z]:[\\/]/.test(s)) return true
|
||||
return false
|
||||
}
|
||||
|
||||
// We are NOT checking to make sure in workspace.
|
||||
// workspaceRoot is optional; when provided, bare relative paths like "src/foo.ts" or
|
||||
// "./README.md" are resolved against it. Without it (or when no workspace is open),
|
||||
// we fall back to URI.file which resolves relative paths against the filesystem root —
|
||||
// same as the legacy behavior, but that's the pathological case we want to avoid.
|
||||
// Prefer the workspace-aware `validateURI` bound inside ToolsService; this raw
|
||||
// version is exported-by-module-scope only for internal re-use.
|
||||
const validateURIWithRoot = (uriStr: unknown, workspaceRoot?: URI | null) => {
|
||||
if (uriStr === null) throw new Error(`Invalid LLM output: uri was null.`)
|
||||
if (typeof uriStr !== 'string') throw new Error(`Invalid LLM output format: Provided uri must be a string, but it's a(n) ${typeof uriStr}. Full value: ${JSON.stringify(uriStr)}.`)
|
||||
|
||||
// Check if it's already a full URI with scheme (e.g., vscode-remote://, file://, etc.)
|
||||
// Look for :// pattern which indicates a scheme is present
|
||||
// Examples of supported URIs:
|
||||
// - vscode-remote://wsl+Ubuntu/home/user/file.txt (WSL)
|
||||
// - vscode-remote://ssh-remote+myserver/home/user/file.txt (SSH)
|
||||
// - file:///home/user/file.txt (local file with scheme)
|
||||
// - /home/user/file.txt (local file path, will be converted to file://)
|
||||
// - C:\Users\file.txt (Windows local path, will be converted to file://)
|
||||
// Scheme-qualified URI (e.g. vscode-remote://, file://, etc.) — parse as-is.
|
||||
if (uriStr.includes('://')) {
|
||||
try {
|
||||
const uri = URI.parse(uriStr)
|
||||
return uri
|
||||
} catch (e) {
|
||||
// If parsing fails, it's a malformed URI
|
||||
throw new Error(`Invalid URI format: ${uriStr}. Error: ${e}`)
|
||||
}
|
||||
} else {
|
||||
// No scheme present, treat as file path
|
||||
// This handles regular file paths like /home/user/file.txt or C:\Users\file.txt
|
||||
const uri = URI.file(uriStr)
|
||||
return uri
|
||||
}
|
||||
|
||||
// Absolute path — safe to pass to URI.file.
|
||||
if (isAbsolutePathString(uriStr)) {
|
||||
return URI.file(uriStr)
|
||||
}
|
||||
|
||||
// Relative path (e.g. "README.md", "src/foo.ts", "./foo", "../bar").
|
||||
// Resolve against workspace root when available. This is the critical branch:
|
||||
// weak models naturally produce bare filenames, and without this resolution
|
||||
// URI.file("README.md") would become file:///README.md (root of filesystem),
|
||||
// forcing models to fall back to terminal commands.
|
||||
if (workspaceRoot) {
|
||||
return URI.joinPath(workspaceRoot, uriStr)
|
||||
}
|
||||
|
||||
// No workspace — legacy fallback. Will resolve from filesystem root and likely fail,
|
||||
// but preserves prior behavior for the (rare) no-workspace case.
|
||||
return URI.file(uriStr)
|
||||
}
|
||||
|
||||
const validateOptionalURI = (uriStr: unknown) => {
|
||||
const validateOptionalURIWithRoot = (uriStr: unknown, workspaceRoot?: URI | null) => {
|
||||
if (isFalsy(uriStr)) return null
|
||||
return validateURI(uriStr)
|
||||
return validateURIWithRoot(uriStr, workspaceRoot)
|
||||
}
|
||||
|
||||
const validateOptionalStr = (argName: string, str: unknown) => {
|
||||
|
|
@ -156,6 +178,16 @@ export class ToolsService implements IToolsService {
|
|||
) {
|
||||
const queryBuilder = instantiationService.createInstance(QueryBuilder);
|
||||
|
||||
// Resolve the current workspace root lazily so that multi-root / workspace-switch
|
||||
// scenarios pick up the correct folder at call time rather than at construction time.
|
||||
// These shadow the module-level helpers so the 11+ call sites below stay terse.
|
||||
const getWorkspaceRoot = (): URI | null => {
|
||||
const folders = workspaceContextService.getWorkspace().folders
|
||||
return folders.length > 0 ? folders[0].uri : null
|
||||
}
|
||||
const validateURI = (uriStr: unknown) => validateURIWithRoot(uriStr, getWorkspaceRoot())
|
||||
const validateOptionalURI = (uriStr: unknown) => validateOptionalURIWithRoot(uriStr, getWorkspaceRoot())
|
||||
|
||||
this.validateParams = {
|
||||
read_file: (params: RawToolParamsObj) => {
|
||||
const { uri: uriStr, start_line: startLineUnknown, end_line: endLineUnknown, page_number: pageNumberUnknown } = params
|
||||
|
|
|
|||
|
|
@ -13,6 +13,10 @@ export type ToolMessage<T extends ToolName> = {
|
|||
content: string; // give this result to LLM (string of value)
|
||||
id: string;
|
||||
rawParams: RawToolParamsObj;
|
||||
// Original serialized `arguments` string from the model's tool call (when available
|
||||
// from the provider stream — OpenAI-compatible only). Used on replay to send
|
||||
// byte-identical tool_calls back, preserving the provider's prefix cache.
|
||||
rawParamsStr?: string;
|
||||
mcpServerName: string | undefined; // the server name at the time of the call
|
||||
} & (
|
||||
// in order of events:
|
||||
|
|
|
|||
|
|
@ -227,9 +227,12 @@ type ProviderReasoningIOSettings = {
|
|||
// include this in payload to get reasoning
|
||||
input?: { includeInPayload?: (reasoningState: SendableReasoningInfo) => null | { [key: string]: any }, };
|
||||
// nameOfFieldInDelta: reasoning output is in response.choices[0].delta[deltaReasoningField]
|
||||
// may be a single field name or a list of candidates tried in order (first non-empty wins) —
|
||||
// lets one provider entry cover gateways that standardize on different field names
|
||||
// (e.g. DeepSeek uses `reasoning_content`, OpenRouter uses `reasoning`).
|
||||
// needsManualParse: whether we must manually parse out the <think> tags
|
||||
output?:
|
||||
| { nameOfFieldInDelta?: string, needsManualParse?: undefined, }
|
||||
| { nameOfFieldInDelta?: string | string[], needsManualParse?: undefined, }
|
||||
| { nameOfFieldInDelta?: undefined, needsManualParse?: true, };
|
||||
}
|
||||
|
||||
|
|
@ -1254,9 +1257,12 @@ const openaiCompatible: VoidStaticProviderInfo = {
|
|||
modelOptionsFallback: (modelName) => extensiveModelOptionsFallback(modelName),
|
||||
modelOptions: {},
|
||||
providerReasoningIOSettings: {
|
||||
// reasoning: we have no idea what endpoint they used, so we can't consistently parse out reasoning
|
||||
// reasoning: we have no idea what endpoint they used, so cover the common field names.
|
||||
// `reasoning_content` — DeepSeek, vLLM, many self-hosted servers
|
||||
// `reasoning` — OpenRouter, opencode-style gateways
|
||||
// `thinking` — some Chinese gateways (Moonshot, Zhipu)
|
||||
input: { includeInPayload: openAICompatIncludeInPayloadReasoning },
|
||||
output: { nameOfFieldInDelta: 'reasoning_content' },
|
||||
output: { nameOfFieldInDelta: ['reasoning_content', 'reasoning', 'thinking'] },
|
||||
},
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -425,17 +425,25 @@ const systemToolsXMLPrompt = (chatMode: ChatMode, mcpTools: InternalToolInfo[] |
|
|||
// ======================================================== chat (normal, gather, agent) ========================================================
|
||||
|
||||
|
||||
export const chat_systemMessage = ({ workspaceFolders, openedURIs, activeURI, persistentTerminalIDs, directoryStr, chatMode: mode, mcpTools, includeXMLToolDefinitions }: { workspaceFolders: string[], directoryStr: string, openedURIs: string[], activeURI: string | undefined, persistentTerminalIDs: string[], chatMode: ChatMode, mcpTools: InternalToolInfo[] | undefined, includeXMLToolDefinitions: boolean }) => {
|
||||
const header = (`You are an expert coding ${mode === 'agent' ? 'agent' : 'assistant'} whose job is \
|
||||
${mode === 'agent' ? `to help the user develop, run, and make changes to their codebase.`
|
||||
: mode === 'gather' ? `to search, understand, and reference files in the user's codebase.`
|
||||
: mode === 'normal' ? `to assist the user with their coding tasks.`
|
||||
: ''}
|
||||
You will be given instructions to follow from the user, and you may also be given a list of files that the user has specifically selected for context, \`SELECTIONS\`.
|
||||
Please assist the user with their query.`)
|
||||
|
||||
// Shared input type between the stable system message and the volatile context.
|
||||
// Kept together so callers can compute the workspace snapshot once and feed both.
|
||||
export type ChatPromptContext = {
|
||||
workspaceFolders: string[]
|
||||
directoryStr: string
|
||||
openedURIs: string[]
|
||||
activeURI: string | undefined
|
||||
persistentTerminalIDs: string[]
|
||||
chatMode: ChatMode
|
||||
mcpTools: InternalToolInfo[] | undefined
|
||||
includeXMLToolDefinitions: boolean
|
||||
}
|
||||
|
||||
|
||||
// Returns the volatile runtime-grounding block as a standalone string. Callers
|
||||
// should prepend this to the latest user message (Phase B caching layout) rather
|
||||
// than embed it in the system message — keeping it out of the system message lets
|
||||
// the stable prefix and the full conversation history be prefix-cached across turns.
|
||||
export const chat_volatileContext = ({ workspaceFolders, openedURIs, activeURI, persistentTerminalIDs, directoryStr, chatMode: mode }: Pick<ChatPromptContext, 'workspaceFolders' | 'directoryStr' | 'openedURIs' | 'activeURI' | 'persistentTerminalIDs' | 'chatMode'>) => {
|
||||
const sysInfo = (`Here is the user's system information:
|
||||
<system_info>
|
||||
- ${os}
|
||||
|
|
@ -459,6 +467,29 @@ ${directoryStr}
|
|||
</files_overview>`)
|
||||
|
||||
|
||||
// XML tag is self-describing; no narration prefix. Keep field order stable
|
||||
// so that on turns where volatile fields happen to match the previous turn,
|
||||
// the cache can extend further into the prefix.
|
||||
return (`<volatile_context>
|
||||
Today's date is ${new Date().toDateString()}.
|
||||
|
||||
${sysInfo}
|
||||
|
||||
${fsInfo}
|
||||
</volatile_context>`)
|
||||
}
|
||||
|
||||
|
||||
export const chat_systemMessage = ({ chatMode: mode, mcpTools, includeXMLToolDefinitions }: Pick<ChatPromptContext, 'chatMode' | 'mcpTools' | 'includeXMLToolDefinitions'>) => {
|
||||
const header = (`You are an expert coding ${mode === 'agent' ? 'agent' : 'assistant'} whose job is \
|
||||
${mode === 'agent' ? `to help the user develop, run, and make changes to their codebase.`
|
||||
: mode === 'gather' ? `to search, understand, and reference files in the user's codebase.`
|
||||
: mode === 'normal' ? `to assist the user with their coding tasks.`
|
||||
: ''}
|
||||
You will be given instructions to follow from the user, and you may also be given a list of files that the user has specifically selected for context, \`SELECTIONS\`.
|
||||
Please assist the user with their query.`)
|
||||
|
||||
|
||||
const toolDefinitions = includeXMLToolDefinitions ? systemToolsXMLPrompt(mode, mcpTools) : null
|
||||
|
||||
const details: string[] = []
|
||||
|
|
@ -506,19 +537,21 @@ Here's an example of a good code block:\n${chatSuggestionDiffExample}`)
|
|||
|
||||
details.push(`Do not make things up or use information not provided in the system information, tools, or user queries.`)
|
||||
details.push(`Always use MARKDOWN to format lists, bullet points, etc. Do NOT write tables.`)
|
||||
details.push(`Today's date is ${new Date().toDateString()}.`)
|
||||
|
||||
const importantDetails = (`Important notes:
|
||||
${details.map((d, i) => `${i + 1}. ${d}`).join('\n\n')}`)
|
||||
|
||||
|
||||
// return answer
|
||||
// System message contains ONLY stable content (persona, rules, tool definitions)
|
||||
// so the entire system prefix is eligible for cross-turn prefix caching. Anything
|
||||
// that can change between turns (active file, open tabs, today's date, directory
|
||||
// listing, terminal IDs) lives in `chat_volatileContext` and is baked into each
|
||||
// user message's stored content at thread-creation time by chatThreadService.
|
||||
// That keeps historical turns byte-identical across subsequent requests so the
|
||||
// provider's prefix cache stays warm as the conversation grows.
|
||||
const ansStrs: string[] = []
|
||||
ansStrs.push(header)
|
||||
ansStrs.push(sysInfo)
|
||||
if (toolDefinitions) ansStrs.push(toolDefinitions)
|
||||
ansStrs.push(importantDetails)
|
||||
ansStrs.push(fsInfo)
|
||||
if (toolDefinitions) ansStrs.push(toolDefinitions)
|
||||
|
||||
const fullSystemMsgStr = ansStrs
|
||||
.join('\n\n\n')
|
||||
|
|
|
|||
|
|
@ -84,6 +84,12 @@ export type RawToolParamsObj = {
|
|||
export type RawToolCallObj = {
|
||||
name: ToolName;
|
||||
rawParams: RawToolParamsObj;
|
||||
// Original serialized `arguments` string as the model emitted it (OpenAI-compatible
|
||||
// path only — Anthropic/Gemini deliver tool input as structured JSON with no raw
|
||||
// source string). Preserved so that on replay we can send byte-identical content
|
||||
// back to the provider, which keeps the prefix cache warm past the tool call.
|
||||
// Absent/undefined when not available; callers should fall back to JSON.stringify(rawParams).
|
||||
rawParamsStr?: string;
|
||||
doneParams: ToolParamName<ToolName>[];
|
||||
id: string;
|
||||
isDone: boolean;
|
||||
|
|
|
|||
|
|
@ -252,7 +252,10 @@ const rawToolCallObjOfParamsStr = (name: string, toolParamsStr: string, id: stri
|
|||
if (typeof input !== 'object') return null
|
||||
|
||||
const rawParams: RawToolParamsObj = input
|
||||
return { id, name, rawParams, doneParams: Object.keys(rawParams), isDone: true }
|
||||
// Preserve the original argument string exactly as the model emitted it. On replay
|
||||
// we'll send this back verbatim inside `tool_calls[].function.arguments` so the
|
||||
// provider sees byte-identical content and the prefix cache stays warm.
|
||||
return { id, name, rawParams, rawParamsStr: toolParamsStr, doneParams: Object.keys(rawParams), isDone: true }
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -365,11 +368,17 @@ const _sendOpenAICompatibleChat = async ({ messages, onText, onFinalMessage, onE
|
|||
}
|
||||
|
||||
|
||||
// reasoning
|
||||
// reasoning — nameOfFieldInDelta may be a single field or a list of candidates
|
||||
// (some gateways like OpenRouter use `reasoning`, others like DeepSeek use
|
||||
// `reasoning_content`). Take the first non-empty one this chunk provides.
|
||||
let newReasoning = ''
|
||||
if (nameOfReasoningFieldInDelta) {
|
||||
// @ts-ignore
|
||||
newReasoning = (chunk.choices[0]?.delta?.[nameOfReasoningFieldInDelta] || '') + ''
|
||||
const fields = Array.isArray(nameOfReasoningFieldInDelta) ? nameOfReasoningFieldInDelta : [nameOfReasoningFieldInDelta]
|
||||
for (const f of fields) {
|
||||
// @ts-ignore
|
||||
const val = (chunk.choices[0]?.delta?.[f] || '') + ''
|
||||
if (val) { newReasoning = val; break }
|
||||
}
|
||||
fullReasoningSoFar += newReasoning
|
||||
}
|
||||
|
||||
|
|
@ -821,9 +830,19 @@ const sendGeminiChat = async ({
|
|||
|
||||
// Process the stream
|
||||
for await (const chunk of stream) {
|
||||
// message
|
||||
const newText = chunk.text ?? ''
|
||||
fullTextSoFar += newText
|
||||
// message — split thought-tagged parts from answer parts.
|
||||
// Gemini 2.5 Pro / Gemma 4 route internal reasoning through parts with
|
||||
// `thought: true`; the visible answer lives in plain text parts. Using
|
||||
// `chunk.text` (SDK shortcut) would concatenate both, polluting the
|
||||
// chat view and the stored message history.
|
||||
const parts = chunk.candidates?.[0]?.content?.parts
|
||||
if (parts) {
|
||||
for (const part of parts) {
|
||||
if (typeof part.text !== 'string') continue // skip functionCall / inlineData / etc.
|
||||
if (part.thought === true) fullReasoningSoFar += part.text
|
||||
else fullTextSoFar += part.text
|
||||
}
|
||||
}
|
||||
|
||||
// tool call
|
||||
const functionCalls = chunk.functionCalls
|
||||
|
|
@ -834,17 +853,21 @@ const sendGeminiChat = async ({
|
|||
toolId = functionCall.id ?? ''
|
||||
}
|
||||
|
||||
// (do not handle reasoning yet)
|
||||
|
||||
// usage (Gemini exposes promptTokenCount / candidatesTokenCount / totalTokenCount /
|
||||
// thoughtsTokenCount via usageMetadata). Only update when the chunk reports it.
|
||||
// thoughtsTokenCount / cachedContentTokenCount via usageMetadata). Multiple
|
||||
// chunks can carry usageMetadata during a stream, and the field set is NOT
|
||||
// consistent across chunks — notably, cachedContentTokenCount often appears
|
||||
// on an early chunk and is absent from the final summary. Merge per-field
|
||||
// with `??` so we preserve the best value seen so far instead of flickering
|
||||
// to `undefined` when Google stops reporting a field.
|
||||
const usageMetadata = chunk.usageMetadata
|
||||
if (usageMetadata) {
|
||||
latestUsage = {
|
||||
inputTokens: usageMetadata.promptTokenCount,
|
||||
outputTokens: usageMetadata.candidatesTokenCount,
|
||||
totalTokens: usageMetadata.totalTokenCount,
|
||||
reasoningTokens: usageMetadata.thoughtsTokenCount,
|
||||
inputTokens: usageMetadata.promptTokenCount ?? latestUsage?.inputTokens,
|
||||
outputTokens: usageMetadata.candidatesTokenCount ?? latestUsage?.outputTokens,
|
||||
totalTokens: usageMetadata.totalTokenCount ?? latestUsage?.totalTokens,
|
||||
reasoningTokens: usageMetadata.thoughtsTokenCount ?? latestUsage?.reasoningTokens,
|
||||
cachedInputTokens: usageMetadata.cachedContentTokenCount ?? latestUsage?.cachedInputTokens,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue