diff --git a/src/vs/workbench/contrib/void/browser/chatThreadService.ts b/src/vs/workbench/contrib/void/browser/chatThreadService.ts index 099a276c..9ad0c712 100644 --- a/src/vs/workbench/contrib/void/browser/chatThreadService.ts +++ b/src/vs/workbench/contrib/void/browser/chatThreadService.ts @@ -199,7 +199,11 @@ export type ThreadStreamState = { llmInfo: { displayContentSoFar: string; reasoningSoFar: string; - toolCallSoFar: RawToolCallObj | null; + // Ordered list of tool calls being streamed from the LLM. Most turns have + // length 0 (pure text) or 1 (single tool call). Providers that support + // parallel tool calling (OpenAI, Anthropic, Gemini) may emit multiple. + // Tools are executed serially by the agent loop in this order. + toolCallsSoFar: RawToolCallObj[]; }; toolInfo?: undefined; interrupt: Promise<() => void>; // calling this should have no effect on state - would be too confusing. it just cancels the tool @@ -714,55 +718,150 @@ class ChatThreadService extends Disposable implements IChatThreadService { - private _swapOutLatestStreamingToolWithResult = (threadId: string, tool: ChatMessage & { role: 'tool' }) => { - const messages = this.state.allThreads[threadId]?.messages - if (!messages) return false - const lastMsg = messages[messages.length - 1] - if (!lastMsg) return false - - if (lastMsg.role === 'tool' && lastMsg.type !== 'invalid_params') { - this._editMessageInThread(threadId, messages.length - 1, tool) - return true - } - return false - } + /** + * Transitions a tool message (by id) to a new state in the thread. Before parallel tool + * calling this just swapped the last message, which worked because a tool was always + * the most recent message at every transition. With batches, tool i may be followed + * in the thread by pre-added tool_requests for tools i+1, i+2..., so we search by id. + * + * If no matching tool is found we append (preserves the original behavior for fresh + * tool_request additions by `_runToolCall`'s non-batch path). When a match exists, + * we preserve batchIndex/batchSize from the existing row so the UI's (i/N) prefix + * doesn't drop across state transitions (tool_request → running_now → success). + */ private _updateLatestTool = (threadId: string, tool: ChatMessage & { role: 'tool' }) => { - const swapped = this._swapOutLatestStreamingToolWithResult(threadId, tool) - if (swapped) return + const messages = this.state.allThreads[threadId]?.messages + if (!messages) { this._addMessageToThread(threadId, tool); return } + for (let i = messages.length - 1; i >= 0; i--) { + const m = messages[i] + if (m.role === 'tool' && m.id === tool.id) { + // Preserve batch metadata from the pre-added row — the transitional updates + // from `_runToolCall` don't know about batchIndex/batchSize. + const merged = { batchIndex: m.batchIndex, batchSize: m.batchSize, ...tool } as ChatMessage & { role: 'tool' } + this._editMessageInThread(threadId, i, merged) + return + } + } this._addMessageToThread(threadId, tool) } + /** + * Returns consecutive trailing `tool_request` messages in the thread — these are the + * not-yet-executed tools in the current batch. The user-facing "awaiting approval" + * tool is always the FIRST of this list (the batch processor runs them in order, so + * any tool before the paused one is already in a terminal state like `success`). + */ + private _getPendingBatchTools = (threadId: string): (ToolMessage & { type: 'tool_request' })[] => { + const messages = this.state.allThreads[threadId]?.messages ?? [] + const pending: (ToolMessage & { type: 'tool_request' })[] = [] + for (let i = messages.length - 1; i >= 0; i--) { + const m = messages[i] + if (m.role === 'tool' && m.type === 'tool_request') pending.unshift(m) + else break + } + return pending + } + + /** + * Runs all currently-pending tool_requests at the tail of the thread, in order. + * Each call to `_runToolCall` validates, checks approval, and either runs the tool + * or pauses for user approval. Returns: + * - 'awaiting_user' if a tool paused for approval (remaining tools stay pending) + * - 'interrupted' if a tool was interrupted (agent should terminate) + * - 'done' if all pending tools ran to a terminal state + */ + private _tryDrainPendingBatch = async (threadId: string): Promise<'done' | 'awaiting_user' | 'interrupted'> => { + while (true) { + const pending = this._getPendingBatchTools(threadId) + if (pending.length === 0) return 'done' + const next = pending[0] + const { awaitingUserApproval, interrupted } = await this._runToolCall( + threadId, next.name, next.id, next.mcpServerName, + { preapproved: false, unvalidatedToolParams: next.rawParams, rawParamsStr: next.rawParamsStr } + ) + if (interrupted) return 'interrupted' + if (awaitingUserApproval) return 'awaiting_user' + } + } + approveLatestToolRequest(threadId: string) { const thread = this.state.allThreads[threadId] if (!thread) return // should never happen - const lastMsg = thread.messages[thread.messages.length - 1] - if (!(lastMsg.role === 'tool' && lastMsg.type === 'tool_request')) return // should never happen - - const callThisToolFirst: ToolMessage = lastMsg + // In batch mode multiple tool_requests can be pending at the tail of the thread — + // the one awaiting approval is the FIRST (tools that already ran have transitioned + // away from tool_request state). Pre-batch code grabbed messages[-1], which silently + // breaks for batches because later not-yet-started tools are newer in the thread. + const pending = this._getPendingBatchTools(threadId) + if (pending.length === 0) return + const callThisToolFirst = pending[0] this._wrapRunAgentToNotify( this._runChatAgent({ callThisToolFirst, threadId, ...this._currentModelSelectionProps() }) , threadId ) } - rejectLatestToolRequest(threadId: string) { + /** + * Reject a pending tool request. + * + * `resumeAgent` controls what happens after the rejection: + * - true (from UI "reject" button): mark this tool + all other pending tools in + * the same batch as `rejected` ("reject-all" semantic), then resume the + * agent loop so the LLM sees the rejections and can react (e.g. ask the + * user what to do next). This keeps the conversation alive. + * - false (from abort/hard-stop path in `abortRunning`): mark rejected and stop. + * The conversation terminates; no further LLM call is made. + * + * Default is true because the common case is the user clicking the UI reject button. + * `abortRunning` explicitly passes false. + */ + rejectLatestToolRequest(threadId: string, resumeAgent: boolean = true) { const thread = this.state.allThreads[threadId] if (!thread) return // should never happen - const lastMsg = thread.messages[thread.messages.length - 1] - - let params: ToolCallParams - if (lastMsg.role === 'tool' && lastMsg.type !== 'invalid_params') { - params = lastMsg.params + // Reject-all semantics: if the user rejected any tool in a batch, reject all its + // pending siblings too. Partial execution (run 1 and 2, reject 3, continue to 4) + // is confusing — the model emitted the batch as an atomic plan, so we either run + // it or abort it as a unit. Tools that already completed (success/tool_error) + // retain their terminal state; only pending tool_requests are rejected. + const pending = this._getPendingBatchTools(threadId) + if (pending.length === 0) { + // Fallback to legacy path: last message should be a tool in a non-terminal + // state. Kept for safety when called from unusual contexts. + const lastMsg = thread.messages[thread.messages.length - 1] + if (!(lastMsg.role === 'tool' && lastMsg.type !== 'invalid_params')) return + const { name, id, rawParams, rawParamsStr, mcpServerName, params } = lastMsg + this._updateLatestTool(threadId, { role: 'tool', type: 'rejected', params, name, content: this.toolErrMsgs.rejected, result: null, id, rawParams, rawParamsStr, mcpServerName }) + if (!resumeAgent) this._setStreamState(threadId, undefined) + return } - else return - const { name, id, rawParams, rawParamsStr, mcpServerName } = lastMsg + const rejectedCount = pending.length + // Mark every pending tool in the batch as rejected. For the one the user actually + // clicked (the first pending), use the primary rejection message. For the others + // ("cascade rejections"), use a short explanation so the LLM can distinguish direct + // vs. cascade rejection when composing its response. + for (let i = 0; i < pending.length; i++) { + const p = pending[i] + const content = i === 0 ? this.toolErrMsgs.rejected : this.toolErrMsgs.rejectedCascade(rejectedCount) + this._updateLatestTool(threadId, { + role: 'tool', type: 'rejected', + params: p.params, name: p.name, content, result: null, + id: p.id, rawParams: p.rawParams, rawParamsStr: p.rawParamsStr, mcpServerName: p.mcpServerName, + }) + } - const errorMessage = this.toolErrMsgs.rejected - this._updateLatestTool(threadId, { role: 'tool', type: 'rejected', params: params, name: name, content: errorMessage, result: null, id, rawParams, rawParamsStr, mcpServerName }) - this._setStreamState(threadId, undefined) + if (resumeAgent) { + // Let the LLM see the rejection(s) and respond. No callThisToolFirst — + // _runChatAgent will loop straight into a new LLM call with the rejected + // tool results in context. + this._wrapRunAgentToNotify( + this._runChatAgent({ threadId, ...this._currentModelSelectionProps() }) + , threadId + ) + } else { + this._setStreamState(threadId, undefined) + } } private _computeMCPServerOfToolName = (toolName: string) => { @@ -775,9 +874,14 @@ class ChatThreadService extends Disposable implements IChatThreadService { // add assistant message if (this.streamState[threadId]?.isRunning === 'LLM') { - const { displayContentSoFar, reasoningSoFar, toolCallSoFar } = this.streamState[threadId].llmInfo + const { displayContentSoFar, reasoningSoFar, toolCallsSoFar } = this.streamState[threadId].llmInfo this._addMessageToThread(threadId, { role: 'assistant', displayContent: displayContentSoFar, reasoning: reasoningSoFar, anthropicReasoning: null }) - if (toolCallSoFar) this._addMessageToThread(threadId, { role: 'interrupted_streaming_tool', name: toolCallSoFar.name, mcpServerName: this._computeMCPServerOfToolName(toolCallSoFar.name) }) + // For each partially-streamed tool call interrupted mid-flight, add a decorative + // "interrupted_streaming_tool" marker. Pre-batch this only handled one tool; + // now we iterate the full list so the UI shows all tools the model was planning. + for (const tc of toolCallsSoFar) { + this._addMessageToThread(threadId, { role: 'interrupted_streaming_tool', name: tc.name, mcpServerName: this._computeMCPServerOfToolName(tc.name) }) + } } // add tool that's running else if (this.streamState[threadId]?.isRunning === 'tool') { @@ -785,9 +889,11 @@ class ChatThreadService extends Disposable implements IChatThreadService { const content = content_ || this.toolErrMsgs.interrupted this._updateLatestTool(threadId, { role: 'tool', name: toolName, params: toolParams, id, content, rawParams, rawParamsStr, type: 'rejected', result: null, mcpServerName }) } - // reject the tool for the user if relevant + // reject the tool for the user if relevant. `resumeAgent: false` — abortRunning is + // a hard stop from the user; we don't want to restart the LLM loop with rejection + // feedback (which is what the normal reject-button path does). else if (this.streamState[threadId]?.isRunning === 'awaiting_user') { - this.rejectLatestToolRequest(threadId) + this.rejectLatestToolRequest(threadId, false) } else if (this.streamState[threadId]?.isRunning === 'idle') { // do nothing @@ -807,7 +913,16 @@ class ChatThreadService extends Disposable implements IChatThreadService { private readonly toolErrMsgs = { - rejected: 'Tool call was rejected by the user.', + // Phrased to discourage the model from immediately retrying the same tool. "Rejected" + // alone tends to trigger LLMs into "let me try again" behavior, which wastes tokens + // and annoys the user. Framing it as a signal to pause and consult the user breaks + // that pattern. + rejected: 'The user rejected this tool call. Do not retry the same action. Acknowledge the rejection, ask the user what they want you to do differently, or propose an alternative approach.', + // Used for the "cascade" rejections when the user rejects one tool in a multi-tool + // batch and reject-all semantics propagates the rejection to its siblings. Tells + // the model that not running the rest was a side effect of one rejection, not a + // per-tool decision, so it doesn't over-apologize for each. + rejectedCascade: (batchSize: number) => `The user rejected the tool batch (${batchSize} tools). This specific tool was skipped as part of that rejection, not individually rejected. See the primary rejection for the user's reasoning.`, interrupted: 'Tool call was interrupted by the user.', errWhenStringifying: (error: any) => `Tool call succeeded, but there was an error stringifying the output.\n${getErrorMessage(error)}` } @@ -851,7 +966,10 @@ class ChatThreadService extends Disposable implements IChatThreadService { } catch (error) { const errorMessage = getErrorMessage(error) - this._addMessageToThread(threadId, { role: 'tool', type: 'invalid_params', rawParams: opts.unvalidatedToolParams, rawParamsStr, result: null, name: toolName, content: errorMessage, id: toolId, mcpServerName }) + // Use _updateLatestTool (not _addMessageToThread) so that when this tool was + // pre-added as a `tool_request` by the batch processor, we transition that + // row in place (preserving batchIndex/batchSize) instead of appending a new one. + this._updateLatestTool(threadId, { role: 'tool', type: 'invalid_params', rawParams: opts.unvalidatedToolParams, rawParamsStr, result: null, name: toolName, content: errorMessage, id: toolId, mcpServerName }) return {} } // once validated, add checkpoint for edit @@ -883,8 +1001,13 @@ class ChatThreadService extends Disposable implements IChatThreadService { } } - // add a tool_request because we use it for UI if a tool is loading (this should be improved in the future) - this._addMessageToThread(threadId, { role: 'tool', type: 'tool_request', content: '(Awaiting user permission...)', result: null, name: toolName, params: toolParams, id: toolId, rawParams: opts.unvalidatedToolParams, rawParamsStr, mcpServerName }) + // Transition (or create) the tool_request row. _updateLatestTool finds the + // row by id: for solo tool calls there's no pre-added row and it appends one + // (same as the old behavior). For batched tool calls, the batch processor + // pre-added a tool_request with batchIndex/batchSize, and this call now + // replaces its placeholder unvalidated params with the validated ones while + // preserving the batch metadata. + this._updateLatestTool(threadId, { role: 'tool', type: 'tool_request', content: '(Awaiting user permission...)', result: null, name: toolName, params: toolParams, id: toolId, rawParams: opts.unvalidatedToolParams, rawParamsStr, mcpServerName }) if (!autoApprove) { return { awaitingUserApproval: true } } @@ -996,12 +1119,29 @@ class ChatThreadService extends Disposable implements IChatThreadService { // before enter loop, call tool if (callThisToolFirst) { + // Run the just-approved tool, then drain any remaining pending batch siblings + // (tools pre-added when the batch started and not yet run). Each drained tool + // may pause for its own approval — we stop the agent in that case and return. const { interrupted } = await this._runToolCall(threadId, callThisToolFirst.name, callThisToolFirst.id, callThisToolFirst.mcpServerName, { preapproved: true, unvalidatedToolParams: callThisToolFirst.rawParams, rawParamsStr: callThisToolFirst.rawParamsStr, validatedParams: callThisToolFirst.params }) if (interrupted) { this._setStreamState(threadId, undefined) this._addUserCheckpoint({ threadId }) - + return } + // Drain the remaining pending batch (if there are other tools from this turn + // that still need to run). If any of them pauses for approval, stop here — the + // agent will resume when the user next approves or rejects. + const drainRes = await this._tryDrainPendingBatch(threadId) + if (drainRes === 'interrupted') { + this._setStreamState(threadId, undefined) + this._addUserCheckpoint({ threadId }) + return + } + if (drainRes === 'awaiting_user') { + this._setStreamState(threadId, { isRunning: 'awaiting_user' }) + return + } + // drainRes === 'done': fall through to the main LLM loop below. } this._setStreamState(threadId, { isRunning: 'idle', interrupt: 'not_needed' }) // just decorative, for clarity @@ -1034,7 +1174,7 @@ class ChatThreadService extends Disposable implements IChatThreadService { nAttempts += 1 type ResTypes = - | { type: 'llmDone', toolCall?: RawToolCallObj, info: { fullText: string, fullReasoning: string, anthropicReasoning: AnthropicReasoning[] | null, finishReason?: string } } + | { type: 'llmDone', toolCalls: RawToolCallObj[], info: { fullText: string, fullReasoning: string, anthropicReasoning: AnthropicReasoning[] | null, finishReason?: string } } | { type: 'llmError', error?: { message: string; fullError: Error | null; } } | { type: 'llmAborted' } @@ -1050,16 +1190,16 @@ class ChatThreadService extends Disposable implements IChatThreadService { overridesOfModel, logging: { loggingName: `Chat - ${chatMode}`, loggingExtras: { threadId, nMessagesSent, chatMode } }, separateSystemMessage: separateSystemMessage, - onText: ({ fullText, fullReasoning, toolCall, usage }) => { + onText: ({ fullText, fullReasoning, toolCalls, usage }) => { if (usage) this._setLatestUsage(threadId, usage) - this._setStreamState(threadId, { isRunning: 'LLM', llmInfo: { displayContentSoFar: fullText, reasoningSoFar: fullReasoning, toolCallSoFar: toolCall ?? null }, interrupt: Promise.resolve(() => { if (llmCancelToken) this._llmMessageService.abort(llmCancelToken) }) }) + this._setStreamState(threadId, { isRunning: 'LLM', llmInfo: { displayContentSoFar: fullText, reasoningSoFar: fullReasoning, toolCallsSoFar: toolCalls ?? [] }, interrupt: Promise.resolve(() => { if (llmCancelToken) this._llmMessageService.abort(llmCancelToken) }) }) }, - onFinalMessage: async ({ fullText, fullReasoning, toolCall, anthropicReasoning, usage, finishReason }) => { + onFinalMessage: async ({ fullText, fullReasoning, toolCalls, anthropicReasoning, usage, finishReason }) => { if (usage) this._setLatestUsage(threadId, usage) // Lock in this request's usage so the next loop iteration's // running total is added to (not replacing) what we already counted. this._lockInCurrentRequestUsage(threadId) - resMessageIsDonePromise({ type: 'llmDone', toolCall, info: { fullText, fullReasoning, anthropicReasoning, finishReason } }) // resolve with tool calls + resMessageIsDonePromise({ type: 'llmDone', toolCalls: toolCalls ?? [], info: { fullText, fullReasoning, anthropicReasoning, finishReason } }) // resolve with tool calls }, onError: async (error) => { resMessageIsDonePromise({ type: 'llmError', error: error }) @@ -1077,7 +1217,7 @@ class ChatThreadService extends Disposable implements IChatThreadService { break } - this._setStreamState(threadId, { isRunning: 'LLM', llmInfo: { displayContentSoFar: '', reasoningSoFar: '', toolCallSoFar: null }, interrupt: Promise.resolve(() => this._llmMessageService.abort(llmCancelToken)) }) + this._setStreamState(threadId, { isRunning: 'LLM', llmInfo: { displayContentSoFar: '', reasoningSoFar: '', toolCallsSoFar: [] }, interrupt: Promise.resolve(() => this._llmMessageService.abort(llmCancelToken)) }) const llmRes = await messageIsDonePromise // wait for message to complete // if something else started running in the meantime @@ -1108,9 +1248,13 @@ class ChatThreadService extends Disposable implements IChatThreadService { // error, but too many attempts else { const { error } = llmRes - const { displayContentSoFar, reasoningSoFar, toolCallSoFar } = this.streamState[threadId].llmInfo + const { displayContentSoFar, reasoningSoFar, toolCallsSoFar } = this.streamState[threadId].llmInfo this._addMessageToThread(threadId, { role: 'assistant', displayContent: displayContentSoFar, reasoning: reasoningSoFar, anthropicReasoning: null }) - if (toolCallSoFar) this._addMessageToThread(threadId, { role: 'interrupted_streaming_tool', name: toolCallSoFar.name, mcpServerName: this._computeMCPServerOfToolName(toolCallSoFar.name) }) + // Record an interrupted-streaming marker for every tool the LLM was + // mid-way through emitting. Pre-batch this only handled the first tool. + for (const tc of toolCallsSoFar) { + this._addMessageToThread(threadId, { role: 'interrupted_streaming_tool', name: tc.name, mcpServerName: this._computeMCPServerOfToolName(tc.name) }) + } this._setStreamState(threadId, { isRunning: undefined, error }) this._addUserCheckpoint({ threadId }) @@ -1119,23 +1263,52 @@ class ChatThreadService extends Disposable implements IChatThreadService { } // llm res success - const { toolCall, info } = llmRes + const { toolCalls, info } = llmRes this._addMessageToThread(threadId, { role: 'assistant', displayContent: info.fullText, reasoning: info.fullReasoning, anthropicReasoning: info.anthropicReasoning, finishReason: info.finishReason }) this._setStreamState(threadId, { isRunning: 'idle', interrupt: 'not_needed' }) // just decorative for clarity - // call tool if there is one - if (toolCall) { + // call tool(s) if there are any. Batched / parallel tool emissions are handled + // by pre-adding every tool as a `tool_request` (with batchIndex/batchSize so the + // UI can render "(1/N)" prefixes), then running them serially. Any tool may pause + // for user approval; if that happens the remaining tools in the batch stay as + // pending tool_requests, visible to the user as stacked progress rows. + if (toolCalls.length > 0) { const mcpTools = this._mcpService.getMCPTools() - const mcpTool = mcpTools?.find(t => t.name === toolCall.name) + const batchSize = toolCalls.length + for (let i = 0; i < batchSize; i++) { + const tc = toolCalls[i] + const mcpServerName = mcpTools?.find(t => t.name === tc.name)?.mcpServerName + this._addMessageToThread(threadId, { + role: 'tool', + type: 'tool_request', + content: '(Pending...)', + result: null, + name: tc.name, + // Placeholder unvalidated params — `_runToolCall` will validate and + // replace via `_updateLatestTool` before the tool runs. The cast is + // safe because the UI only reads validated `params` on tool_requests + // once they've transitioned past the placeholder phase (which happens + // synchronously when `_tryDrainPendingBatch` hits this tool). + params: tc.rawParams as unknown as ToolCallParams, + id: tc.id, + rawParams: tc.rawParams, + rawParamsStr: tc.rawParamsStr, + mcpServerName, + // Only stamp batch metadata when there's actually more than one tool — + // a solo tool call shouldn't render "(1/1)" in the UI. + batchIndex: batchSize > 1 ? i : undefined, + batchSize: batchSize > 1 ? batchSize : undefined, + }) + } - const { awaitingUserApproval, interrupted } = await this._runToolCall(threadId, toolCall.name, toolCall.id, mcpTool?.mcpServerName, { preapproved: false, unvalidatedToolParams: toolCall.rawParams, rawParamsStr: toolCall.rawParamsStr }) - if (interrupted) { + const batchRes = await this._tryDrainPendingBatch(threadId) + if (batchRes === 'interrupted') { this._setStreamState(threadId, undefined) return } - if (awaitingUserApproval) { isRunningWhenEnd = 'awaiting_user' } + if (batchRes === 'awaiting_user') { isRunningWhenEnd = 'awaiting_user' } else { shouldSendAnotherMessage = true } this._setStreamState(threadId, { isRunning: 'idle', interrupt: 'not_needed' }) // just decorative, for clarity diff --git a/src/vs/workbench/contrib/void/browser/convertToLLMMessageService.ts b/src/vs/workbench/contrib/void/browser/convertToLLMMessageService.ts index 74d0c0c5..c78193ee 100644 --- a/src/vs/workbench/contrib/void/browser/convertToLLMMessageService.ts +++ b/src/vs/workbench/contrib/void/browser/convertToLLMMessageService.ts @@ -85,22 +85,35 @@ const prepareMessages_openai_tools = (messages: SimpleLLMMessage[]): AnthropicOr continue } - // edit previous assistant message to have called the tool - const prevMsg = 0 <= i - 1 && i - 1 <= newMessages.length ? newMessages[i - 1] : undefined - if (prevMsg?.role === 'assistant') { + // Walk back through newMessages to find the assistant that called this tool. For a + // solo tool this is always the immediately-prior message; for a batched response + // (N parallel tool calls) we need to append to the same assistant across N tool + // messages — the previous implementation overwrote tool_calls each time and only + // the LAST tool in a batch survived, corrupting replay bytes + the provider's cache. + let assistantIdx = -1 + for (let j = newMessages.length - 1; j >= 0; j--) { + const m = newMessages[j] + if (m.role === 'assistant') { assistantIdx = j; break } + // Stop at any non-tool, non-assistant message (should never happen since we only + // push assistant/tool/user through here in order, but keep the safety rail). + if (m.role !== 'tool') break + } + if (assistantIdx >= 0) { + const asstMsg = newMessages[assistantIdx] as OpenAILLMChatMessage & { role: 'assistant' } // Prefer the model's original serialized argument string when we have it // (OpenAI-compatible providers expose it in the streaming delta). Sending // byte-identical bytes back preserves the provider's prefix cache past the // tool call. Fall back to re-serializing when the raw string is unavailable // (e.g. conversations from before this field existed, or non-OpenAI provenance). - prevMsg.tool_calls = [{ - type: 'function', + const newCall = { + type: 'function' as const, id: currMsg.id, function: { name: currMsg.name, arguments: currMsg.rawParamsStr ?? JSON.stringify(currMsg.rawParams) } - }] + } + asstMsg.tool_calls = [...(asstMsg.tool_calls ?? []), newCall] } // add the tool @@ -181,13 +194,27 @@ const prepareMessages_anthropic_tools = (messages: SimpleLLMMessage[], supportsA } if (currMsg.role === 'tool') { - // add anthropic tools - const prevMsg = 0 <= i - 1 && i - 1 <= newMessages.length ? newMessages[i - 1] : undefined - - // make it so the assistant called the tool - if (prevMsg?.role === 'assistant') { - if (typeof prevMsg.content === 'string') prevMsg.content = [{ type: 'text', text: prevMsg.content }] - prevMsg.content.push({ type: 'tool_use', id: currMsg.id, name: currMsg.name, input: currMsg.rawParams }) + // Walk back to the assistant that owned this tool call. For a batched turn + // (multiple parallel tool calls on one assistant), each tool message appends + // its own `tool_use` block to the same assistant's content array, and Anthropic + // sees the full batch as one assistant turn. Previously only the first tool + // was attached (prevMsg check) and the rest silently orphaned, which made + // replay of batched turns fail validation. + let assistantIdx = -1 + for (let j = i - 1; j >= 0; j--) { + const m = newMessages[j] + if (!m) continue + if (m.role === 'assistant') { assistantIdx = j; break } + // Skip over previously-converted tool rows (now user messages with tool_result); + // anything else means we walked past the batch boundary. + if (m.role !== 'user') break + const isToolResultUser = Array.isArray(m.content) && m.content.some(c => c.type === 'tool_result') + if (!isToolResultUser) break + } + if (assistantIdx >= 0) { + const asstMsg = newMessages[assistantIdx] as AnthropicLLMChatMessage & { role: 'assistant' } + if (typeof asstMsg.content === 'string') asstMsg.content = [{ type: 'text', text: asstMsg.content }] + asstMsg.content.push({ type: 'tool_use', id: currMsg.id, name: currMsg.name, input: currMsg.rawParams }) } // turn each tool into a user message with tool results at the end @@ -214,12 +241,20 @@ const prepareMessages_XML_tools = (messages: SimpleLLMMessage[], supportsAnthrop const next = 0 <= i + 1 && i + 1 <= messages.length - 1 ? messages[i + 1] : null if (c.role === 'assistant') { - // if called a tool (message after it), re-add its XML to the message - // alternatively, could just hold onto the original output, but this way requires less piping raw strings everywhere + // Re-serialize every consecutive tool message after this assistant as XML and + // concatenate them back onto the assistant content. Multi-tool batches may land + // in history (e.g. if the user switches from a native-tool-calling model into a + // grammar-based one); only appending `next` would lose tool calls 2..N. let content: AnthropicOrOpenAILLMMessage['content'] = c.content - if (next?.role === 'tool') { - content = `${content}\n\n${reParsedToolXMLString(next.name, next.rawParams)}` + for (let k = i + 1; k < messages.length; k++) { + const followUp = messages[k] + if (followUp.role !== 'tool') break + content = `${content}\n\n${reParsedToolXMLString(followUp.name, followUp.rawParams)}` } + // For backward compatibility of the void-format assumption we keep `next` only + // reference intact below (it's still used by the batch-rebuild loop at the + // tool-result step). + void next // anthropic reasoning if (c.anthropicReasoning && supportsAnthropicReasoning) { @@ -454,7 +489,13 @@ const prepareOpenAIOrAnthropicMessages = ({ type GeminiUserPart = (GeminiLLMChatMessage & { role: 'user' })['parts'][0] type GeminiModelPart = (GeminiLLMChatMessage & { role: 'model' })['parts'][0] const prepareGeminiMessages = (messages: AnthropicLLMChatMessage[]) => { - let latestToolName: ToolName | undefined = undefined + // Map tool_use id → tool name, populated as we encounter `tool_use` parts on + // assistant turns. functionResponse entries later (on user turns) look their name up + // by id so batched turns resolve each response to the correct call. Previously a + // single `latestToolName` was tracked, which broke when one assistant emitted N + // parallel tools: the Nth name won, and all earlier functionResponse parts were + // mislabeled (Gemini rejects these with "function name mismatch"). + const toolNameById = new Map() const messages2: GeminiLLMChatMessage[] = messages.map((m): GeminiLLMChatMessage | null => { if (m.role === 'assistant') { if (typeof m.content === 'string') { @@ -466,7 +507,7 @@ const prepareGeminiMessages = (messages: AnthropicLLMChatMessage[]) => { return { text: c.text } } else if (c.type === 'tool_use') { - latestToolName = c.name + toolNameById.set(c.id, c.name) return { functionCall: { id: c.id, name: c.name, args: c.input } } } else return null @@ -484,8 +525,9 @@ const prepareGeminiMessages = (messages: AnthropicLLMChatMessage[]) => { return { text: c.text } } else if (c.type === 'tool_result') { - if (!latestToolName) return null - return { functionResponse: { id: c.tool_use_id, name: latestToolName, response: { output: c.content } } } + const resolvedName = toolNameById.get(c.tool_use_id) + if (!resolvedName) return null + return { functionResponse: { id: c.tool_use_id, name: resolvedName, response: { output: c.content } } } } else return null }).filter(m => !!m) diff --git a/src/vs/workbench/contrib/void/browser/react/src/sidebar-tsx/SidebarChat.tsx b/src/vs/workbench/contrib/void/browser/react/src/sidebar-tsx/SidebarChat.tsx index f59074fa..f9d3206e 100644 --- a/src/vs/workbench/contrib/void/browser/react/src/sidebar-tsx/SidebarChat.tsx +++ b/src/vs/workbench/contrib/void/browser/react/src/sidebar-tsx/SidebarChat.tsx @@ -1621,8 +1621,20 @@ const titleOfBuiltinToolName = { } as const satisfies Record -const getTitle = (toolMessage: Pick): React.ReactNode => { +// Prefix like "(1/2) " when this tool is part of a multi-tool batch emitted in one +// assistant turn. The prefix is purely decorative (helps the user see that one reply +// contains multiple tools and track how many are done) and is omitted for solo tools +// or when the message predates parallel tool support (batchIndex/batchSize undefined). +const batchPrefix = (m: Pick): string => { + if (m.batchIndex === undefined || m.batchSize === undefined) return '' + if (m.batchSize <= 1) return '' + // batchIndex is 0-based internally but we render as 1-based for humans. + return `(${m.batchIndex + 1}/${m.batchSize}) ` +} + +const getTitle = (toolMessage: Pick): React.ReactNode => { const t = toolMessage + const prefix = batchPrefix(t) // non-built-in title if (!builtinToolNames.includes(t.name as BuiltinToolName)) { @@ -1637,7 +1649,7 @@ const getTitle = (toolMessage: Pick void) | null, + // Index of the message that currently owns the approve/reject prompt (the earliest + // tool_request in the consecutive trailing batch). When a multi-tool batch is + // pre-added, all queued tool_requests share the same status but only the first one + // should render the buttons; the others are "waiting their turn". undefined = no + // pending approval anywhere in the thread. + firstPendingToolRequestIdx?: number, } const ChatBubble = (props: ChatBubbleProps) => { @@ -2707,7 +2727,7 @@ const ChatBubble = (props: ChatBubbleProps) => { } -const _ChatBubble = ({ threadId, chatMessage, currCheckpointIdx, isCommitted, messageIdx, chatIsRunning, _scrollToBottom }: ChatBubbleProps) => { +const _ChatBubble = ({ threadId, chatMessage, currCheckpointIdx, isCommitted, messageIdx, chatIsRunning, _scrollToBottom, firstPendingToolRequestIdx }: ChatBubbleProps) => { const role = chatMessage.role const isCheckpointGhost = messageIdx > (currCheckpointIdx ?? Infinity) && !chatIsRunning // whether to show as gray (if chat is running, for good measure just dont show any ghosts) @@ -2751,7 +2771,7 @@ const _ChatBubble = ({ threadId, chatMessage, currCheckpointIdx, isCommitted, me threadId={threadId} /> - {chatMessage.type === 'tool_request' ? + {chatMessage.type === 'tool_request' && messageIdx === firstPendingToolRequestIdx ?
: null} @@ -3102,8 +3122,14 @@ const ThreadMessagesView = ({ threadId, isActive, scrollContainerRef }: { const streamState = useChatThreadsStreamState(threadId) const isRunning = streamState?.isRunning const latestError = streamState?.error - const { displayContentSoFar, toolCallSoFar, reasoningSoFar } = streamState?.llmInfo ?? {} - const toolIsGenerating = toolCallSoFar && !toolCallSoFar.isDone + const { displayContentSoFar, toolCallsSoFar, reasoningSoFar } = streamState?.llmInfo ?? {} + // During streaming the "currently being written" tool is the last one in the array + // (indices are emitted in order). Earlier tools in the batch may already be complete + // (their argument JSON fully streamed) but their persisted tool_request rows only + // show up in `thread.messages` once onFinalMessage fires and the batch is committed. + // For the live preview here we just show the latest in-flight tool. + const currentInFlightTool = toolCallsSoFar && toolCallsSoFar.length > 0 ? toolCallsSoFar[toolCallsSoFar.length - 1] : undefined + const toolIsGenerating = currentInFlightTool && !currentInFlightTool.isDone const currCheckpointIdx = thread?.state?.currCheckpointIdx ?? undefined @@ -3118,6 +3144,22 @@ const ThreadMessagesView = ({ threadId, isActive, scrollContainerRef }: { } }, [isActive, scrollContainerRef]) + // Index of the "currently awaiting approval" tool request — the earliest of the + // consecutive trailing tool_request messages. Matches _getPendingBatchTools() in + // the service. For a solo tool call this is just the last message (same as the + // pre-batch behavior). For a multi-tool batch, it's the first pending one; later + // queued tool_requests render as stacked progress rows without approve/reject + // buttons. + const firstPendingToolRequestIdx = useMemo(() => { + let earliest: number | undefined + for (let i = previousMessages.length - 1; i >= 0; i--) { + const m = previousMessages[i] + if (m.role === 'tool' && m.type === 'tool_request') earliest = i + else break + } + return earliest + }, [previousMessages]) + const previousMessagesHTML = useMemo(() => { return previousMessages.map((message, i) => { return scrollToBottom(scrollContainerRef)} + firstPendingToolRequestIdx={firstPendingToolRequestIdx} /> }) - }, [previousMessages, threadId, currCheckpointIdx, isRunning, scrollContainerRef]) + }, [previousMessages, threadId, currCheckpointIdx, isRunning, scrollContainerRef, firstPendingToolRequestIdx]) const streamingChatIdx = previousMessagesHTML.length const currStreamingMessageHTML = reasoningSoFar || displayContentSoFar || isRunning ? @@ -3151,10 +3194,10 @@ const ThreadMessagesView = ({ threadId, isActive, scrollContainerRef }: { _scrollToBottom={null} /> : null - const generatingTool = toolIsGenerating ? - toolCallSoFar.name === 'edit_file' || toolCallSoFar.name === 'rewrite_file' ? : null : null @@ -3230,10 +3273,13 @@ export const SidebarChat = () => { const currThreadStreamState = useChatThreadsStreamState(chatThreadsState.currentThreadId) const isRunning = currThreadStreamState?.isRunning const latestError = currThreadStreamState?.error - const { displayContentSoFar, toolCallSoFar, reasoningSoFar } = currThreadStreamState?.llmInfo ?? {} + const { displayContentSoFar, toolCallsSoFar, reasoningSoFar } = currThreadStreamState?.llmInfo ?? {} + // See ThreadMessagesView comment: the last tool in the array is the one still + // being streamed; earlier batch siblings may already have complete argument JSON. + const currentInFlightTool = toolCallsSoFar && toolCallsSoFar.length > 0 ? toolCallsSoFar[toolCallsSoFar.length - 1] : undefined // this is just if it's currently being generated, NOT if it's currently running - const toolIsGenerating = toolCallSoFar && !toolCallSoFar.isDone // show loading for slow tools (right now just edit) + const toolIsGenerating = currentInFlightTool && !currentInFlightTool.isDone // show loading for slow tools (right now just edit) // ----- SIDEBAR CHAT state (local) ----- diff --git a/src/vs/workbench/contrib/void/common/chatThreadServiceTypes.ts b/src/vs/workbench/contrib/void/common/chatThreadServiceTypes.ts index 3a027208..a4762d84 100644 --- a/src/vs/workbench/contrib/void/common/chatThreadServiceTypes.ts +++ b/src/vs/workbench/contrib/void/common/chatThreadServiceTypes.ts @@ -18,6 +18,14 @@ export type ToolMessage = { // byte-identical tool_calls back, preserving the provider's prefix cache. rawParamsStr?: string; mcpServerName: string | undefined; // the server name at the time of the call + // Position of this tool within its assistant-turn batch. When a model emits multiple + // parallel tool calls in one response, each tool message stores its 0-based index + // (`batchIndex`) and the total count (`batchSize`). The UI uses these to render a + // "(1/2)"-style prefix so the user can see tool grouping at a glance. Both are + // optional — legacy single-tool responses and persisted history from before this + // field existed simply omit them (UI treats that as a solo call, no prefix shown). + batchIndex?: number; + batchSize?: number; } & ( // in order of events: | { type: 'invalid_params', result: null, name: T, } diff --git a/src/vs/workbench/contrib/void/common/prompt/prompts.ts b/src/vs/workbench/contrib/void/common/prompt/prompts.ts index e23ef2db..53a532ea 100644 --- a/src/vs/workbench/contrib/void/common/prompt/prompts.ts +++ b/src/vs/workbench/contrib/void/common/prompt/prompts.ts @@ -507,7 +507,12 @@ You will be given instructions from the user, and may also receive a list of fil if (mode === 'agent' || mode === 'gather') { details.push(`Only call tools if they help you accomplish the user's goal. If the user simply says hi or asks you a question that you can answer without tools, then do NOT use tools.`) details.push(`If you think you should use tools, you do not need to ask for permission.`) - details.push('Only use ONE tool call at a time.') + // Parallel tool calls are OK (and encouraged) when the operations are independent + // — e.g. reading several files, searching several patterns. A single assistant + // turn that batches N reads costs one round-trip instead of N, and prefix caching + // stays warm across the whole batch. Keep sequential tools for dependent steps + // where later arguments require earlier results. + details.push(`You can call multiple tools in a single turn when the operations are independent (e.g. reading several files, searching several patterns). Prefer batching reads/searches together rather than issuing them one-at-a-time across turns. Use separate turns when a later tool's arguments depend on an earlier tool's result.`) details.push(`NEVER say something like "I'm going to use \`tool_name\`". Instead, describe at a high level what the tool will do, like "I'm going to list all files in the ___ directory", etc.`) details.push(`Many tools only work if the user has a workspace open.`) } diff --git a/src/vs/workbench/contrib/void/common/sendLLMMessageTypes.ts b/src/vs/workbench/contrib/void/common/sendLLMMessageTypes.ts index 7a692845..dc9b0622 100644 --- a/src/vs/workbench/contrib/void/common/sendLLMMessageTypes.ts +++ b/src/vs/workbench/contrib/void/common/sendLLMMessageTypes.ts @@ -112,7 +112,12 @@ export type LLMUsage = { cachedInputTokens?: number; } -export type OnText = (p: { fullText: string; fullReasoning: string; toolCall?: RawToolCallObj; usage?: LLMUsage }) => void +// `toolCalls` is an ordered list. Providers that support parallel/batched tool calling +// (OpenAI, Anthropic, Gemini) may emit multiple tools in a single assistant turn. A +// single-tool response is represented as a length-1 array; no tools as an empty array +// (or `undefined` for brevity). The ordering is preserved from the provider — Void +// executes them serially in that order. +export type OnText = (p: { fullText: string; fullReasoning: string; toolCalls?: RawToolCallObj[]; usage?: LLMUsage }) => void // `finishReason` is the provider's own reason for ending the stream. OpenAI-compatible // servers return one of `stop` / `tool_calls` / `function_call` / `length` / `content_filter` @@ -122,7 +127,9 @@ export type OnText = (p: { fullText: string; fullReasoning: string; toolCall?: R // clips against `max_tokens`, but also `content_filter` or unknown gateway-specific values). // Populated only by OAI-compatible providers right now — Anthropic / Gemini paths leave this // undefined, which renders as "no warning" (the same as before this was added). -export type OnFinalMessage = (p: { fullText: string; fullReasoning: string; toolCall?: RawToolCallObj; anthropicReasoning: AnthropicReasoning[] | null; usage?: LLMUsage; finishReason?: string }) => void // id is tool_use_id +// +// `toolCalls` — see `OnText` above. Empty/undefined on pure-text responses. +export type OnFinalMessage = (p: { fullText: string; fullReasoning: string; toolCalls?: RawToolCallObj[]; anthropicReasoning: AnthropicReasoning[] | null; usage?: LLMUsage; finishReason?: string }) => void export type OnError = (p: { message: string; fullError: Error | null }) => void export type OnAbort = () => void export type AbortRef = { current: (() => void) | null } diff --git a/src/vs/workbench/contrib/void/electron-main/llmMessage/extractGrammar.ts b/src/vs/workbench/contrib/void/electron-main/llmMessage/extractGrammar.ts index 66e16791..73d8161a 100644 --- a/src/vs/workbench/contrib/void/electron-main/llmMessage/extractGrammar.ts +++ b/src/vs/workbench/contrib/void/electron-main/llmMessage/extractGrammar.ts @@ -334,10 +334,15 @@ export const extractXMLToolsWrapper = ( ) } + // Grammar-based tool extraction only surfaces one tool at a time (XML tags are parsed + // sequentially out of the text stream), so the array is always length 0 or 1 on this + // path. Models that use this wrapper (local models, pseudo-tool-use via text) don't + // produce parallel tool calls — that capability is exclusive to providers with native + // tool-calling (OpenAI-compatible, Anthropic, Gemini). onText({ ...params, fullText, - toolCall: latestToolCall, + toolCalls: latestToolCall ? [latestToolCall] : undefined, }); }; @@ -349,12 +354,7 @@ export const extractXMLToolsWrapper = ( fullText = fullText.trimEnd() const toolCall = latestToolCall - // console.log('final message!!!', trueFullText) - // console.log('----- returning ----\n', fullText) - // console.log('----- tools ----\n', JSON.stringify(firstToolCallRef.current, null, 2)) - // console.log('----- toolCall ----\n', JSON.stringify(toolCall, null, 2)) - - onFinalMessage({ ...params, fullText, toolCall: toolCall }) + onFinalMessage({ ...params, fullText, toolCalls: toolCall ? [toolCall] : undefined }) } return { newOnText, newOnFinalMessage }; } diff --git a/src/vs/workbench/contrib/void/electron-main/llmMessage/sendLLMMessage.impl.ts b/src/vs/workbench/contrib/void/electron-main/llmMessage/sendLLMMessage.impl.ts index 445cc234..e93860c9 100644 --- a/src/vs/workbench/contrib/void/electron-main/llmMessage/sendLLMMessage.impl.ts +++ b/src/vs/workbench/contrib/void/electron-main/llmMessage/sendLLMMessage.impl.ts @@ -15,6 +15,7 @@ import { GoogleAuth } from 'google-auth-library' /* eslint-enable */ import { AnthropicLLMChatMessage, GeminiLLMChatMessage, LLMChatMessage, LLMFIMMessage, type LLMUsage, ModelListParams, OllamaModelResponse, OnError, OnFinalMessage, OnText, RawToolCallObj, RawToolParamsObj } from '../../common/sendLLMMessageTypes.js'; +import type { ToolName } from '../../common/toolsServiceTypes.js'; import { ChatMode, displayInfoOfProviderName, ModelSelectionOptions, OverridesOfModel, ProviderName, SettingsOfProvider } from '../../common/voidSettingsTypes.js'; import { getSendableReasoningInfo, getModelCapabilities, getProviderCapabilities, defaultProviderSettings, getReservedOutputTokenSpace } from '../../common/modelCapabilities.js'; import { extractReasoningWrapper, extractXMLToolsWrapper } from './extractGrammar.js'; @@ -339,9 +340,19 @@ const _sendOpenAICompatibleChat = async ({ messages, onText, onFinalMessage, onE let fullReasoningSoFar = '' let fullTextSoFar = '' - let toolName = '' - let toolId = '' - let toolParamsStr = '' + // Tool-call buffers keyed by `tool_calls[].index` from the delta. OpenAI's streaming spec + // allows multiple tool calls in one assistant turn, each identified by its own numeric index, + // with chunks interleaved arbitrarily (index=0 chunk, index=1 chunk, index=0 chunk again...). + // We previously dropped everything past index 0, which silently corrupted parallel tool-call + // responses from GPT-4+, MiniMax, and other providers that batch. Using a Map keyed by index + // handles out-of-order chunks correctly. On final, we sort by index to preserve the + // provider's intended execution order. + const toolBuffers = new Map() + const getOrCreateToolBuffer = (index: number) => { + let buf = toolBuffers.get(index) + if (!buf) { buf = { name: '', argsStr: '', id: '' }; toolBuffers.set(index, buf) } + return buf + } // Usage only arrives in the final chunk (and only if the server honored // stream_options.include_usage). `chunk.usage` is typed as `| null` there. @@ -374,14 +385,17 @@ const _sendOpenAICompatibleChat = async ({ messages, onText, onFinalMessage, onE const chunkFinishReason = chunk.choices[0]?.finish_reason if (chunkFinishReason) lastFinishReason = chunkFinishReason - // tool call + // tool calls — aggregate by index. A single chunk may include deltas for multiple + // indices (rare but valid), and a single index's pieces may arrive across many + // chunks (the common case). `id` is typically present only on the first chunk + // for a given index; `arguments` streams incrementally. for (const tool of chunk.choices[0]?.delta?.tool_calls ?? []) { const index = tool.index - if (index !== 0) continue - - toolName += tool.function?.name ?? '' - toolParamsStr += tool.function?.arguments ?? ''; - toolId += tool.id ?? '' + if (index === undefined) continue + const buf = getOrCreateToolBuffer(index) + buf.name += tool.function?.name ?? '' + buf.argsStr += tool.function?.arguments ?? '' + buf.id += tool.id ?? '' } @@ -413,23 +427,44 @@ const _sendOpenAICompatibleChat = async ({ messages, onText, onFinalMessage, onE } } + // Build the in-progress toolCalls snapshot for UI streaming. We only emit entries + // for buffers that have at least a name (argument-only deltas for an as-yet- + // unnamed tool are still accumulating). Indices are sorted so the UI's rendered + // order matches the provider's intended execution order. + const inProgressToolCalls: RawToolCallObj[] = Array.from(toolBuffers.entries()) + .filter(([_i, buf]) => !!buf.name) + .sort(([a], [b]) => a - b) + .map(([_i, buf]) => ({ name: buf.name as ToolName, rawParams: {}, isDone: false, doneParams: [], id: buf.id })) + // call onText onText({ fullText: fullTextSoFar, fullReasoning: fullReasoningSoFar, - toolCall: !toolName ? undefined : { name: toolName, rawParams: {}, isDone: false, doneParams: [], id: toolId }, + toolCalls: inProgressToolCalls.length > 0 ? inProgressToolCalls : undefined, usage: latestUsage, }) } - // on final - if (!fullTextSoFar && !fullReasoningSoFar && !toolName) { + // on final: parse each completed tool buffer. `rawToolCallObjOfParamsStr` returns + // null on malformed JSON or non-object inputs — we skip those rather than crashing + // the whole turn, but log for diagnosis. + const finalToolCalls: RawToolCallObj[] = Array.from(toolBuffers.entries()) + .sort(([a], [b]) => a - b) + .map(([_i, buf]) => rawToolCallObjOfParamsStr(buf.name, buf.argsStr, buf.id)) + .filter((t): t is RawToolCallObj => t !== null) + + if (!fullTextSoFar && !fullReasoningSoFar && finalToolCalls.length === 0) { onError({ message: 'Void: Response from model was empty.', fullError: null }) } else { - const toolCall = rawToolCallObjOfParamsStr(toolName, toolParamsStr, toolId) - const toolCallObj = toolCall ? { toolCall } : {} - onFinalMessage({ fullText: fullTextSoFar, fullReasoning: fullReasoningSoFar, anthropicReasoning: null, usage: latestUsage, finishReason: lastFinishReason, ...toolCallObj }); + onFinalMessage({ + fullText: fullTextSoFar, + fullReasoning: fullReasoningSoFar, + anthropicReasoning: null, + usage: latestUsage, + finishReason: lastFinishReason, + toolCalls: finalToolCalls.length > 0 ? finalToolCalls : undefined, + }); } }) // when error/fail - this catches errors of both .create() and .then(for await) @@ -557,15 +592,27 @@ const sendAnthropicChat = async ({ messages, providerName, onText, onFinalMessag let fullText = '' let fullReasoning = '' - let fullToolName = '' - let fullToolParams = '' - + // Tool-call buffers keyed by Anthropic's content-block `index`. Anthropic streams each + // tool as its own `content_block_start` (with name+id) followed by `content_block_delta` + // events carrying `input_json_delta` chunks — both tagged with the same numeric `index`. + // We previously only kept the first tool (`tools[0]` at finalMessage), silently dropping + // any parallel tool_use blocks. Map preserves ordering and the per-tool id. + const anthropicToolBuffers = new Map() + const getOrCreateAnthropicTool = (index: number) => { + let buf = anthropicToolBuffers.get(index) + if (!buf) { buf = { name: '', argsStr: '', id: '' }; anthropicToolBuffers.set(index, buf) } + return buf + } const runOnText = () => { + const inProgressToolCalls: RawToolCallObj[] = Array.from(anthropicToolBuffers.entries()) + .filter(([_i, buf]) => !!buf.name) + .sort(([a], [b]) => a - b) + .map(([_i, buf]) => ({ name: buf.name as ToolName, rawParams: {}, isDone: false, doneParams: [], id: buf.id || 'dummy' })) onText({ fullText, fullReasoning, - toolCall: !fullToolName ? undefined : { name: fullToolName, rawParams: {}, isDone: false, doneParams: [], id: 'dummy' }, + toolCalls: inProgressToolCalls.length > 0 ? inProgressToolCalls : undefined, }) } // there are no events for tool_use, it comes in at the end @@ -589,7 +636,11 @@ const sendAnthropicChat = async ({ messages, providerName, onText, onFinalMessag runOnText() } else if (e.content_block.type === 'tool_use') { - fullToolName += e.content_block.name ?? '' // anthropic gives us the tool name in the start block + // Anthropic gives the tool name+id in the start block and the JSON input in + // subsequent input_json_delta events keyed to the same `e.index`. + const buf = getOrCreateAnthropicTool(e.index) + buf.name += e.content_block.name ?? '' + buf.id += e.content_block.id ?? '' runOnText() } } @@ -605,7 +656,10 @@ const sendAnthropicChat = async ({ messages, providerName, onText, onFinalMessag runOnText() } else if (e.delta.type === 'input_json_delta') { // tool use - fullToolParams += e.delta.partial_json ?? '' // anthropic gives us the partial delta (string) here - https://docs.anthropic.com/en/api/messages-streaming + // partial_json is a string delta scoped to the current content block (e.index). + // See https://docs.anthropic.com/en/api/messages-streaming + const buf = getOrCreateAnthropicTool(e.index) + buf.argsStr += e.delta.partial_json ?? '' runOnText() } } @@ -614,13 +668,19 @@ const sendAnthropicChat = async ({ messages, providerName, onText, onFinalMessag // on done - (or when error/fail) - this is called AFTER last streamEvent stream.on('finalMessage', (response) => { const anthropicReasoning = response.content.filter(c => c.type === 'thinking' || c.type === 'redacted_thinking') + // Iterate ALL tool_use blocks in document order (response.content preserves ordering). + // Previous behavior only used `tools[0]`, which silently dropped parallel tool calls. const tools = response.content.filter(c => c.type === 'tool_use') - // console.log('TOOLS!!!!!!', JSON.stringify(tools, null, 2)) - // console.log('TOOLS!!!!!!', JSON.stringify(response, null, 2)) - const toolCall = tools[0] && rawToolCallObjOfAnthropicParams(tools[0]) - const toolCallObj = toolCall ? { toolCall } : {} + const finalToolCalls: RawToolCallObj[] = tools + .map(t => rawToolCallObjOfAnthropicParams(t)) + .filter((t): t is RawToolCallObj => t !== null) - onFinalMessage({ fullText, fullReasoning, anthropicReasoning, ...toolCallObj }) + onFinalMessage({ + fullText, + fullReasoning, + anthropicReasoning, + toolCalls: finalToolCalls.length > 0 ? finalToolCalls : undefined, + }) }) // on error stream.on('error', (error) => { @@ -825,9 +885,14 @@ const sendGeminiChat = async ({ let fullReasoningSoFar = '' let fullTextSoFar = '' - let toolName = '' - let toolParamsStr = '' - let toolId = '' + // Tool-call buffer — Gemini emits each functionCall as a fully-formed object (not a + // streamed partial like OpenAI/Anthropic), so we just accumulate them. Each chunk's + // `chunk.functionCalls` may contain zero or more calls. We track by (name + JSON args) + // to dedupe in case a later chunk repeats an earlier call (the SDK occasionally does + // this in the final summary chunk). Ordering is preserved by first-appearance. + type GeminiToolBuf = { name: string; argsStr: string; id: string } + const geminiToolCalls: GeminiToolBuf[] = [] + const geminiToolSeen = new Set() // Gemini reports token usage via chunk.usageMetadata. It typically appears in the last // chunk(s), but we keep the latest seen so we always forward the freshest values. @@ -861,13 +926,20 @@ const sendGeminiChat = async ({ } } - // tool call + // tool calls — iterate ALL functionCalls in the chunk. Previously we only kept + // `functionCalls[0]`, silently dropping any parallel tool emission (e.g. a model + // asking to read three files at once). Dedupe across chunks by (id || name+args). const functionCalls = chunk.functionCalls if (functionCalls && functionCalls.length > 0) { - const functionCall = functionCalls[0] // Get the first function call - toolName = functionCall.name ?? '' - toolParamsStr = JSON.stringify(functionCall.args ?? {}) - toolId = functionCall.id ?? '' + for (const fc of functionCalls) { + const name = fc.name ?? '' + const argsStr = JSON.stringify(fc.args ?? {}) + const id = fc.id ?? '' + const key = id || `${name}::${argsStr}` + if (geminiToolSeen.has(key)) continue + geminiToolSeen.add(key) + geminiToolCalls.push({ name, argsStr, id }) + } } // usage (Gemini exposes promptTokenCount / candidatesTokenCount / totalTokenCount / @@ -888,23 +960,43 @@ const sendGeminiChat = async ({ } } + // Build the in-progress tool-call snapshot for UI streaming. Gemini tool calls + // are already complete when they appear in a chunk, but we still surface them + // via onText so the UI can render them as they arrive rather than only at end. + const inProgressToolCalls: RawToolCallObj[] = geminiToolCalls.map(buf => ({ + name: buf.name as ToolName, + rawParams: {}, + isDone: false, + doneParams: [], + id: buf.id, + })) + // call onText onText({ fullText: fullTextSoFar, fullReasoning: fullReasoningSoFar, - toolCall: !toolName ? undefined : { name: toolName, rawParams: {}, isDone: false, doneParams: [], id: toolId }, + toolCalls: inProgressToolCalls.length > 0 ? inProgressToolCalls : undefined, usage: latestUsage, }) } - // on final - if (!fullTextSoFar && !fullReasoningSoFar && !toolName) { + // on final — parse each accumulated tool buffer into a full RawToolCallObj. + // Empty ids are filled with a UUID so downstream code (which keys tool-result + // messages by id) doesn't collide across tools. Malformed JSON args are skipped. + const finalToolCalls: RawToolCallObj[] = geminiToolCalls + .map(buf => rawToolCallObjOfParamsStr(buf.name, buf.argsStr, buf.id || generateUuid())) + .filter((t): t is RawToolCallObj => t !== null) + + if (!fullTextSoFar && !fullReasoningSoFar && finalToolCalls.length === 0) { onError({ message: 'Void: Response from model was empty.', fullError: null }) } else { - if (!toolId) toolId = generateUuid() // ids are empty, but other providers might expect an id - const toolCall = rawToolCallObjOfParamsStr(toolName, toolParamsStr, toolId) - const toolCallObj = toolCall ? { toolCall } : {} - onFinalMessage({ fullText: fullTextSoFar, fullReasoning: fullReasoningSoFar, anthropicReasoning: null, usage: latestUsage, ...toolCallObj }); + onFinalMessage({ + fullText: fullTextSoFar, + fullReasoning: fullReasoningSoFar, + anthropicReasoning: null, + usage: latestUsage, + toolCalls: finalToolCalls.length > 0 ? finalToolCalls : undefined, + }); } }) .catch(error => { diff --git a/src/vs/workbench/contrib/void/electron-main/llmMessage/sendLLMMessage.ts b/src/vs/workbench/contrib/void/electron-main/llmMessage/sendLLMMessage.ts index 27f35ad5..d851f4f7 100644 --- a/src/vs/workbench/contrib/void/electron-main/llmMessage/sendLLMMessage.ts +++ b/src/vs/workbench/contrib/void/electron-main/llmMessage/sendLLMMessage.ts @@ -66,9 +66,17 @@ export const sendLLMMessage = async ({ } const onFinalMessage: OnFinalMessage = (params) => { - const { fullText, fullReasoning, toolCall } = params + const { fullText, fullReasoning, toolCalls } = params if (_didAbort) return - captureLLMEvent(`${loggingName} - Received Full Message`, { messageLength: fullText.length, reasoningLength: fullReasoning?.length, duration: new Date().getMilliseconds() - submit_time.getMilliseconds(), toolCallName: toolCall?.name }) + captureLLMEvent(`${loggingName} - Received Full Message`, { + messageLength: fullText.length, + reasoningLength: fullReasoning?.length, + duration: new Date().getMilliseconds() - submit_time.getMilliseconds(), + // Parallel tool calling: capture the number of tools and a comma-joined summary + // so metrics can see how often models emit batches (vs. 0 or 1 tool per turn). + toolCallCount: toolCalls?.length ?? 0, + toolCallNames: toolCalls?.map(t => t.name).join(','), + }) onFinalMessage_(params) }