support multi tool call (#13)

2026-05-22 17:08:25 +00:00 · 2026-04-22 23:12:29 +08:00 · 2026-04-22 23:12:29 +08:00 · 79db82e458
commit 79db82e458
parent f9cb764fbc
9 changed files with 524 additions and 143 deletions
--- a/src/vs/workbench/contrib/void/browser/chatThreadService.ts
+++ b/src/vs/workbench/contrib/void/browser/chatThreadService.ts
@ -199,7 +199,11 @@ export type ThreadStreamState = {
 		llmInfo: {
 			displayContentSoFar: string;
 			reasoningSoFar: string;
-			toolCallSoFar: RawToolCallObj | null;
+			// Ordered list of tool calls being streamed from the LLM. Most turns have
+			// length 0 (pure text) or 1 (single tool call). Providers that support
+			// parallel tool calling (OpenAI, Anthropic, Gemini) may emit multiple.
+			// Tools are executed serially by the agent loop in this order.
+			toolCallsSoFar: RawToolCallObj[];
 		};
 		toolInfo?: undefined;
 		interrupt: Promise<() => void>; // calling this should have no effect on state - would be too confusing. it just cancels the tool
@ -714,55 +718,150 @@ class ChatThreadService extends Disposable implements IChatThreadService {



-	private _swapOutLatestStreamingToolWithResult = (threadId: string, tool: ChatMessage & { role: 'tool' }) => {
-		const messages = this.state.allThreads[threadId]?.messages
-		if (!messages) return false
-		const lastMsg = messages[messages.length - 1]
-		if (!lastMsg) return false
-
-		if (lastMsg.role === 'tool' && lastMsg.type !== 'invalid_params') {
-			this._editMessageInThread(threadId, messages.length - 1, tool)
-			return true
-		}
-		return false
-	}
+	/**
+	 * Transitions a tool message (by id) to a new state in the thread. Before parallel tool
+	 * calling this just swapped the last message, which worked because a tool was always
+	 * the most recent message at every transition. With batches, tool i may be followed
+	 * in the thread by pre-added tool_requests for tools i+1, i+2..., so we search by id.
+	 *
+	 * If no matching tool is found we append (preserves the original behavior for fresh
+	 * tool_request additions by `_runToolCall`'s non-batch path). When a match exists,
+	 * we preserve batchIndex/batchSize from the existing row so the UI's (i/N) prefix
+	 * doesn't drop across state transitions (tool_request → running_now → success).
+	 */
 	private _updateLatestTool = (threadId: string, tool: ChatMessage & { role: 'tool' }) => {
-		const swapped = this._swapOutLatestStreamingToolWithResult(threadId, tool)
-		if (swapped) return
+		const messages = this.state.allThreads[threadId]?.messages
+		if (!messages) { this._addMessageToThread(threadId, tool); return }
+		for (let i = messages.length - 1; i >= 0; i--) {
+			const m = messages[i]
+			if (m.role === 'tool' && m.id === tool.id) {
+				// Preserve batch metadata from the pre-added row — the transitional updates
+				// from `_runToolCall` don't know about batchIndex/batchSize.
+				const merged = { batchIndex: m.batchIndex, batchSize: m.batchSize, ...tool } as ChatMessage & { role: 'tool' }
+				this._editMessageInThread(threadId, i, merged)
+				return
+			}
+		}
 		this._addMessageToThread(threadId, tool)
 	}

+	/**
+	 * Returns consecutive trailing `tool_request` messages in the thread — these are the
+	 * not-yet-executed tools in the current batch. The user-facing "awaiting approval"
+	 * tool is always the FIRST of this list (the batch processor runs them in order, so
+	 * any tool before the paused one is already in a terminal state like `success`).
+	 */
+	private _getPendingBatchTools = (threadId: string): (ToolMessage<ToolName> & { type: 'tool_request' })[] => {
+		const messages = this.state.allThreads[threadId]?.messages ?? []
+		const pending: (ToolMessage<ToolName> & { type: 'tool_request' })[] = []
+		for (let i = messages.length - 1; i >= 0; i--) {
+			const m = messages[i]
+			if (m.role === 'tool' && m.type === 'tool_request') pending.unshift(m)
+			else break
+		}
+		return pending
+	}
+
+	/**
+	 * Runs all currently-pending tool_requests at the tail of the thread, in order.
+	 * Each call to `_runToolCall` validates, checks approval, and either runs the tool
+	 * or pauses for user approval. Returns:
+	 *   - 'awaiting_user' if a tool paused for approval (remaining tools stay pending)
+	 *   - 'interrupted' if a tool was interrupted (agent should terminate)
+	 *   - 'done' if all pending tools ran to a terminal state
+	 */
+	private _tryDrainPendingBatch = async (threadId: string): Promise<'done' | 'awaiting_user' | 'interrupted'> => {
+		while (true) {
+			const pending = this._getPendingBatchTools(threadId)
+			if (pending.length === 0) return 'done'
+			const next = pending[0]
+			const { awaitingUserApproval, interrupted } = await this._runToolCall(
+				threadId, next.name, next.id, next.mcpServerName,
+				{ preapproved: false, unvalidatedToolParams: next.rawParams, rawParamsStr: next.rawParamsStr }
+			)
+			if (interrupted) return 'interrupted'
+			if (awaitingUserApproval) return 'awaiting_user'
+		}
+	}
+
 	approveLatestToolRequest(threadId: string) {
 		const thread = this.state.allThreads[threadId]
 		if (!thread) return // should never happen

-		const lastMsg = thread.messages[thread.messages.length - 1]
-		if (!(lastMsg.role === 'tool' && lastMsg.type === 'tool_request')) return // should never happen
-
-		const callThisToolFirst: ToolMessage<ToolName> = lastMsg
+		// In batch mode multiple tool_requests can be pending at the tail of the thread —
+		// the one awaiting approval is the FIRST (tools that already ran have transitioned
+		// away from tool_request state). Pre-batch code grabbed messages[-1], which silently
+		// breaks for batches because later not-yet-started tools are newer in the thread.
+		const pending = this._getPendingBatchTools(threadId)
+		if (pending.length === 0) return
+		const callThisToolFirst = pending[0]

 		this._wrapRunAgentToNotify(
 			this._runChatAgent({ callThisToolFirst, threadId, ...this._currentModelSelectionProps() })
 			, threadId
 		)
 	}
-	rejectLatestToolRequest(threadId: string) {
+	/**
+	 * Reject a pending tool request.
+	 *
+	 * `resumeAgent` controls what happens after the rejection:
+	 *   - true  (from UI "reject" button): mark this tool + all other pending tools in
+	 *           the same batch as `rejected` ("reject-all" semantic), then resume the
+	 *           agent loop so the LLM sees the rejections and can react (e.g. ask the
+	 *           user what to do next). This keeps the conversation alive.
+	 *   - false (from abort/hard-stop path in `abortRunning`): mark rejected and stop.
+	 *           The conversation terminates; no further LLM call is made.
+	 *
+	 * Default is true because the common case is the user clicking the UI reject button.
+	 * `abortRunning` explicitly passes false.
+	 */
+	rejectLatestToolRequest(threadId: string, resumeAgent: boolean = true) {
 		const thread = this.state.allThreads[threadId]
 		if (!thread) return // should never happen

-		const lastMsg = thread.messages[thread.messages.length - 1]
-
-		let params: ToolCallParams<ToolName>
-		if (lastMsg.role === 'tool' && lastMsg.type !== 'invalid_params') {
-			params = lastMsg.params
+		// Reject-all semantics: if the user rejected any tool in a batch, reject all its
+		// pending siblings too. Partial execution (run 1 and 2, reject 3, continue to 4)
+		// is confusing — the model emitted the batch as an atomic plan, so we either run
+		// it or abort it as a unit. Tools that already completed (success/tool_error)
+		// retain their terminal state; only pending tool_requests are rejected.
+		const pending = this._getPendingBatchTools(threadId)
+		if (pending.length === 0) {
+			// Fallback to legacy path: last message should be a tool in a non-terminal
+			// state. Kept for safety when called from unusual contexts.
+			const lastMsg = thread.messages[thread.messages.length - 1]
+			if (!(lastMsg.role === 'tool' && lastMsg.type !== 'invalid_params')) return
+			const { name, id, rawParams, rawParamsStr, mcpServerName, params } = lastMsg
+			this._updateLatestTool(threadId, { role: 'tool', type: 'rejected', params, name, content: this.toolErrMsgs.rejected, result: null, id, rawParams, rawParamsStr, mcpServerName })
+			if (!resumeAgent) this._setStreamState(threadId, undefined)
+			return
 		}
-		else return

-		const { name, id, rawParams, rawParamsStr, mcpServerName } = lastMsg
+		const rejectedCount = pending.length
+		// Mark every pending tool in the batch as rejected. For the one the user actually
+		// clicked (the first pending), use the primary rejection message. For the others
+		// ("cascade rejections"), use a short explanation so the LLM can distinguish direct
+		// vs. cascade rejection when composing its response.
+		for (let i = 0; i < pending.length; i++) {
+			const p = pending[i]
+			const content = i === 0 ? this.toolErrMsgs.rejected : this.toolErrMsgs.rejectedCascade(rejectedCount)
+			this._updateLatestTool(threadId, {
+				role: 'tool', type: 'rejected',
+				params: p.params, name: p.name, content, result: null,
+				id: p.id, rawParams: p.rawParams, rawParamsStr: p.rawParamsStr, mcpServerName: p.mcpServerName,
+			})
+		}

-		const errorMessage = this.toolErrMsgs.rejected
-		this._updateLatestTool(threadId, { role: 'tool', type: 'rejected', params: params, name: name, content: errorMessage, result: null, id, rawParams, rawParamsStr, mcpServerName })
-		this._setStreamState(threadId, undefined)
+		if (resumeAgent) {
+			// Let the LLM see the rejection(s) and respond. No callThisToolFirst —
+			// _runChatAgent will loop straight into a new LLM call with the rejected
+			// tool results in context.
+			this._wrapRunAgentToNotify(
+				this._runChatAgent({ threadId, ...this._currentModelSelectionProps() })
+				, threadId
+			)
+		} else {
+			this._setStreamState(threadId, undefined)
+		}
 	}

 	private _computeMCPServerOfToolName = (toolName: string) => {
@ -775,9 +874,14 @@ class ChatThreadService extends Disposable implements IChatThreadService {

 		// add assistant message
 		if (this.streamState[threadId]?.isRunning === 'LLM') {
-			const { displayContentSoFar, reasoningSoFar, toolCallSoFar } = this.streamState[threadId].llmInfo
+			const { displayContentSoFar, reasoningSoFar, toolCallsSoFar } = this.streamState[threadId].llmInfo
 			this._addMessageToThread(threadId, { role: 'assistant', displayContent: displayContentSoFar, reasoning: reasoningSoFar, anthropicReasoning: null })
-			if (toolCallSoFar) this._addMessageToThread(threadId, { role: 'interrupted_streaming_tool', name: toolCallSoFar.name, mcpServerName: this._computeMCPServerOfToolName(toolCallSoFar.name) })
+			// For each partially-streamed tool call interrupted mid-flight, add a decorative
+			// "interrupted_streaming_tool" marker. Pre-batch this only handled one tool;
+			// now we iterate the full list so the UI shows all tools the model was planning.
+			for (const tc of toolCallsSoFar) {
+				this._addMessageToThread(threadId, { role: 'interrupted_streaming_tool', name: tc.name, mcpServerName: this._computeMCPServerOfToolName(tc.name) })
+			}
 		}
 		// add tool that's running
 		else if (this.streamState[threadId]?.isRunning === 'tool') {
@ -785,9 +889,11 @@ class ChatThreadService extends Disposable implements IChatThreadService {
 			const content = content_ || this.toolErrMsgs.interrupted
 			this._updateLatestTool(threadId, { role: 'tool', name: toolName, params: toolParams, id, content, rawParams, rawParamsStr, type: 'rejected', result: null, mcpServerName })
 		}
-		// reject the tool for the user if relevant
+		// reject the tool for the user if relevant. `resumeAgent: false` — abortRunning is
+		// a hard stop from the user; we don't want to restart the LLM loop with rejection
+		// feedback (which is what the normal reject-button path does).
 		else if (this.streamState[threadId]?.isRunning === 'awaiting_user') {
-			this.rejectLatestToolRequest(threadId)
+			this.rejectLatestToolRequest(threadId, false)
 		}
 		else if (this.streamState[threadId]?.isRunning === 'idle') {
 			// do nothing
@ -807,7 +913,16 @@ class ChatThreadService extends Disposable implements IChatThreadService {


 	private readonly toolErrMsgs = {
-		rejected: 'Tool call was rejected by the user.',
+		// Phrased to discourage the model from immediately retrying the same tool. "Rejected"
+		// alone tends to trigger LLMs into "let me try again" behavior, which wastes tokens
+		// and annoys the user. Framing it as a signal to pause and consult the user breaks
+		// that pattern.
+		rejected: 'The user rejected this tool call. Do not retry the same action. Acknowledge the rejection, ask the user what they want you to do differently, or propose an alternative approach.',
+		// Used for the "cascade" rejections when the user rejects one tool in a multi-tool
+		// batch and reject-all semantics propagates the rejection to its siblings. Tells
+		// the model that not running the rest was a side effect of one rejection, not a
+		// per-tool decision, so it doesn't over-apologize for each.
+		rejectedCascade: (batchSize: number) => `The user rejected the tool batch (${batchSize} tools). This specific tool was skipped as part of that rejection, not individually rejected. See the primary rejection for the user's reasoning.`,
 		interrupted: 'Tool call was interrupted by the user.',
 		errWhenStringifying: (error: any) => `Tool call succeeded, but there was an error stringifying the output.\n${getErrorMessage(error)}`
 	}
@ -851,7 +966,10 @@ class ChatThreadService extends Disposable implements IChatThreadService {
 			}
 			catch (error) {
 				const errorMessage = getErrorMessage(error)
-				this._addMessageToThread(threadId, { role: 'tool', type: 'invalid_params', rawParams: opts.unvalidatedToolParams, rawParamsStr, result: null, name: toolName, content: errorMessage, id: toolId, mcpServerName })
+				// Use _updateLatestTool (not _addMessageToThread) so that when this tool was
+				// pre-added as a `tool_request` by the batch processor, we transition that
+				// row in place (preserving batchIndex/batchSize) instead of appending a new one.
+				this._updateLatestTool(threadId, { role: 'tool', type: 'invalid_params', rawParams: opts.unvalidatedToolParams, rawParamsStr, result: null, name: toolName, content: errorMessage, id: toolId, mcpServerName })
 				return {}
 			}
 			// once validated, add checkpoint for edit
@ -883,8 +1001,13 @@ class ChatThreadService extends Disposable implements IChatThreadService {
 					}
 				}

-				// add a tool_request because we use it for UI if a tool is loading (this should be improved in the future)
-				this._addMessageToThread(threadId, { role: 'tool', type: 'tool_request', content: '(Awaiting user permission...)', result: null, name: toolName, params: toolParams, id: toolId, rawParams: opts.unvalidatedToolParams, rawParamsStr, mcpServerName })
+				// Transition (or create) the tool_request row. _updateLatestTool finds the
+				// row by id: for solo tool calls there's no pre-added row and it appends one
+				// (same as the old behavior). For batched tool calls, the batch processor
+				// pre-added a tool_request with batchIndex/batchSize, and this call now
+				// replaces its placeholder unvalidated params with the validated ones while
+				// preserving the batch metadata.
+				this._updateLatestTool(threadId, { role: 'tool', type: 'tool_request', content: '(Awaiting user permission...)', result: null, name: toolName, params: toolParams, id: toolId, rawParams: opts.unvalidatedToolParams, rawParamsStr, mcpServerName })
 				if (!autoApprove) {
 					return { awaitingUserApproval: true }
 				}
@ -996,12 +1119,29 @@ class ChatThreadService extends Disposable implements IChatThreadService {

 		// before enter loop, call tool
 		if (callThisToolFirst) {
+			// Run the just-approved tool, then drain any remaining pending batch siblings
+			// (tools pre-added when the batch started and not yet run). Each drained tool
+			// may pause for its own approval — we stop the agent in that case and return.
 			const { interrupted } = await this._runToolCall(threadId, callThisToolFirst.name, callThisToolFirst.id, callThisToolFirst.mcpServerName, { preapproved: true, unvalidatedToolParams: callThisToolFirst.rawParams, rawParamsStr: callThisToolFirst.rawParamsStr, validatedParams: callThisToolFirst.params })
 			if (interrupted) {
 				this._setStreamState(threadId, undefined)
 				this._addUserCheckpoint({ threadId })
-
+				return
 			}
+			// Drain the remaining pending batch (if there are other tools from this turn
+			// that still need to run). If any of them pauses for approval, stop here — the
+			// agent will resume when the user next approves or rejects.
+			const drainRes = await this._tryDrainPendingBatch(threadId)
+			if (drainRes === 'interrupted') {
+				this._setStreamState(threadId, undefined)
+				this._addUserCheckpoint({ threadId })
+				return
+			}
+			if (drainRes === 'awaiting_user') {
+				this._setStreamState(threadId, { isRunning: 'awaiting_user' })
+				return
+			}
+			// drainRes === 'done': fall through to the main LLM loop below.
 		}
 		this._setStreamState(threadId, { isRunning: 'idle', interrupt: 'not_needed' })  // just decorative, for clarity

@ -1034,7 +1174,7 @@ class ChatThreadService extends Disposable implements IChatThreadService {
 				nAttempts += 1

 				type ResTypes =
-					| { type: 'llmDone', toolCall?: RawToolCallObj, info: { fullText: string, fullReasoning: string, anthropicReasoning: AnthropicReasoning[] | null, finishReason?: string } }
+					| { type: 'llmDone', toolCalls: RawToolCallObj[], info: { fullText: string, fullReasoning: string, anthropicReasoning: AnthropicReasoning[] | null, finishReason?: string } }
 					| { type: 'llmError', error?: { message: string; fullError: Error | null; } }
 					| { type: 'llmAborted' }

@ -1050,16 +1190,16 @@ class ChatThreadService extends Disposable implements IChatThreadService {
 					overridesOfModel,
 					logging: { loggingName: `Chat - ${chatMode}`, loggingExtras: { threadId, nMessagesSent, chatMode } },
 					separateSystemMessage: separateSystemMessage,
-					onText: ({ fullText, fullReasoning, toolCall, usage }) => {
+					onText: ({ fullText, fullReasoning, toolCalls, usage }) => {
 						if (usage) this._setLatestUsage(threadId, usage)
-						this._setStreamState(threadId, { isRunning: 'LLM', llmInfo: { displayContentSoFar: fullText, reasoningSoFar: fullReasoning, toolCallSoFar: toolCall ?? null }, interrupt: Promise.resolve(() => { if (llmCancelToken) this._llmMessageService.abort(llmCancelToken) }) })
+						this._setStreamState(threadId, { isRunning: 'LLM', llmInfo: { displayContentSoFar: fullText, reasoningSoFar: fullReasoning, toolCallsSoFar: toolCalls ?? [] }, interrupt: Promise.resolve(() => { if (llmCancelToken) this._llmMessageService.abort(llmCancelToken) }) })
 					},
-					onFinalMessage: async ({ fullText, fullReasoning, toolCall, anthropicReasoning, usage, finishReason }) => {
+					onFinalMessage: async ({ fullText, fullReasoning, toolCalls, anthropicReasoning, usage, finishReason }) => {
 						if (usage) this._setLatestUsage(threadId, usage)
 						// Lock in this request's usage so the next loop iteration's
 						// running total is added to (not replacing) what we already counted.
 						this._lockInCurrentRequestUsage(threadId)
-						resMessageIsDonePromise({ type: 'llmDone', toolCall, info: { fullText, fullReasoning, anthropicReasoning, finishReason } }) // resolve with tool calls
+						resMessageIsDonePromise({ type: 'llmDone', toolCalls: toolCalls ?? [], info: { fullText, fullReasoning, anthropicReasoning, finishReason } }) // resolve with tool calls
 					},
 					onError: async (error) => {
 						resMessageIsDonePromise({ type: 'llmError', error: error })
@ -1077,7 +1217,7 @@ class ChatThreadService extends Disposable implements IChatThreadService {
 					break
 				}

-				this._setStreamState(threadId, { isRunning: 'LLM', llmInfo: { displayContentSoFar: '', reasoningSoFar: '', toolCallSoFar: null }, interrupt: Promise.resolve(() => this._llmMessageService.abort(llmCancelToken)) })
+				this._setStreamState(threadId, { isRunning: 'LLM', llmInfo: { displayContentSoFar: '', reasoningSoFar: '', toolCallsSoFar: [] }, interrupt: Promise.resolve(() => this._llmMessageService.abort(llmCancelToken)) })
 				const llmRes = await messageIsDonePromise // wait for message to complete

 				// if something else started running in the meantime
@ -1108,9 +1248,13 @@ class ChatThreadService extends Disposable implements IChatThreadService {
 					// error, but too many attempts
 					else {
 						const { error } = llmRes
-						const { displayContentSoFar, reasoningSoFar, toolCallSoFar } = this.streamState[threadId].llmInfo
+						const { displayContentSoFar, reasoningSoFar, toolCallsSoFar } = this.streamState[threadId].llmInfo
 						this._addMessageToThread(threadId, { role: 'assistant', displayContent: displayContentSoFar, reasoning: reasoningSoFar, anthropicReasoning: null })
-						if (toolCallSoFar) this._addMessageToThread(threadId, { role: 'interrupted_streaming_tool', name: toolCallSoFar.name, mcpServerName: this._computeMCPServerOfToolName(toolCallSoFar.name) })
+						// Record an interrupted-streaming marker for every tool the LLM was
+						// mid-way through emitting. Pre-batch this only handled the first tool.
+						for (const tc of toolCallsSoFar) {
+							this._addMessageToThread(threadId, { role: 'interrupted_streaming_tool', name: tc.name, mcpServerName: this._computeMCPServerOfToolName(tc.name) })
+						}

 						this._setStreamState(threadId, { isRunning: undefined, error })
 						this._addUserCheckpoint({ threadId })
@ -1119,23 +1263,52 @@ class ChatThreadService extends Disposable implements IChatThreadService {
 				}

 				// llm res success
-				const { toolCall, info } = llmRes
+				const { toolCalls, info } = llmRes

 				this._addMessageToThread(threadId, { role: 'assistant', displayContent: info.fullText, reasoning: info.fullReasoning, anthropicReasoning: info.anthropicReasoning, finishReason: info.finishReason })

 				this._setStreamState(threadId, { isRunning: 'idle', interrupt: 'not_needed' }) // just decorative for clarity

-				// call tool if there is one
-				if (toolCall) {
+				// call tool(s) if there are any. Batched / parallel tool emissions are handled
+				// by pre-adding every tool as a `tool_request` (with batchIndex/batchSize so the
+				// UI can render "(1/N)" prefixes), then running them serially. Any tool may pause
+				// for user approval; if that happens the remaining tools in the batch stay as
+				// pending tool_requests, visible to the user as stacked progress rows.
+				if (toolCalls.length > 0) {
 					const mcpTools = this._mcpService.getMCPTools()
-					const mcpTool = mcpTools?.find(t => t.name === toolCall.name)
+					const batchSize = toolCalls.length
+					for (let i = 0; i < batchSize; i++) {
+						const tc = toolCalls[i]
+						const mcpServerName = mcpTools?.find(t => t.name === tc.name)?.mcpServerName
+						this._addMessageToThread(threadId, {
+							role: 'tool',
+							type: 'tool_request',
+							content: '(Pending...)',
+							result: null,
+							name: tc.name,
+							// Placeholder unvalidated params — `_runToolCall` will validate and
+							// replace via `_updateLatestTool` before the tool runs. The cast is
+							// safe because the UI only reads validated `params` on tool_requests
+							// once they've transitioned past the placeholder phase (which happens
+							// synchronously when `_tryDrainPendingBatch` hits this tool).
+							params: tc.rawParams as unknown as ToolCallParams<ToolName>,
+							id: tc.id,
+							rawParams: tc.rawParams,
+							rawParamsStr: tc.rawParamsStr,
+							mcpServerName,
+							// Only stamp batch metadata when there's actually more than one tool —
+							// a solo tool call shouldn't render "(1/1)" in the UI.
+							batchIndex: batchSize > 1 ? i : undefined,
+							batchSize: batchSize > 1 ? batchSize : undefined,
+						})
+					}

-					const { awaitingUserApproval, interrupted } = await this._runToolCall(threadId, toolCall.name, toolCall.id, mcpTool?.mcpServerName, { preapproved: false, unvalidatedToolParams: toolCall.rawParams, rawParamsStr: toolCall.rawParamsStr })
-					if (interrupted) {
+					const batchRes = await this._tryDrainPendingBatch(threadId)
+					if (batchRes === 'interrupted') {
 						this._setStreamState(threadId, undefined)
 						return
 					}
-					if (awaitingUserApproval) { isRunningWhenEnd = 'awaiting_user' }
+					if (batchRes === 'awaiting_user') { isRunningWhenEnd = 'awaiting_user' }
 					else { shouldSendAnotherMessage = true }

 					this._setStreamState(threadId, { isRunning: 'idle', interrupt: 'not_needed' }) // just decorative, for clarity
--- a/src/vs/workbench/contrib/void/browser/convertToLLMMessageService.ts
+++ b/src/vs/workbench/contrib/void/browser/convertToLLMMessageService.ts
@ -85,22 +85,35 @@ const prepareMessages_openai_tools = (messages: SimpleLLMMessage[]): AnthropicOr
 			continue
 		}

-		// edit previous assistant message to have called the tool
-		const prevMsg = 0 <= i - 1 && i - 1 <= newMessages.length ? newMessages[i - 1] : undefined
-		if (prevMsg?.role === 'assistant') {
+		// Walk back through newMessages to find the assistant that called this tool. For a
+		// solo tool this is always the immediately-prior message; for a batched response
+		// (N parallel tool calls) we need to append to the same assistant across N tool
+		// messages — the previous implementation overwrote tool_calls each time and only
+		// the LAST tool in a batch survived, corrupting replay bytes + the provider's cache.
+		let assistantIdx = -1
+		for (let j = newMessages.length - 1; j >= 0; j--) {
+			const m = newMessages[j]
+			if (m.role === 'assistant') { assistantIdx = j; break }
+			// Stop at any non-tool, non-assistant message (should never happen since we only
+			// push assistant/tool/user through here in order, but keep the safety rail).
+			if (m.role !== 'tool') break
+		}
+		if (assistantIdx >= 0) {
+			const asstMsg = newMessages[assistantIdx] as OpenAILLMChatMessage & { role: 'assistant' }
 			// Prefer the model's original serialized argument string when we have it
 			// (OpenAI-compatible providers expose it in the streaming delta). Sending
 			// byte-identical bytes back preserves the provider's prefix cache past the
 			// tool call. Fall back to re-serializing when the raw string is unavailable
 			// (e.g. conversations from before this field existed, or non-OpenAI provenance).
-			prevMsg.tool_calls = [{
-				type: 'function',
+			const newCall = {
+				type: 'function' as const,
 				id: currMsg.id,
 				function: {
 					name: currMsg.name,
 					arguments: currMsg.rawParamsStr ?? JSON.stringify(currMsg.rawParams)
 				}
-			}]
+			}
+			asstMsg.tool_calls = [...(asstMsg.tool_calls ?? []), newCall]
 		}

 		// add the tool
@ -181,13 +194,27 @@ const prepareMessages_anthropic_tools = (messages: SimpleLLMMessage[], supportsA
 		}

 		if (currMsg.role === 'tool') {
-			// add anthropic tools
-			const prevMsg = 0 <= i - 1 && i - 1 <= newMessages.length ? newMessages[i - 1] : undefined
-
-			// make it so the assistant called the tool
-			if (prevMsg?.role === 'assistant') {
-				if (typeof prevMsg.content === 'string') prevMsg.content = [{ type: 'text', text: prevMsg.content }]
-				prevMsg.content.push({ type: 'tool_use', id: currMsg.id, name: currMsg.name, input: currMsg.rawParams })
+			// Walk back to the assistant that owned this tool call. For a batched turn
+			// (multiple parallel tool calls on one assistant), each tool message appends
+			// its own `tool_use` block to the same assistant's content array, and Anthropic
+			// sees the full batch as one assistant turn. Previously only the first tool
+			// was attached (prevMsg check) and the rest silently orphaned, which made
+			// replay of batched turns fail validation.
+			let assistantIdx = -1
+			for (let j = i - 1; j >= 0; j--) {
+				const m = newMessages[j]
+				if (!m) continue
+				if (m.role === 'assistant') { assistantIdx = j; break }
+				// Skip over previously-converted tool rows (now user messages with tool_result);
+				// anything else means we walked past the batch boundary.
+				if (m.role !== 'user') break
+				const isToolResultUser = Array.isArray(m.content) && m.content.some(c => c.type === 'tool_result')
+				if (!isToolResultUser) break
+			}
+			if (assistantIdx >= 0) {
+				const asstMsg = newMessages[assistantIdx] as AnthropicLLMChatMessage & { role: 'assistant' }
+				if (typeof asstMsg.content === 'string') asstMsg.content = [{ type: 'text', text: asstMsg.content }]
+				asstMsg.content.push({ type: 'tool_use', id: currMsg.id, name: currMsg.name, input: currMsg.rawParams })
 			}

 			// turn each tool into a user message with tool results at the end
@ -214,12 +241,20 @@ const prepareMessages_XML_tools = (messages: SimpleLLMMessage[], supportsAnthrop
 		const next = 0 <= i + 1 && i + 1 <= messages.length - 1 ? messages[i + 1] : null

 		if (c.role === 'assistant') {
-			// if called a tool (message after it), re-add its XML to the message
-			// alternatively, could just hold onto the original output, but this way requires less piping raw strings everywhere
+			// Re-serialize every consecutive tool message after this assistant as XML and
+			// concatenate them back onto the assistant content. Multi-tool batches may land
+			// in history (e.g. if the user switches from a native-tool-calling model into a
+			// grammar-based one); only appending `next` would lose tool calls 2..N.
 			let content: AnthropicOrOpenAILLMMessage['content'] = c.content
-			if (next?.role === 'tool') {
-				content = `${content}\n\n${reParsedToolXMLString(next.name, next.rawParams)}`
+			for (let k = i + 1; k < messages.length; k++) {
+				const followUp = messages[k]
+				if (followUp.role !== 'tool') break
+				content = `${content}\n\n${reParsedToolXMLString(followUp.name, followUp.rawParams)}`
 			}
+			// For backward compatibility of the void-format assumption we keep `next` only
+			// reference intact below (it's still used by the batch-rebuild loop at the
+			// tool-result step).
+			void next

 			// anthropic reasoning
 			if (c.anthropicReasoning && supportsAnthropicReasoning) {
@ -454,7 +489,13 @@ const prepareOpenAIOrAnthropicMessages = ({
 type GeminiUserPart = (GeminiLLMChatMessage & { role: 'user' })['parts'][0]
 type GeminiModelPart = (GeminiLLMChatMessage & { role: 'model' })['parts'][0]
 const prepareGeminiMessages = (messages: AnthropicLLMChatMessage[]) => {
-	let latestToolName: ToolName | undefined = undefined
+	// Map tool_use id → tool name, populated as we encounter `tool_use` parts on
+	// assistant turns. functionResponse entries later (on user turns) look their name up
+	// by id so batched turns resolve each response to the correct call. Previously a
+	// single `latestToolName` was tracked, which broke when one assistant emitted N
+	// parallel tools: the Nth name won, and all earlier functionResponse parts were
+	// mislabeled (Gemini rejects these with "function name mismatch").
+	const toolNameById = new Map<string, ToolName>()
 	const messages2: GeminiLLMChatMessage[] = messages.map((m): GeminiLLMChatMessage | null => {
 		if (m.role === 'assistant') {
 			if (typeof m.content === 'string') {
@ -466,7 +507,7 @@ const prepareGeminiMessages = (messages: AnthropicLLMChatMessage[]) => {
 						return { text: c.text }
 					}
 					else if (c.type === 'tool_use') {
-						latestToolName = c.name
+						toolNameById.set(c.id, c.name)
 						return { functionCall: { id: c.id, name: c.name, args: c.input } }
 					}
 					else return null
@ -484,8 +525,9 @@ const prepareGeminiMessages = (messages: AnthropicLLMChatMessage[]) => {
 						return { text: c.text }
 					}
 					else if (c.type === 'tool_result') {
-						if (!latestToolName) return null
-						return { functionResponse: { id: c.tool_use_id, name: latestToolName, response: { output: c.content } } }
+						const resolvedName = toolNameById.get(c.tool_use_id)
+						if (!resolvedName) return null
+						return { functionResponse: { id: c.tool_use_id, name: resolvedName, response: { output: c.content } } }
 					}
 					else return null
 				}).filter(m => !!m)
--- a/src/vs/workbench/contrib/void/browser/react/src/sidebar-tsx/SidebarChat.tsx
+++ b/src/vs/workbench/contrib/void/browser/react/src/sidebar-tsx/SidebarChat.tsx
@ -1621,8 +1621,20 @@ const titleOfBuiltinToolName = {
 } as const satisfies Record<BuiltinToolName, { done: any, proposed: any, running: any }>


-const getTitle = (toolMessage: Pick<ChatMessage & { role: 'tool' }, 'name' | 'type' | 'mcpServerName'>): React.ReactNode => {
+// Prefix like "(1/2) " when this tool is part of a multi-tool batch emitted in one
+// assistant turn. The prefix is purely decorative (helps the user see that one reply
+// contains multiple tools and track how many are done) and is omitted for solo tools
+// or when the message predates parallel tool support (batchIndex/batchSize undefined).
+const batchPrefix = (m: Pick<ChatMessage & { role: 'tool' }, 'batchIndex' | 'batchSize'>): string => {
+	if (m.batchIndex === undefined || m.batchSize === undefined) return ''
+	if (m.batchSize <= 1) return ''
+	// batchIndex is 0-based internally but we render as 1-based for humans.
+	return `(${m.batchIndex + 1}/${m.batchSize}) `
+}
+
+const getTitle = (toolMessage: Pick<ChatMessage & { role: 'tool' }, 'name' | 'type' | 'mcpServerName' | 'batchIndex' | 'batchSize'>): React.ReactNode => {
 	const t = toolMessage
+	const prefix = batchPrefix(t)

 	// non-built-in title
 	if (!builtinToolNames.includes(t.name as BuiltinToolName)) {
@ -1637,7 +1649,7 @@ const getTitle = (toolMessage: Pick<ChatMessage & { role: 'tool' }, 'name' | 'ty
 									: 'Call'


-		const title = `${descriptor} ${toolMessage.mcpServerName || 'MCP'}`
+		const title = `${prefix}${descriptor} ${toolMessage.mcpServerName || 'MCP'}`
 		if (t.type === 'running_now' || t.type === 'tool_request')
 			return loadingTitleWrapper(title)
 		return title
@ -1646,9 +1658,11 @@ const getTitle = (toolMessage: Pick<ChatMessage & { role: 'tool' }, 'name' | 'ty
 	// built-in title
 	else {
 		const toolName = t.name as BuiltinToolName
-		if (t.type === 'success') return titleOfBuiltinToolName[toolName].done
-		if (t.type === 'running_now') return titleOfBuiltinToolName[toolName].running
-		return titleOfBuiltinToolName[toolName].proposed
+		const base =
+			t.type === 'success' ? titleOfBuiltinToolName[toolName].done
+				: t.type === 'running_now' ? titleOfBuiltinToolName[toolName].running
+					: titleOfBuiltinToolName[toolName].proposed
+		return prefix ? `${prefix}${base}` : base
 	}
 }

@ -2699,6 +2713,12 @@ type ChatBubbleProps = {
 	threadId: string,
 	currCheckpointIdx: number | undefined,
 	_scrollToBottom: (() => void) | null,
+	// Index of the message that currently owns the approve/reject prompt (the earliest
+	// tool_request in the consecutive trailing batch). When a multi-tool batch is
+	// pre-added, all queued tool_requests share the same status but only the first one
+	// should render the buttons; the others are "waiting their turn". undefined = no
+	// pending approval anywhere in the thread.
+	firstPendingToolRequestIdx?: number,
 }

 const ChatBubble = (props: ChatBubbleProps) => {
@ -2707,7 +2727,7 @@ const ChatBubble = (props: ChatBubbleProps) => {
 	</ErrorBoundary>
 }

-const _ChatBubble = ({ threadId, chatMessage, currCheckpointIdx, isCommitted, messageIdx, chatIsRunning, _scrollToBottom }: ChatBubbleProps) => {
+const _ChatBubble = ({ threadId, chatMessage, currCheckpointIdx, isCommitted, messageIdx, chatIsRunning, _scrollToBottom, firstPendingToolRequestIdx }: ChatBubbleProps) => {
 	const role = chatMessage.role

 	const isCheckpointGhost = messageIdx > (currCheckpointIdx ?? Infinity) && !chatIsRunning // whether to show as gray (if chat is running, for good measure just dont show any ghosts)
@ -2751,7 +2771,7 @@ const _ChatBubble = ({ threadId, chatMessage, currCheckpointIdx, isCommitted, me
 						threadId={threadId}
 					/>
 				</div>
-				{chatMessage.type === 'tool_request' ?
+				{chatMessage.type === 'tool_request' && messageIdx === firstPendingToolRequestIdx ?
 					<div className={`${isCheckpointGhost ? 'opacity-50 pointer-events-none' : ''}`}>
 						<ToolRequestAcceptRejectButtons toolName={chatMessage.name} />
 					</div> : null}
@ -3102,8 +3122,14 @@ const ThreadMessagesView = ({ threadId, isActive, scrollContainerRef }: {
 	const streamState = useChatThreadsStreamState(threadId)
 	const isRunning = streamState?.isRunning
 	const latestError = streamState?.error
-	const { displayContentSoFar, toolCallSoFar, reasoningSoFar } = streamState?.llmInfo ?? {}
-	const toolIsGenerating = toolCallSoFar && !toolCallSoFar.isDone
+	const { displayContentSoFar, toolCallsSoFar, reasoningSoFar } = streamState?.llmInfo ?? {}
+	// During streaming the "currently being written" tool is the last one in the array
+	// (indices are emitted in order). Earlier tools in the batch may already be complete
+	// (their argument JSON fully streamed) but their persisted tool_request rows only
+	// show up in `thread.messages` once onFinalMessage fires and the batch is committed.
+	// For the live preview here we just show the latest in-flight tool.
+	const currentInFlightTool = toolCallsSoFar && toolCallsSoFar.length > 0 ? toolCallsSoFar[toolCallsSoFar.length - 1] : undefined
+	const toolIsGenerating = currentInFlightTool && !currentInFlightTool.isDone

 	const currCheckpointIdx = thread?.state?.currCheckpointIdx ?? undefined

@ -3118,6 +3144,22 @@ const ThreadMessagesView = ({ threadId, isActive, scrollContainerRef }: {
 		}
 	}, [isActive, scrollContainerRef])

+	// Index of the "currently awaiting approval" tool request — the earliest of the
+	// consecutive trailing tool_request messages. Matches _getPendingBatchTools() in
+	// the service. For a solo tool call this is just the last message (same as the
+	// pre-batch behavior). For a multi-tool batch, it's the first pending one; later
+	// queued tool_requests render as stacked progress rows without approve/reject
+	// buttons.
+	const firstPendingToolRequestIdx = useMemo(() => {
+		let earliest: number | undefined
+		for (let i = previousMessages.length - 1; i >= 0; i--) {
+			const m = previousMessages[i]
+			if (m.role === 'tool' && m.type === 'tool_request') earliest = i
+			else break
+		}
+		return earliest
+	}, [previousMessages])
+
 	const previousMessagesHTML = useMemo(() => {
 		return previousMessages.map((message, i) => {
 			return <ChatBubble
@ -3129,9 +3171,10 @@ const ThreadMessagesView = ({ threadId, isActive, scrollContainerRef }: {
 				chatIsRunning={isRunning}
 				threadId={threadId}
 				_scrollToBottom={() => scrollToBottom(scrollContainerRef)}
+				firstPendingToolRequestIdx={firstPendingToolRequestIdx}
 			/>
 		})
-	}, [previousMessages, threadId, currCheckpointIdx, isRunning, scrollContainerRef])
+	}, [previousMessages, threadId, currCheckpointIdx, isRunning, scrollContainerRef, firstPendingToolRequestIdx])

 	const streamingChatIdx = previousMessagesHTML.length
 	const currStreamingMessageHTML = reasoningSoFar || displayContentSoFar || isRunning ?
@ -3151,10 +3194,10 @@ const ThreadMessagesView = ({ threadId, isActive, scrollContainerRef }: {
 			_scrollToBottom={null}
 		/> : null

-	const generatingTool = toolIsGenerating ?
-		toolCallSoFar.name === 'edit_file' || toolCallSoFar.name === 'rewrite_file' ? <EditToolSoFar
+	const generatingTool = toolIsGenerating && currentInFlightTool ?
+		currentInFlightTool.name === 'edit_file' || currentInFlightTool.name === 'rewrite_file' ? <EditToolSoFar
 			key={'curr-streaming-tool'}
-			toolCallSoFar={toolCallSoFar}
+			toolCallSoFar={currentInFlightTool}
 		/>
 			: null
 		: null
@ -3230,10 +3273,13 @@ export const SidebarChat = () => {
 	const currThreadStreamState = useChatThreadsStreamState(chatThreadsState.currentThreadId)
 	const isRunning = currThreadStreamState?.isRunning
 	const latestError = currThreadStreamState?.error
-	const { displayContentSoFar, toolCallSoFar, reasoningSoFar } = currThreadStreamState?.llmInfo ?? {}
+	const { displayContentSoFar, toolCallsSoFar, reasoningSoFar } = currThreadStreamState?.llmInfo ?? {}
+	// See ThreadMessagesView comment: the last tool in the array is the one still
+	// being streamed; earlier batch siblings may already have complete argument JSON.
+	const currentInFlightTool = toolCallsSoFar && toolCallsSoFar.length > 0 ? toolCallsSoFar[toolCallsSoFar.length - 1] : undefined

 	// this is just if it's currently being generated, NOT if it's currently running
-	const toolIsGenerating = toolCallSoFar && !toolCallSoFar.isDone // show loading for slow tools (right now just edit)
+	const toolIsGenerating = currentInFlightTool && !currentInFlightTool.isDone // show loading for slow tools (right now just edit)

 	// ----- SIDEBAR CHAT state (local) -----

--- a/src/vs/workbench/contrib/void/common/chatThreadServiceTypes.ts
+++ b/src/vs/workbench/contrib/void/common/chatThreadServiceTypes.ts
@ -18,6 +18,14 @@ export type ToolMessage<T extends ToolName> = {
 	// byte-identical tool_calls back, preserving the provider's prefix cache.
 	rawParamsStr?: string;
 	mcpServerName: string | undefined; // the server name at the time of the call
+	// Position of this tool within its assistant-turn batch. When a model emits multiple
+	// parallel tool calls in one response, each tool message stores its 0-based index
+	// (`batchIndex`) and the total count (`batchSize`). The UI uses these to render a
+	// "(1/2)"-style prefix so the user can see tool grouping at a glance. Both are
+	// optional — legacy single-tool responses and persisted history from before this
+	// field existed simply omit them (UI treats that as a solo call, no prefix shown).
+	batchIndex?: number;
+	batchSize?: number;
 } & (
 		// in order of events:
 		| { type: 'invalid_params', result: null, name: T, }
--- a/src/vs/workbench/contrib/void/common/prompt/prompts.ts
+++ b/src/vs/workbench/contrib/void/common/prompt/prompts.ts
@ -507,7 +507,12 @@ You will be given instructions from the user, and may also receive a list of fil
 	if (mode === 'agent' || mode === 'gather') {
 		details.push(`Only call tools if they help you accomplish the user's goal. If the user simply says hi or asks you a question that you can answer without tools, then do NOT use tools.`)
 		details.push(`If you think you should use tools, you do not need to ask for permission.`)
-		details.push('Only use ONE tool call at a time.')
+		// Parallel tool calls are OK (and encouraged) when the operations are independent
+		// — e.g. reading several files, searching several patterns. A single assistant
+		// turn that batches N reads costs one round-trip instead of N, and prefix caching
+		// stays warm across the whole batch. Keep sequential tools for dependent steps
+		// where later arguments require earlier results.
+		details.push(`You can call multiple tools in a single turn when the operations are independent (e.g. reading several files, searching several patterns). Prefer batching reads/searches together rather than issuing them one-at-a-time across turns. Use separate turns when a later tool's arguments depend on an earlier tool's result.`)
 		details.push(`NEVER say something like "I'm going to use \`tool_name\`". Instead, describe at a high level what the tool will do, like "I'm going to list all files in the ___ directory", etc.`)
 		details.push(`Many tools only work if the user has a workspace open.`)
 	}
--- a/src/vs/workbench/contrib/void/common/sendLLMMessageTypes.ts
+++ b/src/vs/workbench/contrib/void/common/sendLLMMessageTypes.ts
@ -112,7 +112,12 @@ export type LLMUsage = {
 	cachedInputTokens?: number;
 }

-export type OnText = (p: { fullText: string; fullReasoning: string; toolCall?: RawToolCallObj; usage?: LLMUsage }) => void
+// `toolCalls` is an ordered list. Providers that support parallel/batched tool calling
+// (OpenAI, Anthropic, Gemini) may emit multiple tools in a single assistant turn. A
+// single-tool response is represented as a length-1 array; no tools as an empty array
+// (or `undefined` for brevity). The ordering is preserved from the provider — Void
+// executes them serially in that order.
+export type OnText = (p: { fullText: string; fullReasoning: string; toolCalls?: RawToolCallObj[]; usage?: LLMUsage }) => void

 // `finishReason` is the provider's own reason for ending the stream. OpenAI-compatible
 // servers return one of `stop` / `tool_calls` / `function_call` / `length` / `content_filter`
@ -122,7 +127,9 @@ export type OnText = (p: { fullText: string; fullReasoning: string; toolCall?: R
 // clips against `max_tokens`, but also `content_filter` or unknown gateway-specific values).
 // Populated only by OAI-compatible providers right now — Anthropic / Gemini paths leave this
 // undefined, which renders as "no warning" (the same as before this was added).
-export type OnFinalMessage = (p: { fullText: string; fullReasoning: string; toolCall?: RawToolCallObj; anthropicReasoning: AnthropicReasoning[] | null; usage?: LLMUsage; finishReason?: string }) => void // id is tool_use_id
+//
+// `toolCalls` — see `OnText` above. Empty/undefined on pure-text responses.
+export type OnFinalMessage = (p: { fullText: string; fullReasoning: string; toolCalls?: RawToolCallObj[]; anthropicReasoning: AnthropicReasoning[] | null; usage?: LLMUsage; finishReason?: string }) => void
 export type OnError = (p: { message: string; fullError: Error | null }) => void
 export type OnAbort = () => void
 export type AbortRef = { current: (() => void) | null }
--- a/src/vs/workbench/contrib/void/electron-main/llmMessage/extractGrammar.ts
+++ b/src/vs/workbench/contrib/void/electron-main/llmMessage/extractGrammar.ts
@ -334,10 +334,15 @@ export const extractXMLToolsWrapper = (
 			)
 		}

+		// Grammar-based tool extraction only surfaces one tool at a time (XML tags are parsed
+		// sequentially out of the text stream), so the array is always length 0 or 1 on this
+		// path. Models that use this wrapper (local models, pseudo-tool-use via text) don't
+		// produce parallel tool calls — that capability is exclusive to providers with native
+		// tool-calling (OpenAI-compatible, Anthropic, Gemini).
 		onText({
 			...params,
 			fullText,
-			toolCall: latestToolCall,
+			toolCalls: latestToolCall ? [latestToolCall] : undefined,
 		});
 	};

@ -349,12 +354,7 @@ export const extractXMLToolsWrapper = (
 		fullText = fullText.trimEnd()
 		const toolCall = latestToolCall

-		// console.log('final message!!!', trueFullText)
-		// console.log('----- returning ----\n', fullText)
-		// console.log('----- tools ----\n', JSON.stringify(firstToolCallRef.current, null, 2))
-		// console.log('----- toolCall ----\n', JSON.stringify(toolCall, null, 2))
-
-		onFinalMessage({ ...params, fullText, toolCall: toolCall })
+		onFinalMessage({ ...params, fullText, toolCalls: toolCall ? [toolCall] : undefined })
 	}
 	return { newOnText, newOnFinalMessage };
 }
--- a/src/vs/workbench/contrib/void/electron-main/llmMessage/sendLLMMessage.impl.ts
+++ b/src/vs/workbench/contrib/void/electron-main/llmMessage/sendLLMMessage.impl.ts
@ -15,6 +15,7 @@ import { GoogleAuth } from 'google-auth-library'
 /* eslint-enable */

 import { AnthropicLLMChatMessage, GeminiLLMChatMessage, LLMChatMessage, LLMFIMMessage, type LLMUsage, ModelListParams, OllamaModelResponse, OnError, OnFinalMessage, OnText, RawToolCallObj, RawToolParamsObj } from '../../common/sendLLMMessageTypes.js';
+import type { ToolName } from '../../common/toolsServiceTypes.js';
 import { ChatMode, displayInfoOfProviderName, ModelSelectionOptions, OverridesOfModel, ProviderName, SettingsOfProvider } from '../../common/voidSettingsTypes.js';
 import { getSendableReasoningInfo, getModelCapabilities, getProviderCapabilities, defaultProviderSettings, getReservedOutputTokenSpace } from '../../common/modelCapabilities.js';
 import { extractReasoningWrapper, extractXMLToolsWrapper } from './extractGrammar.js';
@ -339,9 +340,19 @@ const _sendOpenAICompatibleChat = async ({ messages, onText, onFinalMessage, onE
 	let fullReasoningSoFar = ''
 	let fullTextSoFar = ''

-	let toolName = ''
-	let toolId = ''
-	let toolParamsStr = ''
+	// Tool-call buffers keyed by `tool_calls[].index` from the delta. OpenAI's streaming spec
+	// allows multiple tool calls in one assistant turn, each identified by its own numeric index,
+	// with chunks interleaved arbitrarily (index=0 chunk, index=1 chunk, index=0 chunk again...).
+	// We previously dropped everything past index 0, which silently corrupted parallel tool-call
+	// responses from GPT-4+, MiniMax, and other providers that batch. Using a Map keyed by index
+	// handles out-of-order chunks correctly. On final, we sort by index to preserve the
+	// provider's intended execution order.
+	const toolBuffers = new Map<number, { name: string; argsStr: string; id: string }>()
+	const getOrCreateToolBuffer = (index: number) => {
+		let buf = toolBuffers.get(index)
+		if (!buf) { buf = { name: '', argsStr: '', id: '' }; toolBuffers.set(index, buf) }
+		return buf
+	}

 	// Usage only arrives in the final chunk (and only if the server honored
 	// stream_options.include_usage). `chunk.usage` is typed as `| null` there.
@ -374,14 +385,17 @@ const _sendOpenAICompatibleChat = async ({ messages, onText, onFinalMessage, onE
 				const chunkFinishReason = chunk.choices[0]?.finish_reason
 				if (chunkFinishReason) lastFinishReason = chunkFinishReason

-				// tool call
+				// tool calls — aggregate by index. A single chunk may include deltas for multiple
+				// indices (rare but valid), and a single index's pieces may arrive across many
+				// chunks (the common case). `id` is typically present only on the first chunk
+				// for a given index; `arguments` streams incrementally.
 				for (const tool of chunk.choices[0]?.delta?.tool_calls ?? []) {
 					const index = tool.index
-					if (index !== 0) continue
-
-					toolName += tool.function?.name ?? ''
-					toolParamsStr += tool.function?.arguments ?? '';
-					toolId += tool.id ?? ''
+					if (index === undefined) continue
+					const buf = getOrCreateToolBuffer(index)
+					buf.name += tool.function?.name ?? ''
+					buf.argsStr += tool.function?.arguments ?? ''
+					buf.id += tool.id ?? ''
 				}


@ -413,23 +427,44 @@ const _sendOpenAICompatibleChat = async ({ messages, onText, onFinalMessage, onE
 					}
 				}

+				// Build the in-progress toolCalls snapshot for UI streaming. We only emit entries
+				// for buffers that have at least a name (argument-only deltas for an as-yet-
+				// unnamed tool are still accumulating). Indices are sorted so the UI's rendered
+				// order matches the provider's intended execution order.
+				const inProgressToolCalls: RawToolCallObj[] = Array.from(toolBuffers.entries())
+					.filter(([_i, buf]) => !!buf.name)
+					.sort(([a], [b]) => a - b)
+					.map(([_i, buf]) => ({ name: buf.name as ToolName, rawParams: {}, isDone: false, doneParams: [], id: buf.id }))
+
 				// call onText
 				onText({
 					fullText: fullTextSoFar,
 					fullReasoning: fullReasoningSoFar,
-					toolCall: !toolName ? undefined : { name: toolName, rawParams: {}, isDone: false, doneParams: [], id: toolId },
+					toolCalls: inProgressToolCalls.length > 0 ? inProgressToolCalls : undefined,
 					usage: latestUsage,
 				})

 			}
-			// on final
-			if (!fullTextSoFar && !fullReasoningSoFar && !toolName) {
+			// on final: parse each completed tool buffer. `rawToolCallObjOfParamsStr` returns
+			// null on malformed JSON or non-object inputs — we skip those rather than crashing
+			// the whole turn, but log for diagnosis.
+			const finalToolCalls: RawToolCallObj[] = Array.from(toolBuffers.entries())
+				.sort(([a], [b]) => a - b)
+				.map(([_i, buf]) => rawToolCallObjOfParamsStr(buf.name, buf.argsStr, buf.id))
+				.filter((t): t is RawToolCallObj => t !== null)
+
+			if (!fullTextSoFar && !fullReasoningSoFar && finalToolCalls.length === 0) {
 				onError({ message: 'Void: Response from model was empty.', fullError: null })
 			}
 			else {
-				const toolCall = rawToolCallObjOfParamsStr(toolName, toolParamsStr, toolId)
-				const toolCallObj = toolCall ? { toolCall } : {}
-				onFinalMessage({ fullText: fullTextSoFar, fullReasoning: fullReasoningSoFar, anthropicReasoning: null, usage: latestUsage, finishReason: lastFinishReason, ...toolCallObj });
+				onFinalMessage({
+					fullText: fullTextSoFar,
+					fullReasoning: fullReasoningSoFar,
+					anthropicReasoning: null,
+					usage: latestUsage,
+					finishReason: lastFinishReason,
+					toolCalls: finalToolCalls.length > 0 ? finalToolCalls : undefined,
+				});
 			}
 		})
 		// when error/fail - this catches errors of both .create() and .then(for await)
@ -557,15 +592,27 @@ const sendAnthropicChat = async ({ messages, providerName, onText, onFinalMessag
 	let fullText = ''
 	let fullReasoning = ''

-	let fullToolName = ''
-	let fullToolParams = ''
-
+	// Tool-call buffers keyed by Anthropic's content-block `index`. Anthropic streams each
+	// tool as its own `content_block_start` (with name+id) followed by `content_block_delta`
+	// events carrying `input_json_delta` chunks — both tagged with the same numeric `index`.
+	// We previously only kept the first tool (`tools[0]` at finalMessage), silently dropping
+	// any parallel tool_use blocks. Map<index, ...> preserves ordering and the per-tool id.
+	const anthropicToolBuffers = new Map<number, { name: string; argsStr: string; id: string }>()
+	const getOrCreateAnthropicTool = (index: number) => {
+		let buf = anthropicToolBuffers.get(index)
+		if (!buf) { buf = { name: '', argsStr: '', id: '' }; anthropicToolBuffers.set(index, buf) }
+		return buf
+	}

 	const runOnText = () => {
+		const inProgressToolCalls: RawToolCallObj[] = Array.from(anthropicToolBuffers.entries())
+			.filter(([_i, buf]) => !!buf.name)
+			.sort(([a], [b]) => a - b)
+			.map(([_i, buf]) => ({ name: buf.name as ToolName, rawParams: {}, isDone: false, doneParams: [], id: buf.id || 'dummy' }))
 		onText({
 			fullText,
 			fullReasoning,
-			toolCall: !fullToolName ? undefined : { name: fullToolName, rawParams: {}, isDone: false, doneParams: [], id: 'dummy' },
+			toolCalls: inProgressToolCalls.length > 0 ? inProgressToolCalls : undefined,
 		})
 	}
 	// there are no events for tool_use, it comes in at the end
@ -589,7 +636,11 @@ const sendAnthropicChat = async ({ messages, providerName, onText, onFinalMessag
 				runOnText()
 			}
 			else if (e.content_block.type === 'tool_use') {
-				fullToolName += e.content_block.name ?? '' // anthropic gives us the tool name in the start block
+				// Anthropic gives the tool name+id in the start block and the JSON input in
+				// subsequent input_json_delta events keyed to the same `e.index`.
+				const buf = getOrCreateAnthropicTool(e.index)
+				buf.name += e.content_block.name ?? ''
+				buf.id += e.content_block.id ?? ''
 				runOnText()
 			}
 		}
@ -605,7 +656,10 @@ const sendAnthropicChat = async ({ messages, providerName, onText, onFinalMessag
 				runOnText()
 			}
 			else if (e.delta.type === 'input_json_delta') { // tool use
-				fullToolParams += e.delta.partial_json ?? '' // anthropic gives us the partial delta (string) here - https://docs.anthropic.com/en/api/messages-streaming
+				// partial_json is a string delta scoped to the current content block (e.index).
+				// See https://docs.anthropic.com/en/api/messages-streaming
+				const buf = getOrCreateAnthropicTool(e.index)
+				buf.argsStr += e.delta.partial_json ?? ''
 				runOnText()
 			}
 		}
@ -614,13 +668,19 @@ const sendAnthropicChat = async ({ messages, providerName, onText, onFinalMessag
 	// on done - (or when error/fail) - this is called AFTER last streamEvent
 	stream.on('finalMessage', (response) => {
 		const anthropicReasoning = response.content.filter(c => c.type === 'thinking' || c.type === 'redacted_thinking')
+		// Iterate ALL tool_use blocks in document order (response.content preserves ordering).
+		// Previous behavior only used `tools[0]`, which silently dropped parallel tool calls.
 		const tools = response.content.filter(c => c.type === 'tool_use')
-		// console.log('TOOLS!!!!!!', JSON.stringify(tools, null, 2))
-		// console.log('TOOLS!!!!!!', JSON.stringify(response, null, 2))
-		const toolCall = tools[0] && rawToolCallObjOfAnthropicParams(tools[0])
-		const toolCallObj = toolCall ? { toolCall } : {}
+		const finalToolCalls: RawToolCallObj[] = tools
+			.map(t => rawToolCallObjOfAnthropicParams(t))
+			.filter((t): t is RawToolCallObj => t !== null)

-		onFinalMessage({ fullText, fullReasoning, anthropicReasoning, ...toolCallObj })
+		onFinalMessage({
+			fullText,
+			fullReasoning,
+			anthropicReasoning,
+			toolCalls: finalToolCalls.length > 0 ? finalToolCalls : undefined,
+		})
 	})
 	// on error
 	stream.on('error', (error) => {
@ -825,9 +885,14 @@ const sendGeminiChat = async ({
 	let fullReasoningSoFar = ''
 	let fullTextSoFar = ''

-	let toolName = ''
-	let toolParamsStr = ''
-	let toolId = ''
+	// Tool-call buffer — Gemini emits each functionCall as a fully-formed object (not a
+	// streamed partial like OpenAI/Anthropic), so we just accumulate them. Each chunk's
+	// `chunk.functionCalls` may contain zero or more calls. We track by (name + JSON args)
+	// to dedupe in case a later chunk repeats an earlier call (the SDK occasionally does
+	// this in the final summary chunk). Ordering is preserved by first-appearance.
+	type GeminiToolBuf = { name: string; argsStr: string; id: string }
+	const geminiToolCalls: GeminiToolBuf[] = []
+	const geminiToolSeen = new Set<string>()

 	// Gemini reports token usage via chunk.usageMetadata. It typically appears in the last
 	// chunk(s), but we keep the latest seen so we always forward the freshest values.
@ -861,13 +926,20 @@ const sendGeminiChat = async ({
 					}
 				}

-				// tool call
+				// tool calls — iterate ALL functionCalls in the chunk. Previously we only kept
+				// `functionCalls[0]`, silently dropping any parallel tool emission (e.g. a model
+				// asking to read three files at once). Dedupe across chunks by (id || name+args).
 				const functionCalls = chunk.functionCalls
 				if (functionCalls && functionCalls.length > 0) {
-					const functionCall = functionCalls[0] // Get the first function call
-					toolName = functionCall.name ?? ''
-					toolParamsStr = JSON.stringify(functionCall.args ?? {})
-					toolId = functionCall.id ?? ''
+					for (const fc of functionCalls) {
+						const name = fc.name ?? ''
+						const argsStr = JSON.stringify(fc.args ?? {})
+						const id = fc.id ?? ''
+						const key = id || `${name}::${argsStr}`
+						if (geminiToolSeen.has(key)) continue
+						geminiToolSeen.add(key)
+						geminiToolCalls.push({ name, argsStr, id })
+					}
 				}

 				// usage (Gemini exposes promptTokenCount / candidatesTokenCount / totalTokenCount /
@ -888,23 +960,43 @@ const sendGeminiChat = async ({
 					}
 				}

+				// Build the in-progress tool-call snapshot for UI streaming. Gemini tool calls
+				// are already complete when they appear in a chunk, but we still surface them
+				// via onText so the UI can render them as they arrive rather than only at end.
+				const inProgressToolCalls: RawToolCallObj[] = geminiToolCalls.map(buf => ({
+					name: buf.name as ToolName,
+					rawParams: {},
+					isDone: false,
+					doneParams: [],
+					id: buf.id,
+				}))
+
 				// call onText
 				onText({
 					fullText: fullTextSoFar,
 					fullReasoning: fullReasoningSoFar,
-					toolCall: !toolName ? undefined : { name: toolName, rawParams: {}, isDone: false, doneParams: [], id: toolId },
+					toolCalls: inProgressToolCalls.length > 0 ? inProgressToolCalls : undefined,
 					usage: latestUsage,
 				})
 			}

-			// on final
-			if (!fullTextSoFar && !fullReasoningSoFar && !toolName) {
+			// on final — parse each accumulated tool buffer into a full RawToolCallObj.
+			// Empty ids are filled with a UUID so downstream code (which keys tool-result
+			// messages by id) doesn't collide across tools. Malformed JSON args are skipped.
+			const finalToolCalls: RawToolCallObj[] = geminiToolCalls
+				.map(buf => rawToolCallObjOfParamsStr(buf.name, buf.argsStr, buf.id || generateUuid()))
+				.filter((t): t is RawToolCallObj => t !== null)
+
+			if (!fullTextSoFar && !fullReasoningSoFar && finalToolCalls.length === 0) {
 				onError({ message: 'Void: Response from model was empty.', fullError: null })
 			} else {
-				if (!toolId) toolId = generateUuid() // ids are empty, but other providers might expect an id
-				const toolCall = rawToolCallObjOfParamsStr(toolName, toolParamsStr, toolId)
-				const toolCallObj = toolCall ? { toolCall } : {}
-				onFinalMessage({ fullText: fullTextSoFar, fullReasoning: fullReasoningSoFar, anthropicReasoning: null, usage: latestUsage, ...toolCallObj });
+				onFinalMessage({
+					fullText: fullTextSoFar,
+					fullReasoning: fullReasoningSoFar,
+					anthropicReasoning: null,
+					usage: latestUsage,
+					toolCalls: finalToolCalls.length > 0 ? finalToolCalls : undefined,
+				});
 			}
 		})
 		.catch(error => {
--- a/src/vs/workbench/contrib/void/electron-main/llmMessage/sendLLMMessage.ts
+++ b/src/vs/workbench/contrib/void/electron-main/llmMessage/sendLLMMessage.ts
@ -66,9 +66,17 @@ export const sendLLMMessage = async ({
 	}

 	const onFinalMessage: OnFinalMessage = (params) => {
-		const { fullText, fullReasoning, toolCall } = params
+		const { fullText, fullReasoning, toolCalls } = params
 		if (_didAbort) return
-		captureLLMEvent(`${loggingName} - Received Full Message`, { messageLength: fullText.length, reasoningLength: fullReasoning?.length, duration: new Date().getMilliseconds() - submit_time.getMilliseconds(), toolCallName: toolCall?.name })
+		captureLLMEvent(`${loggingName} - Received Full Message`, {
+			messageLength: fullText.length,
+			reasoningLength: fullReasoning?.length,
+			duration: new Date().getMilliseconds() - submit_time.getMilliseconds(),
+			// Parallel tool calling: capture the number of tools and a comma-joined summary
+			// so metrics can see how often models emit batches (vs. 0 or 1 tool per turn).
+			toolCallCount: toolCalls?.length ?? 0,
+			toolCallNames: toolCalls?.map(t => t.name).join(','),
+		})
 		onFinalMessage_(params)
 	}