mirror of
https://github.com/voideditor/void
synced 2026-05-22 17:08:25 +00:00
support multi tool call (#13)
This commit is contained in:
parent
f9cb764fbc
commit
79db82e458
9 changed files with 524 additions and 143 deletions
|
|
@ -199,7 +199,11 @@ export type ThreadStreamState = {
|
|||
llmInfo: {
|
||||
displayContentSoFar: string;
|
||||
reasoningSoFar: string;
|
||||
toolCallSoFar: RawToolCallObj | null;
|
||||
// Ordered list of tool calls being streamed from the LLM. Most turns have
|
||||
// length 0 (pure text) or 1 (single tool call). Providers that support
|
||||
// parallel tool calling (OpenAI, Anthropic, Gemini) may emit multiple.
|
||||
// Tools are executed serially by the agent loop in this order.
|
||||
toolCallsSoFar: RawToolCallObj[];
|
||||
};
|
||||
toolInfo?: undefined;
|
||||
interrupt: Promise<() => void>; // calling this should have no effect on state - would be too confusing. it just cancels the tool
|
||||
|
|
@ -714,55 +718,150 @@ class ChatThreadService extends Disposable implements IChatThreadService {
|
|||
|
||||
|
||||
|
||||
private _swapOutLatestStreamingToolWithResult = (threadId: string, tool: ChatMessage & { role: 'tool' }) => {
|
||||
const messages = this.state.allThreads[threadId]?.messages
|
||||
if (!messages) return false
|
||||
const lastMsg = messages[messages.length - 1]
|
||||
if (!lastMsg) return false
|
||||
|
||||
if (lastMsg.role === 'tool' && lastMsg.type !== 'invalid_params') {
|
||||
this._editMessageInThread(threadId, messages.length - 1, tool)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
/**
|
||||
* Transitions a tool message (by id) to a new state in the thread. Before parallel tool
|
||||
* calling this just swapped the last message, which worked because a tool was always
|
||||
* the most recent message at every transition. With batches, tool i may be followed
|
||||
* in the thread by pre-added tool_requests for tools i+1, i+2..., so we search by id.
|
||||
*
|
||||
* If no matching tool is found we append (preserves the original behavior for fresh
|
||||
* tool_request additions by `_runToolCall`'s non-batch path). When a match exists,
|
||||
* we preserve batchIndex/batchSize from the existing row so the UI's (i/N) prefix
|
||||
* doesn't drop across state transitions (tool_request → running_now → success).
|
||||
*/
|
||||
private _updateLatestTool = (threadId: string, tool: ChatMessage & { role: 'tool' }) => {
|
||||
const swapped = this._swapOutLatestStreamingToolWithResult(threadId, tool)
|
||||
if (swapped) return
|
||||
const messages = this.state.allThreads[threadId]?.messages
|
||||
if (!messages) { this._addMessageToThread(threadId, tool); return }
|
||||
for (let i = messages.length - 1; i >= 0; i--) {
|
||||
const m = messages[i]
|
||||
if (m.role === 'tool' && m.id === tool.id) {
|
||||
// Preserve batch metadata from the pre-added row — the transitional updates
|
||||
// from `_runToolCall` don't know about batchIndex/batchSize.
|
||||
const merged = { batchIndex: m.batchIndex, batchSize: m.batchSize, ...tool } as ChatMessage & { role: 'tool' }
|
||||
this._editMessageInThread(threadId, i, merged)
|
||||
return
|
||||
}
|
||||
}
|
||||
this._addMessageToThread(threadId, tool)
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns consecutive trailing `tool_request` messages in the thread — these are the
|
||||
* not-yet-executed tools in the current batch. The user-facing "awaiting approval"
|
||||
* tool is always the FIRST of this list (the batch processor runs them in order, so
|
||||
* any tool before the paused one is already in a terminal state like `success`).
|
||||
*/
|
||||
private _getPendingBatchTools = (threadId: string): (ToolMessage<ToolName> & { type: 'tool_request' })[] => {
|
||||
const messages = this.state.allThreads[threadId]?.messages ?? []
|
||||
const pending: (ToolMessage<ToolName> & { type: 'tool_request' })[] = []
|
||||
for (let i = messages.length - 1; i >= 0; i--) {
|
||||
const m = messages[i]
|
||||
if (m.role === 'tool' && m.type === 'tool_request') pending.unshift(m)
|
||||
else break
|
||||
}
|
||||
return pending
|
||||
}
|
||||
|
||||
/**
|
||||
* Runs all currently-pending tool_requests at the tail of the thread, in order.
|
||||
* Each call to `_runToolCall` validates, checks approval, and either runs the tool
|
||||
* or pauses for user approval. Returns:
|
||||
* - 'awaiting_user' if a tool paused for approval (remaining tools stay pending)
|
||||
* - 'interrupted' if a tool was interrupted (agent should terminate)
|
||||
* - 'done' if all pending tools ran to a terminal state
|
||||
*/
|
||||
private _tryDrainPendingBatch = async (threadId: string): Promise<'done' | 'awaiting_user' | 'interrupted'> => {
|
||||
while (true) {
|
||||
const pending = this._getPendingBatchTools(threadId)
|
||||
if (pending.length === 0) return 'done'
|
||||
const next = pending[0]
|
||||
const { awaitingUserApproval, interrupted } = await this._runToolCall(
|
||||
threadId, next.name, next.id, next.mcpServerName,
|
||||
{ preapproved: false, unvalidatedToolParams: next.rawParams, rawParamsStr: next.rawParamsStr }
|
||||
)
|
||||
if (interrupted) return 'interrupted'
|
||||
if (awaitingUserApproval) return 'awaiting_user'
|
||||
}
|
||||
}
|
||||
|
||||
approveLatestToolRequest(threadId: string) {
|
||||
const thread = this.state.allThreads[threadId]
|
||||
if (!thread) return // should never happen
|
||||
|
||||
const lastMsg = thread.messages[thread.messages.length - 1]
|
||||
if (!(lastMsg.role === 'tool' && lastMsg.type === 'tool_request')) return // should never happen
|
||||
|
||||
const callThisToolFirst: ToolMessage<ToolName> = lastMsg
|
||||
// In batch mode multiple tool_requests can be pending at the tail of the thread —
|
||||
// the one awaiting approval is the FIRST (tools that already ran have transitioned
|
||||
// away from tool_request state). Pre-batch code grabbed messages[-1], which silently
|
||||
// breaks for batches because later not-yet-started tools are newer in the thread.
|
||||
const pending = this._getPendingBatchTools(threadId)
|
||||
if (pending.length === 0) return
|
||||
const callThisToolFirst = pending[0]
|
||||
|
||||
this._wrapRunAgentToNotify(
|
||||
this._runChatAgent({ callThisToolFirst, threadId, ...this._currentModelSelectionProps() })
|
||||
, threadId
|
||||
)
|
||||
}
|
||||
rejectLatestToolRequest(threadId: string) {
|
||||
/**
|
||||
* Reject a pending tool request.
|
||||
*
|
||||
* `resumeAgent` controls what happens after the rejection:
|
||||
* - true (from UI "reject" button): mark this tool + all other pending tools in
|
||||
* the same batch as `rejected` ("reject-all" semantic), then resume the
|
||||
* agent loop so the LLM sees the rejections and can react (e.g. ask the
|
||||
* user what to do next). This keeps the conversation alive.
|
||||
* - false (from abort/hard-stop path in `abortRunning`): mark rejected and stop.
|
||||
* The conversation terminates; no further LLM call is made.
|
||||
*
|
||||
* Default is true because the common case is the user clicking the UI reject button.
|
||||
* `abortRunning` explicitly passes false.
|
||||
*/
|
||||
rejectLatestToolRequest(threadId: string, resumeAgent: boolean = true) {
|
||||
const thread = this.state.allThreads[threadId]
|
||||
if (!thread) return // should never happen
|
||||
|
||||
const lastMsg = thread.messages[thread.messages.length - 1]
|
||||
|
||||
let params: ToolCallParams<ToolName>
|
||||
if (lastMsg.role === 'tool' && lastMsg.type !== 'invalid_params') {
|
||||
params = lastMsg.params
|
||||
// Reject-all semantics: if the user rejected any tool in a batch, reject all its
|
||||
// pending siblings too. Partial execution (run 1 and 2, reject 3, continue to 4)
|
||||
// is confusing — the model emitted the batch as an atomic plan, so we either run
|
||||
// it or abort it as a unit. Tools that already completed (success/tool_error)
|
||||
// retain their terminal state; only pending tool_requests are rejected.
|
||||
const pending = this._getPendingBatchTools(threadId)
|
||||
if (pending.length === 0) {
|
||||
// Fallback to legacy path: last message should be a tool in a non-terminal
|
||||
// state. Kept for safety when called from unusual contexts.
|
||||
const lastMsg = thread.messages[thread.messages.length - 1]
|
||||
if (!(lastMsg.role === 'tool' && lastMsg.type !== 'invalid_params')) return
|
||||
const { name, id, rawParams, rawParamsStr, mcpServerName, params } = lastMsg
|
||||
this._updateLatestTool(threadId, { role: 'tool', type: 'rejected', params, name, content: this.toolErrMsgs.rejected, result: null, id, rawParams, rawParamsStr, mcpServerName })
|
||||
if (!resumeAgent) this._setStreamState(threadId, undefined)
|
||||
return
|
||||
}
|
||||
else return
|
||||
|
||||
const { name, id, rawParams, rawParamsStr, mcpServerName } = lastMsg
|
||||
const rejectedCount = pending.length
|
||||
// Mark every pending tool in the batch as rejected. For the one the user actually
|
||||
// clicked (the first pending), use the primary rejection message. For the others
|
||||
// ("cascade rejections"), use a short explanation so the LLM can distinguish direct
|
||||
// vs. cascade rejection when composing its response.
|
||||
for (let i = 0; i < pending.length; i++) {
|
||||
const p = pending[i]
|
||||
const content = i === 0 ? this.toolErrMsgs.rejected : this.toolErrMsgs.rejectedCascade(rejectedCount)
|
||||
this._updateLatestTool(threadId, {
|
||||
role: 'tool', type: 'rejected',
|
||||
params: p.params, name: p.name, content, result: null,
|
||||
id: p.id, rawParams: p.rawParams, rawParamsStr: p.rawParamsStr, mcpServerName: p.mcpServerName,
|
||||
})
|
||||
}
|
||||
|
||||
const errorMessage = this.toolErrMsgs.rejected
|
||||
this._updateLatestTool(threadId, { role: 'tool', type: 'rejected', params: params, name: name, content: errorMessage, result: null, id, rawParams, rawParamsStr, mcpServerName })
|
||||
this._setStreamState(threadId, undefined)
|
||||
if (resumeAgent) {
|
||||
// Let the LLM see the rejection(s) and respond. No callThisToolFirst —
|
||||
// _runChatAgent will loop straight into a new LLM call with the rejected
|
||||
// tool results in context.
|
||||
this._wrapRunAgentToNotify(
|
||||
this._runChatAgent({ threadId, ...this._currentModelSelectionProps() })
|
||||
, threadId
|
||||
)
|
||||
} else {
|
||||
this._setStreamState(threadId, undefined)
|
||||
}
|
||||
}
|
||||
|
||||
private _computeMCPServerOfToolName = (toolName: string) => {
|
||||
|
|
@ -775,9 +874,14 @@ class ChatThreadService extends Disposable implements IChatThreadService {
|
|||
|
||||
// add assistant message
|
||||
if (this.streamState[threadId]?.isRunning === 'LLM') {
|
||||
const { displayContentSoFar, reasoningSoFar, toolCallSoFar } = this.streamState[threadId].llmInfo
|
||||
const { displayContentSoFar, reasoningSoFar, toolCallsSoFar } = this.streamState[threadId].llmInfo
|
||||
this._addMessageToThread(threadId, { role: 'assistant', displayContent: displayContentSoFar, reasoning: reasoningSoFar, anthropicReasoning: null })
|
||||
if (toolCallSoFar) this._addMessageToThread(threadId, { role: 'interrupted_streaming_tool', name: toolCallSoFar.name, mcpServerName: this._computeMCPServerOfToolName(toolCallSoFar.name) })
|
||||
// For each partially-streamed tool call interrupted mid-flight, add a decorative
|
||||
// "interrupted_streaming_tool" marker. Pre-batch this only handled one tool;
|
||||
// now we iterate the full list so the UI shows all tools the model was planning.
|
||||
for (const tc of toolCallsSoFar) {
|
||||
this._addMessageToThread(threadId, { role: 'interrupted_streaming_tool', name: tc.name, mcpServerName: this._computeMCPServerOfToolName(tc.name) })
|
||||
}
|
||||
}
|
||||
// add tool that's running
|
||||
else if (this.streamState[threadId]?.isRunning === 'tool') {
|
||||
|
|
@ -785,9 +889,11 @@ class ChatThreadService extends Disposable implements IChatThreadService {
|
|||
const content = content_ || this.toolErrMsgs.interrupted
|
||||
this._updateLatestTool(threadId, { role: 'tool', name: toolName, params: toolParams, id, content, rawParams, rawParamsStr, type: 'rejected', result: null, mcpServerName })
|
||||
}
|
||||
// reject the tool for the user if relevant
|
||||
// reject the tool for the user if relevant. `resumeAgent: false` — abortRunning is
|
||||
// a hard stop from the user; we don't want to restart the LLM loop with rejection
|
||||
// feedback (which is what the normal reject-button path does).
|
||||
else if (this.streamState[threadId]?.isRunning === 'awaiting_user') {
|
||||
this.rejectLatestToolRequest(threadId)
|
||||
this.rejectLatestToolRequest(threadId, false)
|
||||
}
|
||||
else if (this.streamState[threadId]?.isRunning === 'idle') {
|
||||
// do nothing
|
||||
|
|
@ -807,7 +913,16 @@ class ChatThreadService extends Disposable implements IChatThreadService {
|
|||
|
||||
|
||||
private readonly toolErrMsgs = {
|
||||
rejected: 'Tool call was rejected by the user.',
|
||||
// Phrased to discourage the model from immediately retrying the same tool. "Rejected"
|
||||
// alone tends to trigger LLMs into "let me try again" behavior, which wastes tokens
|
||||
// and annoys the user. Framing it as a signal to pause and consult the user breaks
|
||||
// that pattern.
|
||||
rejected: 'The user rejected this tool call. Do not retry the same action. Acknowledge the rejection, ask the user what they want you to do differently, or propose an alternative approach.',
|
||||
// Used for the "cascade" rejections when the user rejects one tool in a multi-tool
|
||||
// batch and reject-all semantics propagates the rejection to its siblings. Tells
|
||||
// the model that not running the rest was a side effect of one rejection, not a
|
||||
// per-tool decision, so it doesn't over-apologize for each.
|
||||
rejectedCascade: (batchSize: number) => `The user rejected the tool batch (${batchSize} tools). This specific tool was skipped as part of that rejection, not individually rejected. See the primary rejection for the user's reasoning.`,
|
||||
interrupted: 'Tool call was interrupted by the user.',
|
||||
errWhenStringifying: (error: any) => `Tool call succeeded, but there was an error stringifying the output.\n${getErrorMessage(error)}`
|
||||
}
|
||||
|
|
@ -851,7 +966,10 @@ class ChatThreadService extends Disposable implements IChatThreadService {
|
|||
}
|
||||
catch (error) {
|
||||
const errorMessage = getErrorMessage(error)
|
||||
this._addMessageToThread(threadId, { role: 'tool', type: 'invalid_params', rawParams: opts.unvalidatedToolParams, rawParamsStr, result: null, name: toolName, content: errorMessage, id: toolId, mcpServerName })
|
||||
// Use _updateLatestTool (not _addMessageToThread) so that when this tool was
|
||||
// pre-added as a `tool_request` by the batch processor, we transition that
|
||||
// row in place (preserving batchIndex/batchSize) instead of appending a new one.
|
||||
this._updateLatestTool(threadId, { role: 'tool', type: 'invalid_params', rawParams: opts.unvalidatedToolParams, rawParamsStr, result: null, name: toolName, content: errorMessage, id: toolId, mcpServerName })
|
||||
return {}
|
||||
}
|
||||
// once validated, add checkpoint for edit
|
||||
|
|
@ -883,8 +1001,13 @@ class ChatThreadService extends Disposable implements IChatThreadService {
|
|||
}
|
||||
}
|
||||
|
||||
// add a tool_request because we use it for UI if a tool is loading (this should be improved in the future)
|
||||
this._addMessageToThread(threadId, { role: 'tool', type: 'tool_request', content: '(Awaiting user permission...)', result: null, name: toolName, params: toolParams, id: toolId, rawParams: opts.unvalidatedToolParams, rawParamsStr, mcpServerName })
|
||||
// Transition (or create) the tool_request row. _updateLatestTool finds the
|
||||
// row by id: for solo tool calls there's no pre-added row and it appends one
|
||||
// (same as the old behavior). For batched tool calls, the batch processor
|
||||
// pre-added a tool_request with batchIndex/batchSize, and this call now
|
||||
// replaces its placeholder unvalidated params with the validated ones while
|
||||
// preserving the batch metadata.
|
||||
this._updateLatestTool(threadId, { role: 'tool', type: 'tool_request', content: '(Awaiting user permission...)', result: null, name: toolName, params: toolParams, id: toolId, rawParams: opts.unvalidatedToolParams, rawParamsStr, mcpServerName })
|
||||
if (!autoApprove) {
|
||||
return { awaitingUserApproval: true }
|
||||
}
|
||||
|
|
@ -996,12 +1119,29 @@ class ChatThreadService extends Disposable implements IChatThreadService {
|
|||
|
||||
// before enter loop, call tool
|
||||
if (callThisToolFirst) {
|
||||
// Run the just-approved tool, then drain any remaining pending batch siblings
|
||||
// (tools pre-added when the batch started and not yet run). Each drained tool
|
||||
// may pause for its own approval — we stop the agent in that case and return.
|
||||
const { interrupted } = await this._runToolCall(threadId, callThisToolFirst.name, callThisToolFirst.id, callThisToolFirst.mcpServerName, { preapproved: true, unvalidatedToolParams: callThisToolFirst.rawParams, rawParamsStr: callThisToolFirst.rawParamsStr, validatedParams: callThisToolFirst.params })
|
||||
if (interrupted) {
|
||||
this._setStreamState(threadId, undefined)
|
||||
this._addUserCheckpoint({ threadId })
|
||||
|
||||
return
|
||||
}
|
||||
// Drain the remaining pending batch (if there are other tools from this turn
|
||||
// that still need to run). If any of them pauses for approval, stop here — the
|
||||
// agent will resume when the user next approves or rejects.
|
||||
const drainRes = await this._tryDrainPendingBatch(threadId)
|
||||
if (drainRes === 'interrupted') {
|
||||
this._setStreamState(threadId, undefined)
|
||||
this._addUserCheckpoint({ threadId })
|
||||
return
|
||||
}
|
||||
if (drainRes === 'awaiting_user') {
|
||||
this._setStreamState(threadId, { isRunning: 'awaiting_user' })
|
||||
return
|
||||
}
|
||||
// drainRes === 'done': fall through to the main LLM loop below.
|
||||
}
|
||||
this._setStreamState(threadId, { isRunning: 'idle', interrupt: 'not_needed' }) // just decorative, for clarity
|
||||
|
||||
|
|
@ -1034,7 +1174,7 @@ class ChatThreadService extends Disposable implements IChatThreadService {
|
|||
nAttempts += 1
|
||||
|
||||
type ResTypes =
|
||||
| { type: 'llmDone', toolCall?: RawToolCallObj, info: { fullText: string, fullReasoning: string, anthropicReasoning: AnthropicReasoning[] | null, finishReason?: string } }
|
||||
| { type: 'llmDone', toolCalls: RawToolCallObj[], info: { fullText: string, fullReasoning: string, anthropicReasoning: AnthropicReasoning[] | null, finishReason?: string } }
|
||||
| { type: 'llmError', error?: { message: string; fullError: Error | null; } }
|
||||
| { type: 'llmAborted' }
|
||||
|
||||
|
|
@ -1050,16 +1190,16 @@ class ChatThreadService extends Disposable implements IChatThreadService {
|
|||
overridesOfModel,
|
||||
logging: { loggingName: `Chat - ${chatMode}`, loggingExtras: { threadId, nMessagesSent, chatMode } },
|
||||
separateSystemMessage: separateSystemMessage,
|
||||
onText: ({ fullText, fullReasoning, toolCall, usage }) => {
|
||||
onText: ({ fullText, fullReasoning, toolCalls, usage }) => {
|
||||
if (usage) this._setLatestUsage(threadId, usage)
|
||||
this._setStreamState(threadId, { isRunning: 'LLM', llmInfo: { displayContentSoFar: fullText, reasoningSoFar: fullReasoning, toolCallSoFar: toolCall ?? null }, interrupt: Promise.resolve(() => { if (llmCancelToken) this._llmMessageService.abort(llmCancelToken) }) })
|
||||
this._setStreamState(threadId, { isRunning: 'LLM', llmInfo: { displayContentSoFar: fullText, reasoningSoFar: fullReasoning, toolCallsSoFar: toolCalls ?? [] }, interrupt: Promise.resolve(() => { if (llmCancelToken) this._llmMessageService.abort(llmCancelToken) }) })
|
||||
},
|
||||
onFinalMessage: async ({ fullText, fullReasoning, toolCall, anthropicReasoning, usage, finishReason }) => {
|
||||
onFinalMessage: async ({ fullText, fullReasoning, toolCalls, anthropicReasoning, usage, finishReason }) => {
|
||||
if (usage) this._setLatestUsage(threadId, usage)
|
||||
// Lock in this request's usage so the next loop iteration's
|
||||
// running total is added to (not replacing) what we already counted.
|
||||
this._lockInCurrentRequestUsage(threadId)
|
||||
resMessageIsDonePromise({ type: 'llmDone', toolCall, info: { fullText, fullReasoning, anthropicReasoning, finishReason } }) // resolve with tool calls
|
||||
resMessageIsDonePromise({ type: 'llmDone', toolCalls: toolCalls ?? [], info: { fullText, fullReasoning, anthropicReasoning, finishReason } }) // resolve with tool calls
|
||||
},
|
||||
onError: async (error) => {
|
||||
resMessageIsDonePromise({ type: 'llmError', error: error })
|
||||
|
|
@ -1077,7 +1217,7 @@ class ChatThreadService extends Disposable implements IChatThreadService {
|
|||
break
|
||||
}
|
||||
|
||||
this._setStreamState(threadId, { isRunning: 'LLM', llmInfo: { displayContentSoFar: '', reasoningSoFar: '', toolCallSoFar: null }, interrupt: Promise.resolve(() => this._llmMessageService.abort(llmCancelToken)) })
|
||||
this._setStreamState(threadId, { isRunning: 'LLM', llmInfo: { displayContentSoFar: '', reasoningSoFar: '', toolCallsSoFar: [] }, interrupt: Promise.resolve(() => this._llmMessageService.abort(llmCancelToken)) })
|
||||
const llmRes = await messageIsDonePromise // wait for message to complete
|
||||
|
||||
// if something else started running in the meantime
|
||||
|
|
@ -1108,9 +1248,13 @@ class ChatThreadService extends Disposable implements IChatThreadService {
|
|||
// error, but too many attempts
|
||||
else {
|
||||
const { error } = llmRes
|
||||
const { displayContentSoFar, reasoningSoFar, toolCallSoFar } = this.streamState[threadId].llmInfo
|
||||
const { displayContentSoFar, reasoningSoFar, toolCallsSoFar } = this.streamState[threadId].llmInfo
|
||||
this._addMessageToThread(threadId, { role: 'assistant', displayContent: displayContentSoFar, reasoning: reasoningSoFar, anthropicReasoning: null })
|
||||
if (toolCallSoFar) this._addMessageToThread(threadId, { role: 'interrupted_streaming_tool', name: toolCallSoFar.name, mcpServerName: this._computeMCPServerOfToolName(toolCallSoFar.name) })
|
||||
// Record an interrupted-streaming marker for every tool the LLM was
|
||||
// mid-way through emitting. Pre-batch this only handled the first tool.
|
||||
for (const tc of toolCallsSoFar) {
|
||||
this._addMessageToThread(threadId, { role: 'interrupted_streaming_tool', name: tc.name, mcpServerName: this._computeMCPServerOfToolName(tc.name) })
|
||||
}
|
||||
|
||||
this._setStreamState(threadId, { isRunning: undefined, error })
|
||||
this._addUserCheckpoint({ threadId })
|
||||
|
|
@ -1119,23 +1263,52 @@ class ChatThreadService extends Disposable implements IChatThreadService {
|
|||
}
|
||||
|
||||
// llm res success
|
||||
const { toolCall, info } = llmRes
|
||||
const { toolCalls, info } = llmRes
|
||||
|
||||
this._addMessageToThread(threadId, { role: 'assistant', displayContent: info.fullText, reasoning: info.fullReasoning, anthropicReasoning: info.anthropicReasoning, finishReason: info.finishReason })
|
||||
|
||||
this._setStreamState(threadId, { isRunning: 'idle', interrupt: 'not_needed' }) // just decorative for clarity
|
||||
|
||||
// call tool if there is one
|
||||
if (toolCall) {
|
||||
// call tool(s) if there are any. Batched / parallel tool emissions are handled
|
||||
// by pre-adding every tool as a `tool_request` (with batchIndex/batchSize so the
|
||||
// UI can render "(1/N)" prefixes), then running them serially. Any tool may pause
|
||||
// for user approval; if that happens the remaining tools in the batch stay as
|
||||
// pending tool_requests, visible to the user as stacked progress rows.
|
||||
if (toolCalls.length > 0) {
|
||||
const mcpTools = this._mcpService.getMCPTools()
|
||||
const mcpTool = mcpTools?.find(t => t.name === toolCall.name)
|
||||
const batchSize = toolCalls.length
|
||||
for (let i = 0; i < batchSize; i++) {
|
||||
const tc = toolCalls[i]
|
||||
const mcpServerName = mcpTools?.find(t => t.name === tc.name)?.mcpServerName
|
||||
this._addMessageToThread(threadId, {
|
||||
role: 'tool',
|
||||
type: 'tool_request',
|
||||
content: '(Pending...)',
|
||||
result: null,
|
||||
name: tc.name,
|
||||
// Placeholder unvalidated params — `_runToolCall` will validate and
|
||||
// replace via `_updateLatestTool` before the tool runs. The cast is
|
||||
// safe because the UI only reads validated `params` on tool_requests
|
||||
// once they've transitioned past the placeholder phase (which happens
|
||||
// synchronously when `_tryDrainPendingBatch` hits this tool).
|
||||
params: tc.rawParams as unknown as ToolCallParams<ToolName>,
|
||||
id: tc.id,
|
||||
rawParams: tc.rawParams,
|
||||
rawParamsStr: tc.rawParamsStr,
|
||||
mcpServerName,
|
||||
// Only stamp batch metadata when there's actually more than one tool —
|
||||
// a solo tool call shouldn't render "(1/1)" in the UI.
|
||||
batchIndex: batchSize > 1 ? i : undefined,
|
||||
batchSize: batchSize > 1 ? batchSize : undefined,
|
||||
})
|
||||
}
|
||||
|
||||
const { awaitingUserApproval, interrupted } = await this._runToolCall(threadId, toolCall.name, toolCall.id, mcpTool?.mcpServerName, { preapproved: false, unvalidatedToolParams: toolCall.rawParams, rawParamsStr: toolCall.rawParamsStr })
|
||||
if (interrupted) {
|
||||
const batchRes = await this._tryDrainPendingBatch(threadId)
|
||||
if (batchRes === 'interrupted') {
|
||||
this._setStreamState(threadId, undefined)
|
||||
return
|
||||
}
|
||||
if (awaitingUserApproval) { isRunningWhenEnd = 'awaiting_user' }
|
||||
if (batchRes === 'awaiting_user') { isRunningWhenEnd = 'awaiting_user' }
|
||||
else { shouldSendAnotherMessage = true }
|
||||
|
||||
this._setStreamState(threadId, { isRunning: 'idle', interrupt: 'not_needed' }) // just decorative, for clarity
|
||||
|
|
|
|||
|
|
@ -85,22 +85,35 @@ const prepareMessages_openai_tools = (messages: SimpleLLMMessage[]): AnthropicOr
|
|||
continue
|
||||
}
|
||||
|
||||
// edit previous assistant message to have called the tool
|
||||
const prevMsg = 0 <= i - 1 && i - 1 <= newMessages.length ? newMessages[i - 1] : undefined
|
||||
if (prevMsg?.role === 'assistant') {
|
||||
// Walk back through newMessages to find the assistant that called this tool. For a
|
||||
// solo tool this is always the immediately-prior message; for a batched response
|
||||
// (N parallel tool calls) we need to append to the same assistant across N tool
|
||||
// messages — the previous implementation overwrote tool_calls each time and only
|
||||
// the LAST tool in a batch survived, corrupting replay bytes + the provider's cache.
|
||||
let assistantIdx = -1
|
||||
for (let j = newMessages.length - 1; j >= 0; j--) {
|
||||
const m = newMessages[j]
|
||||
if (m.role === 'assistant') { assistantIdx = j; break }
|
||||
// Stop at any non-tool, non-assistant message (should never happen since we only
|
||||
// push assistant/tool/user through here in order, but keep the safety rail).
|
||||
if (m.role !== 'tool') break
|
||||
}
|
||||
if (assistantIdx >= 0) {
|
||||
const asstMsg = newMessages[assistantIdx] as OpenAILLMChatMessage & { role: 'assistant' }
|
||||
// Prefer the model's original serialized argument string when we have it
|
||||
// (OpenAI-compatible providers expose it in the streaming delta). Sending
|
||||
// byte-identical bytes back preserves the provider's prefix cache past the
|
||||
// tool call. Fall back to re-serializing when the raw string is unavailable
|
||||
// (e.g. conversations from before this field existed, or non-OpenAI provenance).
|
||||
prevMsg.tool_calls = [{
|
||||
type: 'function',
|
||||
const newCall = {
|
||||
type: 'function' as const,
|
||||
id: currMsg.id,
|
||||
function: {
|
||||
name: currMsg.name,
|
||||
arguments: currMsg.rawParamsStr ?? JSON.stringify(currMsg.rawParams)
|
||||
}
|
||||
}]
|
||||
}
|
||||
asstMsg.tool_calls = [...(asstMsg.tool_calls ?? []), newCall]
|
||||
}
|
||||
|
||||
// add the tool
|
||||
|
|
@ -181,13 +194,27 @@ const prepareMessages_anthropic_tools = (messages: SimpleLLMMessage[], supportsA
|
|||
}
|
||||
|
||||
if (currMsg.role === 'tool') {
|
||||
// add anthropic tools
|
||||
const prevMsg = 0 <= i - 1 && i - 1 <= newMessages.length ? newMessages[i - 1] : undefined
|
||||
|
||||
// make it so the assistant called the tool
|
||||
if (prevMsg?.role === 'assistant') {
|
||||
if (typeof prevMsg.content === 'string') prevMsg.content = [{ type: 'text', text: prevMsg.content }]
|
||||
prevMsg.content.push({ type: 'tool_use', id: currMsg.id, name: currMsg.name, input: currMsg.rawParams })
|
||||
// Walk back to the assistant that owned this tool call. For a batched turn
|
||||
// (multiple parallel tool calls on one assistant), each tool message appends
|
||||
// its own `tool_use` block to the same assistant's content array, and Anthropic
|
||||
// sees the full batch as one assistant turn. Previously only the first tool
|
||||
// was attached (prevMsg check) and the rest silently orphaned, which made
|
||||
// replay of batched turns fail validation.
|
||||
let assistantIdx = -1
|
||||
for (let j = i - 1; j >= 0; j--) {
|
||||
const m = newMessages[j]
|
||||
if (!m) continue
|
||||
if (m.role === 'assistant') { assistantIdx = j; break }
|
||||
// Skip over previously-converted tool rows (now user messages with tool_result);
|
||||
// anything else means we walked past the batch boundary.
|
||||
if (m.role !== 'user') break
|
||||
const isToolResultUser = Array.isArray(m.content) && m.content.some(c => c.type === 'tool_result')
|
||||
if (!isToolResultUser) break
|
||||
}
|
||||
if (assistantIdx >= 0) {
|
||||
const asstMsg = newMessages[assistantIdx] as AnthropicLLMChatMessage & { role: 'assistant' }
|
||||
if (typeof asstMsg.content === 'string') asstMsg.content = [{ type: 'text', text: asstMsg.content }]
|
||||
asstMsg.content.push({ type: 'tool_use', id: currMsg.id, name: currMsg.name, input: currMsg.rawParams })
|
||||
}
|
||||
|
||||
// turn each tool into a user message with tool results at the end
|
||||
|
|
@ -214,12 +241,20 @@ const prepareMessages_XML_tools = (messages: SimpleLLMMessage[], supportsAnthrop
|
|||
const next = 0 <= i + 1 && i + 1 <= messages.length - 1 ? messages[i + 1] : null
|
||||
|
||||
if (c.role === 'assistant') {
|
||||
// if called a tool (message after it), re-add its XML to the message
|
||||
// alternatively, could just hold onto the original output, but this way requires less piping raw strings everywhere
|
||||
// Re-serialize every consecutive tool message after this assistant as XML and
|
||||
// concatenate them back onto the assistant content. Multi-tool batches may land
|
||||
// in history (e.g. if the user switches from a native-tool-calling model into a
|
||||
// grammar-based one); only appending `next` would lose tool calls 2..N.
|
||||
let content: AnthropicOrOpenAILLMMessage['content'] = c.content
|
||||
if (next?.role === 'tool') {
|
||||
content = `${content}\n\n${reParsedToolXMLString(next.name, next.rawParams)}`
|
||||
for (let k = i + 1; k < messages.length; k++) {
|
||||
const followUp = messages[k]
|
||||
if (followUp.role !== 'tool') break
|
||||
content = `${content}\n\n${reParsedToolXMLString(followUp.name, followUp.rawParams)}`
|
||||
}
|
||||
// For backward compatibility of the void-format assumption we keep `next` only
|
||||
// reference intact below (it's still used by the batch-rebuild loop at the
|
||||
// tool-result step).
|
||||
void next
|
||||
|
||||
// anthropic reasoning
|
||||
if (c.anthropicReasoning && supportsAnthropicReasoning) {
|
||||
|
|
@ -454,7 +489,13 @@ const prepareOpenAIOrAnthropicMessages = ({
|
|||
type GeminiUserPart = (GeminiLLMChatMessage & { role: 'user' })['parts'][0]
|
||||
type GeminiModelPart = (GeminiLLMChatMessage & { role: 'model' })['parts'][0]
|
||||
const prepareGeminiMessages = (messages: AnthropicLLMChatMessage[]) => {
|
||||
let latestToolName: ToolName | undefined = undefined
|
||||
// Map tool_use id → tool name, populated as we encounter `tool_use` parts on
|
||||
// assistant turns. functionResponse entries later (on user turns) look their name up
|
||||
// by id so batched turns resolve each response to the correct call. Previously a
|
||||
// single `latestToolName` was tracked, which broke when one assistant emitted N
|
||||
// parallel tools: the Nth name won, and all earlier functionResponse parts were
|
||||
// mislabeled (Gemini rejects these with "function name mismatch").
|
||||
const toolNameById = new Map<string, ToolName>()
|
||||
const messages2: GeminiLLMChatMessage[] = messages.map((m): GeminiLLMChatMessage | null => {
|
||||
if (m.role === 'assistant') {
|
||||
if (typeof m.content === 'string') {
|
||||
|
|
@ -466,7 +507,7 @@ const prepareGeminiMessages = (messages: AnthropicLLMChatMessage[]) => {
|
|||
return { text: c.text }
|
||||
}
|
||||
else if (c.type === 'tool_use') {
|
||||
latestToolName = c.name
|
||||
toolNameById.set(c.id, c.name)
|
||||
return { functionCall: { id: c.id, name: c.name, args: c.input } }
|
||||
}
|
||||
else return null
|
||||
|
|
@ -484,8 +525,9 @@ const prepareGeminiMessages = (messages: AnthropicLLMChatMessage[]) => {
|
|||
return { text: c.text }
|
||||
}
|
||||
else if (c.type === 'tool_result') {
|
||||
if (!latestToolName) return null
|
||||
return { functionResponse: { id: c.tool_use_id, name: latestToolName, response: { output: c.content } } }
|
||||
const resolvedName = toolNameById.get(c.tool_use_id)
|
||||
if (!resolvedName) return null
|
||||
return { functionResponse: { id: c.tool_use_id, name: resolvedName, response: { output: c.content } } }
|
||||
}
|
||||
else return null
|
||||
}).filter(m => !!m)
|
||||
|
|
|
|||
|
|
@ -1621,8 +1621,20 @@ const titleOfBuiltinToolName = {
|
|||
} as const satisfies Record<BuiltinToolName, { done: any, proposed: any, running: any }>
|
||||
|
||||
|
||||
const getTitle = (toolMessage: Pick<ChatMessage & { role: 'tool' }, 'name' | 'type' | 'mcpServerName'>): React.ReactNode => {
|
||||
// Prefix like "(1/2) " when this tool is part of a multi-tool batch emitted in one
|
||||
// assistant turn. The prefix is purely decorative (helps the user see that one reply
|
||||
// contains multiple tools and track how many are done) and is omitted for solo tools
|
||||
// or when the message predates parallel tool support (batchIndex/batchSize undefined).
|
||||
const batchPrefix = (m: Pick<ChatMessage & { role: 'tool' }, 'batchIndex' | 'batchSize'>): string => {
|
||||
if (m.batchIndex === undefined || m.batchSize === undefined) return ''
|
||||
if (m.batchSize <= 1) return ''
|
||||
// batchIndex is 0-based internally but we render as 1-based for humans.
|
||||
return `(${m.batchIndex + 1}/${m.batchSize}) `
|
||||
}
|
||||
|
||||
const getTitle = (toolMessage: Pick<ChatMessage & { role: 'tool' }, 'name' | 'type' | 'mcpServerName' | 'batchIndex' | 'batchSize'>): React.ReactNode => {
|
||||
const t = toolMessage
|
||||
const prefix = batchPrefix(t)
|
||||
|
||||
// non-built-in title
|
||||
if (!builtinToolNames.includes(t.name as BuiltinToolName)) {
|
||||
|
|
@ -1637,7 +1649,7 @@ const getTitle = (toolMessage: Pick<ChatMessage & { role: 'tool' }, 'name' | 'ty
|
|||
: 'Call'
|
||||
|
||||
|
||||
const title = `${descriptor} ${toolMessage.mcpServerName || 'MCP'}`
|
||||
const title = `${prefix}${descriptor} ${toolMessage.mcpServerName || 'MCP'}`
|
||||
if (t.type === 'running_now' || t.type === 'tool_request')
|
||||
return loadingTitleWrapper(title)
|
||||
return title
|
||||
|
|
@ -1646,9 +1658,11 @@ const getTitle = (toolMessage: Pick<ChatMessage & { role: 'tool' }, 'name' | 'ty
|
|||
// built-in title
|
||||
else {
|
||||
const toolName = t.name as BuiltinToolName
|
||||
if (t.type === 'success') return titleOfBuiltinToolName[toolName].done
|
||||
if (t.type === 'running_now') return titleOfBuiltinToolName[toolName].running
|
||||
return titleOfBuiltinToolName[toolName].proposed
|
||||
const base =
|
||||
t.type === 'success' ? titleOfBuiltinToolName[toolName].done
|
||||
: t.type === 'running_now' ? titleOfBuiltinToolName[toolName].running
|
||||
: titleOfBuiltinToolName[toolName].proposed
|
||||
return prefix ? `${prefix}${base}` : base
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -2699,6 +2713,12 @@ type ChatBubbleProps = {
|
|||
threadId: string,
|
||||
currCheckpointIdx: number | undefined,
|
||||
_scrollToBottom: (() => void) | null,
|
||||
// Index of the message that currently owns the approve/reject prompt (the earliest
|
||||
// tool_request in the consecutive trailing batch). When a multi-tool batch is
|
||||
// pre-added, all queued tool_requests share the same status but only the first one
|
||||
// should render the buttons; the others are "waiting their turn". undefined = no
|
||||
// pending approval anywhere in the thread.
|
||||
firstPendingToolRequestIdx?: number,
|
||||
}
|
||||
|
||||
const ChatBubble = (props: ChatBubbleProps) => {
|
||||
|
|
@ -2707,7 +2727,7 @@ const ChatBubble = (props: ChatBubbleProps) => {
|
|||
</ErrorBoundary>
|
||||
}
|
||||
|
||||
const _ChatBubble = ({ threadId, chatMessage, currCheckpointIdx, isCommitted, messageIdx, chatIsRunning, _scrollToBottom }: ChatBubbleProps) => {
|
||||
const _ChatBubble = ({ threadId, chatMessage, currCheckpointIdx, isCommitted, messageIdx, chatIsRunning, _scrollToBottom, firstPendingToolRequestIdx }: ChatBubbleProps) => {
|
||||
const role = chatMessage.role
|
||||
|
||||
const isCheckpointGhost = messageIdx > (currCheckpointIdx ?? Infinity) && !chatIsRunning // whether to show as gray (if chat is running, for good measure just dont show any ghosts)
|
||||
|
|
@ -2751,7 +2771,7 @@ const _ChatBubble = ({ threadId, chatMessage, currCheckpointIdx, isCommitted, me
|
|||
threadId={threadId}
|
||||
/>
|
||||
</div>
|
||||
{chatMessage.type === 'tool_request' ?
|
||||
{chatMessage.type === 'tool_request' && messageIdx === firstPendingToolRequestIdx ?
|
||||
<div className={`${isCheckpointGhost ? 'opacity-50 pointer-events-none' : ''}`}>
|
||||
<ToolRequestAcceptRejectButtons toolName={chatMessage.name} />
|
||||
</div> : null}
|
||||
|
|
@ -3102,8 +3122,14 @@ const ThreadMessagesView = ({ threadId, isActive, scrollContainerRef }: {
|
|||
const streamState = useChatThreadsStreamState(threadId)
|
||||
const isRunning = streamState?.isRunning
|
||||
const latestError = streamState?.error
|
||||
const { displayContentSoFar, toolCallSoFar, reasoningSoFar } = streamState?.llmInfo ?? {}
|
||||
const toolIsGenerating = toolCallSoFar && !toolCallSoFar.isDone
|
||||
const { displayContentSoFar, toolCallsSoFar, reasoningSoFar } = streamState?.llmInfo ?? {}
|
||||
// During streaming the "currently being written" tool is the last one in the array
|
||||
// (indices are emitted in order). Earlier tools in the batch may already be complete
|
||||
// (their argument JSON fully streamed) but their persisted tool_request rows only
|
||||
// show up in `thread.messages` once onFinalMessage fires and the batch is committed.
|
||||
// For the live preview here we just show the latest in-flight tool.
|
||||
const currentInFlightTool = toolCallsSoFar && toolCallsSoFar.length > 0 ? toolCallsSoFar[toolCallsSoFar.length - 1] : undefined
|
||||
const toolIsGenerating = currentInFlightTool && !currentInFlightTool.isDone
|
||||
|
||||
const currCheckpointIdx = thread?.state?.currCheckpointIdx ?? undefined
|
||||
|
||||
|
|
@ -3118,6 +3144,22 @@ const ThreadMessagesView = ({ threadId, isActive, scrollContainerRef }: {
|
|||
}
|
||||
}, [isActive, scrollContainerRef])
|
||||
|
||||
// Index of the "currently awaiting approval" tool request — the earliest of the
|
||||
// consecutive trailing tool_request messages. Matches _getPendingBatchTools() in
|
||||
// the service. For a solo tool call this is just the last message (same as the
|
||||
// pre-batch behavior). For a multi-tool batch, it's the first pending one; later
|
||||
// queued tool_requests render as stacked progress rows without approve/reject
|
||||
// buttons.
|
||||
const firstPendingToolRequestIdx = useMemo(() => {
|
||||
let earliest: number | undefined
|
||||
for (let i = previousMessages.length - 1; i >= 0; i--) {
|
||||
const m = previousMessages[i]
|
||||
if (m.role === 'tool' && m.type === 'tool_request') earliest = i
|
||||
else break
|
||||
}
|
||||
return earliest
|
||||
}, [previousMessages])
|
||||
|
||||
const previousMessagesHTML = useMemo(() => {
|
||||
return previousMessages.map((message, i) => {
|
||||
return <ChatBubble
|
||||
|
|
@ -3129,9 +3171,10 @@ const ThreadMessagesView = ({ threadId, isActive, scrollContainerRef }: {
|
|||
chatIsRunning={isRunning}
|
||||
threadId={threadId}
|
||||
_scrollToBottom={() => scrollToBottom(scrollContainerRef)}
|
||||
firstPendingToolRequestIdx={firstPendingToolRequestIdx}
|
||||
/>
|
||||
})
|
||||
}, [previousMessages, threadId, currCheckpointIdx, isRunning, scrollContainerRef])
|
||||
}, [previousMessages, threadId, currCheckpointIdx, isRunning, scrollContainerRef, firstPendingToolRequestIdx])
|
||||
|
||||
const streamingChatIdx = previousMessagesHTML.length
|
||||
const currStreamingMessageHTML = reasoningSoFar || displayContentSoFar || isRunning ?
|
||||
|
|
@ -3151,10 +3194,10 @@ const ThreadMessagesView = ({ threadId, isActive, scrollContainerRef }: {
|
|||
_scrollToBottom={null}
|
||||
/> : null
|
||||
|
||||
const generatingTool = toolIsGenerating ?
|
||||
toolCallSoFar.name === 'edit_file' || toolCallSoFar.name === 'rewrite_file' ? <EditToolSoFar
|
||||
const generatingTool = toolIsGenerating && currentInFlightTool ?
|
||||
currentInFlightTool.name === 'edit_file' || currentInFlightTool.name === 'rewrite_file' ? <EditToolSoFar
|
||||
key={'curr-streaming-tool'}
|
||||
toolCallSoFar={toolCallSoFar}
|
||||
toolCallSoFar={currentInFlightTool}
|
||||
/>
|
||||
: null
|
||||
: null
|
||||
|
|
@ -3230,10 +3273,13 @@ export const SidebarChat = () => {
|
|||
const currThreadStreamState = useChatThreadsStreamState(chatThreadsState.currentThreadId)
|
||||
const isRunning = currThreadStreamState?.isRunning
|
||||
const latestError = currThreadStreamState?.error
|
||||
const { displayContentSoFar, toolCallSoFar, reasoningSoFar } = currThreadStreamState?.llmInfo ?? {}
|
||||
const { displayContentSoFar, toolCallsSoFar, reasoningSoFar } = currThreadStreamState?.llmInfo ?? {}
|
||||
// See ThreadMessagesView comment: the last tool in the array is the one still
|
||||
// being streamed; earlier batch siblings may already have complete argument JSON.
|
||||
const currentInFlightTool = toolCallsSoFar && toolCallsSoFar.length > 0 ? toolCallsSoFar[toolCallsSoFar.length - 1] : undefined
|
||||
|
||||
// this is just if it's currently being generated, NOT if it's currently running
|
||||
const toolIsGenerating = toolCallSoFar && !toolCallSoFar.isDone // show loading for slow tools (right now just edit)
|
||||
const toolIsGenerating = currentInFlightTool && !currentInFlightTool.isDone // show loading for slow tools (right now just edit)
|
||||
|
||||
// ----- SIDEBAR CHAT state (local) -----
|
||||
|
||||
|
|
|
|||
|
|
@ -18,6 +18,14 @@ export type ToolMessage<T extends ToolName> = {
|
|||
// byte-identical tool_calls back, preserving the provider's prefix cache.
|
||||
rawParamsStr?: string;
|
||||
mcpServerName: string | undefined; // the server name at the time of the call
|
||||
// Position of this tool within its assistant-turn batch. When a model emits multiple
|
||||
// parallel tool calls in one response, each tool message stores its 0-based index
|
||||
// (`batchIndex`) and the total count (`batchSize`). The UI uses these to render a
|
||||
// "(1/2)"-style prefix so the user can see tool grouping at a glance. Both are
|
||||
// optional — legacy single-tool responses and persisted history from before this
|
||||
// field existed simply omit them (UI treats that as a solo call, no prefix shown).
|
||||
batchIndex?: number;
|
||||
batchSize?: number;
|
||||
} & (
|
||||
// in order of events:
|
||||
| { type: 'invalid_params', result: null, name: T, }
|
||||
|
|
|
|||
|
|
@ -507,7 +507,12 @@ You will be given instructions from the user, and may also receive a list of fil
|
|||
if (mode === 'agent' || mode === 'gather') {
|
||||
details.push(`Only call tools if they help you accomplish the user's goal. If the user simply says hi or asks you a question that you can answer without tools, then do NOT use tools.`)
|
||||
details.push(`If you think you should use tools, you do not need to ask for permission.`)
|
||||
details.push('Only use ONE tool call at a time.')
|
||||
// Parallel tool calls are OK (and encouraged) when the operations are independent
|
||||
// — e.g. reading several files, searching several patterns. A single assistant
|
||||
// turn that batches N reads costs one round-trip instead of N, and prefix caching
|
||||
// stays warm across the whole batch. Keep sequential tools for dependent steps
|
||||
// where later arguments require earlier results.
|
||||
details.push(`You can call multiple tools in a single turn when the operations are independent (e.g. reading several files, searching several patterns). Prefer batching reads/searches together rather than issuing them one-at-a-time across turns. Use separate turns when a later tool's arguments depend on an earlier tool's result.`)
|
||||
details.push(`NEVER say something like "I'm going to use \`tool_name\`". Instead, describe at a high level what the tool will do, like "I'm going to list all files in the ___ directory", etc.`)
|
||||
details.push(`Many tools only work if the user has a workspace open.`)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -112,7 +112,12 @@ export type LLMUsage = {
|
|||
cachedInputTokens?: number;
|
||||
}
|
||||
|
||||
export type OnText = (p: { fullText: string; fullReasoning: string; toolCall?: RawToolCallObj; usage?: LLMUsage }) => void
|
||||
// `toolCalls` is an ordered list. Providers that support parallel/batched tool calling
|
||||
// (OpenAI, Anthropic, Gemini) may emit multiple tools in a single assistant turn. A
|
||||
// single-tool response is represented as a length-1 array; no tools as an empty array
|
||||
// (or `undefined` for brevity). The ordering is preserved from the provider — Void
|
||||
// executes them serially in that order.
|
||||
export type OnText = (p: { fullText: string; fullReasoning: string; toolCalls?: RawToolCallObj[]; usage?: LLMUsage }) => void
|
||||
|
||||
// `finishReason` is the provider's own reason for ending the stream. OpenAI-compatible
|
||||
// servers return one of `stop` / `tool_calls` / `function_call` / `length` / `content_filter`
|
||||
|
|
@ -122,7 +127,9 @@ export type OnText = (p: { fullText: string; fullReasoning: string; toolCall?: R
|
|||
// clips against `max_tokens`, but also `content_filter` or unknown gateway-specific values).
|
||||
// Populated only by OAI-compatible providers right now — Anthropic / Gemini paths leave this
|
||||
// undefined, which renders as "no warning" (the same as before this was added).
|
||||
export type OnFinalMessage = (p: { fullText: string; fullReasoning: string; toolCall?: RawToolCallObj; anthropicReasoning: AnthropicReasoning[] | null; usage?: LLMUsage; finishReason?: string }) => void // id is tool_use_id
|
||||
//
|
||||
// `toolCalls` — see `OnText` above. Empty/undefined on pure-text responses.
|
||||
export type OnFinalMessage = (p: { fullText: string; fullReasoning: string; toolCalls?: RawToolCallObj[]; anthropicReasoning: AnthropicReasoning[] | null; usage?: LLMUsage; finishReason?: string }) => void
|
||||
export type OnError = (p: { message: string; fullError: Error | null }) => void
|
||||
export type OnAbort = () => void
|
||||
export type AbortRef = { current: (() => void) | null }
|
||||
|
|
|
|||
|
|
@ -334,10 +334,15 @@ export const extractXMLToolsWrapper = (
|
|||
)
|
||||
}
|
||||
|
||||
// Grammar-based tool extraction only surfaces one tool at a time (XML tags are parsed
|
||||
// sequentially out of the text stream), so the array is always length 0 or 1 on this
|
||||
// path. Models that use this wrapper (local models, pseudo-tool-use via text) don't
|
||||
// produce parallel tool calls — that capability is exclusive to providers with native
|
||||
// tool-calling (OpenAI-compatible, Anthropic, Gemini).
|
||||
onText({
|
||||
...params,
|
||||
fullText,
|
||||
toolCall: latestToolCall,
|
||||
toolCalls: latestToolCall ? [latestToolCall] : undefined,
|
||||
});
|
||||
};
|
||||
|
||||
|
|
@ -349,12 +354,7 @@ export const extractXMLToolsWrapper = (
|
|||
fullText = fullText.trimEnd()
|
||||
const toolCall = latestToolCall
|
||||
|
||||
// console.log('final message!!!', trueFullText)
|
||||
// console.log('----- returning ----\n', fullText)
|
||||
// console.log('----- tools ----\n', JSON.stringify(firstToolCallRef.current, null, 2))
|
||||
// console.log('----- toolCall ----\n', JSON.stringify(toolCall, null, 2))
|
||||
|
||||
onFinalMessage({ ...params, fullText, toolCall: toolCall })
|
||||
onFinalMessage({ ...params, fullText, toolCalls: toolCall ? [toolCall] : undefined })
|
||||
}
|
||||
return { newOnText, newOnFinalMessage };
|
||||
}
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@ import { GoogleAuth } from 'google-auth-library'
|
|||
/* eslint-enable */
|
||||
|
||||
import { AnthropicLLMChatMessage, GeminiLLMChatMessage, LLMChatMessage, LLMFIMMessage, type LLMUsage, ModelListParams, OllamaModelResponse, OnError, OnFinalMessage, OnText, RawToolCallObj, RawToolParamsObj } from '../../common/sendLLMMessageTypes.js';
|
||||
import type { ToolName } from '../../common/toolsServiceTypes.js';
|
||||
import { ChatMode, displayInfoOfProviderName, ModelSelectionOptions, OverridesOfModel, ProviderName, SettingsOfProvider } from '../../common/voidSettingsTypes.js';
|
||||
import { getSendableReasoningInfo, getModelCapabilities, getProviderCapabilities, defaultProviderSettings, getReservedOutputTokenSpace } from '../../common/modelCapabilities.js';
|
||||
import { extractReasoningWrapper, extractXMLToolsWrapper } from './extractGrammar.js';
|
||||
|
|
@ -339,9 +340,19 @@ const _sendOpenAICompatibleChat = async ({ messages, onText, onFinalMessage, onE
|
|||
let fullReasoningSoFar = ''
|
||||
let fullTextSoFar = ''
|
||||
|
||||
let toolName = ''
|
||||
let toolId = ''
|
||||
let toolParamsStr = ''
|
||||
// Tool-call buffers keyed by `tool_calls[].index` from the delta. OpenAI's streaming spec
|
||||
// allows multiple tool calls in one assistant turn, each identified by its own numeric index,
|
||||
// with chunks interleaved arbitrarily (index=0 chunk, index=1 chunk, index=0 chunk again...).
|
||||
// We previously dropped everything past index 0, which silently corrupted parallel tool-call
|
||||
// responses from GPT-4+, MiniMax, and other providers that batch. Using a Map keyed by index
|
||||
// handles out-of-order chunks correctly. On final, we sort by index to preserve the
|
||||
// provider's intended execution order.
|
||||
const toolBuffers = new Map<number, { name: string; argsStr: string; id: string }>()
|
||||
const getOrCreateToolBuffer = (index: number) => {
|
||||
let buf = toolBuffers.get(index)
|
||||
if (!buf) { buf = { name: '', argsStr: '', id: '' }; toolBuffers.set(index, buf) }
|
||||
return buf
|
||||
}
|
||||
|
||||
// Usage only arrives in the final chunk (and only if the server honored
|
||||
// stream_options.include_usage). `chunk.usage` is typed as `| null` there.
|
||||
|
|
@ -374,14 +385,17 @@ const _sendOpenAICompatibleChat = async ({ messages, onText, onFinalMessage, onE
|
|||
const chunkFinishReason = chunk.choices[0]?.finish_reason
|
||||
if (chunkFinishReason) lastFinishReason = chunkFinishReason
|
||||
|
||||
// tool call
|
||||
// tool calls — aggregate by index. A single chunk may include deltas for multiple
|
||||
// indices (rare but valid), and a single index's pieces may arrive across many
|
||||
// chunks (the common case). `id` is typically present only on the first chunk
|
||||
// for a given index; `arguments` streams incrementally.
|
||||
for (const tool of chunk.choices[0]?.delta?.tool_calls ?? []) {
|
||||
const index = tool.index
|
||||
if (index !== 0) continue
|
||||
|
||||
toolName += tool.function?.name ?? ''
|
||||
toolParamsStr += tool.function?.arguments ?? '';
|
||||
toolId += tool.id ?? ''
|
||||
if (index === undefined) continue
|
||||
const buf = getOrCreateToolBuffer(index)
|
||||
buf.name += tool.function?.name ?? ''
|
||||
buf.argsStr += tool.function?.arguments ?? ''
|
||||
buf.id += tool.id ?? ''
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -413,23 +427,44 @@ const _sendOpenAICompatibleChat = async ({ messages, onText, onFinalMessage, onE
|
|||
}
|
||||
}
|
||||
|
||||
// Build the in-progress toolCalls snapshot for UI streaming. We only emit entries
|
||||
// for buffers that have at least a name (argument-only deltas for an as-yet-
|
||||
// unnamed tool are still accumulating). Indices are sorted so the UI's rendered
|
||||
// order matches the provider's intended execution order.
|
||||
const inProgressToolCalls: RawToolCallObj[] = Array.from(toolBuffers.entries())
|
||||
.filter(([_i, buf]) => !!buf.name)
|
||||
.sort(([a], [b]) => a - b)
|
||||
.map(([_i, buf]) => ({ name: buf.name as ToolName, rawParams: {}, isDone: false, doneParams: [], id: buf.id }))
|
||||
|
||||
// call onText
|
||||
onText({
|
||||
fullText: fullTextSoFar,
|
||||
fullReasoning: fullReasoningSoFar,
|
||||
toolCall: !toolName ? undefined : { name: toolName, rawParams: {}, isDone: false, doneParams: [], id: toolId },
|
||||
toolCalls: inProgressToolCalls.length > 0 ? inProgressToolCalls : undefined,
|
||||
usage: latestUsage,
|
||||
})
|
||||
|
||||
}
|
||||
// on final
|
||||
if (!fullTextSoFar && !fullReasoningSoFar && !toolName) {
|
||||
// on final: parse each completed tool buffer. `rawToolCallObjOfParamsStr` returns
|
||||
// null on malformed JSON or non-object inputs — we skip those rather than crashing
|
||||
// the whole turn, but log for diagnosis.
|
||||
const finalToolCalls: RawToolCallObj[] = Array.from(toolBuffers.entries())
|
||||
.sort(([a], [b]) => a - b)
|
||||
.map(([_i, buf]) => rawToolCallObjOfParamsStr(buf.name, buf.argsStr, buf.id))
|
||||
.filter((t): t is RawToolCallObj => t !== null)
|
||||
|
||||
if (!fullTextSoFar && !fullReasoningSoFar && finalToolCalls.length === 0) {
|
||||
onError({ message: 'Void: Response from model was empty.', fullError: null })
|
||||
}
|
||||
else {
|
||||
const toolCall = rawToolCallObjOfParamsStr(toolName, toolParamsStr, toolId)
|
||||
const toolCallObj = toolCall ? { toolCall } : {}
|
||||
onFinalMessage({ fullText: fullTextSoFar, fullReasoning: fullReasoningSoFar, anthropicReasoning: null, usage: latestUsage, finishReason: lastFinishReason, ...toolCallObj });
|
||||
onFinalMessage({
|
||||
fullText: fullTextSoFar,
|
||||
fullReasoning: fullReasoningSoFar,
|
||||
anthropicReasoning: null,
|
||||
usage: latestUsage,
|
||||
finishReason: lastFinishReason,
|
||||
toolCalls: finalToolCalls.length > 0 ? finalToolCalls : undefined,
|
||||
});
|
||||
}
|
||||
})
|
||||
// when error/fail - this catches errors of both .create() and .then(for await)
|
||||
|
|
@ -557,15 +592,27 @@ const sendAnthropicChat = async ({ messages, providerName, onText, onFinalMessag
|
|||
let fullText = ''
|
||||
let fullReasoning = ''
|
||||
|
||||
let fullToolName = ''
|
||||
let fullToolParams = ''
|
||||
|
||||
// Tool-call buffers keyed by Anthropic's content-block `index`. Anthropic streams each
|
||||
// tool as its own `content_block_start` (with name+id) followed by `content_block_delta`
|
||||
// events carrying `input_json_delta` chunks — both tagged with the same numeric `index`.
|
||||
// We previously only kept the first tool (`tools[0]` at finalMessage), silently dropping
|
||||
// any parallel tool_use blocks. Map<index, ...> preserves ordering and the per-tool id.
|
||||
const anthropicToolBuffers = new Map<number, { name: string; argsStr: string; id: string }>()
|
||||
const getOrCreateAnthropicTool = (index: number) => {
|
||||
let buf = anthropicToolBuffers.get(index)
|
||||
if (!buf) { buf = { name: '', argsStr: '', id: '' }; anthropicToolBuffers.set(index, buf) }
|
||||
return buf
|
||||
}
|
||||
|
||||
const runOnText = () => {
|
||||
const inProgressToolCalls: RawToolCallObj[] = Array.from(anthropicToolBuffers.entries())
|
||||
.filter(([_i, buf]) => !!buf.name)
|
||||
.sort(([a], [b]) => a - b)
|
||||
.map(([_i, buf]) => ({ name: buf.name as ToolName, rawParams: {}, isDone: false, doneParams: [], id: buf.id || 'dummy' }))
|
||||
onText({
|
||||
fullText,
|
||||
fullReasoning,
|
||||
toolCall: !fullToolName ? undefined : { name: fullToolName, rawParams: {}, isDone: false, doneParams: [], id: 'dummy' },
|
||||
toolCalls: inProgressToolCalls.length > 0 ? inProgressToolCalls : undefined,
|
||||
})
|
||||
}
|
||||
// there are no events for tool_use, it comes in at the end
|
||||
|
|
@ -589,7 +636,11 @@ const sendAnthropicChat = async ({ messages, providerName, onText, onFinalMessag
|
|||
runOnText()
|
||||
}
|
||||
else if (e.content_block.type === 'tool_use') {
|
||||
fullToolName += e.content_block.name ?? '' // anthropic gives us the tool name in the start block
|
||||
// Anthropic gives the tool name+id in the start block and the JSON input in
|
||||
// subsequent input_json_delta events keyed to the same `e.index`.
|
||||
const buf = getOrCreateAnthropicTool(e.index)
|
||||
buf.name += e.content_block.name ?? ''
|
||||
buf.id += e.content_block.id ?? ''
|
||||
runOnText()
|
||||
}
|
||||
}
|
||||
|
|
@ -605,7 +656,10 @@ const sendAnthropicChat = async ({ messages, providerName, onText, onFinalMessag
|
|||
runOnText()
|
||||
}
|
||||
else if (e.delta.type === 'input_json_delta') { // tool use
|
||||
fullToolParams += e.delta.partial_json ?? '' // anthropic gives us the partial delta (string) here - https://docs.anthropic.com/en/api/messages-streaming
|
||||
// partial_json is a string delta scoped to the current content block (e.index).
|
||||
// See https://docs.anthropic.com/en/api/messages-streaming
|
||||
const buf = getOrCreateAnthropicTool(e.index)
|
||||
buf.argsStr += e.delta.partial_json ?? ''
|
||||
runOnText()
|
||||
}
|
||||
}
|
||||
|
|
@ -614,13 +668,19 @@ const sendAnthropicChat = async ({ messages, providerName, onText, onFinalMessag
|
|||
// on done - (or when error/fail) - this is called AFTER last streamEvent
|
||||
stream.on('finalMessage', (response) => {
|
||||
const anthropicReasoning = response.content.filter(c => c.type === 'thinking' || c.type === 'redacted_thinking')
|
||||
// Iterate ALL tool_use blocks in document order (response.content preserves ordering).
|
||||
// Previous behavior only used `tools[0]`, which silently dropped parallel tool calls.
|
||||
const tools = response.content.filter(c => c.type === 'tool_use')
|
||||
// console.log('TOOLS!!!!!!', JSON.stringify(tools, null, 2))
|
||||
// console.log('TOOLS!!!!!!', JSON.stringify(response, null, 2))
|
||||
const toolCall = tools[0] && rawToolCallObjOfAnthropicParams(tools[0])
|
||||
const toolCallObj = toolCall ? { toolCall } : {}
|
||||
const finalToolCalls: RawToolCallObj[] = tools
|
||||
.map(t => rawToolCallObjOfAnthropicParams(t))
|
||||
.filter((t): t is RawToolCallObj => t !== null)
|
||||
|
||||
onFinalMessage({ fullText, fullReasoning, anthropicReasoning, ...toolCallObj })
|
||||
onFinalMessage({
|
||||
fullText,
|
||||
fullReasoning,
|
||||
anthropicReasoning,
|
||||
toolCalls: finalToolCalls.length > 0 ? finalToolCalls : undefined,
|
||||
})
|
||||
})
|
||||
// on error
|
||||
stream.on('error', (error) => {
|
||||
|
|
@ -825,9 +885,14 @@ const sendGeminiChat = async ({
|
|||
let fullReasoningSoFar = ''
|
||||
let fullTextSoFar = ''
|
||||
|
||||
let toolName = ''
|
||||
let toolParamsStr = ''
|
||||
let toolId = ''
|
||||
// Tool-call buffer — Gemini emits each functionCall as a fully-formed object (not a
|
||||
// streamed partial like OpenAI/Anthropic), so we just accumulate them. Each chunk's
|
||||
// `chunk.functionCalls` may contain zero or more calls. We track by (name + JSON args)
|
||||
// to dedupe in case a later chunk repeats an earlier call (the SDK occasionally does
|
||||
// this in the final summary chunk). Ordering is preserved by first-appearance.
|
||||
type GeminiToolBuf = { name: string; argsStr: string; id: string }
|
||||
const geminiToolCalls: GeminiToolBuf[] = []
|
||||
const geminiToolSeen = new Set<string>()
|
||||
|
||||
// Gemini reports token usage via chunk.usageMetadata. It typically appears in the last
|
||||
// chunk(s), but we keep the latest seen so we always forward the freshest values.
|
||||
|
|
@ -861,13 +926,20 @@ const sendGeminiChat = async ({
|
|||
}
|
||||
}
|
||||
|
||||
// tool call
|
||||
// tool calls — iterate ALL functionCalls in the chunk. Previously we only kept
|
||||
// `functionCalls[0]`, silently dropping any parallel tool emission (e.g. a model
|
||||
// asking to read three files at once). Dedupe across chunks by (id || name+args).
|
||||
const functionCalls = chunk.functionCalls
|
||||
if (functionCalls && functionCalls.length > 0) {
|
||||
const functionCall = functionCalls[0] // Get the first function call
|
||||
toolName = functionCall.name ?? ''
|
||||
toolParamsStr = JSON.stringify(functionCall.args ?? {})
|
||||
toolId = functionCall.id ?? ''
|
||||
for (const fc of functionCalls) {
|
||||
const name = fc.name ?? ''
|
||||
const argsStr = JSON.stringify(fc.args ?? {})
|
||||
const id = fc.id ?? ''
|
||||
const key = id || `${name}::${argsStr}`
|
||||
if (geminiToolSeen.has(key)) continue
|
||||
geminiToolSeen.add(key)
|
||||
geminiToolCalls.push({ name, argsStr, id })
|
||||
}
|
||||
}
|
||||
|
||||
// usage (Gemini exposes promptTokenCount / candidatesTokenCount / totalTokenCount /
|
||||
|
|
@ -888,23 +960,43 @@ const sendGeminiChat = async ({
|
|||
}
|
||||
}
|
||||
|
||||
// Build the in-progress tool-call snapshot for UI streaming. Gemini tool calls
|
||||
// are already complete when they appear in a chunk, but we still surface them
|
||||
// via onText so the UI can render them as they arrive rather than only at end.
|
||||
const inProgressToolCalls: RawToolCallObj[] = geminiToolCalls.map(buf => ({
|
||||
name: buf.name as ToolName,
|
||||
rawParams: {},
|
||||
isDone: false,
|
||||
doneParams: [],
|
||||
id: buf.id,
|
||||
}))
|
||||
|
||||
// call onText
|
||||
onText({
|
||||
fullText: fullTextSoFar,
|
||||
fullReasoning: fullReasoningSoFar,
|
||||
toolCall: !toolName ? undefined : { name: toolName, rawParams: {}, isDone: false, doneParams: [], id: toolId },
|
||||
toolCalls: inProgressToolCalls.length > 0 ? inProgressToolCalls : undefined,
|
||||
usage: latestUsage,
|
||||
})
|
||||
}
|
||||
|
||||
// on final
|
||||
if (!fullTextSoFar && !fullReasoningSoFar && !toolName) {
|
||||
// on final — parse each accumulated tool buffer into a full RawToolCallObj.
|
||||
// Empty ids are filled with a UUID so downstream code (which keys tool-result
|
||||
// messages by id) doesn't collide across tools. Malformed JSON args are skipped.
|
||||
const finalToolCalls: RawToolCallObj[] = geminiToolCalls
|
||||
.map(buf => rawToolCallObjOfParamsStr(buf.name, buf.argsStr, buf.id || generateUuid()))
|
||||
.filter((t): t is RawToolCallObj => t !== null)
|
||||
|
||||
if (!fullTextSoFar && !fullReasoningSoFar && finalToolCalls.length === 0) {
|
||||
onError({ message: 'Void: Response from model was empty.', fullError: null })
|
||||
} else {
|
||||
if (!toolId) toolId = generateUuid() // ids are empty, but other providers might expect an id
|
||||
const toolCall = rawToolCallObjOfParamsStr(toolName, toolParamsStr, toolId)
|
||||
const toolCallObj = toolCall ? { toolCall } : {}
|
||||
onFinalMessage({ fullText: fullTextSoFar, fullReasoning: fullReasoningSoFar, anthropicReasoning: null, usage: latestUsage, ...toolCallObj });
|
||||
onFinalMessage({
|
||||
fullText: fullTextSoFar,
|
||||
fullReasoning: fullReasoningSoFar,
|
||||
anthropicReasoning: null,
|
||||
usage: latestUsage,
|
||||
toolCalls: finalToolCalls.length > 0 ? finalToolCalls : undefined,
|
||||
});
|
||||
}
|
||||
})
|
||||
.catch(error => {
|
||||
|
|
|
|||
|
|
@ -66,9 +66,17 @@ export const sendLLMMessage = async ({
|
|||
}
|
||||
|
||||
const onFinalMessage: OnFinalMessage = (params) => {
|
||||
const { fullText, fullReasoning, toolCall } = params
|
||||
const { fullText, fullReasoning, toolCalls } = params
|
||||
if (_didAbort) return
|
||||
captureLLMEvent(`${loggingName} - Received Full Message`, { messageLength: fullText.length, reasoningLength: fullReasoning?.length, duration: new Date().getMilliseconds() - submit_time.getMilliseconds(), toolCallName: toolCall?.name })
|
||||
captureLLMEvent(`${loggingName} - Received Full Message`, {
|
||||
messageLength: fullText.length,
|
||||
reasoningLength: fullReasoning?.length,
|
||||
duration: new Date().getMilliseconds() - submit_time.getMilliseconds(),
|
||||
// Parallel tool calling: capture the number of tools and a comma-joined summary
|
||||
// so metrics can see how often models emit batches (vs. 0 or 1 tool per turn).
|
||||
toolCallCount: toolCalls?.length ?? 0,
|
||||
toolCallNames: toolCalls?.map(t => t.name).join(','),
|
||||
})
|
||||
onFinalMessage_(params)
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue