From 85a539d49de766bbc5241b6211ffe0121a30a5ac Mon Sep 17 00:00:00 2001 From: davi0015 Date: Wed, 22 Apr 2026 21:07:34 +0800 Subject: [PATCH] handle unexpected token termination for openai compatible response (#11) --- .../contrib/void/browser/chatThreadService.ts | 8 +++---- .../react/src/sidebar-tsx/SidebarChat.tsx | 23 +++++++++++++++++++ .../void/common/chatThreadServiceTypes.ts | 7 ++++++ .../void/common/sendLLMMessageTypes.ts | 11 ++++++++- .../llmMessage/sendLLMMessage.impl.ts | 19 ++++++++++++++- 5 files changed, 62 insertions(+), 6 deletions(-) diff --git a/src/vs/workbench/contrib/void/browser/chatThreadService.ts b/src/vs/workbench/contrib/void/browser/chatThreadService.ts index 75982bd6..b2b8703d 100644 --- a/src/vs/workbench/contrib/void/browser/chatThreadService.ts +++ b/src/vs/workbench/contrib/void/browser/chatThreadService.ts @@ -1014,7 +1014,7 @@ class ChatThreadService extends Disposable implements IChatThreadService { nAttempts += 1 type ResTypes = - | { type: 'llmDone', toolCall?: RawToolCallObj, info: { fullText: string, fullReasoning: string, anthropicReasoning: AnthropicReasoning[] | null } } + | { type: 'llmDone', toolCall?: RawToolCallObj, info: { fullText: string, fullReasoning: string, anthropicReasoning: AnthropicReasoning[] | null, finishReason?: string } } | { type: 'llmError', error?: { message: string; fullError: Error | null; } } | { type: 'llmAborted' } @@ -1034,12 +1034,12 @@ class ChatThreadService extends Disposable implements IChatThreadService { if (usage) this._setLatestUsage(threadId, usage) this._setStreamState(threadId, { isRunning: 'LLM', llmInfo: { displayContentSoFar: fullText, reasoningSoFar: fullReasoning, toolCallSoFar: toolCall ?? null }, interrupt: Promise.resolve(() => { if (llmCancelToken) this._llmMessageService.abort(llmCancelToken) }) }) }, - onFinalMessage: async ({ fullText, fullReasoning, toolCall, anthropicReasoning, usage }) => { + onFinalMessage: async ({ fullText, fullReasoning, toolCall, anthropicReasoning, usage, finishReason }) => { if (usage) this._setLatestUsage(threadId, usage) // Lock in this request's usage so the next loop iteration's // running total is added to (not replacing) what we already counted. this._lockInCurrentRequestUsage(threadId) - resMessageIsDonePromise({ type: 'llmDone', toolCall, info: { fullText, fullReasoning, anthropicReasoning } }) // resolve with tool calls + resMessageIsDonePromise({ type: 'llmDone', toolCall, info: { fullText, fullReasoning, anthropicReasoning, finishReason } }) // resolve with tool calls }, onError: async (error) => { resMessageIsDonePromise({ type: 'llmError', error: error }) @@ -1101,7 +1101,7 @@ class ChatThreadService extends Disposable implements IChatThreadService { // llm res success const { toolCall, info } = llmRes - this._addMessageToThread(threadId, { role: 'assistant', displayContent: info.fullText, reasoning: info.fullReasoning, anthropicReasoning: info.anthropicReasoning }) + this._addMessageToThread(threadId, { role: 'assistant', displayContent: info.fullText, reasoning: info.fullReasoning, anthropicReasoning: info.anthropicReasoning, finishReason: info.finishReason }) this._setStreamState(threadId, { isRunning: 'idle', interrupt: 'not_needed' }) // just decorative for clarity diff --git a/src/vs/workbench/contrib/void/browser/react/src/sidebar-tsx/SidebarChat.tsx b/src/vs/workbench/contrib/void/browser/react/src/sidebar-tsx/SidebarChat.tsx index 2b767625..f59074fa 100644 --- a/src/vs/workbench/contrib/void/browser/react/src/sidebar-tsx/SidebarChat.tsx +++ b/src/vs/workbench/contrib/void/browser/react/src/sidebar-tsx/SidebarChat.tsx @@ -1504,6 +1504,23 @@ const AssistantMessageComponent = ({ chatMessage, isCheckpointGhost, isCommitted const isEmpty = !chatMessage.displayContent && !chatMessage.reasoning if (isEmpty) return null + // Show a truncation warning when the provider reported a non-clean stream end. + // Only rendered on committed messages (so we don't flash a scary banner mid-stream — + // the `finish_reason` is set on the final chunk, but until we've taken the round- + // trip through `onFinalMessage` + `_addMessageToThread`, we don't trust it). + // Empty/undefined reason → no warning (Anthropic/Gemini paths, or any OAI-compatible + // server that doesn't report finish_reason). + const finishReason = chatMessage.finishReason + const showTruncationWarning = isCommitted + && !!finishReason + && finishReason !== 'stop' + && finishReason !== 'tool_calls' + && finishReason !== 'function_call' + const truncationWarningText = + finishReason === 'length' ? 'Response truncated — model hit its output-token limit (finish_reason: length).' : + finishReason === 'content_filter' ? 'Response blocked — provider content filter (finish_reason: content_filter).' : + `Response ended unexpectedly (finish_reason: ${finishReason}).` + return <> {/* reasoning token */} {hasReasoning && @@ -1534,6 +1551,12 @@ const AssistantMessageComponent = ({ chatMessage, isCheckpointGhost, isCommitted } + + {showTruncationWarning && +
+ +
+ } } diff --git a/src/vs/workbench/contrib/void/common/chatThreadServiceTypes.ts b/src/vs/workbench/contrib/void/common/chatThreadServiceTypes.ts index 61b628dd..3a027208 100644 --- a/src/vs/workbench/contrib/void/common/chatThreadServiceTypes.ts +++ b/src/vs/workbench/contrib/void/common/chatThreadServiceTypes.ts @@ -67,6 +67,13 @@ export type ChatMessage = reasoning: string; // reasoning from the LLM, used for step-by-step thinking anthropicReasoning: AnthropicReasoning[] | null; // anthropic reasoning + + // Provider-reported reason the stream ended. Populated only for OAI-compatible + // providers today; others leave this undefined. Used by the UI to warn when a + // response was silently truncated (typically `length` on MiniMax/OpenRouter + // when reasoning tokens exhaust the output budget). Optional to stay backward + // compatible with chat history persisted before this field existed. + finishReason?: string; } | ToolMessage | DecorativeCanceledTool diff --git a/src/vs/workbench/contrib/void/common/sendLLMMessageTypes.ts b/src/vs/workbench/contrib/void/common/sendLLMMessageTypes.ts index 00446785..7a692845 100644 --- a/src/vs/workbench/contrib/void/common/sendLLMMessageTypes.ts +++ b/src/vs/workbench/contrib/void/common/sendLLMMessageTypes.ts @@ -113,7 +113,16 @@ export type LLMUsage = { } export type OnText = (p: { fullText: string; fullReasoning: string; toolCall?: RawToolCallObj; usage?: LLMUsage }) => void -export type OnFinalMessage = (p: { fullText: string; fullReasoning: string; toolCall?: RawToolCallObj; anthropicReasoning: AnthropicReasoning[] | null; usage?: LLMUsage }) => void // id is tool_use_id + +// `finishReason` is the provider's own reason for ending the stream. OpenAI-compatible +// servers return one of `stop` / `tool_calls` / `function_call` / `length` / `content_filter` +// in `choices[0].finish_reason`. Clean completions (`stop`/`tool_calls`/`function_call`) are +// treated as normal; the field only exists so the UI can warn the user when a stream ends +// for a reason that silently truncates the response (primarily `length` when a provider +// clips against `max_tokens`, but also `content_filter` or unknown gateway-specific values). +// Populated only by OAI-compatible providers right now — Anthropic / Gemini paths leave this +// undefined, which renders as "no warning" (the same as before this was added). +export type OnFinalMessage = (p: { fullText: string; fullReasoning: string; toolCall?: RawToolCallObj; anthropicReasoning: AnthropicReasoning[] | null; usage?: LLMUsage; finishReason?: string }) => void // id is tool_use_id export type OnError = (p: { message: string; fullError: Error | null }) => void export type OnAbort = () => void export type AbortRef = { current: (() => void) | null } diff --git a/src/vs/workbench/contrib/void/electron-main/llmMessage/sendLLMMessage.impl.ts b/src/vs/workbench/contrib/void/electron-main/llmMessage/sendLLMMessage.impl.ts index 68762f33..445cc234 100644 --- a/src/vs/workbench/contrib/void/electron-main/llmMessage/sendLLMMessage.impl.ts +++ b/src/vs/workbench/contrib/void/electron-main/llmMessage/sendLLMMessage.impl.ts @@ -347,6 +347,15 @@ const _sendOpenAICompatibleChat = async ({ messages, onText, onFinalMessage, onE // stream_options.include_usage). `chunk.usage` is typed as `| null` there. let latestUsage: LLMUsage | undefined = undefined + // The provider's own termination reason. We keep the *last* non-empty value seen + // across the stream — every content-carrying chunk has `finish_reason: null` until + // the final one, which carries e.g. `'stop'`, `'tool_calls'`, `'length'`, + // `'content_filter'`, or a provider-specific value. Without this, a `length` + // truncation (common on MiniMax via OpenRouter when reasoning tokens eat the output + // budget) looks identical to a normal completion to the UI — spinner stops, + // message cuts off mid-word, no warning shown. + let lastFinishReason: string | undefined = undefined + openai.chat.completions .create(options) .then(async response => { @@ -357,6 +366,14 @@ const _sendOpenAICompatibleChat = async ({ messages, onText, onFinalMessage, onE const newText = chunk.choices[0]?.delta?.content ?? '' fullTextSoFar += newText + // finish_reason: first choice only. Most chunks have `null`; keep what + // we've got if this one is null/empty, overwrite if it's set. Some gateways + // (OpenRouter) occasionally emit a finish_reason in a chunk that still + // has content, so we intentionally don't `break` — keep consuming until + // the stream actually ends. + const chunkFinishReason = chunk.choices[0]?.finish_reason + if (chunkFinishReason) lastFinishReason = chunkFinishReason + // tool call for (const tool of chunk.choices[0]?.delta?.tool_calls ?? []) { const index = tool.index @@ -412,7 +429,7 @@ const _sendOpenAICompatibleChat = async ({ messages, onText, onFinalMessage, onE else { const toolCall = rawToolCallObjOfParamsStr(toolName, toolParamsStr, toolId) const toolCallObj = toolCall ? { toolCall } : {} - onFinalMessage({ fullText: fullTextSoFar, fullReasoning: fullReasoningSoFar, anthropicReasoning: null, usage: latestUsage, ...toolCallObj }); + onFinalMessage({ fullText: fullTextSoFar, fullReasoning: fullReasoningSoFar, anthropicReasoning: null, usage: latestUsage, finishReason: lastFinishReason, ...toolCallObj }); } }) // when error/fail - this catches errors of both .create() and .then(for await)