From 85a539d49de766bbc5241b6211ffe0121a30a5ac Mon Sep 17 00:00:00 2001
From: davi0015 <david_halim90@yahoo.com>
Date: Wed, 22 Apr 2026 21:07:34 +0800
Subject: [PATCH] handle unexpected token termination for openai compatible
 response (#11)

---
 .../contrib/void/browser/chatThreadService.ts |  8 +++----
 .../react/src/sidebar-tsx/SidebarChat.tsx     | 23 +++++++++++++++++++
 .../void/common/chatThreadServiceTypes.ts     |  7 ++++++
 .../void/common/sendLLMMessageTypes.ts        | 11 ++++++++-
 .../llmMessage/sendLLMMessage.impl.ts         | 19 ++++++++++++++-
 5 files changed, 62 insertions(+), 6 deletions(-)

diff --git a/src/vs/workbench/contrib/void/browser/chatThreadService.ts b/src/vs/workbench/contrib/void/browser/chatThreadService.ts
index 75982bd6..b2b8703d 100644
--- a/src/vs/workbench/contrib/void/browser/chatThreadService.ts
+++ b/src/vs/workbench/contrib/void/browser/chatThreadService.ts
@@ -1014,7 +1014,7 @@ class ChatThreadService extends Disposable implements IChatThreadService {
 				nAttempts += 1
 
 				type ResTypes =
-					| { type: 'llmDone', toolCall?: RawToolCallObj, info: { fullText: string, fullReasoning: string, anthropicReasoning: AnthropicReasoning[] | null } }
+					| { type: 'llmDone', toolCall?: RawToolCallObj, info: { fullText: string, fullReasoning: string, anthropicReasoning: AnthropicReasoning[] | null, finishReason?: string } }
 					| { type: 'llmError', error?: { message: string; fullError: Error | null; } }
 					| { type: 'llmAborted' }
 
@@ -1034,12 +1034,12 @@ class ChatThreadService extends Disposable implements IChatThreadService {
 						if (usage) this._setLatestUsage(threadId, usage)
 						this._setStreamState(threadId, { isRunning: 'LLM', llmInfo: { displayContentSoFar: fullText, reasoningSoFar: fullReasoning, toolCallSoFar: toolCall ?? null }, interrupt: Promise.resolve(() => { if (llmCancelToken) this._llmMessageService.abort(llmCancelToken) }) })
 					},
-					onFinalMessage: async ({ fullText, fullReasoning, toolCall, anthropicReasoning, usage }) => {
+					onFinalMessage: async ({ fullText, fullReasoning, toolCall, anthropicReasoning, usage, finishReason }) => {
 						if (usage) this._setLatestUsage(threadId, usage)
 						// Lock in this request's usage so the next loop iteration's
 						// running total is added to (not replacing) what we already counted.
 						this._lockInCurrentRequestUsage(threadId)
-						resMessageIsDonePromise({ type: 'llmDone', toolCall, info: { fullText, fullReasoning, anthropicReasoning } }) // resolve with tool calls
+						resMessageIsDonePromise({ type: 'llmDone', toolCall, info: { fullText, fullReasoning, anthropicReasoning, finishReason } }) // resolve with tool calls
 					},
 					onError: async (error) => {
 						resMessageIsDonePromise({ type: 'llmError', error: error })
@@ -1101,7 +1101,7 @@ class ChatThreadService extends Disposable implements IChatThreadService {
 				// llm res success
 				const { toolCall, info } = llmRes
 
-				this._addMessageToThread(threadId, { role: 'assistant', displayContent: info.fullText, reasoning: info.fullReasoning, anthropicReasoning: info.anthropicReasoning })
+				this._addMessageToThread(threadId, { role: 'assistant', displayContent: info.fullText, reasoning: info.fullReasoning, anthropicReasoning: info.anthropicReasoning, finishReason: info.finishReason })
 
 				this._setStreamState(threadId, { isRunning: 'idle', interrupt: 'not_needed' }) // just decorative for clarity
 
diff --git a/src/vs/workbench/contrib/void/browser/react/src/sidebar-tsx/SidebarChat.tsx b/src/vs/workbench/contrib/void/browser/react/src/sidebar-tsx/SidebarChat.tsx
index 2b767625..f59074fa 100644
--- a/src/vs/workbench/contrib/void/browser/react/src/sidebar-tsx/SidebarChat.tsx
+++ b/src/vs/workbench/contrib/void/browser/react/src/sidebar-tsx/SidebarChat.tsx
@@ -1504,6 +1504,23 @@ const AssistantMessageComponent = ({ chatMessage, isCheckpointGhost, isCommitted
 	const isEmpty = !chatMessage.displayContent && !chatMessage.reasoning
 	if (isEmpty) return null
 
+	// Show a truncation warning when the provider reported a non-clean stream end.
+	// Only rendered on committed messages (so we don't flash a scary banner mid-stream —
+	// the `finish_reason` is set on the final chunk, but until we've taken the round-
+	// trip through `onFinalMessage` + `_addMessageToThread`, we don't trust it).
+	// Empty/undefined reason → no warning (Anthropic/Gemini paths, or any OAI-compatible
+	// server that doesn't report finish_reason).
+	const finishReason = chatMessage.finishReason
+	const showTruncationWarning = isCommitted
+		&& !!finishReason
+		&& finishReason !== 'stop'
+		&& finishReason !== 'tool_calls'
+		&& finishReason !== 'function_call'
+	const truncationWarningText =
+		finishReason === 'length' ? 'Response truncated — model hit its output-token limit (finish_reason: length).' :
+			finishReason === 'content_filter' ? 'Response blocked — provider content filter (finish_reason: content_filter).' :
+				`Response ended unexpectedly (finish_reason: ${finishReason}).`
+
 	return <>
 		{/* reasoning token */}
 		{hasReasoning &&
@@ -1534,6 +1551,12 @@ const AssistantMessageComponent = ({ chatMessage, isCheckpointGhost, isCommitted
 				</ProseWrapper>
 			</div>
 		}
+
+		{showTruncationWarning &&
+			<div className={`${isCheckpointGhost ? 'opacity-50' : ''} mt-1`}>
+				<WarningBox text={truncationWarningText} />
+			</div>
+		}
 	</>
 
 }
diff --git a/src/vs/workbench/contrib/void/common/chatThreadServiceTypes.ts b/src/vs/workbench/contrib/void/common/chatThreadServiceTypes.ts
index 61b628dd..3a027208 100644
--- a/src/vs/workbench/contrib/void/common/chatThreadServiceTypes.ts
+++ b/src/vs/workbench/contrib/void/common/chatThreadServiceTypes.ts
@@ -67,6 +67,13 @@ export type ChatMessage =
 		reasoning: string; // reasoning from the LLM, used for step-by-step thinking
 
 		anthropicReasoning: AnthropicReasoning[] | null; // anthropic reasoning
+
+		// Provider-reported reason the stream ended. Populated only for OAI-compatible
+		// providers today; others leave this undefined. Used by the UI to warn when a
+		// response was silently truncated (typically `length` on MiniMax/OpenRouter
+		// when reasoning tokens exhaust the output budget). Optional to stay backward
+		// compatible with chat history persisted before this field existed.
+		finishReason?: string;
 	}
 	| ToolMessage<ToolName>
 	| DecorativeCanceledTool
diff --git a/src/vs/workbench/contrib/void/common/sendLLMMessageTypes.ts b/src/vs/workbench/contrib/void/common/sendLLMMessageTypes.ts
index 00446785..7a692845 100644
--- a/src/vs/workbench/contrib/void/common/sendLLMMessageTypes.ts
+++ b/src/vs/workbench/contrib/void/common/sendLLMMessageTypes.ts
@@ -113,7 +113,16 @@ export type LLMUsage = {
 }
 
 export type OnText = (p: { fullText: string; fullReasoning: string; toolCall?: RawToolCallObj; usage?: LLMUsage }) => void
-export type OnFinalMessage = (p: { fullText: string; fullReasoning: string; toolCall?: RawToolCallObj; anthropicReasoning: AnthropicReasoning[] | null; usage?: LLMUsage }) => void // id is tool_use_id
+
+// `finishReason` is the provider's own reason for ending the stream. OpenAI-compatible
+// servers return one of `stop` / `tool_calls` / `function_call` / `length` / `content_filter`
+// in `choices[0].finish_reason`. Clean completions (`stop`/`tool_calls`/`function_call`) are
+// treated as normal; the field only exists so the UI can warn the user when a stream ends
+// for a reason that silently truncates the response (primarily `length` when a provider
+// clips against `max_tokens`, but also `content_filter` or unknown gateway-specific values).
+// Populated only by OAI-compatible providers right now — Anthropic / Gemini paths leave this
+// undefined, which renders as "no warning" (the same as before this was added).
+export type OnFinalMessage = (p: { fullText: string; fullReasoning: string; toolCall?: RawToolCallObj; anthropicReasoning: AnthropicReasoning[] | null; usage?: LLMUsage; finishReason?: string }) => void // id is tool_use_id
 export type OnError = (p: { message: string; fullError: Error | null }) => void
 export type OnAbort = () => void
 export type AbortRef = { current: (() => void) | null }
diff --git a/src/vs/workbench/contrib/void/electron-main/llmMessage/sendLLMMessage.impl.ts b/src/vs/workbench/contrib/void/electron-main/llmMessage/sendLLMMessage.impl.ts
index 68762f33..445cc234 100644
--- a/src/vs/workbench/contrib/void/electron-main/llmMessage/sendLLMMessage.impl.ts
+++ b/src/vs/workbench/contrib/void/electron-main/llmMessage/sendLLMMessage.impl.ts
@@ -347,6 +347,15 @@ const _sendOpenAICompatibleChat = async ({ messages, onText, onFinalMessage, onE
 	// stream_options.include_usage). `chunk.usage` is typed as `| null` there.
 	let latestUsage: LLMUsage | undefined = undefined
 
+	// The provider's own termination reason. We keep the *last* non-empty value seen
+	// across the stream — every content-carrying chunk has `finish_reason: null` until
+	// the final one, which carries e.g. `'stop'`, `'tool_calls'`, `'length'`,
+	// `'content_filter'`, or a provider-specific value. Without this, a `length`
+	// truncation (common on MiniMax via OpenRouter when reasoning tokens eat the output
+	// budget) looks identical to a normal completion to the UI — spinner stops,
+	// message cuts off mid-word, no warning shown.
+	let lastFinishReason: string | undefined = undefined
+
 	openai.chat.completions
 		.create(options)
 		.then(async response => {
@@ -357,6 +366,14 @@ const _sendOpenAICompatibleChat = async ({ messages, onText, onFinalMessage, onE
 				const newText = chunk.choices[0]?.delta?.content ?? ''
 				fullTextSoFar += newText
 
+				// finish_reason: first choice only. Most chunks have `null`; keep what
+				// we've got if this one is null/empty, overwrite if it's set. Some gateways
+				// (OpenRouter) occasionally emit a finish_reason in a chunk that still
+				// has content, so we intentionally don't `break` — keep consuming until
+				// the stream actually ends.
+				const chunkFinishReason = chunk.choices[0]?.finish_reason
+				if (chunkFinishReason) lastFinishReason = chunkFinishReason
+
 				// tool call
 				for (const tool of chunk.choices[0]?.delta?.tool_calls ?? []) {
 					const index = tool.index
@@ -412,7 +429,7 @@ const _sendOpenAICompatibleChat = async ({ messages, onText, onFinalMessage, onE
 			else {
 				const toolCall = rawToolCallObjOfParamsStr(toolName, toolParamsStr, toolId)
 				const toolCallObj = toolCall ? { toolCall } : {}
-				onFinalMessage({ fullText: fullTextSoFar, fullReasoning: fullReasoningSoFar, anthropicReasoning: null, usage: latestUsage, ...toolCallObj });
+				onFinalMessage({ fullText: fullTextSoFar, fullReasoning: fullReasoningSoFar, anthropicReasoning: null, usage: latestUsage, finishReason: lastFinishReason, ...toolCallObj });
 			}
 		})
 		// when error/fail - this catches errors of both .create() and .then(for await)