mirror of
https://github.com/voideditor/void
synced 2026-05-23 17:38:23 +00:00
handle unexpected token termination for openai compatible response (#11)
This commit is contained in:
parent
5c0ca803ea
commit
85a539d49d
5 changed files with 62 additions and 6 deletions
|
|
@ -1014,7 +1014,7 @@ class ChatThreadService extends Disposable implements IChatThreadService {
|
|||
nAttempts += 1
|
||||
|
||||
type ResTypes =
|
||||
| { type: 'llmDone', toolCall?: RawToolCallObj, info: { fullText: string, fullReasoning: string, anthropicReasoning: AnthropicReasoning[] | null } }
|
||||
| { type: 'llmDone', toolCall?: RawToolCallObj, info: { fullText: string, fullReasoning: string, anthropicReasoning: AnthropicReasoning[] | null, finishReason?: string } }
|
||||
| { type: 'llmError', error?: { message: string; fullError: Error | null; } }
|
||||
| { type: 'llmAborted' }
|
||||
|
||||
|
|
@ -1034,12 +1034,12 @@ class ChatThreadService extends Disposable implements IChatThreadService {
|
|||
if (usage) this._setLatestUsage(threadId, usage)
|
||||
this._setStreamState(threadId, { isRunning: 'LLM', llmInfo: { displayContentSoFar: fullText, reasoningSoFar: fullReasoning, toolCallSoFar: toolCall ?? null }, interrupt: Promise.resolve(() => { if (llmCancelToken) this._llmMessageService.abort(llmCancelToken) }) })
|
||||
},
|
||||
onFinalMessage: async ({ fullText, fullReasoning, toolCall, anthropicReasoning, usage }) => {
|
||||
onFinalMessage: async ({ fullText, fullReasoning, toolCall, anthropicReasoning, usage, finishReason }) => {
|
||||
if (usage) this._setLatestUsage(threadId, usage)
|
||||
// Lock in this request's usage so the next loop iteration's
|
||||
// running total is added to (not replacing) what we already counted.
|
||||
this._lockInCurrentRequestUsage(threadId)
|
||||
resMessageIsDonePromise({ type: 'llmDone', toolCall, info: { fullText, fullReasoning, anthropicReasoning } }) // resolve with tool calls
|
||||
resMessageIsDonePromise({ type: 'llmDone', toolCall, info: { fullText, fullReasoning, anthropicReasoning, finishReason } }) // resolve with tool calls
|
||||
},
|
||||
onError: async (error) => {
|
||||
resMessageIsDonePromise({ type: 'llmError', error: error })
|
||||
|
|
@ -1101,7 +1101,7 @@ class ChatThreadService extends Disposable implements IChatThreadService {
|
|||
// llm res success
|
||||
const { toolCall, info } = llmRes
|
||||
|
||||
this._addMessageToThread(threadId, { role: 'assistant', displayContent: info.fullText, reasoning: info.fullReasoning, anthropicReasoning: info.anthropicReasoning })
|
||||
this._addMessageToThread(threadId, { role: 'assistant', displayContent: info.fullText, reasoning: info.fullReasoning, anthropicReasoning: info.anthropicReasoning, finishReason: info.finishReason })
|
||||
|
||||
this._setStreamState(threadId, { isRunning: 'idle', interrupt: 'not_needed' }) // just decorative for clarity
|
||||
|
||||
|
|
|
|||
|
|
@ -1504,6 +1504,23 @@ const AssistantMessageComponent = ({ chatMessage, isCheckpointGhost, isCommitted
|
|||
const isEmpty = !chatMessage.displayContent && !chatMessage.reasoning
|
||||
if (isEmpty) return null
|
||||
|
||||
// Show a truncation warning when the provider reported a non-clean stream end.
|
||||
// Only rendered on committed messages (so we don't flash a scary banner mid-stream —
|
||||
// the `finish_reason` is set on the final chunk, but until we've taken the round-
|
||||
// trip through `onFinalMessage` + `_addMessageToThread`, we don't trust it).
|
||||
// Empty/undefined reason → no warning (Anthropic/Gemini paths, or any OAI-compatible
|
||||
// server that doesn't report finish_reason).
|
||||
const finishReason = chatMessage.finishReason
|
||||
const showTruncationWarning = isCommitted
|
||||
&& !!finishReason
|
||||
&& finishReason !== 'stop'
|
||||
&& finishReason !== 'tool_calls'
|
||||
&& finishReason !== 'function_call'
|
||||
const truncationWarningText =
|
||||
finishReason === 'length' ? 'Response truncated — model hit its output-token limit (finish_reason: length).' :
|
||||
finishReason === 'content_filter' ? 'Response blocked — provider content filter (finish_reason: content_filter).' :
|
||||
`Response ended unexpectedly (finish_reason: ${finishReason}).`
|
||||
|
||||
return <>
|
||||
{/* reasoning token */}
|
||||
{hasReasoning &&
|
||||
|
|
@ -1534,6 +1551,12 @@ const AssistantMessageComponent = ({ chatMessage, isCheckpointGhost, isCommitted
|
|||
</ProseWrapper>
|
||||
</div>
|
||||
}
|
||||
|
||||
{showTruncationWarning &&
|
||||
<div className={`${isCheckpointGhost ? 'opacity-50' : ''} mt-1`}>
|
||||
<WarningBox text={truncationWarningText} />
|
||||
</div>
|
||||
}
|
||||
</>
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -67,6 +67,13 @@ export type ChatMessage =
|
|||
reasoning: string; // reasoning from the LLM, used for step-by-step thinking
|
||||
|
||||
anthropicReasoning: AnthropicReasoning[] | null; // anthropic reasoning
|
||||
|
||||
// Provider-reported reason the stream ended. Populated only for OAI-compatible
|
||||
// providers today; others leave this undefined. Used by the UI to warn when a
|
||||
// response was silently truncated (typically `length` on MiniMax/OpenRouter
|
||||
// when reasoning tokens exhaust the output budget). Optional to stay backward
|
||||
// compatible with chat history persisted before this field existed.
|
||||
finishReason?: string;
|
||||
}
|
||||
| ToolMessage<ToolName>
|
||||
| DecorativeCanceledTool
|
||||
|
|
|
|||
|
|
@ -113,7 +113,16 @@ export type LLMUsage = {
|
|||
}
|
||||
|
||||
export type OnText = (p: { fullText: string; fullReasoning: string; toolCall?: RawToolCallObj; usage?: LLMUsage }) => void
|
||||
export type OnFinalMessage = (p: { fullText: string; fullReasoning: string; toolCall?: RawToolCallObj; anthropicReasoning: AnthropicReasoning[] | null; usage?: LLMUsage }) => void // id is tool_use_id
|
||||
|
||||
// `finishReason` is the provider's own reason for ending the stream. OpenAI-compatible
|
||||
// servers return one of `stop` / `tool_calls` / `function_call` / `length` / `content_filter`
|
||||
// in `choices[0].finish_reason`. Clean completions (`stop`/`tool_calls`/`function_call`) are
|
||||
// treated as normal; the field only exists so the UI can warn the user when a stream ends
|
||||
// for a reason that silently truncates the response (primarily `length` when a provider
|
||||
// clips against `max_tokens`, but also `content_filter` or unknown gateway-specific values).
|
||||
// Populated only by OAI-compatible providers right now — Anthropic / Gemini paths leave this
|
||||
// undefined, which renders as "no warning" (the same as before this was added).
|
||||
export type OnFinalMessage = (p: { fullText: string; fullReasoning: string; toolCall?: RawToolCallObj; anthropicReasoning: AnthropicReasoning[] | null; usage?: LLMUsage; finishReason?: string }) => void // id is tool_use_id
|
||||
export type OnError = (p: { message: string; fullError: Error | null }) => void
|
||||
export type OnAbort = () => void
|
||||
export type AbortRef = { current: (() => void) | null }
|
||||
|
|
|
|||
|
|
@ -347,6 +347,15 @@ const _sendOpenAICompatibleChat = async ({ messages, onText, onFinalMessage, onE
|
|||
// stream_options.include_usage). `chunk.usage` is typed as `| null` there.
|
||||
let latestUsage: LLMUsage | undefined = undefined
|
||||
|
||||
// The provider's own termination reason. We keep the *last* non-empty value seen
|
||||
// across the stream — every content-carrying chunk has `finish_reason: null` until
|
||||
// the final one, which carries e.g. `'stop'`, `'tool_calls'`, `'length'`,
|
||||
// `'content_filter'`, or a provider-specific value. Without this, a `length`
|
||||
// truncation (common on MiniMax via OpenRouter when reasoning tokens eat the output
|
||||
// budget) looks identical to a normal completion to the UI — spinner stops,
|
||||
// message cuts off mid-word, no warning shown.
|
||||
let lastFinishReason: string | undefined = undefined
|
||||
|
||||
openai.chat.completions
|
||||
.create(options)
|
||||
.then(async response => {
|
||||
|
|
@ -357,6 +366,14 @@ const _sendOpenAICompatibleChat = async ({ messages, onText, onFinalMessage, onE
|
|||
const newText = chunk.choices[0]?.delta?.content ?? ''
|
||||
fullTextSoFar += newText
|
||||
|
||||
// finish_reason: first choice only. Most chunks have `null`; keep what
|
||||
// we've got if this one is null/empty, overwrite if it's set. Some gateways
|
||||
// (OpenRouter) occasionally emit a finish_reason in a chunk that still
|
||||
// has content, so we intentionally don't `break` — keep consuming until
|
||||
// the stream actually ends.
|
||||
const chunkFinishReason = chunk.choices[0]?.finish_reason
|
||||
if (chunkFinishReason) lastFinishReason = chunkFinishReason
|
||||
|
||||
// tool call
|
||||
for (const tool of chunk.choices[0]?.delta?.tool_calls ?? []) {
|
||||
const index = tool.index
|
||||
|
|
@ -412,7 +429,7 @@ const _sendOpenAICompatibleChat = async ({ messages, onText, onFinalMessage, onE
|
|||
else {
|
||||
const toolCall = rawToolCallObjOfParamsStr(toolName, toolParamsStr, toolId)
|
||||
const toolCallObj = toolCall ? { toolCall } : {}
|
||||
onFinalMessage({ fullText: fullTextSoFar, fullReasoning: fullReasoningSoFar, anthropicReasoning: null, usage: latestUsage, ...toolCallObj });
|
||||
onFinalMessage({ fullText: fullTextSoFar, fullReasoning: fullReasoningSoFar, anthropicReasoning: null, usage: latestUsage, finishReason: lastFinishReason, ...toolCallObj });
|
||||
}
|
||||
})
|
||||
// when error/fail - this catches errors of both .create() and .then(for await)
|
||||
|
|
|
|||
Loading…
Reference in a new issue