From 0de63a088d7922c82e5204d30c94b566309de1e5 Mon Sep 17 00:00:00 2001 From: Andrew Pareles Date: Wed, 26 Feb 2025 01:59:59 -0800 Subject: [PATCH] reasoning UI! --- .../contrib/void/browser/chatThreadService.ts | 19 ++-- .../contrib/void/browser/editCodeService.ts | 9 +- .../browser/helpers/extractCodeFromResult.ts | 99 +++++++++-------- .../react/src/markdown/ChatMarkdownRender.tsx | 14 +-- .../react/src/sidebar-tsx/SidebarChat.tsx | 103 ++++++++++-------- .../void/browser/searchReplaceCacheService.ts | 26 ++--- .../contrib/void/common/llmMessageTypes.ts | 4 +- .../void/electron-main/llmMessage/MODELS.ts | 30 ++--- 8 files changed, 163 insertions(+), 141 deletions(-) diff --git a/src/vs/workbench/contrib/void/browser/chatThreadService.ts b/src/vs/workbench/contrib/void/browser/chatThreadService.ts index cc64fc92..98f98f8e 100644 --- a/src/vs/workbench/contrib/void/browser/chatThreadService.ts +++ b/src/vs/workbench/contrib/void/browser/chatThreadService.ts @@ -60,11 +60,7 @@ export type ToolMessage = { // WARNING: changing this format is a big deal!!!!!! need to migrate old format to new format on users' computers so people don't get errors. export type ChatMessage = - | { - role: 'system'; - content: string; - displayContent?: undefined; - } | { + { role: 'user'; content: string | null; // content displayed to the LLM on future calls - allowed to be '', will be replaced with (empty) displayContent: string | null; // content displayed to user - allowed to be '', will be ignored @@ -76,7 +72,6 @@ export type ChatMessage = } | { role: 'assistant'; content: string | null; // content received from LLM - allowed to be '', will be replaced with (empty) - displayContent: string | null; // content displayed to user (this is the same as content for now) - allowed to be '', will be ignored reasoning: string | null; // reasoning from the LLM, used for step-by-step thinking } | ToolMessage @@ -332,9 +327,9 @@ class ChatThreadService extends Disposable implements IChatThreadService { // ---------- streaming ---------- - private _finishStreamingTextMessage = (threadId: string, content: string, error?: { message: string, fullError: Error | null }, reasoning?: string) => { + private _finishStreamingTextMessage = (threadId: string, options: { content: string, reasoning?: string }, error?: { message: string, fullError: Error | null }) => { // add assistant's message to chat history, and clear selection - this._addMessageToThread(threadId, { role: 'assistant', content, displayContent: content || null, reasoning: reasoning || null }) + this._addMessageToThread(threadId, { role: 'assistant', content: options.content, reasoning: options.reasoning || null }) this._setStreamState(threadId, { messageSoFar: undefined, reasoningSoFar: undefined, streamingToken: undefined, error }) } @@ -439,10 +434,10 @@ class ChatThreadService extends Disposable implements IChatThreadService { onFinalMessage: async ({ fullText, toolCalls, fullReasoning }) => { if ((toolCalls?.length ?? 0) === 0) { - this._finishStreamingTextMessage(threadId, fullText, undefined, fullReasoning) + this._finishStreamingTextMessage(threadId, { content: fullText, reasoning: fullReasoning }) } else { - this._addMessageToThread(threadId, { role: 'assistant', content: fullText, displayContent: fullText, reasoning: fullReasoning || null }) + this._addMessageToThread(threadId, { role: 'assistant', content: fullText, reasoning: fullReasoning || null }) this._setStreamState(threadId, { messageSoFar: undefined, reasoningSoFar: undefined }) // clear streaming message for (const tool of toolCalls ?? []) { const toolName = tool.name as ToolName @@ -479,7 +474,7 @@ class ChatThreadService extends Disposable implements IChatThreadService { onError: (error) => { const messageSoFar = this.streamState[threadId]?.messageSoFar ?? '' const reasoningSoFar = this.streamState[threadId]?.reasoningSoFar ?? '' - this._finishStreamingTextMessage(threadId, messageSoFar, error, reasoningSoFar) + this._finishStreamingTextMessage(threadId, { content: messageSoFar, reasoning: reasoningSoFar }, error) res_() }, }) @@ -499,7 +494,7 @@ class ChatThreadService extends Disposable implements IChatThreadService { if (llmCancelToken !== undefined) this._llmMessageService.abort(llmCancelToken) const messageSoFar = this.streamState[threadId]?.messageSoFar ?? '' const reasoningSoFar = this.streamState[threadId]?.reasoningSoFar ?? '' - this._finishStreamingTextMessage(threadId, messageSoFar, undefined, reasoningSoFar) + this._finishStreamingTextMessage(threadId, { content: messageSoFar, reasoning: reasoningSoFar }) } dismissStreamError(threadId: string): void { diff --git a/src/vs/workbench/contrib/void/browser/editCodeService.ts b/src/vs/workbench/contrib/void/browser/editCodeService.ts index 0f92d4ef..5ef2e555 100644 --- a/src/vs/workbench/contrib/void/browser/editCodeService.ts +++ b/src/vs/workbench/contrib/void/browser/editCodeService.ts @@ -1400,7 +1400,7 @@ class EditCodeService extends Disposable implements IEditCodeService { const latestStreamInfoMutable: StreamLocationMutable = { line: diffZone.startLine, addedSplitYet: false, col: 1, originalCodeStartLine: 1 } // state used in onText: - let fullText = '' + let fullTextSoFar = '' // so far (INCLUDING ignored suffix) let prevIgnoredSuffix = '' streamRequestIdRef.current = this._llmMessageService.sendLLMMessage({ @@ -1408,12 +1408,13 @@ class EditCodeService extends Disposable implements IEditCodeService { useProviderFor: opts.from === 'ClickApply' ? 'Apply' : 'Ctrl+K', logging: { loggingName: `startApplying - ${from}` }, messages, - onText: ({ newText: newText_ }) => { + onText: ({ fullText: fullText_ }) => { + const newText_ = fullText_.substring(fullTextSoFar.length, Infinity) const newText = prevIgnoredSuffix + newText_ // add the previously ignored suffix because it's no longer the suffix! - fullText += prevIgnoredSuffix + newText // full text, including ```, etc + fullTextSoFar += newText // full text, including ```, etc - const [croppedText, deltaCroppedText, croppedSuffix] = extractText(fullText, newText.length) + const [croppedText, deltaCroppedText, croppedSuffix] = extractText(fullTextSoFar, newText.length) const { endLineInLlmTextSoFar } = this._writeStreamedDiffZoneLLMText(uri, originalCode, croppedText, deltaCroppedText, latestStreamInfoMutable) diffZone._streamState.line = (diffZone.startLine - 1) + endLineInLlmTextSoFar // change coordinate systems from originalCode to full file diff --git a/src/vs/workbench/contrib/void/browser/helpers/extractCodeFromResult.ts b/src/vs/workbench/contrib/void/browser/helpers/extractCodeFromResult.ts index 564a565d..21f6e6f5 100644 --- a/src/vs/workbench/contrib/void/browser/helpers/extractCodeFromResult.ts +++ b/src/vs/workbench/contrib/void/browser/helpers/extractCodeFromResult.ts @@ -173,7 +173,7 @@ export type ExtractedSearchReplaceBlock = { const endsWithAnyPrefixOf = (str: string, anyPrefix: string) => { // for each prefix - for (let i = anyPrefix.length; i >= 0; i--) { + for (let i = anyPrefix.length; i >= 1; i--) { // i >= 1 because must not be empty string const prefix = anyPrefix.slice(0, i) if (str.endsWith(prefix)) return prefix } @@ -252,93 +252,104 @@ export const extractSearchReplaceBlocks = (str: string) => { // could simplify this - this assumes we can never add a tag without committing it to the user's screen, but that's not true export const extractReasoningOnTextWrapper = (onText: OnText, thinkTags: [string, string]): OnText => { - - - let latestAddIdx = 0 // exclusive + let latestAddIdx = 0 // exclusive index in fullText_ let foundTag1 = false let foundTag2 = false - let fullText = '' - let fullReasoning = '' + let fullTextSoFar = '' + let fullReasoningSoFar = '' - const newOnText: OnText = ({ newText: newText_, fullText: fullText_ }) => { + let onText_ = onText + onText = (params) => { + onText_(params) + } - // abcdefghi - // | + const newOnText: OnText = ({ fullText: fullText_ }) => { // until found the first think tag, keep adding to fullText if (!foundTag1) { const endsWithTag1 = endsWithAnyPrefixOf(fullText_, thinkTags[0]) if (endsWithTag1) { + console.log('endswith1', { fullTextSoFar, fullReasoningSoFar, fullText_ }) // wait until we get the full tag or know more return } // if found the first tag - const tag1Index = fullText_.lastIndexOf(thinkTags[0]) + const tag1Index = fullText_.indexOf(thinkTags[0]) if (tag1Index !== -1) { + console.log('tag1Index !==1', { tag1Index, fullTextSoFar, fullReasoningSoFar, thinkTags, fullText_ }) foundTag1 = true - const newText = fullText.substring(latestAddIdx, tag1Index) - const newReasoning = fullText.substring(tag1Index + thinkTags[0].length, Infinity) - - fullText += newText - fullReasoning += newReasoning - latestAddIdx += newText.length + newReasoning.length - onText({ newText, fullText, newReasoning: newReasoning, fullReasoning }) + // Add text before the tag to fullTextSoFar + fullTextSoFar += fullText_.substring(0, tag1Index) + // Update latestAddIdx to after the first tag + latestAddIdx = tag1Index + thinkTags[0].length + onText({ fullText: fullTextSoFar, fullReasoning: fullReasoningSoFar }) return } + console.log('adding to text A', { fullTextSoFar, fullReasoningSoFar }) // add the text to fullText - const newText = fullText.substring(latestAddIdx, Infinity) - fullText += newText - latestAddIdx += newText.length - onText({ newText, fullText, newReasoning: '', fullReasoning }) + fullTextSoFar = fullText_ + latestAddIdx = fullText_.length + onText({ fullText: fullTextSoFar, fullReasoning: fullReasoningSoFar }) return } + // at this point, we found // until found the second think tag, keep adding to fullReasoning if (!foundTag2) { const endsWithTag2 = endsWithAnyPrefixOf(fullText_, thinkTags[1]) if (endsWithTag2) { + console.log('endsWith2', { fullTextSoFar, fullReasoningSoFar }) // wait until we get the full tag or know more return } - // if found the second tag - const tag2Index = fullText_.lastIndexOf(thinkTags[1]) - if (tag2Index !== -1) { - foundTag2 = true - const newReasoning = fullText.substring(latestAddIdx, tag2Index) - const newText = fullText.substring(tag2Index + thinkTags[1].length, Infinity) - fullText += newText - fullReasoning += newReasoning - latestAddIdx += newText.length + newReasoning.length - onText({ newText, fullText, newReasoning: newReasoning, fullReasoning }) + // if found the second tag + const tag2Index = fullText_.indexOf(thinkTags[1], latestAddIdx) + if (tag2Index !== -1) { + console.log('tag2Index !== -1', { fullTextSoFar, fullReasoningSoFar }) + foundTag2 = true + // Add everything between first and second tag to reasoning + fullReasoningSoFar += fullText_.substring(latestAddIdx, tag2Index) + // Update latestAddIdx to after the second tag + latestAddIdx = tag2Index + thinkTags[1].length + onText({ fullText: fullTextSoFar, fullReasoning: fullReasoningSoFar }) return } - // add the text to fullReasoning - const newReasoning = fullText.substring(latestAddIdx, Infinity) - fullReasoning += newReasoning - latestAddIdx += newReasoning.length - onText({ newText: '', fullText, newReasoning, fullReasoning }) + // add the text to fullReasoning (content after first tag but before second tag) + console.log('adding to text B', { fullTextSoFar, fullReasoningSoFar }) + + // If we have more text than we've processed, add it to reasoning + if (fullText_.length > latestAddIdx) { + fullReasoningSoFar += fullText_.substring(latestAddIdx) + latestAddIdx = fullText_.length + } + + onText({ fullText: fullTextSoFar, fullReasoning: fullReasoningSoFar }) return } - // at this point, we found - fullText += newText_ - const newText = fullText.substring(latestAddIdx, Infinity) - latestAddIdx += newText.length - onText({ newText, fullText, newReasoning: '', fullReasoning }) + // at this point, we found - content after the second tag is normal text + console.log('adding to text C', { fullTextSoFar, fullReasoningSoFar }) + + // Add any new text after the closing tag to fullTextSoFar + if (fullText_.length > latestAddIdx) { + fullTextSoFar += fullText_.substring(latestAddIdx) + latestAddIdx = fullText_.length + } + + onText({ fullText: fullTextSoFar, fullReasoning: fullReasoningSoFar }) } - return newOnText } export const extractReasoningOnFinalMessage = (fullText_: string, thinkTags: [string, string]): { fullText: string, fullReasoning: string } => { - const tag1Idx = fullText_.lastIndexOf(thinkTags[0]) - const tag2Idx = fullText_.lastIndexOf(thinkTags[1]) + const tag1Idx = fullText_.indexOf(thinkTags[0]) + const tag2Idx = fullText_.indexOf(thinkTags[1]) if (tag1Idx === -1 || tag2Idx === -1) return { fullText: fullText_, fullReasoning: '' } const fullText = fullText_.substring(0, tag1Idx) + fullText_.substring(tag2Idx + thinkTags[1].length, Infinity) const fullReasoning = fullText.substring(tag1Idx + thinkTags[0].length, tag2Idx) diff --git a/src/vs/workbench/contrib/void/browser/react/src/markdown/ChatMarkdownRender.tsx b/src/vs/workbench/contrib/void/browser/react/src/markdown/ChatMarkdownRender.tsx index d2f24569..1aa55e61 100644 --- a/src/vs/workbench/contrib/void/browser/react/src/markdown/ChatMarkdownRender.tsx +++ b/src/vs/workbench/contrib/void/browser/react/src/markdown/ChatMarkdownRender.tsx @@ -33,7 +33,7 @@ export const CodeSpan = ({ children, className }: { children: React.ReactNode, c } -const RenderToken = ({ token, nested, noSpace, chatMessageLocation, tokenIdx }: { token: Token | string, nested?: boolean, noSpace?: boolean, chatMessageLocation?: ChatMessageLocation, tokenIdx: string }): JSX.Element => { +const RenderToken = ({ token, nested, noSpace, chatMessageLocationForApply, tokenIdx }: { token: Token | string, nested?: boolean, noSpace?: boolean, chatMessageLocationForApply?: ChatMessageLocation, tokenIdx: string }): JSX.Element => { // deal with built-in tokens first (assume marked token) @@ -45,9 +45,9 @@ const RenderToken = ({ token, nested, noSpace, chatMessageLocation, tokenIdx }: if (t.type === "code") { - const applyBoxId = chatMessageLocation ? getApplyBoxId({ - threadId: chatMessageLocation.threadId, - messageIdx: chatMessageLocation.messageIdx, + const applyBoxId = chatMessageLocationForApply ? getApplyBoxId({ + threadId: chatMessageLocationForApply.threadId, + messageIdx: chatMessageLocationForApply.messageIdx, tokenIdx: tokenIdx, }) : null @@ -131,7 +131,7 @@ const RenderToken = ({ token, nested, noSpace, chatMessageLocation, tokenIdx }: )} - + ))} @@ -243,12 +243,12 @@ const RenderToken = ({ token, nested, noSpace, chatMessageLocation, tokenIdx }: ) } -export const ChatMarkdownRender = ({ string, nested = false, noSpace, chatMessageLocation }: { string: string, nested?: boolean, noSpace?: boolean, chatMessageLocation?: ChatMessageLocation }) => { +export const ChatMarkdownRender = ({ string, nested = false, noSpace, chatMessageLocationForApply }: { string: string, nested?: boolean, noSpace?: boolean, chatMessageLocationForApply?: ChatMessageLocation }) => { const tokens = marked.lexer(string); // https://marked.js.org/using_pro#renderer return ( <> {tokens.map((token, index) => ( - + ))} ) diff --git a/src/vs/workbench/contrib/void/browser/react/src/sidebar-tsx/SidebarChat.tsx b/src/vs/workbench/contrib/void/browser/react/src/sidebar-tsx/SidebarChat.tsx index 05ec8419..dd35e8e7 100644 --- a/src/vs/workbench/contrib/void/browser/react/src/sidebar-tsx/SidebarChat.tsx +++ b/src/vs/workbench/contrib/void/browser/react/src/sidebar-tsx/SidebarChat.tsx @@ -139,6 +139,9 @@ export const IconLoading = ({ className = '' }: { className?: string }) => { } +const getChatBubbleId = (threadId: string, messageIdx: number) => `${threadId}-${messageIdx}`; + + interface VoidChatAreaProps { // Required children: React.ReactNode; // This will be the input component @@ -696,9 +699,12 @@ const toolResultToComponent: ToolResultToComponent = { type ChatBubbleMode = 'display' | 'edit' -const ChatBubble = ({ chatMessage, isLoading, messageIdx }: { chatMessage: ChatMessage, messageIdx?: number, isLoading?: boolean, }) => { +const ChatBubble = ({ chatMessage, isLoading, messageIdx }: { chatMessage: ChatMessage, messageIdx: number, isLoading?: boolean, }) => { const role = chatMessage.role + // Only show reasoning dropdown when there's actual content + const hasReasoning = chatMessage.role === 'assistant' && chatMessage.reasoning + const [isReasoningOpen, setIsReasoningOpen] = useState(false) const accessor = useAccessor() @@ -839,46 +845,45 @@ const ChatBubble = ({ chatMessage, isLoading, messageIdx }: { chatMessage: ChatM } else if (role === 'assistant') { const thread = chatThreadsService.getCurrentThread() - const hasReasoning = !!chatMessage.reasoning const chatMessageLocation: ChatMessageLocation = { threadId: thread.id, - messageIdx: messageIdx!, + messageIdx: messageIdx, } - chatbubbleContents = ( - <> - {/* Always show the content */} - - {/* Show reasoning in a dropdown if it exists */} - {hasReasoning && ( -
-
-
setIsReasoningOpen(!isReasoningOpen)} - > - -
- Reasoning - Model's step-by-step thinking -
-
-
-
- -
-
+ const reasoningDropdown = hasReasoning ? ( +
+
+
setIsReasoningOpen(!isReasoningOpen)} + > + +
+ Reasoning + Model's step-by-step thinking
- )} - - ) +
+
+ +
+
+
+
+ ) : null + + chatbubbleContents = (<> + {/* Reasoning dropdown (conditional) */} + {reasoningDropdown} + {/* Main content */} + + ) } else if (role === 'tool') { @@ -1029,13 +1034,27 @@ export const SidebarChat = () => { }, [isHistoryOpen, currentThread.id]) - const prevMessagesHTML = useMemo(() => { + const pastMessagesHTML = useMemo(() => { return previousMessages.map((message, i) => - + ) }, [previousMessages]) + const streamingChatIdx = pastMessagesHTML.length + const currStreamingMessageHTML = !!(reasoningSoFar || messageSoFar) ? + : null + + const allMessagesHTML = [...pastMessagesHTML, currStreamingMessageHTML] + + const threadSelector =
@@ -1053,20 +1072,12 @@ export const SidebarChat = () => { overflow-x-hidden overflow-y-auto py-4 - ${prevMessagesHTML.length === 0 && !messageSoFar ? 'hidden' : ''} + ${pastMessagesHTML.length === 0 && !messageSoFar ? 'hidden' : ''} `} style={{ maxHeight: sidebarDimensions.height - historyDimensions.height - chatAreaDimensions.height - 36 }} // the height of the previousMessages is determined by all other heights > {/* previous messages */} - {prevMessagesHTML} - - {/* message stream */} - {messageSoFar && } + {allMessagesHTML} {/* error message */} @@ -1101,7 +1112,7 @@ export const SidebarChat = () => { isStreaming={isStreaming} isDisabled={isDisabled} showSelections={true} - showProspectiveSelections={prevMessagesHTML.length === 0} + showProspectiveSelections={pastMessagesHTML.length === 0} selections={selections} setSelections={setSelections} onClickAnywhere={() => { textAreaRef.current?.focus() }} diff --git a/src/vs/workbench/contrib/void/browser/searchReplaceCacheService.ts b/src/vs/workbench/contrib/void/browser/searchReplaceCacheService.ts index e7a9448e..e50a6d12 100644 --- a/src/vs/workbench/contrib/void/browser/searchReplaceCacheService.ts +++ b/src/vs/workbench/contrib/void/browser/searchReplaceCacheService.ts @@ -7,8 +7,8 @@ import { Emitter, Event } from '../../../../base/common/event.js'; import { Disposable } from '../../../../base/common/lifecycle.js'; import { InstantiationType, registerSingleton } from '../../../../platform/instantiation/common/extensions.js'; import { createDecorator } from '../../../../platform/instantiation/common/instantiation.js'; -import { ILLMMessageService } from '../common/llmMessageService.js'; -import { ServiceSendLLMMessageParams } from '../common/llmMessageTypes.js'; +// import { ILLMMessageService } from '../common/llmMessageService.js'; +// import { ServiceSendLLMMessageParams } from '../common/llmMessageTypes.js'; @@ -24,22 +24,22 @@ class SearchReplaceService extends Disposable implements ISearchReplaceService { readonly onDidChangeState: Event = this._onDidChangeState.event; constructor( - @ILLMMessageService private readonly llmMessageService: ILLMMessageService, + // @ILLMMessageService private readonly llmMessageService: ILLMMessageService, ) { super() } - send(params: Omit & { onText: (p: { newText: string, fullText: string }) => { retry: boolean } }) { - this.llmMessageService.sendLLMMessage({ - ...params as ServiceSendLLMMessageParams, - onText: (p) => { - const { retry } = params.onText(p) - if (retry) { + // send(params: ServiceSendLLMMessageParams & { onText: (p: { newText: string, fullText: string }) => { retry: boolean } }) { + // this.llmMessageService.sendLLMMessage({ + // ...params as ServiceSendLLMMessageParams, + // onText: (p) => { + // const { retry } = params.onText(p) + // if (retry) { - } - } - }) - } + // } + // } + // }) + // } } diff --git a/src/vs/workbench/contrib/void/common/llmMessageTypes.ts b/src/vs/workbench/contrib/void/common/llmMessageTypes.ts index 93ef12b3..e8800562 100644 --- a/src/vs/workbench/contrib/void/common/llmMessageTypes.ts +++ b/src/vs/workbench/contrib/void/common/llmMessageTypes.ts @@ -45,14 +45,14 @@ export type ToolCallType = { } -export type OnText = (p: { newText: string, fullText: string; newReasoning: string; fullReasoning: string }) => void +export type OnText = (p: { fullText: string; fullReasoning: string }) => void export type OnFinalMessage = (p: { fullText: string, toolCalls?: ToolCallType[], fullReasoning?: string }) => void // id is tool_use_id export type OnError = (p: { message: string, fullError: Error | null }) => void export type AbortRef = { current: (() => void) | null } export const toLLMChatMessage = (c: ChatMessage): LLMChatMessage => { - if (c.role === 'system' || c.role === 'user') { + if (c.role === 'user') { return { role: c.role, content: c.content || '(empty message)' } } else if (c.role === 'assistant') diff --git a/src/vs/workbench/contrib/void/electron-main/llmMessage/MODELS.ts b/src/vs/workbench/contrib/void/electron-main/llmMessage/MODELS.ts index c19aca3d..f9fd0cf5 100644 --- a/src/vs/workbench/contrib/void/electron-main/llmMessage/MODELS.ts +++ b/src/vs/workbench/contrib/void/electron-main/llmMessage/MODELS.ts @@ -3,16 +3,16 @@ * Licensed under the Apache License, Version 2.0. See LICENSE.txt for more information. *--------------------------------------------------------------------------------------*/ -import OpenAI, { ClientOptions } from 'openai'; import Anthropic from '@anthropic-ai/sdk'; import { Ollama } from 'ollama'; +import OpenAI, { ClientOptions } from 'openai'; import { Model as OpenAIModel } from 'openai/resources/models.js'; -import { OllamaModelResponse, OnText, OnFinalMessage, OnError, LLMChatMessage, LLMFIMMessage, ModelListParams } from '../../common/llmMessageTypes.js'; +import { extractReasoningOnFinalMessage, extractReasoningOnTextWrapper } from '../../browser/helpers/extractCodeFromResult.js'; +import { LLMChatMessage, LLMFIMMessage, ModelListParams, OllamaModelResponse, OnError, OnFinalMessage, OnText } from '../../common/llmMessageTypes.js'; import { InternalToolInfo, isAToolName } from '../../common/toolsService.js'; import { defaultProviderSettings, displayInfoOfProviderName, ProviderName, SettingsOfProvider } from '../../common/voidSettingsTypes.js'; import { prepareFIMMessage, prepareMessages } from './preprocessLLMMessages.js'; -import { extractReasoningOnFinalMessage, extractReasoningOnTextWrapper } from '../../browser/helpers/extractCodeFromResult.js'; @@ -677,7 +677,7 @@ const _sendOpenAICompatibleChat = ({ messages: messages_, onText, onFinalMessage supportsReasoningOutput, supportsSystemMessage, supportsTools, - maxOutputTokens, + // maxOutputTokens, right now we are ignoring this } = getModelCapabilities(providerName, modelName_) const { messages } = prepareMessages({ messages: messages_, aiInstructions, supportsSystemMessage, supportsTools, }) @@ -686,9 +686,8 @@ const _sendOpenAICompatibleChat = ({ messages: messages_, onText, onFinalMessage const includeInPayload = supportsReasoningOutput ? modelSettingsOfProvider[providerName].ifSupportsReasoningOutput?.input?.includeInPayload || {} : {} const toolsObj = tools ? { tools: tools, tool_choice: 'auto', parallel_tool_calls: false, } as const : {} - const maxTokensObj = maxOutputTokens ? { max_tokens: maxOutputTokens } : {} const openai: OpenAI = newOpenAICompatibleSDK({ providerName, settingsOfProvider, includeInPayload }) - const options: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = { model: modelName, messages: messages, stream: true, ...toolsObj, ...maxTokensObj } + const options: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = { model: modelName, messages: messages, stream: true, ...toolsObj, } const { nameOfFieldInDelta: nameOfReasoningFieldInDelta, needsManualParse: needsManualReasoningParse } = modelSettingsOfProvider[providerName].ifSupportsReasoningOutput?.output ?? {} @@ -727,15 +726,20 @@ const _sendOpenAICompatibleChat = ({ messages: messages_, onText, onFinalMessage fullReasoningSoFar += newReasoning } - onText({ newText, fullText: fullTextSoFar, newReasoning, fullReasoning: fullReasoningSoFar }) + onText({ fullText: fullTextSoFar, fullReasoning: fullReasoningSoFar }) } // on final const toolCalls = toolCallsFrom_OpenAICompat(toolCallOfIndex) - if (manuallyParseReasoning) { - const { fullText, fullReasoning } = extractReasoningOnFinalMessage(fullTextSoFar, supportsReasoningOutput.openSourceThinkTags) - onFinalMessage({ fullText, fullReasoning, toolCalls }); - } else { - onFinalMessage({ fullText: fullTextSoFar, fullReasoning: fullReasoningSoFar, toolCalls }); + if (!fullTextSoFar && !fullReasoningSoFar && toolCalls.length === 0) { + onError({ message: 'Void: Response from model was empty.', fullError: null }) + } + else { + if (manuallyParseReasoning) { + const { fullText, fullReasoning } = extractReasoningOnFinalMessage(fullTextSoFar, supportsReasoningOutput.openSourceThinkTags) + onFinalMessage({ fullText, fullReasoning, toolCalls }); + } else { + onFinalMessage({ fullText: fullTextSoFar, fullReasoning: fullReasoningSoFar, toolCalls }); + } } }) // when error/fail - this catches errors of both .create() and .then(for await) @@ -823,7 +827,7 @@ const sendAnthropicChat = ({ messages: messages_, providerName, onText, onFinalM }) // when receive text stream.on('text', (newText, fullText) => { - onText({ newText, fullText, newReasoning: '', fullReasoning: '' }) + onText({ fullText, fullReasoning: '' }) }) // when we get the final message on this stream (or when error/fail) stream.on('finalMessage', (response) => {