reasoning UI!

This commit is contained in:
Andrew Pareles 2025-02-26 01:59:59 -08:00
parent 0de1c20551
commit 0de63a088d
8 changed files with 163 additions and 141 deletions

View file

@ -60,11 +60,7 @@ export type ToolMessage<T extends ToolName> = {
// WARNING: changing this format is a big deal!!!!!! need to migrate old format to new format on users' computers so people don't get errors.
export type ChatMessage =
| {
role: 'system';
content: string;
displayContent?: undefined;
} | {
{
role: 'user';
content: string | null; // content displayed to the LLM on future calls - allowed to be '', will be replaced with (empty)
displayContent: string | null; // content displayed to user - allowed to be '', will be ignored
@ -76,7 +72,6 @@ export type ChatMessage =
} | {
role: 'assistant';
content: string | null; // content received from LLM - allowed to be '', will be replaced with (empty)
displayContent: string | null; // content displayed to user (this is the same as content for now) - allowed to be '', will be ignored
reasoning: string | null; // reasoning from the LLM, used for step-by-step thinking
}
| ToolMessage<ToolName>
@ -332,9 +327,9 @@ class ChatThreadService extends Disposable implements IChatThreadService {
// ---------- streaming ----------
private _finishStreamingTextMessage = (threadId: string, content: string, error?: { message: string, fullError: Error | null }, reasoning?: string) => {
private _finishStreamingTextMessage = (threadId: string, options: { content: string, reasoning?: string }, error?: { message: string, fullError: Error | null }) => {
// add assistant's message to chat history, and clear selection
this._addMessageToThread(threadId, { role: 'assistant', content, displayContent: content || null, reasoning: reasoning || null })
this._addMessageToThread(threadId, { role: 'assistant', content: options.content, reasoning: options.reasoning || null })
this._setStreamState(threadId, { messageSoFar: undefined, reasoningSoFar: undefined, streamingToken: undefined, error })
}
@ -439,10 +434,10 @@ class ChatThreadService extends Disposable implements IChatThreadService {
onFinalMessage: async ({ fullText, toolCalls, fullReasoning }) => {
if ((toolCalls?.length ?? 0) === 0) {
this._finishStreamingTextMessage(threadId, fullText, undefined, fullReasoning)
this._finishStreamingTextMessage(threadId, { content: fullText, reasoning: fullReasoning })
}
else {
this._addMessageToThread(threadId, { role: 'assistant', content: fullText, displayContent: fullText, reasoning: fullReasoning || null })
this._addMessageToThread(threadId, { role: 'assistant', content: fullText, reasoning: fullReasoning || null })
this._setStreamState(threadId, { messageSoFar: undefined, reasoningSoFar: undefined }) // clear streaming message
for (const tool of toolCalls ?? []) {
const toolName = tool.name as ToolName
@ -479,7 +474,7 @@ class ChatThreadService extends Disposable implements IChatThreadService {
onError: (error) => {
const messageSoFar = this.streamState[threadId]?.messageSoFar ?? ''
const reasoningSoFar = this.streamState[threadId]?.reasoningSoFar ?? ''
this._finishStreamingTextMessage(threadId, messageSoFar, error, reasoningSoFar)
this._finishStreamingTextMessage(threadId, { content: messageSoFar, reasoning: reasoningSoFar }, error)
res_()
},
})
@ -499,7 +494,7 @@ class ChatThreadService extends Disposable implements IChatThreadService {
if (llmCancelToken !== undefined) this._llmMessageService.abort(llmCancelToken)
const messageSoFar = this.streamState[threadId]?.messageSoFar ?? ''
const reasoningSoFar = this.streamState[threadId]?.reasoningSoFar ?? ''
this._finishStreamingTextMessage(threadId, messageSoFar, undefined, reasoningSoFar)
this._finishStreamingTextMessage(threadId, { content: messageSoFar, reasoning: reasoningSoFar })
}
dismissStreamError(threadId: string): void {

View file

@ -1400,7 +1400,7 @@ class EditCodeService extends Disposable implements IEditCodeService {
const latestStreamInfoMutable: StreamLocationMutable = { line: diffZone.startLine, addedSplitYet: false, col: 1, originalCodeStartLine: 1 }
// state used in onText:
let fullText = ''
let fullTextSoFar = '' // so far (INCLUDING ignored suffix)
let prevIgnoredSuffix = ''
streamRequestIdRef.current = this._llmMessageService.sendLLMMessage({
@ -1408,12 +1408,13 @@ class EditCodeService extends Disposable implements IEditCodeService {
useProviderFor: opts.from === 'ClickApply' ? 'Apply' : 'Ctrl+K',
logging: { loggingName: `startApplying - ${from}` },
messages,
onText: ({ newText: newText_ }) => {
onText: ({ fullText: fullText_ }) => {
const newText_ = fullText_.substring(fullTextSoFar.length, Infinity)
const newText = prevIgnoredSuffix + newText_ // add the previously ignored suffix because it's no longer the suffix!
fullText += prevIgnoredSuffix + newText // full text, including ```, etc
fullTextSoFar += newText // full text, including ```, etc
const [croppedText, deltaCroppedText, croppedSuffix] = extractText(fullText, newText.length)
const [croppedText, deltaCroppedText, croppedSuffix] = extractText(fullTextSoFar, newText.length)
const { endLineInLlmTextSoFar } = this._writeStreamedDiffZoneLLMText(uri, originalCode, croppedText, deltaCroppedText, latestStreamInfoMutable)
diffZone._streamState.line = (diffZone.startLine - 1) + endLineInLlmTextSoFar // change coordinate systems from originalCode to full file

View file

@ -173,7 +173,7 @@ export type ExtractedSearchReplaceBlock = {
const endsWithAnyPrefixOf = (str: string, anyPrefix: string) => {
// for each prefix
for (let i = anyPrefix.length; i >= 0; i--) {
for (let i = anyPrefix.length; i >= 1; i--) { // i >= 1 because must not be empty string
const prefix = anyPrefix.slice(0, i)
if (str.endsWith(prefix)) return prefix
}
@ -252,93 +252,104 @@ export const extractSearchReplaceBlocks = (str: string) => {
// could simplify this - this assumes we can never add a tag without committing it to the user's screen, but that's not true
export const extractReasoningOnTextWrapper = (onText: OnText, thinkTags: [string, string]): OnText => {
let latestAddIdx = 0 // exclusive
let latestAddIdx = 0 // exclusive index in fullText_
let foundTag1 = false
let foundTag2 = false
let fullText = ''
let fullReasoning = ''
let fullTextSoFar = ''
let fullReasoningSoFar = ''
const newOnText: OnText = ({ newText: newText_, fullText: fullText_ }) => {
let onText_ = onText
onText = (params) => {
onText_(params)
}
// abcdef<t|hin|k>ghi
// |
const newOnText: OnText = ({ fullText: fullText_ }) => {
// until found the first think tag, keep adding to fullText
if (!foundTag1) {
const endsWithTag1 = endsWithAnyPrefixOf(fullText_, thinkTags[0])
if (endsWithTag1) {
console.log('endswith1', { fullTextSoFar, fullReasoningSoFar, fullText_ })
// wait until we get the full tag or know more
return
}
// if found the first tag
const tag1Index = fullText_.lastIndexOf(thinkTags[0])
const tag1Index = fullText_.indexOf(thinkTags[0])
if (tag1Index !== -1) {
console.log('tag1Index !==1', { tag1Index, fullTextSoFar, fullReasoningSoFar, thinkTags, fullText_ })
foundTag1 = true
const newText = fullText.substring(latestAddIdx, tag1Index)
const newReasoning = fullText.substring(tag1Index + thinkTags[0].length, Infinity)
fullText += newText
fullReasoning += newReasoning
latestAddIdx += newText.length + newReasoning.length
onText({ newText, fullText, newReasoning: newReasoning, fullReasoning })
// Add text before the tag to fullTextSoFar
fullTextSoFar += fullText_.substring(0, tag1Index)
// Update latestAddIdx to after the first tag
latestAddIdx = tag1Index + thinkTags[0].length
onText({ fullText: fullTextSoFar, fullReasoning: fullReasoningSoFar })
return
}
console.log('adding to text A', { fullTextSoFar, fullReasoningSoFar })
// add the text to fullText
const newText = fullText.substring(latestAddIdx, Infinity)
fullText += newText
latestAddIdx += newText.length
onText({ newText, fullText, newReasoning: '', fullReasoning })
fullTextSoFar = fullText_
latestAddIdx = fullText_.length
onText({ fullText: fullTextSoFar, fullReasoning: fullReasoningSoFar })
return
}
// at this point, we found <tag1>
// until found the second think tag, keep adding to fullReasoning
if (!foundTag2) {
const endsWithTag2 = endsWithAnyPrefixOf(fullText_, thinkTags[1])
if (endsWithTag2) {
console.log('endsWith2', { fullTextSoFar, fullReasoningSoFar })
// wait until we get the full tag or know more
return
}
// if found the second tag
const tag2Index = fullText_.lastIndexOf(thinkTags[1])
if (tag2Index !== -1) {
foundTag2 = true
const newReasoning = fullText.substring(latestAddIdx, tag2Index)
const newText = fullText.substring(tag2Index + thinkTags[1].length, Infinity)
fullText += newText
fullReasoning += newReasoning
latestAddIdx += newText.length + newReasoning.length
onText({ newText, fullText, newReasoning: newReasoning, fullReasoning })
// if found the second tag
const tag2Index = fullText_.indexOf(thinkTags[1], latestAddIdx)
if (tag2Index !== -1) {
console.log('tag2Index !== -1', { fullTextSoFar, fullReasoningSoFar })
foundTag2 = true
// Add everything between first and second tag to reasoning
fullReasoningSoFar += fullText_.substring(latestAddIdx, tag2Index)
// Update latestAddIdx to after the second tag
latestAddIdx = tag2Index + thinkTags[1].length
onText({ fullText: fullTextSoFar, fullReasoning: fullReasoningSoFar })
return
}
// add the text to fullReasoning
const newReasoning = fullText.substring(latestAddIdx, Infinity)
fullReasoning += newReasoning
latestAddIdx += newReasoning.length
onText({ newText: '', fullText, newReasoning, fullReasoning })
// add the text to fullReasoning (content after first tag but before second tag)
console.log('adding to text B', { fullTextSoFar, fullReasoningSoFar })
// If we have more text than we've processed, add it to reasoning
if (fullText_.length > latestAddIdx) {
fullReasoningSoFar += fullText_.substring(latestAddIdx)
latestAddIdx = fullText_.length
}
onText({ fullText: fullTextSoFar, fullReasoning: fullReasoningSoFar })
return
}
// at this point, we found <tag2>
fullText += newText_
const newText = fullText.substring(latestAddIdx, Infinity)
latestAddIdx += newText.length
onText({ newText, fullText, newReasoning: '', fullReasoning })
// at this point, we found <tag2> - content after the second tag is normal text
console.log('adding to text C', { fullTextSoFar, fullReasoningSoFar })
// Add any new text after the closing tag to fullTextSoFar
if (fullText_.length > latestAddIdx) {
fullTextSoFar += fullText_.substring(latestAddIdx)
latestAddIdx = fullText_.length
}
onText({ fullText: fullTextSoFar, fullReasoning: fullReasoningSoFar })
}
return newOnText
}
export const extractReasoningOnFinalMessage = (fullText_: string, thinkTags: [string, string]): { fullText: string, fullReasoning: string } => {
const tag1Idx = fullText_.lastIndexOf(thinkTags[0])
const tag2Idx = fullText_.lastIndexOf(thinkTags[1])
const tag1Idx = fullText_.indexOf(thinkTags[0])
const tag2Idx = fullText_.indexOf(thinkTags[1])
if (tag1Idx === -1 || tag2Idx === -1) return { fullText: fullText_, fullReasoning: '' }
const fullText = fullText_.substring(0, tag1Idx) + fullText_.substring(tag2Idx + thinkTags[1].length, Infinity)
const fullReasoning = fullText.substring(tag1Idx + thinkTags[0].length, tag2Idx)

View file

@ -33,7 +33,7 @@ export const CodeSpan = ({ children, className }: { children: React.ReactNode, c
</code>
}
const RenderToken = ({ token, nested, noSpace, chatMessageLocation, tokenIdx }: { token: Token | string, nested?: boolean, noSpace?: boolean, chatMessageLocation?: ChatMessageLocation, tokenIdx: string }): JSX.Element => {
const RenderToken = ({ token, nested, noSpace, chatMessageLocationForApply, tokenIdx }: { token: Token | string, nested?: boolean, noSpace?: boolean, chatMessageLocationForApply?: ChatMessageLocation, tokenIdx: string }): JSX.Element => {
// deal with built-in tokens first (assume marked token)
@ -45,9 +45,9 @@ const RenderToken = ({ token, nested, noSpace, chatMessageLocation, tokenIdx }:
if (t.type === "code") {
const applyBoxId = chatMessageLocation ? getApplyBoxId({
threadId: chatMessageLocation.threadId,
messageIdx: chatMessageLocation.messageIdx,
const applyBoxId = chatMessageLocationForApply ? getApplyBoxId({
threadId: chatMessageLocationForApply.threadId,
messageIdx: chatMessageLocationForApply.messageIdx,
tokenIdx: tokenIdx,
}) : null
@ -131,7 +131,7 @@ const RenderToken = ({ token, nested, noSpace, chatMessageLocation, tokenIdx }:
<input type="checkbox" checked={item.checked} readOnly className="mr-2 form-checkbox" />
)}
<span className="ml-1">
<ChatMarkdownRender chatMessageLocation={chatMessageLocation} string={item.text} nested={true} />
<ChatMarkdownRender chatMessageLocationForApply={chatMessageLocationForApply} string={item.text} nested={true} />
</span>
</li>
))}
@ -243,12 +243,12 @@ const RenderToken = ({ token, nested, noSpace, chatMessageLocation, tokenIdx }:
)
}
export const ChatMarkdownRender = ({ string, nested = false, noSpace, chatMessageLocation }: { string: string, nested?: boolean, noSpace?: boolean, chatMessageLocation?: ChatMessageLocation }) => {
export const ChatMarkdownRender = ({ string, nested = false, noSpace, chatMessageLocationForApply }: { string: string, nested?: boolean, noSpace?: boolean, chatMessageLocationForApply?: ChatMessageLocation }) => {
const tokens = marked.lexer(string); // https://marked.js.org/using_pro#renderer
return (
<>
{tokens.map((token, index) => (
<RenderToken key={index} token={token} nested={nested} noSpace={noSpace} chatMessageLocation={chatMessageLocation} tokenIdx={index + ''} />
<RenderToken key={index} token={token} nested={nested} noSpace={noSpace} chatMessageLocationForApply={chatMessageLocationForApply} tokenIdx={index + ''} />
))}
</>
)

View file

@ -139,6 +139,9 @@ export const IconLoading = ({ className = '' }: { className?: string }) => {
}
const getChatBubbleId = (threadId: string, messageIdx: number) => `${threadId}-${messageIdx}`;
interface VoidChatAreaProps {
// Required
children: React.ReactNode; // This will be the input component
@ -696,9 +699,12 @@ const toolResultToComponent: ToolResultToComponent = {
type ChatBubbleMode = 'display' | 'edit'
const ChatBubble = ({ chatMessage, isLoading, messageIdx }: { chatMessage: ChatMessage, messageIdx?: number, isLoading?: boolean, }) => {
const ChatBubble = ({ chatMessage, isLoading, messageIdx }: { chatMessage: ChatMessage, messageIdx: number, isLoading?: boolean, }) => {
const role = chatMessage.role
// Only show reasoning dropdown when there's actual content
const hasReasoning = chatMessage.role === 'assistant' && chatMessage.reasoning
const [isReasoningOpen, setIsReasoningOpen] = useState(false)
const accessor = useAccessor()
@ -839,46 +845,45 @@ const ChatBubble = ({ chatMessage, isLoading, messageIdx }: { chatMessage: ChatM
}
else if (role === 'assistant') {
const thread = chatThreadsService.getCurrentThread()
const hasReasoning = !!chatMessage.reasoning
const chatMessageLocation: ChatMessageLocation = {
threadId: thread.id,
messageIdx: messageIdx!,
messageIdx: messageIdx,
}
chatbubbleContents = (
<>
{/* Always show the content */}
<ChatMarkdownRender string={chatMessage.displayContent ?? ''} chatMessageLocation={chatMessageLocation} />
{/* Show reasoning in a dropdown if it exists */}
{hasReasoning && (
<div className="mx-4 select-none mt-2">
<div className="border border-void-border-3 rounded px-1 py-0.5 bg-void-bg-tool">
<div
className="flex items-center min-h-[24px] cursor-pointer hover:brightness-125 transition-all duration-150"
onClick={() => setIsReasoningOpen(!isReasoningOpen)}
>
<ChevronRight
className={`text-void-fg-3 mr-0.5 h-5 w-5 flex-shrink-0 transition-transform duration-100 ease-[cubic-bezier(0.4,0,0.2,1)] ${isReasoningOpen ? 'rotate-90' : ''}`}
/>
<div className="flex items-center flex-wrap gap-x-2 gap-y-0.5">
<span className="text-void-fg-3">Reasoning</span>
<span className="text-void-fg-4 text-xs italic">Model's step-by-step thinking</span>
</div>
</div>
<div
className={`mt-1 overflow-hidden transition-all duration-200 ease-in-out ${isReasoningOpen ? 'max-h-[500px] opacity-100' : 'max-h-0 opacity-0'}`}
>
<div className="text-void-fg-2 p-2 bg-void-bg-1 rounded">
<ChatMarkdownRender string={chatMessage.reasoning ?? ''} chatMessageLocation={chatMessageLocation} />
</div>
</div>
const reasoningDropdown = hasReasoning ? (
<div className="mx-4 select-none mt-2">
<div className="border border-void-border-3 rounded px-1 py-0.5 bg-void-bg-tool">
<div
className="flex items-center min-h-[24px] cursor-pointer hover:brightness-125 transition-all duration-150"
onClick={() => setIsReasoningOpen(!isReasoningOpen)}
>
<ChevronRight
className={`text-void-fg-3 mr-0.5 h-5 w-5 flex-shrink-0 transition-transform duration-100 ease-[cubic-bezier(0.4,0,0.2,1)] ${isReasoningOpen ? 'rotate-90' : ''}`}
/>
<div className="flex items-center flex-wrap gap-x-2 gap-y-0.5">
<span className="text-void-fg-3">Reasoning</span>
<span className="text-void-fg-4 text-xs italic">Model's step-by-step thinking</span>
</div>
</div>
)}
</>
)
<div
className={`mt-1 overflow-hidden transition-all duration-200 ease-in-out ${isReasoningOpen ? 'max-h-[500px] opacity-100' : 'max-h-0 opacity-0'}`}
>
<div className="text-void-fg-2 p-2 bg-void-bg-1 rounded">
<ChatMarkdownRender string={chatMessage.reasoning ?? ''} chatMessageLocationForApply={chatMessageLocation} />
</div>
</div>
</div>
</div>
) : null
chatbubbleContents = (<>
{/* Reasoning dropdown (conditional) */}
{reasoningDropdown}
{/* Main content */}
<ChatMarkdownRender string={chatMessage.content ?? ''} chatMessageLocationForApply={chatMessageLocation} />
</>)
}
else if (role === 'tool') {
@ -1029,13 +1034,27 @@ export const SidebarChat = () => {
}, [isHistoryOpen, currentThread.id])
const prevMessagesHTML = useMemo(() => {
const pastMessagesHTML = useMemo(() => {
return previousMessages.map((message, i) =>
<ChatBubble key={i} chatMessage={message} messageIdx={i} />
<ChatBubble key={getChatBubbleId(currentThread.id, i)} chatMessage={message} messageIdx={i} />
)
}, [previousMessages])
const streamingChatIdx = pastMessagesHTML.length
const currStreamingMessageHTML = !!(reasoningSoFar || messageSoFar) ?
<ChatBubble key={getChatBubbleId(currentThread.id, streamingChatIdx)}
messageIdx={streamingChatIdx} chatMessage={{
role: 'assistant',
content: messageSoFar ?? null,
reasoning: reasoningSoFar ?? null,
}}
isLoading={isStreaming}
/> : null
const allMessagesHTML = [...pastMessagesHTML, currStreamingMessageHTML]
const threadSelector = <div ref={historyRef}
className={`w-full h-auto ${isHistoryOpen ? '' : 'hidden'} ring-2 ring-widget-shadow ring-inset z-10`}
>
@ -1053,20 +1072,12 @@ export const SidebarChat = () => {
overflow-x-hidden
overflow-y-auto
py-4
${prevMessagesHTML.length === 0 && !messageSoFar ? 'hidden' : ''}
${pastMessagesHTML.length === 0 && !messageSoFar ? 'hidden' : ''}
`}
style={{ maxHeight: sidebarDimensions.height - historyDimensions.height - chatAreaDimensions.height - 36 }} // the height of the previousMessages is determined by all other heights
>
{/* previous messages */}
{prevMessagesHTML}
{/* message stream */}
{messageSoFar && <ChatBubble chatMessage={{
role: 'assistant',
content: messageSoFar,
displayContent: messageSoFar || null,
reasoning: reasoningSoFar || null,
}} isLoading={isStreaming} />}
{allMessagesHTML}
{/* error message */}
@ -1101,7 +1112,7 @@ export const SidebarChat = () => {
isStreaming={isStreaming}
isDisabled={isDisabled}
showSelections={true}
showProspectiveSelections={prevMessagesHTML.length === 0}
showProspectiveSelections={pastMessagesHTML.length === 0}
selections={selections}
setSelections={setSelections}
onClickAnywhere={() => { textAreaRef.current?.focus() }}

View file

@ -7,8 +7,8 @@ import { Emitter, Event } from '../../../../base/common/event.js';
import { Disposable } from '../../../../base/common/lifecycle.js';
import { InstantiationType, registerSingleton } from '../../../../platform/instantiation/common/extensions.js';
import { createDecorator } from '../../../../platform/instantiation/common/instantiation.js';
import { ILLMMessageService } from '../common/llmMessageService.js';
import { ServiceSendLLMMessageParams } from '../common/llmMessageTypes.js';
// import { ILLMMessageService } from '../common/llmMessageService.js';
// import { ServiceSendLLMMessageParams } from '../common/llmMessageTypes.js';
@ -24,22 +24,22 @@ class SearchReplaceService extends Disposable implements ISearchReplaceService {
readonly onDidChangeState: Event<void> = this._onDidChangeState.event;
constructor(
@ILLMMessageService private readonly llmMessageService: ILLMMessageService,
// @ILLMMessageService private readonly llmMessageService: ILLMMessageService,
) {
super()
}
send(params: Omit<ServiceSendLLMMessageParams, 'onText'> & { onText: (p: { newText: string, fullText: string }) => { retry: boolean } }) {
this.llmMessageService.sendLLMMessage({
...params as ServiceSendLLMMessageParams,
onText: (p) => {
const { retry } = params.onText(p)
if (retry) {
// send(params: ServiceSendLLMMessageParams & { onText: (p: { newText: string, fullText: string }) => { retry: boolean } }) {
// this.llmMessageService.sendLLMMessage({
// ...params as ServiceSendLLMMessageParams,
// onText: (p) => {
// const { retry } = params.onText(p)
// if (retry) {
}
}
})
}
// }
// }
// })
// }
}

View file

@ -45,14 +45,14 @@ export type ToolCallType = {
}
export type OnText = (p: { newText: string, fullText: string; newReasoning: string; fullReasoning: string }) => void
export type OnText = (p: { fullText: string; fullReasoning: string }) => void
export type OnFinalMessage = (p: { fullText: string, toolCalls?: ToolCallType[], fullReasoning?: string }) => void // id is tool_use_id
export type OnError = (p: { message: string, fullError: Error | null }) => void
export type AbortRef = { current: (() => void) | null }
export const toLLMChatMessage = (c: ChatMessage): LLMChatMessage => {
if (c.role === 'system' || c.role === 'user') {
if (c.role === 'user') {
return { role: c.role, content: c.content || '(empty message)' }
}
else if (c.role === 'assistant')

View file

@ -3,16 +3,16 @@
* Licensed under the Apache License, Version 2.0. See LICENSE.txt for more information.
*--------------------------------------------------------------------------------------*/
import OpenAI, { ClientOptions } from 'openai';
import Anthropic from '@anthropic-ai/sdk';
import { Ollama } from 'ollama';
import OpenAI, { ClientOptions } from 'openai';
import { Model as OpenAIModel } from 'openai/resources/models.js';
import { OllamaModelResponse, OnText, OnFinalMessage, OnError, LLMChatMessage, LLMFIMMessage, ModelListParams } from '../../common/llmMessageTypes.js';
import { extractReasoningOnFinalMessage, extractReasoningOnTextWrapper } from '../../browser/helpers/extractCodeFromResult.js';
import { LLMChatMessage, LLMFIMMessage, ModelListParams, OllamaModelResponse, OnError, OnFinalMessage, OnText } from '../../common/llmMessageTypes.js';
import { InternalToolInfo, isAToolName } from '../../common/toolsService.js';
import { defaultProviderSettings, displayInfoOfProviderName, ProviderName, SettingsOfProvider } from '../../common/voidSettingsTypes.js';
import { prepareFIMMessage, prepareMessages } from './preprocessLLMMessages.js';
import { extractReasoningOnFinalMessage, extractReasoningOnTextWrapper } from '../../browser/helpers/extractCodeFromResult.js';
@ -677,7 +677,7 @@ const _sendOpenAICompatibleChat = ({ messages: messages_, onText, onFinalMessage
supportsReasoningOutput,
supportsSystemMessage,
supportsTools,
maxOutputTokens,
// maxOutputTokens, right now we are ignoring this
} = getModelCapabilities(providerName, modelName_)
const { messages } = prepareMessages({ messages: messages_, aiInstructions, supportsSystemMessage, supportsTools, })
@ -686,9 +686,8 @@ const _sendOpenAICompatibleChat = ({ messages: messages_, onText, onFinalMessage
const includeInPayload = supportsReasoningOutput ? modelSettingsOfProvider[providerName].ifSupportsReasoningOutput?.input?.includeInPayload || {} : {}
const toolsObj = tools ? { tools: tools, tool_choice: 'auto', parallel_tool_calls: false, } as const : {}
const maxTokensObj = maxOutputTokens ? { max_tokens: maxOutputTokens } : {}
const openai: OpenAI = newOpenAICompatibleSDK({ providerName, settingsOfProvider, includeInPayload })
const options: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = { model: modelName, messages: messages, stream: true, ...toolsObj, ...maxTokensObj }
const options: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = { model: modelName, messages: messages, stream: true, ...toolsObj, }
const { nameOfFieldInDelta: nameOfReasoningFieldInDelta, needsManualParse: needsManualReasoningParse } = modelSettingsOfProvider[providerName].ifSupportsReasoningOutput?.output ?? {}
@ -727,15 +726,20 @@ const _sendOpenAICompatibleChat = ({ messages: messages_, onText, onFinalMessage
fullReasoningSoFar += newReasoning
}
onText({ newText, fullText: fullTextSoFar, newReasoning, fullReasoning: fullReasoningSoFar })
onText({ fullText: fullTextSoFar, fullReasoning: fullReasoningSoFar })
}
// on final
const toolCalls = toolCallsFrom_OpenAICompat(toolCallOfIndex)
if (manuallyParseReasoning) {
const { fullText, fullReasoning } = extractReasoningOnFinalMessage(fullTextSoFar, supportsReasoningOutput.openSourceThinkTags)
onFinalMessage({ fullText, fullReasoning, toolCalls });
} else {
onFinalMessage({ fullText: fullTextSoFar, fullReasoning: fullReasoningSoFar, toolCalls });
if (!fullTextSoFar && !fullReasoningSoFar && toolCalls.length === 0) {
onError({ message: 'Void: Response from model was empty.', fullError: null })
}
else {
if (manuallyParseReasoning) {
const { fullText, fullReasoning } = extractReasoningOnFinalMessage(fullTextSoFar, supportsReasoningOutput.openSourceThinkTags)
onFinalMessage({ fullText, fullReasoning, toolCalls });
} else {
onFinalMessage({ fullText: fullTextSoFar, fullReasoning: fullReasoningSoFar, toolCalls });
}
}
})
// when error/fail - this catches errors of both .create() and .then(for await)
@ -823,7 +827,7 @@ const sendAnthropicChat = ({ messages: messages_, providerName, onText, onFinalM
})
// when receive text
stream.on('text', (newText, fullText) => {
onText({ newText, fullText, newReasoning: '', fullReasoning: '' })
onText({ fullText, fullReasoning: '' })
})
// when we get the final message on this stream (or when error/fail)
stream.on('finalMessage', (response) => {