diff --git a/src/vs/workbench/contrib/void/browser/chatThreadService.ts b/src/vs/workbench/contrib/void/browser/chatThreadService.ts index 3a5bc403..cc64fc92 100644 --- a/src/vs/workbench/contrib/void/browser/chatThreadService.ts +++ b/src/vs/workbench/contrib/void/browser/chatThreadService.ts @@ -77,6 +77,7 @@ export type ChatMessage = role: 'assistant'; content: string | null; // content received from LLM - allowed to be '', will be replaced with (empty) displayContent: string | null; // content displayed to user (this is the same as content for now) - allowed to be '', will be ignored + reasoning: string | null; // reasoning from the LLM, used for step-by-step thinking } | ToolMessage @@ -116,6 +117,7 @@ export type ThreadStreamState = { [threadId: string]: undefined | { error?: { message: string, fullError: Error | null, }; messageSoFar?: string; + reasoningSoFar?: string; streamingToken?: string; } } @@ -330,10 +332,10 @@ class ChatThreadService extends Disposable implements IChatThreadService { // ---------- streaming ---------- - private _finishStreamingTextMessage = (threadId: string, content: string, error?: { message: string, fullError: Error | null }) => { + private _finishStreamingTextMessage = (threadId: string, content: string, error?: { message: string, fullError: Error | null }, reasoning?: string) => { // add assistant's message to chat history, and clear selection - this._addMessageToThread(threadId, { role: 'assistant', content, displayContent: content || null }) - this._setStreamState(threadId, { messageSoFar: undefined, streamingToken: undefined, error }) + this._addMessageToThread(threadId, { role: 'assistant', content, displayContent: content || null, reasoning: reasoning || null }) + this._setStreamState(threadId, { messageSoFar: undefined, reasoningSoFar: undefined, streamingToken: undefined, error }) } @@ -431,17 +433,17 @@ class ChatThreadService extends Disposable implements IChatThreadService { tools: tools, - onText: ({ fullText }) => { - this._setStreamState(threadId, { messageSoFar: fullText }) + onText: ({ fullText, fullReasoning }) => { + this._setStreamState(threadId, { messageSoFar: fullText, reasoningSoFar: fullReasoning }) }, - onFinalMessage: async ({ fullText, toolCalls }) => { + onFinalMessage: async ({ fullText, toolCalls, fullReasoning }) => { if ((toolCalls?.length ?? 0) === 0) { - this._finishStreamingTextMessage(threadId, fullText) + this._finishStreamingTextMessage(threadId, fullText, undefined, fullReasoning) } else { - this._addMessageToThread(threadId, { role: 'assistant', content: fullText, displayContent: fullText }) - this._setStreamState(threadId, { messageSoFar: undefined }) // clear streaming message + this._addMessageToThread(threadId, { role: 'assistant', content: fullText, displayContent: fullText, reasoning: fullReasoning || null }) + this._setStreamState(threadId, { messageSoFar: undefined, reasoningSoFar: undefined }) // clear streaming message for (const tool of toolCalls ?? []) { const toolName = tool.name as ToolName @@ -475,7 +477,9 @@ class ChatThreadService extends Disposable implements IChatThreadService { res_() }, onError: (error) => { - this._finishStreamingTextMessage(threadId, this.streamState[threadId]?.messageSoFar ?? '', error) + const messageSoFar = this.streamState[threadId]?.messageSoFar ?? '' + const reasoningSoFar = this.streamState[threadId]?.reasoningSoFar ?? '' + this._finishStreamingTextMessage(threadId, messageSoFar, error, reasoningSoFar) res_() }, }) @@ -493,7 +497,9 @@ class ChatThreadService extends Disposable implements IChatThreadService { cancelStreaming(threadId: string) { const llmCancelToken = this.streamState[threadId]?.streamingToken if (llmCancelToken !== undefined) this._llmMessageService.abort(llmCancelToken) - this._finishStreamingTextMessage(threadId, this.streamState[threadId]?.messageSoFar ?? '') + const messageSoFar = this.streamState[threadId]?.messageSoFar ?? '' + const reasoningSoFar = this.streamState[threadId]?.reasoningSoFar ?? '' + this._finishStreamingTextMessage(threadId, messageSoFar, undefined, reasoningSoFar) } dismissStreamError(threadId: string): void { @@ -506,7 +512,8 @@ class ChatThreadService extends Disposable implements IChatThreadService { getCurrentThread(): ChatThreads[string] { const state = this.state - return state.allThreads[state.currentThreadId] + const thread = state.allThreads[state.currentThreadId] + return thread } getFocusedMessageIdx() { @@ -644,7 +651,8 @@ class ChatThreadService extends Disposable implements IChatThreadService { } getCurrentThreadStagingSelections = () => { - return this.getCurrentThread().state.stagingSelections + const currentThread = this.getCurrentThread() + return currentThread.state.stagingSelections } setCurrentThreadStagingSelections = (stagingSelections: StagingSelectionItem[]) => { diff --git a/src/vs/workbench/contrib/void/browser/helpers/extractCodeFromResult.ts b/src/vs/workbench/contrib/void/browser/helpers/extractCodeFromResult.ts index 00eb2ef1..564a565d 100644 --- a/src/vs/workbench/contrib/void/browser/helpers/extractCodeFromResult.ts +++ b/src/vs/workbench/contrib/void/browser/helpers/extractCodeFromResult.ts @@ -251,10 +251,8 @@ export const extractSearchReplaceBlocks = (str: string) => { // could simplify this - this assumes we can never add a tag without committing it to the user's screen, but that's not true -export const extractReasoningFromText = ( - onText_: OnText, - thinkTags: [string, string], -): OnText => { +export const extractReasoningOnTextWrapper = (onText: OnText, thinkTags: [string, string]): OnText => { + let latestAddIdx = 0 // exclusive let foundTag1 = false @@ -263,7 +261,8 @@ export const extractReasoningFromText = ( let fullText = '' let fullReasoning = '' - const onText: OnText = ({ newText: newText_, fullText: fullText_ }) => { + const newOnText: OnText = ({ newText: newText_, fullText: fullText_ }) => { + // abcdefghi // | // until found the first think tag, keep adding to fullText @@ -283,7 +282,7 @@ export const extractReasoningFromText = ( fullText += newText fullReasoning += newReasoning latestAddIdx += newText.length + newReasoning.length - onText_({ newText, fullText, newReasoning: newReasoning, fullReasoning }) + onText({ newText, fullText, newReasoning: newReasoning, fullReasoning }) return } @@ -291,7 +290,7 @@ export const extractReasoningFromText = ( const newText = fullText.substring(latestAddIdx, Infinity) fullText += newText latestAddIdx += newText.length - onText_({ newText, fullText, newReasoning: '', fullReasoning }) + onText({ newText, fullText, newReasoning: '', fullReasoning }) return } // at this point, we found @@ -313,7 +312,7 @@ export const extractReasoningFromText = ( fullText += newText fullReasoning += newReasoning latestAddIdx += newText.length + newReasoning.length - onText_({ newText, fullText, newReasoning: newReasoning, fullReasoning }) + onText({ newText, fullText, newReasoning: newReasoning, fullReasoning }) return } @@ -321,7 +320,7 @@ export const extractReasoningFromText = ( const newReasoning = fullText.substring(latestAddIdx, Infinity) fullReasoning += newReasoning latestAddIdx += newReasoning.length - onText_({ newText: '', fullText, newReasoning, fullReasoning }) + onText({ newText: '', fullText, newReasoning, fullReasoning }) return } // at this point, we found @@ -329,8 +328,19 @@ export const extractReasoningFromText = ( fullText += newText_ const newText = fullText.substring(latestAddIdx, Infinity) latestAddIdx += newText.length - onText_({ newText, fullText, newReasoning: '', fullReasoning }) + onText({ newText, fullText, newReasoning: '', fullReasoning }) } - return onText + + return newOnText +} + + +export const extractReasoningOnFinalMessage = (fullText_: string, thinkTags: [string, string]): { fullText: string, fullReasoning: string } => { + const tag1Idx = fullText_.lastIndexOf(thinkTags[0]) + const tag2Idx = fullText_.lastIndexOf(thinkTags[1]) + if (tag1Idx === -1 || tag2Idx === -1) return { fullText: fullText_, fullReasoning: '' } + const fullText = fullText_.substring(0, tag1Idx) + fullText_.substring(tag2Idx + thinkTags[1].length, Infinity) + const fullReasoning = fullText.substring(tag1Idx + thinkTags[0].length, tag2Idx) + return { fullText, fullReasoning } } diff --git a/src/vs/workbench/contrib/void/browser/react/src/sidebar-tsx/SidebarChat.tsx b/src/vs/workbench/contrib/void/browser/react/src/sidebar-tsx/SidebarChat.tsx index d97647b0..05ec8419 100644 --- a/src/vs/workbench/contrib/void/browser/react/src/sidebar-tsx/SidebarChat.tsx +++ b/src/vs/workbench/contrib/void/browser/react/src/sidebar-tsx/SidebarChat.tsx @@ -699,6 +699,7 @@ type ChatBubbleMode = 'display' | 'edit' const ChatBubble = ({ chatMessage, isLoading, messageIdx }: { chatMessage: ChatMessage, messageIdx?: number, isLoading?: boolean, }) => { const role = chatMessage.role + const [isReasoningOpen, setIsReasoningOpen] = useState(false) const accessor = useAccessor() const chatThreadsService = accessor.get('IChatThreadService') @@ -733,7 +734,6 @@ const ChatBubble = ({ chatMessage, isLoading, messageIdx }: { chatMessage: ChatM const shouldInitialize = _justEnabledEdit.current || _mustInitialize.current if (canInitialize && shouldInitialize) { setStagingSelections(chatMessage.selections || []) - if (textAreaFnsRef.current) textAreaFnsRef.current.setValue(chatMessage.displayContent || '') @@ -839,13 +839,46 @@ const ChatBubble = ({ chatMessage, isLoading, messageIdx }: { chatMessage: ChatM } else if (role === 'assistant') { const thread = chatThreadsService.getCurrentThread() + const hasReasoning = !!chatMessage.reasoning const chatMessageLocation: ChatMessageLocation = { threadId: thread.id, messageIdx: messageIdx!, } - chatbubbleContents = + chatbubbleContents = ( + <> + {/* Always show the content */} + + + {/* Show reasoning in a dropdown if it exists */} + {hasReasoning && ( +
+
+
setIsReasoningOpen(!isReasoningOpen)} + > + +
+ Reasoning + Model's step-by-step thinking +
+
+
+
+ +
+
+
+
+ )} + + ) } else if (role === 'tool') { @@ -939,7 +972,7 @@ export const SidebarChat = () => { const currentThread = chatThreadsService.getCurrentThread() const previousMessages = currentThread?.messages ?? [] - const selections = chatThreadsService.getCurrentThread().state.stagingSelections + const selections = currentThread.state.stagingSelections const setSelections = (s: StagingSelectionItem[]) => { chatThreadsService.setCurrentThreadStagingSelections(s) } // stream state @@ -947,6 +980,7 @@ export const SidebarChat = () => { const isStreaming = !!currThreadStreamState?.streamingToken const latestError = currThreadStreamState?.error const messageSoFar = currThreadStreamState?.messageSoFar + const reasoningSoFar = currThreadStreamState?.reasoningSoFar // ----- SIDEBAR CHAT state (local) ----- @@ -1027,7 +1061,12 @@ export const SidebarChat = () => { {prevMessagesHTML} {/* message stream */} - + {messageSoFar && } {/* error message */} diff --git a/src/vs/workbench/contrib/void/common/llmMessageTypes.ts b/src/vs/workbench/contrib/void/common/llmMessageTypes.ts index abe88970..93ef12b3 100644 --- a/src/vs/workbench/contrib/void/common/llmMessageTypes.ts +++ b/src/vs/workbench/contrib/void/common/llmMessageTypes.ts @@ -46,7 +46,7 @@ export type ToolCallType = { export type OnText = (p: { newText: string, fullText: string; newReasoning: string; fullReasoning: string }) => void -export type OnFinalMessage = (p: { fullText: string, toolCalls?: ToolCallType[] }) => void // id is tool_use_id +export type OnFinalMessage = (p: { fullText: string, toolCalls?: ToolCallType[], fullReasoning?: string }) => void // id is tool_use_id export type OnError = (p: { message: string, fullError: Error | null }) => void export type AbortRef = { current: (() => void) | null } diff --git a/src/vs/workbench/contrib/void/electron-main/llmMessage/MODELS.ts b/src/vs/workbench/contrib/void/electron-main/llmMessage/MODELS.ts index a4ad5487..c19aca3d 100644 --- a/src/vs/workbench/contrib/void/electron-main/llmMessage/MODELS.ts +++ b/src/vs/workbench/contrib/void/electron-main/llmMessage/MODELS.ts @@ -12,7 +12,7 @@ import { OllamaModelResponse, OnText, OnFinalMessage, OnError, LLMChatMessage, L import { InternalToolInfo, isAToolName } from '../../common/toolsService.js'; import { defaultProviderSettings, displayInfoOfProviderName, ProviderName, SettingsOfProvider } from '../../common/voidSettingsTypes.js'; import { prepareFIMMessage, prepareMessages } from './preprocessLLMMessages.js'; -import { extractReasoningFromText } from '../../browser/helpers/extractCodeFromResult.js'; +import { extractReasoningOnFinalMessage, extractReasoningOnTextWrapper } from '../../browser/helpers/extractCodeFromResult.js'; @@ -32,7 +32,7 @@ type ModelOptions = { supportsReasoningOutput: false | { // you are allowed to not include openSourceThinkTags if it's not open source (no such cases as of writing) // if it's open source, put the think tags here so we parse them out in e.g. ollama - openSourceThinkTags?: [string, string] + readonly openSourceThinkTags?: [string, string] }; } @@ -641,9 +641,9 @@ const _sendOpenAICompatibleFIM = ({ messages: messages_, onFinalMessage, onError const { modelName, supportsFIM } = getModelCapabilities(providerName, modelName_) if (!supportsFIM) { if (modelName === modelName_) - onFinalMessage({ fullText: `Model ${modelName} does not support FIM.` }) + onError({ message: `Model ${modelName} does not support FIM.`, fullError: null }) else - onFinalMessage({ fullText: `Model ${modelName_} (${modelName}) does not support FIM.` }) + onError({ message: `Model ${modelName_} (${modelName}) does not support FIM.`, fullError: null }) return } @@ -691,11 +691,15 @@ const _sendOpenAICompatibleChat = ({ messages: messages_, onText, onFinalMessage const options: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = { model: modelName, messages: messages, stream: true, ...toolsObj, ...maxTokensObj } const { nameOfFieldInDelta: nameOfReasoningFieldInDelta, needsManualParse: needsManualReasoningParse } = modelSettingsOfProvider[providerName].ifSupportsReasoningOutput?.output ?? {} - if (needsManualReasoningParse && supportsReasoningOutput && supportsReasoningOutput.openSourceThinkTags) - onText = extractReasoningFromText(onText, supportsReasoningOutput.openSourceThinkTags) - let fullReasoning = '' - let fullText = '' + const manuallyParseReasoning = needsManualReasoningParse && supportsReasoningOutput && supportsReasoningOutput.openSourceThinkTags + if (manuallyParseReasoning) { + onText = extractReasoningOnTextWrapper(onText, supportsReasoningOutput.openSourceThinkTags) + } + + + let fullReasoningSoFar = '' + let fullTextSoFar = '' const toolCallOfIndex: ToolCallOfIndex = {} openai.chat.completions .create(options) @@ -713,19 +717,26 @@ const _sendOpenAICompatibleChat = ({ messages: messages_, onText, onFinalMessage } // message const newText = chunk.choices[0]?.delta?.content ?? '' - fullText += newText + fullTextSoFar += newText // reasoning let newReasoning = '' if (nameOfReasoningFieldInDelta) { // @ts-ignore newReasoning = (chunk.choices[0]?.delta?.[nameOfReasoningFieldInDelta] || '') + '' - fullReasoning += newReasoning + fullReasoningSoFar += newReasoning } - onText({ newText, fullText, newReasoning, fullReasoning }) + onText({ newText, fullText: fullTextSoFar, newReasoning, fullReasoning: fullReasoningSoFar }) + } + // on final + const toolCalls = toolCallsFrom_OpenAICompat(toolCallOfIndex) + if (manuallyParseReasoning) { + const { fullText, fullReasoning } = extractReasoningOnFinalMessage(fullTextSoFar, supportsReasoningOutput.openSourceThinkTags) + onFinalMessage({ fullText, fullReasoning, toolCalls }); + } else { + onFinalMessage({ fullText: fullTextSoFar, fullReasoning: fullReasoningSoFar, toolCalls }); } - onFinalMessage({ fullText, toolCalls: toolCallsFrom_OpenAICompat(toolCallOfIndex) }); }) // when error/fail - this catches errors of both .create() and .then(for await) .catch(error => { @@ -787,7 +798,7 @@ const toolCallsFromAnthropicContent = (content: Anthropic.Messages.ContentBlock[ }).filter(t => !!t) } -const sendAnthropicChat = ({ messages: messages_, onText, providerName, onFinalMessage, onError, settingsOfProvider, modelName: modelName_, _setAborter, aiInstructions, tools: tools_ }: SendChatParams_Internal) => { +const sendAnthropicChat = ({ messages: messages_, providerName, onText, onFinalMessage, onError, settingsOfProvider, modelName: modelName_, _setAborter, aiInstructions, tools: tools_ }: SendChatParams_Internal) => { const { // supportsReasoning: modelSupportsReasoning, modelName, diff --git a/src/vs/workbench/contrib/void/electron-main/llmMessage/sendLLMMessage.ts b/src/vs/workbench/contrib/void/electron-main/llmMessage/sendLLMMessage.ts index 90deffe2..11ae2b76 100644 --- a/src/vs/workbench/contrib/void/electron-main/llmMessage/sendLLMMessage.ts +++ b/src/vs/workbench/contrib/void/electron-main/llmMessage/sendLLMMessage.ts @@ -63,22 +63,23 @@ export const sendLLMMessage = ({ _fullTextSoFar = fullText } - const onFinalMessage: OnFinalMessage = ({ fullText, toolCalls }) => { + const onFinalMessage: OnFinalMessage = (params) => { + const { fullText, fullReasoning } = params if (_didAbort) return - captureLLMEvent(`${loggingName} - Received Full Message`, { messageLength: fullText.length, duration: new Date().getMilliseconds() - submit_time.getMilliseconds() }) - onFinalMessage_({ fullText, toolCalls }) + captureLLMEvent(`${loggingName} - Received Full Message`, { messageLength: fullText.length, reasoningLength: fullReasoning?.length, duration: new Date().getMilliseconds() - submit_time.getMilliseconds() }) + onFinalMessage_(params) } - const onError: OnError = ({ message: error, fullError }) => { + const onError: OnError = ({ message: errorMessage, fullError }) => { if (_didAbort) return - console.error('sendLLMMessage onError:', error) + console.error('sendLLMMessage onError:', errorMessage) // handle failed to fetch errors, which give 0 information by design - if (error === 'TypeError: fetch failed') - error = `Failed to fetch from ${displayInfoOfProviderName(providerName).title}. This likely means you specified the wrong endpoint in Void's Settings, or your local model provider like Ollama is powered off.` + if (errorMessage === 'TypeError: fetch failed') + errorMessage = `Failed to fetch from ${displayInfoOfProviderName(providerName).title}. This likely means you specified the wrong endpoint in Void's Settings, or your local model provider like Ollama is powered off.` - captureLLMEvent(`${loggingName} - Error`, { error }) - onError_({ message: error, fullError }) + captureLLMEvent(`${loggingName} - Error`, { error: errorMessage }) + onError_({ message: errorMessage, fullError }) } const onAbort = () => {