From 0de63a088d7922c82e5204d30c94b566309de1e5 Mon Sep 17 00:00:00 2001
From: Andrew Pareles <andrewpareles@gmail.com>
Date: Wed, 26 Feb 2025 01:59:59 -0800
Subject: [PATCH] reasoning UI!

---
 .../contrib/void/browser/chatThreadService.ts |  19 ++--
 .../contrib/void/browser/editCodeService.ts   |   9 +-
 .../browser/helpers/extractCodeFromResult.ts  |  99 +++++++++--------
 .../react/src/markdown/ChatMarkdownRender.tsx |  14 +--
 .../react/src/sidebar-tsx/SidebarChat.tsx     | 103 ++++++++++--------
 .../void/browser/searchReplaceCacheService.ts |  26 ++---
 .../contrib/void/common/llmMessageTypes.ts    |   4 +-
 .../void/electron-main/llmMessage/MODELS.ts   |  30 ++---
 8 files changed, 163 insertions(+), 141 deletions(-)
diff --git a/src/vs/workbench/contrib/void/browser/chatThreadService.ts b/src/vs/workbench/contrib/void/browser/chatThreadService.ts
index cc64fc92..98f98f8e 100644
--- a/src/vs/workbench/contrib/void/browser/chatThreadService.ts
+++ b/src/vs/workbench/contrib/void/browser/chatThreadService.ts
@@ -60,11 +60,7 @@ export type ToolMessage<T extends ToolName> = {
 
 // WARNING: changing this format is a big deal!!!!!! need to migrate old format to new format on users' computers so people don't get errors.
 export type ChatMessage =
-	| {
-		role: 'system';
-		content: string;
-		displayContent?: undefined;
-	} | {
+	{
 		role: 'user';
 		content: string | null; // content displayed to the LLM on future calls - allowed to be '', will be replaced with (empty)
 		displayContent: string | null; // content displayed to user  - allowed to be '', will be ignored
@@ -76,7 +72,6 @@ export type ChatMessage =
 	} | {
 		role: 'assistant';
 		content: string | null; // content received from LLM  - allowed to be '', will be replaced with (empty)
-		displayContent: string | null; // content displayed to user (this is the same as content for now) - allowed to be '', will be ignored
 		reasoning: string | null; // reasoning from the LLM, used for step-by-step thinking
 	}
 	| ToolMessage<ToolName>
@@ -332,9 +327,9 @@ class ChatThreadService extends Disposable implements IChatThreadService {
 
 	// ---------- streaming ----------
 
-	private _finishStreamingTextMessage = (threadId: string, content: string, error?: { message: string, fullError: Error | null }, reasoning?: string) => {
+	private _finishStreamingTextMessage = (threadId: string, options: { content: string, reasoning?: string }, error?: { message: string, fullError: Error | null }) => {
 		// add assistant's message to chat history, and clear selection
-		this._addMessageToThread(threadId, { role: 'assistant', content, displayContent: content || null, reasoning: reasoning || null })
+		this._addMessageToThread(threadId, { role: 'assistant', content: options.content, reasoning: options.reasoning || null })
 		this._setStreamState(threadId, { messageSoFar: undefined, reasoningSoFar: undefined, streamingToken: undefined, error })
 	}
 
@@ -439,10 +434,10 @@ class ChatThreadService extends Disposable implements IChatThreadService {
 					onFinalMessage: async ({ fullText, toolCalls, fullReasoning }) => {
 
 						if ((toolCalls?.length ?? 0) === 0) {
-							this._finishStreamingTextMessage(threadId, fullText, undefined, fullReasoning)
+							this._finishStreamingTextMessage(threadId, { content: fullText, reasoning: fullReasoning })
 						}
 						else {
-							this._addMessageToThread(threadId, { role: 'assistant', content: fullText, displayContent: fullText, reasoning: fullReasoning || null })
+							this._addMessageToThread(threadId, { role: 'assistant', content: fullText, reasoning: fullReasoning || null })
 							this._setStreamState(threadId, { messageSoFar: undefined, reasoningSoFar: undefined }) // clear streaming message
 							for (const tool of toolCalls ?? []) {
 								const toolName = tool.name as ToolName
@@ -479,7 +474,7 @@ class ChatThreadService extends Disposable implements IChatThreadService {
 					onError: (error) => {
 						const messageSoFar = this.streamState[threadId]?.messageSoFar ?? ''
 						const reasoningSoFar = this.streamState[threadId]?.reasoningSoFar ?? ''
-						this._finishStreamingTextMessage(threadId, messageSoFar, error, reasoningSoFar)
+						this._finishStreamingTextMessage(threadId, { content: messageSoFar, reasoning: reasoningSoFar }, error)
 						res_()
 					},
 				})
@@ -499,7 +494,7 @@ class ChatThreadService extends Disposable implements IChatThreadService {
 		if (llmCancelToken !== undefined) this._llmMessageService.abort(llmCancelToken)
 		const messageSoFar = this.streamState[threadId]?.messageSoFar ?? ''
 		const reasoningSoFar = this.streamState[threadId]?.reasoningSoFar ?? ''
-		this._finishStreamingTextMessage(threadId, messageSoFar, undefined, reasoningSoFar)
+		this._finishStreamingTextMessage(threadId, { content: messageSoFar, reasoning: reasoningSoFar })
 	}
 
 	dismissStreamError(threadId: string): void {
diff --git a/src/vs/workbench/contrib/void/browser/editCodeService.ts b/src/vs/workbench/contrib/void/browser/editCodeService.ts
index 0f92d4ef..5ef2e555 100644
--- a/src/vs/workbench/contrib/void/browser/editCodeService.ts
+++ b/src/vs/workbench/contrib/void/browser/editCodeService.ts
@@ -1400,7 +1400,7 @@ class EditCodeService extends Disposable implements IEditCodeService {
 		const latestStreamInfoMutable: StreamLocationMutable = { line: diffZone.startLine, addedSplitYet: false, col: 1, originalCodeStartLine: 1 }
 
 		// state used in onText:
-		let fullText = ''
+		let fullTextSoFar = '' // so far (INCLUDING ignored suffix)
 		let prevIgnoredSuffix = ''
 
 		streamRequestIdRef.current = this._llmMessageService.sendLLMMessage({
@@ -1408,12 +1408,13 @@ class EditCodeService extends Disposable implements IEditCodeService {
 			useProviderFor: opts.from === 'ClickApply' ? 'Apply' : 'Ctrl+K',
 			logging: { loggingName: `startApplying - ${from}` },
 			messages,
-			onText: ({ newText: newText_ }) => {
+			onText: ({ fullText: fullText_ }) => {
+				const newText_ = fullText_.substring(fullTextSoFar.length, Infinity)
 
 				const newText = prevIgnoredSuffix + newText_ // add the previously ignored suffix because it's no longer the suffix!
-				fullText += prevIgnoredSuffix + newText // full text, including ```, etc
+				fullTextSoFar += newText // full text, including ```, etc
 
-				const [croppedText, deltaCroppedText, croppedSuffix] = extractText(fullText, newText.length)
+				const [croppedText, deltaCroppedText, croppedSuffix] = extractText(fullTextSoFar, newText.length)
 				const { endLineInLlmTextSoFar } = this._writeStreamedDiffZoneLLMText(uri, originalCode, croppedText, deltaCroppedText, latestStreamInfoMutable)
 				diffZone._streamState.line = (diffZone.startLine - 1) + endLineInLlmTextSoFar // change coordinate systems from originalCode to full file
 
diff --git a/src/vs/workbench/contrib/void/browser/helpers/extractCodeFromResult.ts b/src/vs/workbench/contrib/void/browser/helpers/extractCodeFromResult.ts
index 564a565d..21f6e6f5 100644
--- a/src/vs/workbench/contrib/void/browser/helpers/extractCodeFromResult.ts
+++ b/src/vs/workbench/contrib/void/browser/helpers/extractCodeFromResult.ts
@@ -173,7 +173,7 @@ export type ExtractedSearchReplaceBlock = {
 
 const endsWithAnyPrefixOf = (str: string, anyPrefix: string) => {
 	// for each prefix
-	for (let i = anyPrefix.length; i >= 0; i--) {
+	for (let i = anyPrefix.length; i >= 1; i--) { // i >= 1 because must not be empty string
 		const prefix = anyPrefix.slice(0, i)
 		if (str.endsWith(prefix)) return prefix
 	}
@@ -252,93 +252,104 @@ export const extractSearchReplaceBlocks = (str: string) => {
 
 // could simplify this - this assumes we can never add a tag without committing it to the user's screen, but that's not true
 export const extractReasoningOnTextWrapper = (onText: OnText, thinkTags: [string, string]): OnText => {
-
-
-	let latestAddIdx = 0 // exclusive
+	let latestAddIdx = 0 // exclusive index in fullText_
 	let foundTag1 = false
 	let foundTag2 = false
 
-	let fullText = ''
-	let fullReasoning = ''
+	let fullTextSoFar = ''
+	let fullReasoningSoFar = ''
 
-	const newOnText: OnText = ({ newText: newText_, fullText: fullText_ }) => {
+	let onText_ = onText
+	onText = (params) => {
+		onText_(params)
+	}
 
-		//     abcdef<t|hin|k>ghi
-		//           |
+	const newOnText: OnText = ({ fullText: fullText_ }) => {
 		// until found the first think tag, keep adding to fullText
 		if (!foundTag1) {
 			const endsWithTag1 = endsWithAnyPrefixOf(fullText_, thinkTags[0])
 			if (endsWithTag1) {
+				console.log('endswith1', { fullTextSoFar, fullReasoningSoFar, fullText_ })
 				// wait until we get the full tag or know more
 				return
 			}
 			// if found the first tag
-			const tag1Index = fullText_.lastIndexOf(thinkTags[0])
+			const tag1Index = fullText_.indexOf(thinkTags[0])
 			if (tag1Index !== -1) {
+				console.log('tag1Index !==1', { tag1Index, fullTextSoFar, fullReasoningSoFar, thinkTags, fullText_ })
 				foundTag1 = true
-				const newText = fullText.substring(latestAddIdx, tag1Index)
-				const newReasoning = fullText.substring(tag1Index + thinkTags[0].length, Infinity)
-
-				fullText += newText
-				fullReasoning += newReasoning
-				latestAddIdx += newText.length + newReasoning.length
-				onText({ newText, fullText, newReasoning: newReasoning, fullReasoning })
+				// Add text before the tag to fullTextSoFar
+				fullTextSoFar += fullText_.substring(0, tag1Index)
+				// Update latestAddIdx to after the first tag
+				latestAddIdx = tag1Index + thinkTags[0].length
+				onText({ fullText: fullTextSoFar, fullReasoning: fullReasoningSoFar })
 				return
 			}
 
+			console.log('adding to text A', { fullTextSoFar, fullReasoningSoFar })
 			// add the text to fullText
-			const newText = fullText.substring(latestAddIdx, Infinity)
-			fullText += newText
-			latestAddIdx += newText.length
-			onText({ newText, fullText, newReasoning: '', fullReasoning })
+			fullTextSoFar = fullText_
+			latestAddIdx = fullText_.length
+			onText({ fullText: fullTextSoFar, fullReasoning: fullReasoningSoFar })
 			return
 		}
+
 		// at this point, we found <tag1>
 
 		// until found the second think tag, keep adding to fullReasoning
 		if (!foundTag2) {
 			const endsWithTag2 = endsWithAnyPrefixOf(fullText_, thinkTags[1])
 			if (endsWithTag2) {
+				console.log('endsWith2', { fullTextSoFar, fullReasoningSoFar })
 				// wait until we get the full tag or know more
 				return
 			}
-			// if found the second tag
-			const tag2Index = fullText_.lastIndexOf(thinkTags[1])
-			if (tag2Index !== -1) {
-				foundTag2 = true
-				const newReasoning = fullText.substring(latestAddIdx, tag2Index)
-				const newText = fullText.substring(tag2Index + thinkTags[1].length, Infinity)
 
-				fullText += newText
-				fullReasoning += newReasoning
-				latestAddIdx += newText.length + newReasoning.length
-				onText({ newText, fullText, newReasoning: newReasoning, fullReasoning })
+			// if found the second tag
+			const tag2Index = fullText_.indexOf(thinkTags[1], latestAddIdx)
+			if (tag2Index !== -1) {
+				console.log('tag2Index !== -1', { fullTextSoFar, fullReasoningSoFar })
+				foundTag2 = true
+				// Add everything between first and second tag to reasoning
+				fullReasoningSoFar += fullText_.substring(latestAddIdx, tag2Index)
+				// Update latestAddIdx to after the second tag
+				latestAddIdx = tag2Index + thinkTags[1].length
+				onText({ fullText: fullTextSoFar, fullReasoning: fullReasoningSoFar })
 				return
 			}
 
-			// add the text to fullReasoning
-			const newReasoning = fullText.substring(latestAddIdx, Infinity)
-			fullReasoning += newReasoning
-			latestAddIdx += newReasoning.length
-			onText({ newText: '', fullText, newReasoning, fullReasoning })
+			// add the text to fullReasoning (content after first tag but before second tag)
+			console.log('adding to text B', { fullTextSoFar, fullReasoningSoFar })
+
+			// If we have more text than we've processed, add it to reasoning
+			if (fullText_.length > latestAddIdx) {
+				fullReasoningSoFar += fullText_.substring(latestAddIdx)
+				latestAddIdx = fullText_.length
+			}
+
+			onText({ fullText: fullTextSoFar, fullReasoning: fullReasoningSoFar })
 			return
 		}
-		// at this point, we found <tag2>
 
-		fullText += newText_
-		const newText = fullText.substring(latestAddIdx, Infinity)
-		latestAddIdx += newText.length
-		onText({ newText, fullText, newReasoning: '', fullReasoning })
+		// at this point, we found <tag2> - content after the second tag is normal text
+		console.log('adding to text C', { fullTextSoFar, fullReasoningSoFar })
+
+		// Add any new text after the closing tag to fullTextSoFar
+		if (fullText_.length > latestAddIdx) {
+			fullTextSoFar += fullText_.substring(latestAddIdx)
+			latestAddIdx = fullText_.length
+		}
+
+		onText({ fullText: fullTextSoFar, fullReasoning: fullReasoningSoFar })
 	}
 
-
 	return newOnText
 }
 
 
 export const extractReasoningOnFinalMessage = (fullText_: string, thinkTags: [string, string]): { fullText: string, fullReasoning: string } => {
-	const tag1Idx = fullText_.lastIndexOf(thinkTags[0])
-	const tag2Idx = fullText_.lastIndexOf(thinkTags[1])
+	const tag1Idx = fullText_.indexOf(thinkTags[0])
+	const tag2Idx = fullText_.indexOf(thinkTags[1])
 	if (tag1Idx === -1 || tag2Idx === -1) return { fullText: fullText_, fullReasoning: '' }
 	const fullText = fullText_.substring(0, tag1Idx) + fullText_.substring(tag2Idx + thinkTags[1].length, Infinity)
 	const fullReasoning = fullText.substring(tag1Idx + thinkTags[0].length, tag2Idx)
diff --git a/src/vs/workbench/contrib/void/browser/react/src/markdown/ChatMarkdownRender.tsx b/src/vs/workbench/contrib/void/browser/react/src/markdown/ChatMarkdownRender.tsx
index d2f24569..1aa55e61 100644
--- a/src/vs/workbench/contrib/void/browser/react/src/markdown/ChatMarkdownRender.tsx
+++ b/src/vs/workbench/contrib/void/browser/react/src/markdown/ChatMarkdownRender.tsx
@@ -33,7 +33,7 @@ export const CodeSpan = ({ children, className }: { children: React.ReactNode, c
 	</code>
 }
 
-const RenderToken = ({ token, nested, noSpace, chatMessageLocation, tokenIdx }: { token: Token | string, nested?: boolean, noSpace?: boolean, chatMessageLocation?: ChatMessageLocation, tokenIdx: string }): JSX.Element => {
+const RenderToken = ({ token, nested, noSpace, chatMessageLocationForApply, tokenIdx }: { token: Token | string, nested?: boolean, noSpace?: boolean, chatMessageLocationForApply?: ChatMessageLocation, tokenIdx: string }): JSX.Element => {
 
 
 	// deal with built-in tokens first (assume marked token)
@@ -45,9 +45,9 @@ const RenderToken = ({ token, nested, noSpace, chatMessageLocation, tokenIdx }:
 
 	if (t.type === "code") {
 
-		const applyBoxId = chatMessageLocation ? getApplyBoxId({
-			threadId: chatMessageLocation.threadId,
-			messageIdx: chatMessageLocation.messageIdx,
+		const applyBoxId = chatMessageLocationForApply ? getApplyBoxId({
+			threadId: chatMessageLocationForApply.threadId,
+			messageIdx: chatMessageLocationForApply.messageIdx,
 			tokenIdx: tokenIdx,
 		}) : null
 
@@ -131,7 +131,7 @@ const RenderToken = ({ token, nested, noSpace, chatMessageLocation, tokenIdx }:
 							<input type="checkbox" checked={item.checked} readOnly className="mr-2 form-checkbox" />
 						)}
 						<span className="ml-1">
-							<ChatMarkdownRender chatMessageLocation={chatMessageLocation} string={item.text} nested={true} />
+							<ChatMarkdownRender chatMessageLocationForApply={chatMessageLocationForApply} string={item.text} nested={true} />
 						</span>
 					</li>
 				))}
@@ -243,12 +243,12 @@ const RenderToken = ({ token, nested, noSpace, chatMessageLocation, tokenIdx }:
 	)
 }
 
-export const ChatMarkdownRender = ({ string, nested = false, noSpace, chatMessageLocation }: { string: string, nested?: boolean, noSpace?: boolean, chatMessageLocation?: ChatMessageLocation }) => {
+export const ChatMarkdownRender = ({ string, nested = false, noSpace, chatMessageLocationForApply }: { string: string, nested?: boolean, noSpace?: boolean, chatMessageLocationForApply?: ChatMessageLocation }) => {
 	const tokens = marked.lexer(string); // https://marked.js.org/using_pro#renderer
 	return (
 		<>
 			{tokens.map((token, index) => (
-				<RenderToken key={index} token={token} nested={nested} noSpace={noSpace} chatMessageLocation={chatMessageLocation} tokenIdx={index + ''} />
+				<RenderToken key={index} token={token} nested={nested} noSpace={noSpace} chatMessageLocationForApply={chatMessageLocationForApply} tokenIdx={index + ''} />
 			))}
 		</>
 	)
diff --git a/src/vs/workbench/contrib/void/browser/react/src/sidebar-tsx/SidebarChat.tsx b/src/vs/workbench/contrib/void/browser/react/src/sidebar-tsx/SidebarChat.tsx
index 05ec8419..dd35e8e7 100644
--- a/src/vs/workbench/contrib/void/browser/react/src/sidebar-tsx/SidebarChat.tsx
+++ b/src/vs/workbench/contrib/void/browser/react/src/sidebar-tsx/SidebarChat.tsx
@@ -139,6 +139,9 @@ export const IconLoading = ({ className = '' }: { className?: string }) => {
 }
 
 
+const getChatBubbleId = (threadId: string, messageIdx: number) => `${threadId}-${messageIdx}`;
+
+
 interface VoidChatAreaProps {
 	// Required
 	children: React.ReactNode; // This will be the input component
@@ -696,9 +699,12 @@ const toolResultToComponent: ToolResultToComponent = {
 
 
 type ChatBubbleMode = 'display' | 'edit'
-const ChatBubble = ({ chatMessage, isLoading, messageIdx }: { chatMessage: ChatMessage, messageIdx?: number, isLoading?: boolean, }) => {
+const ChatBubble = ({ chatMessage, isLoading, messageIdx }: { chatMessage: ChatMessage, messageIdx: number, isLoading?: boolean, }) => {
 
 	const role = chatMessage.role
+	// Only show reasoning dropdown when there's actual content
+	const hasReasoning = chatMessage.role === 'assistant' && chatMessage.reasoning
+
 	const [isReasoningOpen, setIsReasoningOpen] = useState(false)
 
 	const accessor = useAccessor()
@@ -839,46 +845,45 @@ const ChatBubble = ({ chatMessage, isLoading, messageIdx }: { chatMessage: ChatM
 	}
 	else if (role === 'assistant') {
 		const thread = chatThreadsService.getCurrentThread()
-		const hasReasoning = !!chatMessage.reasoning
 
 		const chatMessageLocation: ChatMessageLocation = {
 			threadId: thread.id,
-			messageIdx: messageIdx!,
+			messageIdx: messageIdx,
 		}
 
-		chatbubbleContents = (
-			<>
-				{/* Always show the content */}
-				<ChatMarkdownRender string={chatMessage.displayContent ?? ''} chatMessageLocation={chatMessageLocation} />
 
-				{/* Show reasoning in a dropdown if it exists */}
-				{hasReasoning && (
-					<div className="mx-4 select-none mt-2">
-						<div className="border border-void-border-3 rounded px-1 py-0.5 bg-void-bg-tool">
-							<div
-								className="flex items-center min-h-[24px] cursor-pointer hover:brightness-125 transition-all duration-150"
-								onClick={() => setIsReasoningOpen(!isReasoningOpen)}
-							>
-								<ChevronRight
-									className={`text-void-fg-3 mr-0.5 h-5 w-5 flex-shrink-0 transition-transform duration-100 ease-[cubic-bezier(0.4,0,0.2,1)] ${isReasoningOpen ? 'rotate-90' : ''}`}
-								/>
-								<div className="flex items-center flex-wrap gap-x-2 gap-y-0.5">
-									<span className="text-void-fg-3">Reasoning</span>
-									<span className="text-void-fg-4 text-xs italic">Model's step-by-step thinking</span>
-								</div>
-							</div>
-							<div
-								className={`mt-1 overflow-hidden transition-all duration-200 ease-in-out ${isReasoningOpen ? 'max-h-[500px] opacity-100' : 'max-h-0 opacity-0'}`}
-							>
-								<div className="text-void-fg-2 p-2 bg-void-bg-1 rounded">
-									<ChatMarkdownRender string={chatMessage.reasoning ?? ''} chatMessageLocation={chatMessageLocation} />
-								</div>
-							</div>
+		const reasoningDropdown = hasReasoning ? (
+			<div className="mx-4 select-none mt-2">
+				<div className="border border-void-border-3 rounded px-1 py-0.5 bg-void-bg-tool">
+					<div
+						className="flex items-center min-h-[24px] cursor-pointer hover:brightness-125 transition-all duration-150"
+						onClick={() => setIsReasoningOpen(!isReasoningOpen)}
+					>
+						<ChevronRight
+							className={`text-void-fg-3 mr-0.5 h-5 w-5 flex-shrink-0 transition-transform duration-100 ease-[cubic-bezier(0.4,0,0.2,1)] ${isReasoningOpen ? 'rotate-90' : ''}`}
+						/>
+						<div className="flex items-center flex-wrap gap-x-2 gap-y-0.5">
+							<span className="text-void-fg-3">Reasoning</span>
+							<span className="text-void-fg-4 text-xs italic">Model's step-by-step thinking</span>
 						</div>
 					</div>
-				)}
-			</>
-		)
+					<div
+						className={`mt-1 overflow-hidden transition-all duration-200 ease-in-out ${isReasoningOpen ? 'max-h-[500px] opacity-100' : 'max-h-0 opacity-0'}`}
+					>
+						<div className="text-void-fg-2 p-2 bg-void-bg-1 rounded">
+							<ChatMarkdownRender string={chatMessage.reasoning ?? ''} chatMessageLocationForApply={chatMessageLocation} />
+						</div>
+					</div>
+				</div>
+			</div>
+		) : null
+
+		chatbubbleContents = (<>
+			{/* Reasoning dropdown (conditional) */}
+			{reasoningDropdown}
+			{/* Main content */}
+			<ChatMarkdownRender string={chatMessage.content ?? ''} chatMessageLocationForApply={chatMessageLocation} />
+		</>)
 	}
 	else if (role === 'tool') {
 
@@ -1029,13 +1034,27 @@ export const SidebarChat = () => {
 	}, [isHistoryOpen, currentThread.id])
 
 
-	const prevMessagesHTML = useMemo(() => {
+	const pastMessagesHTML = useMemo(() => {
 		return previousMessages.map((message, i) =>
-			<ChatBubble key={i} chatMessage={message} messageIdx={i} />
+			<ChatBubble key={getChatBubbleId(currentThread.id, i)} chatMessage={message} messageIdx={i} />
 		)
 	}, [previousMessages])
 
 
+	const streamingChatIdx = pastMessagesHTML.length
+	const currStreamingMessageHTML = !!(reasoningSoFar || messageSoFar) ?
+		<ChatBubble key={getChatBubbleId(currentThread.id, streamingChatIdx)}
+			messageIdx={streamingChatIdx} chatMessage={{
+				role: 'assistant',
+				content: messageSoFar ?? null,
+				reasoning: reasoningSoFar ?? null,
+			}}
+			isLoading={isStreaming}
+		/> : null
+
+	const allMessagesHTML = [...pastMessagesHTML, currStreamingMessageHTML]
+
+
 	const threadSelector = <div ref={historyRef}
 		className={`w-full h-auto ${isHistoryOpen ? '' : 'hidden'} ring-2 ring-widget-shadow ring-inset z-10`}
 	>
@@ -1053,20 +1072,12 @@ export const SidebarChat = () => {
 		overflow-x-hidden
 		overflow-y-auto
 		py-4
-		${prevMessagesHTML.length === 0 && !messageSoFar ? 'hidden' : ''}
+		${pastMessagesHTML.length === 0 && !messageSoFar ? 'hidden' : ''}
 	`}
 		style={{ maxHeight: sidebarDimensions.height - historyDimensions.height - chatAreaDimensions.height - 36 }} // the height of the previousMessages is determined by all other heights
 	>
 		{/* previous messages */}
-		{prevMessagesHTML}
-
-		{/* message stream */}
-		{messageSoFar && <ChatBubble chatMessage={{
-			role: 'assistant',
-			content: messageSoFar,
-			displayContent: messageSoFar || null,
-			reasoning: reasoningSoFar || null,
-		}} isLoading={isStreaming} />}
+		{allMessagesHTML}
 
 
 		{/* error message */}
@@ -1101,7 +1112,7 @@ export const SidebarChat = () => {
 			isStreaming={isStreaming}
 			isDisabled={isDisabled}
 			showSelections={true}
-			showProspectiveSelections={prevMessagesHTML.length === 0}
+			showProspectiveSelections={pastMessagesHTML.length === 0}
 			selections={selections}
 			setSelections={setSelections}
 			onClickAnywhere={() => { textAreaRef.current?.focus() }}
diff --git a/src/vs/workbench/contrib/void/browser/searchReplaceCacheService.ts b/src/vs/workbench/contrib/void/browser/searchReplaceCacheService.ts
index e7a9448e..e50a6d12 100644
--- a/src/vs/workbench/contrib/void/browser/searchReplaceCacheService.ts
+++ b/src/vs/workbench/contrib/void/browser/searchReplaceCacheService.ts
@@ -7,8 +7,8 @@ import { Emitter, Event } from '../../../../base/common/event.js';
 import { Disposable } from '../../../../base/common/lifecycle.js';
 import { InstantiationType, registerSingleton } from '../../../../platform/instantiation/common/extensions.js';
 import { createDecorator } from '../../../../platform/instantiation/common/instantiation.js';
-import { ILLMMessageService } from '../common/llmMessageService.js';
-import { ServiceSendLLMMessageParams } from '../common/llmMessageTypes.js';
+// import { ILLMMessageService } from '../common/llmMessageService.js';
+// import { ServiceSendLLMMessageParams } from '../common/llmMessageTypes.js';
 
 
 
@@ -24,22 +24,22 @@ class SearchReplaceService extends Disposable implements ISearchReplaceService {
 	readonly onDidChangeState: Event<void> = this._onDidChangeState.event;
 
 	constructor(
-		@ILLMMessageService private readonly llmMessageService: ILLMMessageService,
+		// @ILLMMessageService private readonly llmMessageService: ILLMMessageService,
 	) {
 		super()
 	}
 
-	send(params: Omit<ServiceSendLLMMessageParams, 'onText'> & { onText: (p: { newText: string, fullText: string }) => { retry: boolean } }) {
-		this.llmMessageService.sendLLMMessage({
-			...params as ServiceSendLLMMessageParams,
-			onText: (p) => {
-				const { retry } = params.onText(p)
-				if (retry) {
+	// send(params: ServiceSendLLMMessageParams & { onText: (p: { newText: string, fullText: string }) => { retry: boolean } }) {
+	// 	this.llmMessageService.sendLLMMessage({
+	// 		...params as ServiceSendLLMMessageParams,
+	// 		onText: (p) => {
+	// 			const { retry } = params.onText(p)
+	// 			if (retry) {
 
-				}
-			}
-		})
-	}
+	// 			}
+	// 		}
+	// 	})
+	// }
 
 }
 
diff --git a/src/vs/workbench/contrib/void/common/llmMessageTypes.ts b/src/vs/workbench/contrib/void/common/llmMessageTypes.ts
index 93ef12b3..e8800562 100644
--- a/src/vs/workbench/contrib/void/common/llmMessageTypes.ts
+++ b/src/vs/workbench/contrib/void/common/llmMessageTypes.ts
@@ -45,14 +45,14 @@ export type ToolCallType = {
 }
 
 
-export type OnText = (p: { newText: string, fullText: string; newReasoning: string; fullReasoning: string }) => void
+export type OnText = (p: { fullText: string; fullReasoning: string }) => void
 export type OnFinalMessage = (p: { fullText: string, toolCalls?: ToolCallType[], fullReasoning?: string }) => void // id is tool_use_id
 export type OnError = (p: { message: string, fullError: Error | null }) => void
 export type AbortRef = { current: (() => void) | null }
 
 
 export const toLLMChatMessage = (c: ChatMessage): LLMChatMessage => {
-	if (c.role === 'system' || c.role === 'user') {
+	if (c.role === 'user') {
 		return { role: c.role, content: c.content || '(empty message)' }
 	}
 	else if (c.role === 'assistant')
diff --git a/src/vs/workbench/contrib/void/electron-main/llmMessage/MODELS.ts b/src/vs/workbench/contrib/void/electron-main/llmMessage/MODELS.ts
index c19aca3d..f9fd0cf5 100644
--- a/src/vs/workbench/contrib/void/electron-main/llmMessage/MODELS.ts
+++ b/src/vs/workbench/contrib/void/electron-main/llmMessage/MODELS.ts
@@ -3,16 +3,16 @@
  *  Licensed under the Apache License, Version 2.0. See LICENSE.txt for more information.
  *--------------------------------------------------------------------------------------*/
 
-import OpenAI, { ClientOptions } from 'openai';
 import Anthropic from '@anthropic-ai/sdk';
 import { Ollama } from 'ollama';
+import OpenAI, { ClientOptions } from 'openai';
 
 import { Model as OpenAIModel } from 'openai/resources/models.js';
-import { OllamaModelResponse, OnText, OnFinalMessage, OnError, LLMChatMessage, LLMFIMMessage, ModelListParams } from '../../common/llmMessageTypes.js';
+import { extractReasoningOnFinalMessage, extractReasoningOnTextWrapper } from '../../browser/helpers/extractCodeFromResult.js';
+import { LLMChatMessage, LLMFIMMessage, ModelListParams, OllamaModelResponse, OnError, OnFinalMessage, OnText } from '../../common/llmMessageTypes.js';
 import { InternalToolInfo, isAToolName } from '../../common/toolsService.js';
 import { defaultProviderSettings, displayInfoOfProviderName, ProviderName, SettingsOfProvider } from '../../common/voidSettingsTypes.js';
 import { prepareFIMMessage, prepareMessages } from './preprocessLLMMessages.js';
-import { extractReasoningOnFinalMessage, extractReasoningOnTextWrapper } from '../../browser/helpers/extractCodeFromResult.js';
 
 
 
@@ -677,7 +677,7 @@ const _sendOpenAICompatibleChat = ({ messages: messages_, onText, onFinalMessage
 		supportsReasoningOutput,
 		supportsSystemMessage,
 		supportsTools,
-		maxOutputTokens,
+		// maxOutputTokens, right now we are ignoring this
 	} = getModelCapabilities(providerName, modelName_)
 
 	const { messages } = prepareMessages({ messages: messages_, aiInstructions, supportsSystemMessage, supportsTools, })
@@ -686,9 +686,8 @@ const _sendOpenAICompatibleChat = ({ messages: messages_, onText, onFinalMessage
 	const includeInPayload = supportsReasoningOutput ? modelSettingsOfProvider[providerName].ifSupportsReasoningOutput?.input?.includeInPayload || {} : {}
 
 	const toolsObj = tools ? { tools: tools, tool_choice: 'auto', parallel_tool_calls: false, } as const : {}
-	const maxTokensObj = maxOutputTokens ? { max_tokens: maxOutputTokens } : {}
 	const openai: OpenAI = newOpenAICompatibleSDK({ providerName, settingsOfProvider, includeInPayload })
-	const options: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = { model: modelName, messages: messages, stream: true, ...toolsObj, ...maxTokensObj }
+	const options: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = { model: modelName, messages: messages, stream: true, ...toolsObj, }
 
 	const { nameOfFieldInDelta: nameOfReasoningFieldInDelta, needsManualParse: needsManualReasoningParse } = modelSettingsOfProvider[providerName].ifSupportsReasoningOutput?.output ?? {}
 
@@ -727,15 +726,20 @@ const _sendOpenAICompatibleChat = ({ messages: messages_, onText, onFinalMessage
 					fullReasoningSoFar += newReasoning
 				}
 
-				onText({ newText, fullText: fullTextSoFar, newReasoning, fullReasoning: fullReasoningSoFar })
+				onText({ fullText: fullTextSoFar, fullReasoning: fullReasoningSoFar })
 			}
 			// on final
 			const toolCalls = toolCallsFrom_OpenAICompat(toolCallOfIndex)
-			if (manuallyParseReasoning) {
-				const { fullText, fullReasoning } = extractReasoningOnFinalMessage(fullTextSoFar, supportsReasoningOutput.openSourceThinkTags)
-				onFinalMessage({ fullText, fullReasoning, toolCalls });
-			} else {
-				onFinalMessage({ fullText: fullTextSoFar, fullReasoning: fullReasoningSoFar, toolCalls });
+			if (!fullTextSoFar && !fullReasoningSoFar && toolCalls.length === 0) {
+				onError({ message: 'Void: Response from model was empty.', fullError: null })
+			}
+			else {
+				if (manuallyParseReasoning) {
+					const { fullText, fullReasoning } = extractReasoningOnFinalMessage(fullTextSoFar, supportsReasoningOutput.openSourceThinkTags)
+					onFinalMessage({ fullText, fullReasoning, toolCalls });
+				} else {
+					onFinalMessage({ fullText: fullTextSoFar, fullReasoning: fullReasoningSoFar, toolCalls });
+				}
 			}
 		})
 		// when error/fail - this catches errors of both .create() and .then(for await)
@@ -823,7 +827,7 @@ const sendAnthropicChat = ({ messages: messages_, providerName, onText, onFinalM
 	})
 	// when receive text
 	stream.on('text', (newText, fullText) => {
-		onText({ newText, fullText, newReasoning: '', fullReasoning: '' })
+		onText({ fullText, fullReasoning: '' })
 	})
 	// when we get the final message on this stream (or when error/fail)
 	stream.on('finalMessage', (response) => {