Merge 4ffc1405ad into 17e7a5b152

2026-05-22 17:08:25 +00:00 · 2026-04-20 07:40:34 +00:00 · 2026-04-20 07:40:34 +00:00 · 2407ded440
commit 2407ded440
parent 17e7a5b152 4ffc1405ad
7 changed files with 240 additions and 17 deletions
--- a/src/vs/workbench/contrib/void/browser/chatThreadService.ts
+++ b/src/vs/workbench/contrib/void/browser/chatThreadService.ts
@ -12,7 +12,7 @@ import { URI } from '../../../../base/common/uri.js';
 import { Emitter, Event } from '../../../../base/common/event.js';
 import { ILLMMessageService } from '../common/sendLLMMessageService.js';
 import { chat_userMessageContent, isABuiltinToolName } from '../common/prompt/prompts.js';
-import { AnthropicReasoning, getErrorMessage, RawToolCallObj, RawToolParamsObj } from '../common/sendLLMMessageTypes.js';
+import { AnthropicReasoning, getErrorMessage, type LLMUsage, RawToolCallObj, RawToolParamsObj } from '../common/sendLLMMessageTypes.js';
 import { generateUuid } from '../../../../base/common/uuid.js';
 import { FeatureName, ModelSelection, ModelSelectionOptions } from '../common/voidSettingsTypes.js';
 import { IVoidSettingsService } from '../common/voidSettingsService.js';
@ -119,6 +119,11 @@ export type ThreadType = {
 	messages: ChatMessage[];
 	filesWithUserChanges: Set<string>;

+	// Last-seen token usage from the LLM for this thread. Persisted so the
+	// context-usage ring shows a value immediately on reload (instead of only
+	// after the user sends a new message).
+	latestUsage?: LLMUsage;
+
 	// this doesn't need to go in a state object, but feels right
 	state: {
 		currCheckpointIdx: number | null; // the latest checkpoint we're at (null if not at a particular checkpoint, like if the chat is streaming, or chat just finished and we haven't clicked on a checkpt)
@ -232,6 +237,7 @@ export interface IChatThreadService {

 	readonly state: ThreadsState;
 	readonly streamState: ThreadStreamState; // not persistent
+	readonly latestUsageOfThreadId: { [threadId: string]: LLMUsage | undefined }; // hydrated from persisted threads on startup; updated as the model streams

 	onDidChangeCurrentThread: Event<void>;
 	onDidChangeStreamState: Event<{ threadId: string }>
@ -305,6 +311,7 @@ class ChatThreadService extends Disposable implements IChatThreadService {
 	readonly onDidChangeStreamState: Event<{ threadId: string }> = this._onDidChangeStreamState.event;

 	readonly streamState: ThreadStreamState = {}
+	readonly latestUsageOfThreadId: { [threadId: string]: LLMUsage | undefined } = {}
 	state: ThreadsState // allThreads is persisted, currentThread is not

 	// used in checkpointing
@ -339,6 +346,13 @@ class ChatThreadService extends Disposable implements IChatThreadService {
 			currentThreadId: null as unknown as string, // gets set in startNewThread()
 		}

+		// hydrate in-memory latestUsage map from the persisted threads so the
+		// context-usage ring shows the last-known values right after a reload
+		for (const id in allThreads) {
+			const t = allThreads[id]
+			if (t?.latestUsage) this.latestUsageOfThreadId[id] = t.latestUsage
+		}
+
 		// always be in a thread
 		this.openNewThread()

@ -484,6 +498,19 @@ class ChatThreadService extends Disposable implements IChatThreadService {
 		this._onDidChangeStreamState.fire({ threadId })
 	}

+	// updates per-thread latest usage and re-uses the streamState emitter so existing
+	// listeners (and the React mirror in services.tsx) re-read without extra plumbing.
+	// Also persists on the thread so the ring shows the last-known value after a reload.
+	private _setLatestUsage(threadId: string, usage: LLMUsage) {
+		this.latestUsageOfThreadId[threadId] = usage
+		const thread = this.state.allThreads[threadId]
+		if (thread) {
+			thread.latestUsage = usage
+			this._storeAllThreads(this.state.allThreads)
+		}
+		this._onDidChangeStreamState.fire({ threadId })
+	}
+

 	// ---------- streaming ----------

@ -811,10 +838,12 @@ class ChatThreadService extends Disposable implements IChatThreadService {
 					overridesOfModel,
 					logging: { loggingName: `Chat - ${chatMode}`, loggingExtras: { threadId, nMessagesSent, chatMode } },
 					separateSystemMessage: separateSystemMessage,
-					onText: ({ fullText, fullReasoning, toolCall }) => {
+					onText: ({ fullText, fullReasoning, toolCall, usage }) => {
+						if (usage) this._setLatestUsage(threadId, usage)
 						this._setStreamState(threadId, { isRunning: 'LLM', llmInfo: { displayContentSoFar: fullText, reasoningSoFar: fullReasoning, toolCallSoFar: toolCall ?? null }, interrupt: Promise.resolve(() => { if (llmCancelToken) this._llmMessageService.abort(llmCancelToken) }) })
 					},
-					onFinalMessage: async ({ fullText, fullReasoning, toolCall, anthropicReasoning, }) => {
+					onFinalMessage: async ({ fullText, fullReasoning, toolCall, anthropicReasoning, usage }) => {
+						if (usage) this._setLatestUsage(threadId, usage)
 						resMessageIsDonePromise({ type: 'llmDone', toolCall, info: { fullText, fullReasoning, anthropicReasoning } }) // resolve with tool calls
 					},
 					onError: async (error) => {
--- a/src/vs/workbench/contrib/void/browser/react/src/sidebar-tsx/SidebarChat.tsx
+++ b/src/vs/workbench/contrib/void/browser/react/src/sidebar-tsx/SidebarChat.tsx
@ -6,7 +6,7 @@
 import React, { ButtonHTMLAttributes, FormEvent, FormHTMLAttributes, Fragment, KeyboardEvent, useCallback, useEffect, useMemo, useRef, useState } from 'react';


-import { useAccessor, useChatThreadsState, useChatThreadsStreamState, useSettingsState, useActiveURI, useCommandBarState, useFullChatThreadsStreamState } from '../util/services.js';
+import { useAccessor, useChatThreadsState, useChatThreadsStreamState, useSettingsState, useActiveURI, useCommandBarState, useFullChatThreadsStreamState, useChatThreadLatestUsage } from '../util/services.js';
 import { ScrollType } from '../../../../../../../editor/common/editorCommon.js';

 import { ChatMarkdownRender, ChatMessageLocation, getApplyBoxId } from '../markdown/ChatMarkdownRender.js';
@ -29,7 +29,7 @@ import { CopyButton, EditToolAcceptRejectButtonsHTML, IconShell1, JumpToFileButt
 import { IsRunningType } from '../../../chatThreadService.js';
 import { acceptAllBg, acceptBorder, buttonFontSize, buttonTextColor, rejectAllBg, rejectBg, rejectBorder } from '../../../../common/helpers/colors.js';
 import { builtinToolNames, isABuiltinToolName, MAX_FILE_CHARS_PAGE, MAX_TERMINAL_INACTIVE_TIME } from '../../../../common/prompt/prompts.js';
-import { RawToolCallObj } from '../../../../common/sendLLMMessageTypes.js';
+import { type LLMUsage, RawToolCallObj } from '../../../../common/sendLLMMessageTypes.js';
 import ErrorBoundary from './ErrorBoundary.js';
 import { ToolApprovalTypeSwitch } from '../void-settings-tsx/Settings.js';

@ -289,6 +289,127 @@ const ChatModeDropdown = ({ className }: { className: string }) => {



+// ----- Token usage ring -----
+// Wraps the send/stop button with an SVG donut showing totalTokens / contextWindow.
+// On hover: shows percentage + per-bucket breakdown (input / output / reasoning / total).
+
+const formatTokenCount = (n: number | undefined): string => {
+	if (n === undefined || n === null) return '-'
+	if (n < 1_000) return `${n}`
+	if (n < 1_000_000) return `${(n / 1_000).toFixed(n < 10_000 ? 2 : 1)}k`
+	return `${(n / 1_000_000).toFixed(2)}M`
+}
+
+const colorForUsagePct = (pct: number) => {
+	if (pct < 50) return '#6d28d9'   // violet-700 (normal)
+	if (pct < 80) return '#a16207'   // yellow-700 (warning)
+	return '#b91c1c'                  // red-700 (critical)
+}
+
+interface TokenUsageRingProps {
+	// when usage is undefined the wrapper still renders at the same size, but no
+	// ring is drawn — this prevents the send button from shifting once usage arrives
+	usage: LLMUsage | undefined;
+	contextWindow: number; // model's max input context, in tokens
+	children: React.ReactNode;
+	size?: number;
+}
+const TokenUsageRing: React.FC<TokenUsageRingProps> = ({ usage, contextWindow, children, size = 34 }) => {
+	const strokeWidth = 3
+	const radius = (size - strokeWidth) / 2
+	const hasData = !!usage && contextWindow > 0
+
+	let svgEl: React.ReactNode = null
+	let tooltipContent: string | undefined = undefined
+
+	if (hasData && usage) {
+		const total = usage.totalTokens ?? ((usage.inputTokens ?? 0) + (usage.outputTokens ?? 0) + (usage.reasoningTokens ?? 0))
+		const rawPct = (total / contextWindow) * 100
+		const clampedPct = Math.max(0, Math.min(100, rawPct))
+		const circumference = 2 * Math.PI * radius
+		const dashOffset = circumference * (1 - clampedPct / 100)
+		const color = colorForUsagePct(clampedPct)
+
+		const displayPct = rawPct < 0.01 ? '<0.01%' : rawPct < 1 ? `${rawPct.toFixed(2)}%` : `${rawPct.toFixed(1)}%`
+		// Use plain text (no HTML) because the renderer enforces Trusted Types and
+		// react-tooltip's html mode would set innerHTML directly, which is blocked.
+		tooltipContent = [
+			`Context window usage`,
+			`${formatTokenCount(total)} / ${formatTokenCount(contextWindow)} (${displayPct})`,
+			``,
+			`Input: ${formatTokenCount(usage.inputTokens)}`,
+			`Output: ${formatTokenCount(usage.outputTokens)}`,
+			usage.reasoningTokens !== undefined ? `Reasoning: ${formatTokenCount(usage.reasoningTokens)}` : null,
+			`Total: ${formatTokenCount(total)}`,
+		].filter(s => s !== null).join('\n')
+
+		svgEl = (
+			<svg
+				className='absolute inset-0'
+				width={size}
+				height={size}
+				style={{ transform: 'rotate(-90deg)' }}
+			>
+				<circle
+					cx={size / 2}
+					cy={size / 2}
+					r={radius}
+					stroke='rgba(180,180,180,0.45)'
+					strokeWidth={strokeWidth}
+					fill='none'
+				/>
+				<circle
+					cx={size / 2}
+					cy={size / 2}
+					r={radius}
+					stroke={color}
+					strokeWidth={strokeWidth}
+					fill='none'
+					strokeDasharray={circumference}
+					strokeDashoffset={dashOffset}
+					strokeLinecap='butt'
+					style={{ transition: 'stroke-dashoffset 250ms ease, stroke 250ms ease' }}
+				/>
+			</svg>
+		)
+	}
+
+	return (
+		<div
+			className='relative flex items-center justify-center flex-shrink-0'
+			style={{ width: size, height: size }}
+			data-tooltip-id={hasData ? 'void-tooltip' : undefined}
+			data-tooltip-content={tooltipContent}
+			data-tooltip-place={hasData ? 'left' : undefined}
+		>
+			{svgEl}
+			<div className='relative z-1 flex items-center justify-center'>{children}</div>
+		</div>
+	)
+}
+
+// Chooses whether to wrap the send/stop button in a ring based on the current chat
+// thread's latest usage and the active model's context window.
+const SubmitButtonWithUsageRing: React.FC<{ threadId: string; featureName: FeatureName; children: React.ReactNode }> = ({ threadId, featureName, children }) => {
+	const settingsState = useSettingsState()
+	const usage = useChatThreadLatestUsage(threadId)
+
+	const modelSelection = settingsState.modelSelectionOfFeature[featureName]
+	// Always render the wrapper so the send button doesn't jump sideways when
+	// usage first becomes available. TokenUsageRing hides the SVG when there's
+	// no data, but keeps the size reserved.
+	const contextWindow = modelSelection
+		? getModelCapabilities(modelSelection.providerName, modelSelection.modelName, settingsState.overridesOfModel).contextWindow
+		: 0
+
+	return (
+		<TokenUsageRing usage={usage} contextWindow={contextWindow}>
+			{children}
+		</TokenUsageRing>
+	)
+}
+
+
 interface VoidChatAreaProps {
 	// Required
 	children: React.ReactNode; // This will be the input component
@ -300,6 +421,10 @@ interface VoidChatAreaProps {
 	isDisabled?: boolean;
 	divRef?: React.RefObject<HTMLDivElement | null>;

+	// when provided, the send/stop button is wrapped with a ring showing
+	// totalTokens / model.contextWindow for the latest LLM usage on this thread
+	threadIdForUsageRing?: string;
+
 	// UI customization
 	className?: string;
 	showModelDropdown?: boolean;
@ -336,6 +461,7 @@ export const VoidChatArea: React.FC<VoidChatAreaProps> = ({
 	setSelections,
 	featureName,
 	loadingIcon,
+	threadIdForUsageRing,
 }) => {
 	return (
 		<div
@ -397,14 +523,17 @@ export const VoidChatArea: React.FC<VoidChatAreaProps> = ({

 					{isStreaming && loadingIcon}

-					{isStreaming ? (
-						<ButtonStop onClick={onAbort} />
-					) : (
-						<ButtonSubmit
-							onClick={onSubmit}
-							disabled={isDisabled}
-						/>
-					)}
+					{(() => {
+						const button = isStreaming
+							? <ButtonStop onClick={onAbort} />
+							: <ButtonSubmit onClick={onSubmit} disabled={isDisabled} />
+						if (!threadIdForUsageRing) return button
+						return (
+							<SubmitButtonWithUsageRing threadId={threadIdForUsageRing} featureName={featureName}>
+								{button}
+							</SubmitButtonWithUsageRing>
+						)
+					})()}
 				</div>

 			</div>
@ -1148,6 +1277,7 @@ const UserMessageComponent = ({ chatMessage, messageIdx, isCheckpointGhost, curr
 			showProspectiveSelections={false}
 			selections={stagingSelections}
 			setSelections={setStagingSelections}
+			threadIdForUsageRing={chatThreadsService.state.currentThreadId}
 		>
 			<VoidInputBox2
 				enableAtToMention
@ -3074,6 +3204,7 @@ export const SidebarChat = () => {
 		selections={selections}
 		setSelections={setSelections}
 		onClickAnywhere={() => { textAreaRef.current?.focus() }}
+		threadIdForUsageRing={chatThreadsState.currentThreadId}
 	>
 		<VoidInputBox2
 			enableAtToMention
--- a/src/vs/workbench/contrib/void/browser/react/src/util/services.tsx
+++ b/src/vs/workbench/contrib/void/browser/react/src/util/services.tsx
@ -39,6 +39,7 @@ import { IPathService } from '../../../../../../../workbench/services/path/commo
 import { IMetricsService } from '../../../../../../../workbench/contrib/void/common/metricsService.js'
 import { URI } from '../../../../../../../base/common/uri.js'
 import { IChatThreadService, ThreadsState, ThreadStreamState } from '../../../chatThreadService.js'
+import { type LLMUsage } from '../../../../common/sendLLMMessageTypes.js'
 import { ITerminalToolService } from '../../../terminalToolService.js'
 import { ILanguageService } from '../../../../../../../editor/common/languages/language.js'
 import { IVoidModelService } from '../../../../common/voidModelService.js'
@ -67,6 +68,8 @@ const chatThreadsStateListeners: Set<(s: ThreadsState) => void> = new Set()
 let chatThreadsStreamState: ThreadStreamState
 const chatThreadsStreamStateListeners: Set<(threadId: string) => void> = new Set()

+let chatThreadsLatestUsageOfThreadId: { [threadId: string]: LLMUsage | undefined } = {}
+
 let settingsState: VoidSettingsState
 const settingsStateListeners: Set<(s: VoidSettingsState) => void> = new Set()

@ -118,9 +121,11 @@ export const _registerServices = (accessor: ServicesAccessor) => {

 	// same service, different state
 	chatThreadsStreamState = chatThreadsStateService.streamState
+	chatThreadsLatestUsageOfThreadId = chatThreadsStateService.latestUsageOfThreadId
 	disposables.push(
 		chatThreadsStateService.onDidChangeStreamState(({ threadId }) => {
 			chatThreadsStreamState = chatThreadsStateService.streamState
+			chatThreadsLatestUsageOfThreadId = chatThreadsStateService.latestUsageOfThreadId
 			chatThreadsStreamStateListeners.forEach(l => l(threadId))
 		})
 	)
@ -304,6 +309,20 @@ export const useChatThreadsStreamState = (threadId: string) => {
 	return s
 }

+export const useChatThreadLatestUsage = (threadId: string) => {
+	const [u, su] = useState<LLMUsage | undefined>(chatThreadsLatestUsageOfThreadId[threadId])
+	useEffect(() => {
+		su(chatThreadsLatestUsageOfThreadId[threadId])
+		const listener = (threadId_: string) => {
+			if (threadId_ !== threadId) return
+			su(chatThreadsLatestUsageOfThreadId[threadId])
+		}
+		chatThreadsStreamStateListeners.add(listener)
+		return () => { chatThreadsStreamStateListeners.delete(listener) }
+	}, [su, threadId])
+	return u
+}
+
 export const useFullChatThreadsStreamState = () => {
 	const [s, ss] = useState(chatThreadsStreamState)
 	useEffect(() => {
--- a/src/vs/workbench/contrib/void/browser/react/src/void-tooltip/VoidTooltip.tsx
+++ b/src/vs/workbench/contrib/void/browser/react/src/void-tooltip/VoidTooltip.tsx
@ -52,6 +52,7 @@ export const VoidTooltip = () => {
 					z-index: 999999;
 					max-width: 300px;
 					word-wrap: break-word;
+					white-space: pre-line;
 				}

 				#void-tooltip {
--- a/src/vs/workbench/contrib/void/common/sendLLMMessageService.ts
+++ b/src/vs/workbench/contrib/void/common/sendLLMMessageService.ts
@ -58,6 +58,9 @@ export class LLMMessageService extends Disposable implements ILLMMessageService
 		}
 	}

+	// remembers {provider, model} per request so we can include it when logging usage
+	private readonly modelInfoOfRequestId: { [requestId: string]: { providerName: string, modelName: string } } = {}
+
 	constructor(
 		@IMainProcessService private readonly mainProcessService: IMainProcessService, // used as a renderer (only usable on client side)
 		@IVoidSettingsService private readonly voidSettingsService: IVoidSettingsService,
@ -76,6 +79,18 @@ export class LLMMessageService extends Disposable implements ILLMMessageService
 			this.llmMessageHooks.onText[e.requestId]?.(e)
 		}))
 		this._register((this.channel.listen('onFinalMessage_sendLLMMessage') satisfies Event<EventLLMMessageOnFinalMessageParams>)(e => {
+			if (e.usage) {
+				const info = this.modelInfoOfRequestId[e.requestId]
+				console.log('[Void][LLM] usage', {
+					provider: info?.providerName,
+					model: info?.modelName,
+					requestId: e.requestId,
+					inputTokens: e.usage.inputTokens,
+					outputTokens: e.usage.outputTokens,
+					reasoningTokens: e.usage.reasoningTokens,
+					totalTokens: e.usage.totalTokens,
+				})
+			}
 			this.llmMessageHooks.onFinalMessage[e.requestId]?.(e);
 			this._clearChannelHooks(e.requestId)
 		}))
@ -126,6 +141,7 @@ export class LLMMessageService extends Disposable implements ILLMMessageService
 		this.llmMessageHooks.onFinalMessage[requestId] = onFinalMessage
 		this.llmMessageHooks.onError[requestId] = onError
 		this.llmMessageHooks.onAbort[requestId] = onAbort // used internally only
+		this.modelInfoOfRequestId[requestId] = { providerName: modelSelection.providerName, modelName: modelSelection.modelName }

 		// params will be stripped of all its functions over the IPC channel
 		this.channel.call('sendLLMMessage', {
@ -186,6 +202,7 @@ export class LLMMessageService extends Disposable implements ILLMMessageService
 		delete this.llmMessageHooks.onText[requestId]
 		delete this.llmMessageHooks.onFinalMessage[requestId]
 		delete this.llmMessageHooks.onError[requestId]
+		delete this.modelInfoOfRequestId[requestId]

 		delete this.listHooks.ollama.success[requestId]
 		delete this.listHooks.ollama.error[requestId]
--- a/src/vs/workbench/contrib/void/common/sendLLMMessageTypes.ts
+++ b/src/vs/workbench/contrib/void/common/sendLLMMessageTypes.ts
@ -91,8 +91,18 @@ export type RawToolCallObj = {

 export type AnthropicReasoning = ({ type: 'thinking'; thinking: any; signature: string; } | { type: 'redacted_thinking', data: any })

-export type OnText = (p: { fullText: string; fullReasoning: string; toolCall?: RawToolCallObj }) => void
-export type OnFinalMessage = (p: { fullText: string; fullReasoning: string; toolCall?: RawToolCallObj; anthropicReasoning: AnthropicReasoning[] | null }) => void // id is tool_use_id
+// Token usage reported by the provider. All fields optional because providers expose
+// different subsets (e.g. Anthropic streams input/output separately; OpenAI only at end with
+// stream_options.include_usage; Gemini gives it via usageMetadata; Ollama on the final chunk).
+export type LLMUsage = {
+	inputTokens?: number;
+	outputTokens?: number;
+	totalTokens?: number;
+	reasoningTokens?: number;
+}
+
+export type OnText = (p: { fullText: string; fullReasoning: string; toolCall?: RawToolCallObj; usage?: LLMUsage }) => void
+export type OnFinalMessage = (p: { fullText: string; fullReasoning: string; toolCall?: RawToolCallObj; anthropicReasoning: AnthropicReasoning[] | null; usage?: LLMUsage }) => void // id is tool_use_id
 export type OnError = (p: { message: string; fullError: Error | null }) => void
 export type OnAbort = () => void
 export type AbortRef = { current: (() => void) | null }
--- a/src/vs/workbench/contrib/void/electron-main/llmMessage/sendLLMMessage.impl.ts
+++ b/src/vs/workbench/contrib/void/electron-main/llmMessage/sendLLMMessage.impl.ts
@ -14,7 +14,7 @@ import { Tool as GeminiTool, FunctionDeclaration, GoogleGenAI, ThinkingConfig, S
 import { GoogleAuth } from 'google-auth-library'
 /* eslint-enable */

-import { AnthropicLLMChatMessage, GeminiLLMChatMessage, LLMChatMessage, LLMFIMMessage, ModelListParams, OllamaModelResponse, OnError, OnFinalMessage, OnText, RawToolCallObj, RawToolParamsObj } from '../../common/sendLLMMessageTypes.js';
+import { AnthropicLLMChatMessage, GeminiLLMChatMessage, LLMChatMessage, LLMFIMMessage, type LLMUsage, ModelListParams, OllamaModelResponse, OnError, OnFinalMessage, OnText, RawToolCallObj, RawToolParamsObj } from '../../common/sendLLMMessageTypes.js';
 import { ChatMode, displayInfoOfProviderName, ModelSelectionOptions, OverridesOfModel, ProviderName, SettingsOfProvider } from '../../common/voidSettingsTypes.js';
 import { getSendableReasoningInfo, getModelCapabilities, getProviderCapabilities, defaultProviderSettings, getReservedOutputTokenSpace } from '../../common/modelCapabilities.js';
 import { extractReasoningWrapper, extractXMLToolsWrapper } from './extractGrammar.js';
@ -777,6 +777,9 @@ const sendGeminiChat = async ({
 	let toolParamsStr = ''
 	let toolId = ''

+	// Gemini reports token usage via chunk.usageMetadata. It typically appears in the last
+	// chunk(s), but we keep the latest seen so we always forward the freshest values.
+	let latestUsage: LLMUsage | undefined = undefined

 	genAI.models.generateContentStream({
 		model: modelName,
@ -807,11 +810,24 @@ const sendGeminiChat = async ({

 				// (do not handle reasoning yet)

+				// usage (Gemini exposes promptTokenCount / candidatesTokenCount / totalTokenCount /
+				// thoughtsTokenCount via usageMetadata). Only update when the chunk reports it.
+				const usageMetadata = chunk.usageMetadata
+				if (usageMetadata) {
+					latestUsage = {
+						inputTokens: usageMetadata.promptTokenCount,
+						outputTokens: usageMetadata.candidatesTokenCount,
+						totalTokens: usageMetadata.totalTokenCount,
+						reasoningTokens: usageMetadata.thoughtsTokenCount,
+					}
+				}
+
 				// call onText
 				onText({
 					fullText: fullTextSoFar,
 					fullReasoning: fullReasoningSoFar,
 					toolCall: !toolName ? undefined : { name: toolName, rawParams: {}, isDone: false, doneParams: [], id: toolId },
+					usage: latestUsage,
 				})
 			}

@ -822,7 +838,7 @@ const sendGeminiChat = async ({
 				if (!toolId) toolId = generateUuid() // ids are empty, but other providers might expect an id
 				const toolCall = rawToolCallObjOfParamsStr(toolName, toolParamsStr, toolId)
 				const toolCallObj = toolCall ? { toolCall } : {}
-				onFinalMessage({ fullText: fullTextSoFar, fullReasoning: fullReasoningSoFar, anthropicReasoning: null, ...toolCallObj });
+				onFinalMessage({ fullText: fullTextSoFar, fullReasoning: fullReasoningSoFar, anthropicReasoning: null, usage: latestUsage, ...toolCallObj });
 			}
 		})
 		.catch(error => {