From 8d3b7400ff057e7fd678120c4be988ca30ddc6cc Mon Sep 17 00:00:00 2001 From: davi0015 Date: Wed, 22 Apr 2026 14:36:08 +0800 Subject: [PATCH] track accumulative token usage in a conversation thread (#9) --- .../contrib/void/browser/chatThreadService.ts | 84 +++++++++++++++++++ .../react/src/sidebar-tsx/SidebarChat.tsx | 47 ++++++++--- .../void/browser/react/src/util/services.tsx | 35 ++++++++ 3 files changed, 156 insertions(+), 10 deletions(-) diff --git a/src/vs/workbench/contrib/void/browser/chatThreadService.ts b/src/vs/workbench/contrib/void/browser/chatThreadService.ts index 69f2f776..75982bd6 100644 --- a/src/vs/workbench/contrib/void/browser/chatThreadService.ts +++ b/src/vs/workbench/contrib/void/browser/chatThreadService.ts @@ -124,6 +124,14 @@ export type ThreadType = { // after the user sends a new message). latestUsage?: LLMUsage; + // Sum of `LLMUsage` across every API request ever made on this thread. + // In an agent loop with N tool calls, the loop fires N sequential requests + // each carrying the full history + accumulated tool results — total billed + // tokens are O(N²) while `latestUsage` only shows the latest request (O(N)). + // This field surfaces the real cumulative cost so the user can see actual + // billing impact, not just the last sample. Persisted alongside latestUsage. + cumulativeUsageThisThread?: LLMUsage; + // Model used to send the most recent user message on this thread. Captured // on send, restored on `switchToThread` (writes to settings' `Chat` model // selection). `null` means "no message was sent on this thread yet"; if the @@ -252,6 +260,15 @@ export interface IChatThreadService { readonly state: ThreadsState; readonly streamState: ThreadStreamState; // not persistent readonly latestUsageOfThreadId: { [threadId: string]: LLMUsage | undefined }; // hydrated from persisted threads on startup; updated as the model streams + // Cumulative usage across all requests in the *current* user turn (reset + // when a new user message is sent or a thread is opened/switched-to fresh). + // Only lives in memory — not persisted, since "this turn" doesn't survive + // a reload anyway. + readonly cumulativeUsageThisTurnOfThreadId: { [threadId: string]: LLMUsage | undefined }; + // Cumulative usage across the entire thread history. Hydrated from the + // persisted thread on startup so the user can see lifetime cost across + // reloads. + readonly cumulativeUsageThisThreadOfThreadId: { [threadId: string]: LLMUsage | undefined }; onDidChangeCurrentThread: Event; onDidChangeStreamState: Event<{ threadId: string }> @@ -330,6 +347,8 @@ class ChatThreadService extends Disposable implements IChatThreadService { readonly streamState: ThreadStreamState = {} readonly latestUsageOfThreadId: { [threadId: string]: LLMUsage | undefined } = {} + readonly cumulativeUsageThisTurnOfThreadId: { [threadId: string]: LLMUsage | undefined } = {} + readonly cumulativeUsageThisThreadOfThreadId: { [threadId: string]: LLMUsage | undefined } = {} state: ThreadsState // allThreads is persisted, currentThread is not // used in checkpointing @@ -375,6 +394,7 @@ class ChatThreadService extends Disposable implements IChatThreadService { for (const id in allThreads) { const t = allThreads[id] if (t?.latestUsage) this.latestUsageOfThreadId[id] = t.latestUsage + if (t?.cumulativeUsageThisThread) this.cumulativeUsageThisThreadOfThreadId[id] = t.cumulativeUsageThisThread } // always be in a thread @@ -576,14 +596,70 @@ class ChatThreadService extends Disposable implements IChatThreadService { // Also persists on the thread so the ring shows the last-known value after a reload. private _setLatestUsage(threadId: string, usage: LLMUsage) { this.latestUsageOfThreadId[threadId] = usage + + // Cumulative = (cumulative locked-in from prior finalized requests in this + // turn/thread) + (this request's running total). Always recompute from the + // baseline so streaming updates (which carry the per-request running total, + // not a delta) don't double-count. + this.cumulativeUsageThisTurnOfThreadId[threadId] = this._addUsage(this._cumulativeThisTurnBaselineOfThreadId[threadId], usage) + this.cumulativeUsageThisThreadOfThreadId[threadId] = this._addUsage(this._cumulativeThisThreadBaselineOfThreadId[threadId], usage) + const thread = this.state.allThreads[threadId] if (thread) { thread.latestUsage = usage + thread.cumulativeUsageThisThread = this.cumulativeUsageThisThreadOfThreadId[threadId] this._storeAllThreads(this.state.allThreads) } this._onDidChangeStreamState.fire({ threadId }) } + // Baseline = cumulative usage from previously-finalized requests in this + // turn/thread. The current request's running total gets added on top in + // `_setLatestUsage`. Moved forward by `_lockInCurrentRequestUsage` once a + // request finishes, so the next request starts counting from where we + // left off. + private readonly _cumulativeThisTurnBaselineOfThreadId: { [threadId: string]: LLMUsage | undefined } = {} + private readonly _cumulativeThisThreadBaselineOfThreadId: { [threadId: string]: LLMUsage | undefined } = {} + + // Sum two LLMUsage values. `undefined` fields stay undefined unless one of + // the inputs has a defined value, in which case we fall back to the defined + // side (so e.g. a request that doesn't report `cachedInputTokens` doesn't + // erase the previously-accumulated cached count). + private _addUsage(a: LLMUsage | undefined, b: LLMUsage | undefined): LLMUsage | undefined { + if (!a) return b ? { ...b } : undefined + if (!b) return { ...a } + const add = (x: number | undefined, y: number | undefined): number | undefined => { + if (x === undefined && y === undefined) return undefined + return (x ?? 0) + (y ?? 0) + } + return { + inputTokens: add(a.inputTokens, b.inputTokens), + outputTokens: add(a.outputTokens, b.outputTokens), + totalTokens: add(a.totalTokens, b.totalTokens), + reasoningTokens: add(a.reasoningTokens, b.reasoningTokens), + cachedInputTokens: add(a.cachedInputTokens, b.cachedInputTokens), + } + } + + // Roll the most recent per-request usage into the cumulative baselines so + // the next request's running total starts from a fresh zero on top of the + // locked-in totals. Called once per request (on `onFinalMessage`). + private _lockInCurrentRequestUsage(threadId: string) { + const lastUsage = this.latestUsageOfThreadId[threadId] + if (!lastUsage) return + this._cumulativeThisTurnBaselineOfThreadId[threadId] = this._addUsage(this._cumulativeThisTurnBaselineOfThreadId[threadId], lastUsage) + this._cumulativeThisThreadBaselineOfThreadId[threadId] = this._addUsage(this._cumulativeThisThreadBaselineOfThreadId[threadId], lastUsage) + } + + // Reset the "this turn" counter and its baseline. Called when a new user + // message starts a fresh turn. Does NOT touch "this thread" — that's + // lifetime accumulation. + private _resetCumulativeThisTurn(threadId: string) { + this.cumulativeUsageThisTurnOfThreadId[threadId] = undefined + this._cumulativeThisTurnBaselineOfThreadId[threadId] = undefined + this._onDidChangeStreamState.fire({ threadId }) + } + // ---------- streaming ---------- @@ -960,6 +1036,9 @@ class ChatThreadService extends Disposable implements IChatThreadService { }, onFinalMessage: async ({ fullText, fullReasoning, toolCall, anthropicReasoning, usage }) => { if (usage) this._setLatestUsage(threadId, usage) + // Lock in this request's usage so the next loop iteration's + // running total is added to (not replacing) what we already counted. + this._lockInCurrentRequestUsage(threadId) resMessageIsDonePromise({ type: 'llmDone', toolCall, info: { fullText, fullReasoning, anthropicReasoning } }) // resolve with tool calls }, onError: async (error) => { @@ -1385,6 +1464,11 @@ We only need to do it for files that were edited since `from`, ie files between await this.abortRunning(threadId) } + // A new user message starts a new "turn" — zero out this-turn cumulative + // before any LLM requests fire. Lifetime/this-thread cumulative keeps + // accumulating across turns. + this._resetCumulativeThisTurn(threadId) + // add dummy before this message to keep checkpoint before user message idea consistent if (thread.messages.length === 0) { this._addUserCheckpoint({ threadId }) diff --git a/src/vs/workbench/contrib/void/browser/react/src/sidebar-tsx/SidebarChat.tsx b/src/vs/workbench/contrib/void/browser/react/src/sidebar-tsx/SidebarChat.tsx index 43a46ca6..99795344 100644 --- a/src/vs/workbench/contrib/void/browser/react/src/sidebar-tsx/SidebarChat.tsx +++ b/src/vs/workbench/contrib/void/browser/react/src/sidebar-tsx/SidebarChat.tsx @@ -6,7 +6,7 @@ import React, { ButtonHTMLAttributes, FormEvent, FormHTMLAttributes, Fragment, KeyboardEvent, useCallback, useEffect, useMemo, useRef, useState } from 'react'; -import { useAccessor, useChatThreadsState, useChatThreadsStreamState, useSettingsState, useActiveURI, useCommandBarState, useFullChatThreadsStreamState, useChatThreadLatestUsage } from '../util/services.js'; +import { useAccessor, useChatThreadsState, useChatThreadsStreamState, useSettingsState, useActiveURI, useCommandBarState, useFullChatThreadsStreamState, useChatThreadLatestUsage, useChatThreadCumulativeUsage } from '../util/services.js'; import { ScrollType } from '../../../../../../../editor/common/editorCommon.js'; import { ChatMarkdownRender, ChatMessageLocation, getApplyBoxId } from '../markdown/ChatMarkdownRender.js'; @@ -311,10 +311,30 @@ interface TokenUsageRingProps { // ring is drawn — this prevents the send button from shifting once usage arrives usage: LLMUsage | undefined; contextWindow: number; // model's max input context, in tokens + cumulativeThisTurn?: LLMUsage | undefined; + cumulativeThisThread?: LLMUsage | undefined; children: React.ReactNode; size?: number; } -const TokenUsageRing: React.FC = ({ usage, contextWindow, children, size = 34 }) => { + +// Format a single LLMUsage block for the tooltip. Returns an array of plain +// text lines (no HTML — react-tooltip's html mode is blocked by Trusted Types). +const formatUsageBlock = (label: string, u: LLMUsage | undefined): (string | null)[] => { + if (!u) return [`${label}: -`] + const total = u.totalTokens ?? ((u.inputTokens ?? 0) + (u.outputTokens ?? 0) + (u.reasoningTokens ?? 0)) + const inputLine = u.cachedInputTokens !== undefined + ? ` Input: ${formatTokenCount(u.inputTokens)} (${formatTokenCount(u.cachedInputTokens)} cached)` + : ` Input: ${formatTokenCount(u.inputTokens)}` + return [ + `${label}:`, + inputLine, + ` Output: ${formatTokenCount(u.outputTokens)}`, + u.reasoningTokens !== undefined ? ` Reasoning: ${formatTokenCount(u.reasoningTokens)}` : null, + ` Total: ${formatTokenCount(total)}`, + ] +} + +const TokenUsageRing: React.FC = ({ usage, contextWindow, cumulativeThisTurn, cumulativeThisThread, children, size = 34 }) => { const strokeWidth = 3 const radius = (size - strokeWidth) / 2 const hasData = !!usage && contextWindow > 0 @@ -337,17 +357,23 @@ const TokenUsageRing: React.FC = ({ usage, contextWindow, c // prompt cache (OpenAI `prompt_tokens_details.cached_tokens`, mirrored by OpenRouter, // DeepSeek, etc.). Only show the line when the server actually reported a value — // an undefined field means the server doesn't expose it, which is different from 0. - const inputLine = usage.cachedInputTokens !== undefined - ? `Input: ${formatTokenCount(usage.inputTokens)} (${formatTokenCount(usage.cachedInputTokens)} cached)` - : `Input: ${formatTokenCount(usage.inputTokens)}` + // Tooltip layout: + // 1. Context-window ring summary (per-request, drives the ring color) + // 2. Last request breakdown (the per-request snapshot the ring is based on) + // 3. Cumulative this turn (sum across all loop iterations of the current user turn) + // 4. Cumulative this thread (lifetime sum across the whole chat history) + // The cumulative blocks are critical because agent loops issue many requests + // per turn — total billed tokens grow ~O(N²) while the ring only shows the + // last request's input. tooltipContent = [ `Context window usage`, `${formatTokenCount(total)} / ${formatTokenCount(contextWindow)} (${displayPct})`, ``, - inputLine, - `Output: ${formatTokenCount(usage.outputTokens)}`, - usage.reasoningTokens !== undefined ? `Reasoning: ${formatTokenCount(usage.reasoningTokens)}` : null, - `Total: ${formatTokenCount(total)}`, + ...formatUsageBlock('Last request', usage), + ``, + ...formatUsageBlock('Cumulative this turn', cumulativeThisTurn), + ``, + ...formatUsageBlock('Cumulative this thread', cumulativeThisThread), ].filter(s => s !== null).join('\n') svgEl = ( @@ -400,6 +426,7 @@ const TokenUsageRing: React.FC = ({ usage, contextWindow, c const SubmitButtonWithUsageRing: React.FC<{ threadId: string; featureName: FeatureName; children: React.ReactNode }> = ({ threadId, featureName, children }) => { const settingsState = useSettingsState() const usage = useChatThreadLatestUsage(threadId) + const cumulative = useChatThreadCumulativeUsage(threadId) const modelSelection = settingsState.modelSelectionOfFeature[featureName] // Always render the wrapper so the send button doesn't jump sideways when @@ -410,7 +437,7 @@ const SubmitButtonWithUsageRing: React.FC<{ threadId: string; featureName: Featu : 0 return ( - + {children} ) diff --git a/src/vs/workbench/contrib/void/browser/react/src/util/services.tsx b/src/vs/workbench/contrib/void/browser/react/src/util/services.tsx index 411c60c1..803222e5 100644 --- a/src/vs/workbench/contrib/void/browser/react/src/util/services.tsx +++ b/src/vs/workbench/contrib/void/browser/react/src/util/services.tsx @@ -69,6 +69,8 @@ let chatThreadsStreamState: ThreadStreamState const chatThreadsStreamStateListeners: Set<(threadId: string) => void> = new Set() let chatThreadsLatestUsageOfThreadId: { [threadId: string]: LLMUsage | undefined } = {} +let chatThreadsCumulativeUsageThisTurnOfThreadId: { [threadId: string]: LLMUsage | undefined } = {} +let chatThreadsCumulativeUsageThisThreadOfThreadId: { [threadId: string]: LLMUsage | undefined } = {} let settingsState: VoidSettingsState const settingsStateListeners: Set<(s: VoidSettingsState) => void> = new Set() @@ -122,10 +124,14 @@ export const _registerServices = (accessor: ServicesAccessor) => { // same service, different state chatThreadsStreamState = chatThreadsStateService.streamState chatThreadsLatestUsageOfThreadId = chatThreadsStateService.latestUsageOfThreadId + chatThreadsCumulativeUsageThisTurnOfThreadId = chatThreadsStateService.cumulativeUsageThisTurnOfThreadId + chatThreadsCumulativeUsageThisThreadOfThreadId = chatThreadsStateService.cumulativeUsageThisThreadOfThreadId disposables.push( chatThreadsStateService.onDidChangeStreamState(({ threadId }) => { chatThreadsStreamState = chatThreadsStateService.streamState chatThreadsLatestUsageOfThreadId = chatThreadsStateService.latestUsageOfThreadId + chatThreadsCumulativeUsageThisTurnOfThreadId = chatThreadsStateService.cumulativeUsageThisTurnOfThreadId + chatThreadsCumulativeUsageThisThreadOfThreadId = chatThreadsStateService.cumulativeUsageThisThreadOfThreadId chatThreadsStreamStateListeners.forEach(l => l(threadId)) }) ) @@ -323,6 +329,35 @@ export const useChatThreadLatestUsage = (threadId: string) => { return u } +// Cumulative token usage across all LLM requests fired in the current user turn +// (this-turn) and across the entire thread history (this-thread). In an agent +// loop with N tool calls each request resends the full history, so total billed +// tokens grow ~O(N²) — these counters expose that real cost vs. `latestUsage`'s +// per-request snapshot. +export const useChatThreadCumulativeUsage = (threadId: string) => { + const initial = { + thisTurn: chatThreadsCumulativeUsageThisTurnOfThreadId[threadId], + thisThread: chatThreadsCumulativeUsageThisThreadOfThreadId[threadId], + } + const [u, su] = useState<{ thisTurn: LLMUsage | undefined, thisThread: LLMUsage | undefined }>(initial) + useEffect(() => { + su({ + thisTurn: chatThreadsCumulativeUsageThisTurnOfThreadId[threadId], + thisThread: chatThreadsCumulativeUsageThisThreadOfThreadId[threadId], + }) + const listener = (threadId_: string) => { + if (threadId_ !== threadId) return + su({ + thisTurn: chatThreadsCumulativeUsageThisTurnOfThreadId[threadId], + thisThread: chatThreadsCumulativeUsageThisThreadOfThreadId[threadId], + }) + } + chatThreadsStreamStateListeners.add(listener) + return () => { chatThreadsStreamStateListeners.delete(listener) } + }, [su, threadId]) + return u +} + export const useFullChatThreadsStreamState = () => { const [s, ss] = useState(chatThreadsStreamState) useEffect(() => {