This commit is contained in:
davi0015 2026-04-20 07:40:34 +00:00 committed by GitHub
commit 2407ded440
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 240 additions and 17 deletions

View file

@ -12,7 +12,7 @@ import { URI } from '../../../../base/common/uri.js';
import { Emitter, Event } from '../../../../base/common/event.js';
import { ILLMMessageService } from '../common/sendLLMMessageService.js';
import { chat_userMessageContent, isABuiltinToolName } from '../common/prompt/prompts.js';
import { AnthropicReasoning, getErrorMessage, RawToolCallObj, RawToolParamsObj } from '../common/sendLLMMessageTypes.js';
import { AnthropicReasoning, getErrorMessage, type LLMUsage, RawToolCallObj, RawToolParamsObj } from '../common/sendLLMMessageTypes.js';
import { generateUuid } from '../../../../base/common/uuid.js';
import { FeatureName, ModelSelection, ModelSelectionOptions } from '../common/voidSettingsTypes.js';
import { IVoidSettingsService } from '../common/voidSettingsService.js';
@ -119,6 +119,11 @@ export type ThreadType = {
messages: ChatMessage[];
filesWithUserChanges: Set<string>;
// Last-seen token usage from the LLM for this thread. Persisted so the
// context-usage ring shows a value immediately on reload (instead of only
// after the user sends a new message).
latestUsage?: LLMUsage;
// this doesn't need to go in a state object, but feels right
state: {
currCheckpointIdx: number | null; // the latest checkpoint we're at (null if not at a particular checkpoint, like if the chat is streaming, or chat just finished and we haven't clicked on a checkpt)
@ -232,6 +237,7 @@ export interface IChatThreadService {
readonly state: ThreadsState;
readonly streamState: ThreadStreamState; // not persistent
readonly latestUsageOfThreadId: { [threadId: string]: LLMUsage | undefined }; // hydrated from persisted threads on startup; updated as the model streams
onDidChangeCurrentThread: Event<void>;
onDidChangeStreamState: Event<{ threadId: string }>
@ -305,6 +311,7 @@ class ChatThreadService extends Disposable implements IChatThreadService {
readonly onDidChangeStreamState: Event<{ threadId: string }> = this._onDidChangeStreamState.event;
readonly streamState: ThreadStreamState = {}
readonly latestUsageOfThreadId: { [threadId: string]: LLMUsage | undefined } = {}
state: ThreadsState // allThreads is persisted, currentThread is not
// used in checkpointing
@ -339,6 +346,13 @@ class ChatThreadService extends Disposable implements IChatThreadService {
currentThreadId: null as unknown as string, // gets set in startNewThread()
}
// hydrate in-memory latestUsage map from the persisted threads so the
// context-usage ring shows the last-known values right after a reload
for (const id in allThreads) {
const t = allThreads[id]
if (t?.latestUsage) this.latestUsageOfThreadId[id] = t.latestUsage
}
// always be in a thread
this.openNewThread()
@ -484,6 +498,19 @@ class ChatThreadService extends Disposable implements IChatThreadService {
this._onDidChangeStreamState.fire({ threadId })
}
// updates per-thread latest usage and re-uses the streamState emitter so existing
// listeners (and the React mirror in services.tsx) re-read without extra plumbing.
// Also persists on the thread so the ring shows the last-known value after a reload.
private _setLatestUsage(threadId: string, usage: LLMUsage) {
this.latestUsageOfThreadId[threadId] = usage
const thread = this.state.allThreads[threadId]
if (thread) {
thread.latestUsage = usage
this._storeAllThreads(this.state.allThreads)
}
this._onDidChangeStreamState.fire({ threadId })
}
// ---------- streaming ----------
@ -811,10 +838,12 @@ class ChatThreadService extends Disposable implements IChatThreadService {
overridesOfModel,
logging: { loggingName: `Chat - ${chatMode}`, loggingExtras: { threadId, nMessagesSent, chatMode } },
separateSystemMessage: separateSystemMessage,
onText: ({ fullText, fullReasoning, toolCall }) => {
onText: ({ fullText, fullReasoning, toolCall, usage }) => {
if (usage) this._setLatestUsage(threadId, usage)
this._setStreamState(threadId, { isRunning: 'LLM', llmInfo: { displayContentSoFar: fullText, reasoningSoFar: fullReasoning, toolCallSoFar: toolCall ?? null }, interrupt: Promise.resolve(() => { if (llmCancelToken) this._llmMessageService.abort(llmCancelToken) }) })
},
onFinalMessage: async ({ fullText, fullReasoning, toolCall, anthropicReasoning, }) => {
onFinalMessage: async ({ fullText, fullReasoning, toolCall, anthropicReasoning, usage }) => {
if (usage) this._setLatestUsage(threadId, usage)
resMessageIsDonePromise({ type: 'llmDone', toolCall, info: { fullText, fullReasoning, anthropicReasoning } }) // resolve with tool calls
},
onError: async (error) => {

View file

@ -6,7 +6,7 @@
import React, { ButtonHTMLAttributes, FormEvent, FormHTMLAttributes, Fragment, KeyboardEvent, useCallback, useEffect, useMemo, useRef, useState } from 'react';
import { useAccessor, useChatThreadsState, useChatThreadsStreamState, useSettingsState, useActiveURI, useCommandBarState, useFullChatThreadsStreamState } from '../util/services.js';
import { useAccessor, useChatThreadsState, useChatThreadsStreamState, useSettingsState, useActiveURI, useCommandBarState, useFullChatThreadsStreamState, useChatThreadLatestUsage } from '../util/services.js';
import { ScrollType } from '../../../../../../../editor/common/editorCommon.js';
import { ChatMarkdownRender, ChatMessageLocation, getApplyBoxId } from '../markdown/ChatMarkdownRender.js';
@ -29,7 +29,7 @@ import { CopyButton, EditToolAcceptRejectButtonsHTML, IconShell1, JumpToFileButt
import { IsRunningType } from '../../../chatThreadService.js';
import { acceptAllBg, acceptBorder, buttonFontSize, buttonTextColor, rejectAllBg, rejectBg, rejectBorder } from '../../../../common/helpers/colors.js';
import { builtinToolNames, isABuiltinToolName, MAX_FILE_CHARS_PAGE, MAX_TERMINAL_INACTIVE_TIME } from '../../../../common/prompt/prompts.js';
import { RawToolCallObj } from '../../../../common/sendLLMMessageTypes.js';
import { type LLMUsage, RawToolCallObj } from '../../../../common/sendLLMMessageTypes.js';
import ErrorBoundary from './ErrorBoundary.js';
import { ToolApprovalTypeSwitch } from '../void-settings-tsx/Settings.js';
@ -289,6 +289,127 @@ const ChatModeDropdown = ({ className }: { className: string }) => {
// ----- Token usage ring -----
// Wraps the send/stop button with an SVG donut showing totalTokens / contextWindow.
// On hover: shows percentage + per-bucket breakdown (input / output / reasoning / total).
const formatTokenCount = (n: number | undefined): string => {
if (n === undefined || n === null) return '-'
if (n < 1_000) return `${n}`
if (n < 1_000_000) return `${(n / 1_000).toFixed(n < 10_000 ? 2 : 1)}k`
return `${(n / 1_000_000).toFixed(2)}M`
}
const colorForUsagePct = (pct: number) => {
if (pct < 50) return '#6d28d9' // violet-700 (normal)
if (pct < 80) return '#a16207' // yellow-700 (warning)
return '#b91c1c' // red-700 (critical)
}
interface TokenUsageRingProps {
// when usage is undefined the wrapper still renders at the same size, but no
// ring is drawn — this prevents the send button from shifting once usage arrives
usage: LLMUsage | undefined;
contextWindow: number; // model's max input context, in tokens
children: React.ReactNode;
size?: number;
}
const TokenUsageRing: React.FC<TokenUsageRingProps> = ({ usage, contextWindow, children, size = 34 }) => {
const strokeWidth = 3
const radius = (size - strokeWidth) / 2
const hasData = !!usage && contextWindow > 0
let svgEl: React.ReactNode = null
let tooltipContent: string | undefined = undefined
if (hasData && usage) {
const total = usage.totalTokens ?? ((usage.inputTokens ?? 0) + (usage.outputTokens ?? 0) + (usage.reasoningTokens ?? 0))
const rawPct = (total / contextWindow) * 100
const clampedPct = Math.max(0, Math.min(100, rawPct))
const circumference = 2 * Math.PI * radius
const dashOffset = circumference * (1 - clampedPct / 100)
const color = colorForUsagePct(clampedPct)
const displayPct = rawPct < 0.01 ? '<0.01%' : rawPct < 1 ? `${rawPct.toFixed(2)}%` : `${rawPct.toFixed(1)}%`
// Use plain text (no HTML) because the renderer enforces Trusted Types and
// react-tooltip's html mode would set innerHTML directly, which is blocked.
tooltipContent = [
`Context window usage`,
`${formatTokenCount(total)} / ${formatTokenCount(contextWindow)} (${displayPct})`,
``,
`Input: ${formatTokenCount(usage.inputTokens)}`,
`Output: ${formatTokenCount(usage.outputTokens)}`,
usage.reasoningTokens !== undefined ? `Reasoning: ${formatTokenCount(usage.reasoningTokens)}` : null,
`Total: ${formatTokenCount(total)}`,
].filter(s => s !== null).join('\n')
svgEl = (
<svg
className='absolute inset-0'
width={size}
height={size}
style={{ transform: 'rotate(-90deg)' }}
>
<circle
cx={size / 2}
cy={size / 2}
r={radius}
stroke='rgba(180,180,180,0.45)'
strokeWidth={strokeWidth}
fill='none'
/>
<circle
cx={size / 2}
cy={size / 2}
r={radius}
stroke={color}
strokeWidth={strokeWidth}
fill='none'
strokeDasharray={circumference}
strokeDashoffset={dashOffset}
strokeLinecap='butt'
style={{ transition: 'stroke-dashoffset 250ms ease, stroke 250ms ease' }}
/>
</svg>
)
}
return (
<div
className='relative flex items-center justify-center flex-shrink-0'
style={{ width: size, height: size }}
data-tooltip-id={hasData ? 'void-tooltip' : undefined}
data-tooltip-content={tooltipContent}
data-tooltip-place={hasData ? 'left' : undefined}
>
{svgEl}
<div className='relative z-1 flex items-center justify-center'>{children}</div>
</div>
)
}
// Chooses whether to wrap the send/stop button in a ring based on the current chat
// thread's latest usage and the active model's context window.
const SubmitButtonWithUsageRing: React.FC<{ threadId: string; featureName: FeatureName; children: React.ReactNode }> = ({ threadId, featureName, children }) => {
const settingsState = useSettingsState()
const usage = useChatThreadLatestUsage(threadId)
const modelSelection = settingsState.modelSelectionOfFeature[featureName]
// Always render the wrapper so the send button doesn't jump sideways when
// usage first becomes available. TokenUsageRing hides the SVG when there's
// no data, but keeps the size reserved.
const contextWindow = modelSelection
? getModelCapabilities(modelSelection.providerName, modelSelection.modelName, settingsState.overridesOfModel).contextWindow
: 0
return (
<TokenUsageRing usage={usage} contextWindow={contextWindow}>
{children}
</TokenUsageRing>
)
}
interface VoidChatAreaProps {
// Required
children: React.ReactNode; // This will be the input component
@ -300,6 +421,10 @@ interface VoidChatAreaProps {
isDisabled?: boolean;
divRef?: React.RefObject<HTMLDivElement | null>;
// when provided, the send/stop button is wrapped with a ring showing
// totalTokens / model.contextWindow for the latest LLM usage on this thread
threadIdForUsageRing?: string;
// UI customization
className?: string;
showModelDropdown?: boolean;
@ -336,6 +461,7 @@ export const VoidChatArea: React.FC<VoidChatAreaProps> = ({
setSelections,
featureName,
loadingIcon,
threadIdForUsageRing,
}) => {
return (
<div
@ -397,14 +523,17 @@ export const VoidChatArea: React.FC<VoidChatAreaProps> = ({
{isStreaming && loadingIcon}
{isStreaming ? (
<ButtonStop onClick={onAbort} />
) : (
<ButtonSubmit
onClick={onSubmit}
disabled={isDisabled}
/>
)}
{(() => {
const button = isStreaming
? <ButtonStop onClick={onAbort} />
: <ButtonSubmit onClick={onSubmit} disabled={isDisabled} />
if (!threadIdForUsageRing) return button
return (
<SubmitButtonWithUsageRing threadId={threadIdForUsageRing} featureName={featureName}>
{button}
</SubmitButtonWithUsageRing>
)
})()}
</div>
</div>
@ -1148,6 +1277,7 @@ const UserMessageComponent = ({ chatMessage, messageIdx, isCheckpointGhost, curr
showProspectiveSelections={false}
selections={stagingSelections}
setSelections={setStagingSelections}
threadIdForUsageRing={chatThreadsService.state.currentThreadId}
>
<VoidInputBox2
enableAtToMention
@ -3074,6 +3204,7 @@ export const SidebarChat = () => {
selections={selections}
setSelections={setSelections}
onClickAnywhere={() => { textAreaRef.current?.focus() }}
threadIdForUsageRing={chatThreadsState.currentThreadId}
>
<VoidInputBox2
enableAtToMention

View file

@ -39,6 +39,7 @@ import { IPathService } from '../../../../../../../workbench/services/path/commo
import { IMetricsService } from '../../../../../../../workbench/contrib/void/common/metricsService.js'
import { URI } from '../../../../../../../base/common/uri.js'
import { IChatThreadService, ThreadsState, ThreadStreamState } from '../../../chatThreadService.js'
import { type LLMUsage } from '../../../../common/sendLLMMessageTypes.js'
import { ITerminalToolService } from '../../../terminalToolService.js'
import { ILanguageService } from '../../../../../../../editor/common/languages/language.js'
import { IVoidModelService } from '../../../../common/voidModelService.js'
@ -67,6 +68,8 @@ const chatThreadsStateListeners: Set<(s: ThreadsState) => void> = new Set()
let chatThreadsStreamState: ThreadStreamState
const chatThreadsStreamStateListeners: Set<(threadId: string) => void> = new Set()
let chatThreadsLatestUsageOfThreadId: { [threadId: string]: LLMUsage | undefined } = {}
let settingsState: VoidSettingsState
const settingsStateListeners: Set<(s: VoidSettingsState) => void> = new Set()
@ -118,9 +121,11 @@ export const _registerServices = (accessor: ServicesAccessor) => {
// same service, different state
chatThreadsStreamState = chatThreadsStateService.streamState
chatThreadsLatestUsageOfThreadId = chatThreadsStateService.latestUsageOfThreadId
disposables.push(
chatThreadsStateService.onDidChangeStreamState(({ threadId }) => {
chatThreadsStreamState = chatThreadsStateService.streamState
chatThreadsLatestUsageOfThreadId = chatThreadsStateService.latestUsageOfThreadId
chatThreadsStreamStateListeners.forEach(l => l(threadId))
})
)
@ -304,6 +309,20 @@ export const useChatThreadsStreamState = (threadId: string) => {
return s
}
export const useChatThreadLatestUsage = (threadId: string) => {
const [u, su] = useState<LLMUsage | undefined>(chatThreadsLatestUsageOfThreadId[threadId])
useEffect(() => {
su(chatThreadsLatestUsageOfThreadId[threadId])
const listener = (threadId_: string) => {
if (threadId_ !== threadId) return
su(chatThreadsLatestUsageOfThreadId[threadId])
}
chatThreadsStreamStateListeners.add(listener)
return () => { chatThreadsStreamStateListeners.delete(listener) }
}, [su, threadId])
return u
}
export const useFullChatThreadsStreamState = () => {
const [s, ss] = useState(chatThreadsStreamState)
useEffect(() => {

View file

@ -52,6 +52,7 @@ export const VoidTooltip = () => {
z-index: 999999;
max-width: 300px;
word-wrap: break-word;
white-space: pre-line;
}
#void-tooltip {

View file

@ -58,6 +58,9 @@ export class LLMMessageService extends Disposable implements ILLMMessageService
}
}
// remembers {provider, model} per request so we can include it when logging usage
private readonly modelInfoOfRequestId: { [requestId: string]: { providerName: string, modelName: string } } = {}
constructor(
@IMainProcessService private readonly mainProcessService: IMainProcessService, // used as a renderer (only usable on client side)
@IVoidSettingsService private readonly voidSettingsService: IVoidSettingsService,
@ -76,6 +79,18 @@ export class LLMMessageService extends Disposable implements ILLMMessageService
this.llmMessageHooks.onText[e.requestId]?.(e)
}))
this._register((this.channel.listen('onFinalMessage_sendLLMMessage') satisfies Event<EventLLMMessageOnFinalMessageParams>)(e => {
if (e.usage) {
const info = this.modelInfoOfRequestId[e.requestId]
console.log('[Void][LLM] usage', {
provider: info?.providerName,
model: info?.modelName,
requestId: e.requestId,
inputTokens: e.usage.inputTokens,
outputTokens: e.usage.outputTokens,
reasoningTokens: e.usage.reasoningTokens,
totalTokens: e.usage.totalTokens,
})
}
this.llmMessageHooks.onFinalMessage[e.requestId]?.(e);
this._clearChannelHooks(e.requestId)
}))
@ -126,6 +141,7 @@ export class LLMMessageService extends Disposable implements ILLMMessageService
this.llmMessageHooks.onFinalMessage[requestId] = onFinalMessage
this.llmMessageHooks.onError[requestId] = onError
this.llmMessageHooks.onAbort[requestId] = onAbort // used internally only
this.modelInfoOfRequestId[requestId] = { providerName: modelSelection.providerName, modelName: modelSelection.modelName }
// params will be stripped of all its functions over the IPC channel
this.channel.call('sendLLMMessage', {
@ -186,6 +202,7 @@ export class LLMMessageService extends Disposable implements ILLMMessageService
delete this.llmMessageHooks.onText[requestId]
delete this.llmMessageHooks.onFinalMessage[requestId]
delete this.llmMessageHooks.onError[requestId]
delete this.modelInfoOfRequestId[requestId]
delete this.listHooks.ollama.success[requestId]
delete this.listHooks.ollama.error[requestId]

View file

@ -91,8 +91,18 @@ export type RawToolCallObj = {
export type AnthropicReasoning = ({ type: 'thinking'; thinking: any; signature: string; } | { type: 'redacted_thinking', data: any })
export type OnText = (p: { fullText: string; fullReasoning: string; toolCall?: RawToolCallObj }) => void
export type OnFinalMessage = (p: { fullText: string; fullReasoning: string; toolCall?: RawToolCallObj; anthropicReasoning: AnthropicReasoning[] | null }) => void // id is tool_use_id
// Token usage reported by the provider. All fields optional because providers expose
// different subsets (e.g. Anthropic streams input/output separately; OpenAI only at end with
// stream_options.include_usage; Gemini gives it via usageMetadata; Ollama on the final chunk).
export type LLMUsage = {
inputTokens?: number;
outputTokens?: number;
totalTokens?: number;
reasoningTokens?: number;
}
export type OnText = (p: { fullText: string; fullReasoning: string; toolCall?: RawToolCallObj; usage?: LLMUsage }) => void
export type OnFinalMessage = (p: { fullText: string; fullReasoning: string; toolCall?: RawToolCallObj; anthropicReasoning: AnthropicReasoning[] | null; usage?: LLMUsage }) => void // id is tool_use_id
export type OnError = (p: { message: string; fullError: Error | null }) => void
export type OnAbort = () => void
export type AbortRef = { current: (() => void) | null }

View file

@ -14,7 +14,7 @@ import { Tool as GeminiTool, FunctionDeclaration, GoogleGenAI, ThinkingConfig, S
import { GoogleAuth } from 'google-auth-library'
/* eslint-enable */
import { AnthropicLLMChatMessage, GeminiLLMChatMessage, LLMChatMessage, LLMFIMMessage, ModelListParams, OllamaModelResponse, OnError, OnFinalMessage, OnText, RawToolCallObj, RawToolParamsObj } from '../../common/sendLLMMessageTypes.js';
import { AnthropicLLMChatMessage, GeminiLLMChatMessage, LLMChatMessage, LLMFIMMessage, type LLMUsage, ModelListParams, OllamaModelResponse, OnError, OnFinalMessage, OnText, RawToolCallObj, RawToolParamsObj } from '../../common/sendLLMMessageTypes.js';
import { ChatMode, displayInfoOfProviderName, ModelSelectionOptions, OverridesOfModel, ProviderName, SettingsOfProvider } from '../../common/voidSettingsTypes.js';
import { getSendableReasoningInfo, getModelCapabilities, getProviderCapabilities, defaultProviderSettings, getReservedOutputTokenSpace } from '../../common/modelCapabilities.js';
import { extractReasoningWrapper, extractXMLToolsWrapper } from './extractGrammar.js';
@ -777,6 +777,9 @@ const sendGeminiChat = async ({
let toolParamsStr = ''
let toolId = ''
// Gemini reports token usage via chunk.usageMetadata. It typically appears in the last
// chunk(s), but we keep the latest seen so we always forward the freshest values.
let latestUsage: LLMUsage | undefined = undefined
genAI.models.generateContentStream({
model: modelName,
@ -807,11 +810,24 @@ const sendGeminiChat = async ({
// (do not handle reasoning yet)
// usage (Gemini exposes promptTokenCount / candidatesTokenCount / totalTokenCount /
// thoughtsTokenCount via usageMetadata). Only update when the chunk reports it.
const usageMetadata = chunk.usageMetadata
if (usageMetadata) {
latestUsage = {
inputTokens: usageMetadata.promptTokenCount,
outputTokens: usageMetadata.candidatesTokenCount,
totalTokens: usageMetadata.totalTokenCount,
reasoningTokens: usageMetadata.thoughtsTokenCount,
}
}
// call onText
onText({
fullText: fullTextSoFar,
fullReasoning: fullReasoningSoFar,
toolCall: !toolName ? undefined : { name: toolName, rawParams: {}, isDone: false, doneParams: [], id: toolId },
usage: latestUsage,
})
}
@ -822,7 +838,7 @@ const sendGeminiChat = async ({
if (!toolId) toolId = generateUuid() // ids are empty, but other providers might expect an id
const toolCall = rawToolCallObjOfParamsStr(toolName, toolParamsStr, toolId)
const toolCallObj = toolCall ? { toolCall } : {}
onFinalMessage({ fullText: fullTextSoFar, fullReasoning: fullReasoningSoFar, anthropicReasoning: null, ...toolCallObj });
onFinalMessage({ fullText: fullTextSoFar, fullReasoning: fullReasoningSoFar, anthropicReasoning: null, usage: latestUsage, ...toolCallObj });
}
})
.catch(error => {