mirror of
https://github.com/voideditor/void
synced 2026-05-23 01:18:25 +00:00
add token usage progress bar
This commit is contained in:
parent
17e7a5b152
commit
2be0faa3a6
7 changed files with 222 additions and 17 deletions
|
|
@ -12,7 +12,7 @@ import { URI } from '../../../../base/common/uri.js';
|
|||
import { Emitter, Event } from '../../../../base/common/event.js';
|
||||
import { ILLMMessageService } from '../common/sendLLMMessageService.js';
|
||||
import { chat_userMessageContent, isABuiltinToolName } from '../common/prompt/prompts.js';
|
||||
import { AnthropicReasoning, getErrorMessage, RawToolCallObj, RawToolParamsObj } from '../common/sendLLMMessageTypes.js';
|
||||
import { AnthropicReasoning, getErrorMessage, type LLMUsage, RawToolCallObj, RawToolParamsObj } from '../common/sendLLMMessageTypes.js';
|
||||
import { generateUuid } from '../../../../base/common/uuid.js';
|
||||
import { FeatureName, ModelSelection, ModelSelectionOptions } from '../common/voidSettingsTypes.js';
|
||||
import { IVoidSettingsService } from '../common/voidSettingsService.js';
|
||||
|
|
@ -232,6 +232,7 @@ export interface IChatThreadService {
|
|||
|
||||
readonly state: ThreadsState;
|
||||
readonly streamState: ThreadStreamState; // not persistent
|
||||
readonly latestUsageOfThreadId: { [threadId: string]: LLMUsage | undefined }; // not persistent; updated as the model streams
|
||||
|
||||
onDidChangeCurrentThread: Event<void>;
|
||||
onDidChangeStreamState: Event<{ threadId: string }>
|
||||
|
|
@ -305,6 +306,7 @@ class ChatThreadService extends Disposable implements IChatThreadService {
|
|||
readonly onDidChangeStreamState: Event<{ threadId: string }> = this._onDidChangeStreamState.event;
|
||||
|
||||
readonly streamState: ThreadStreamState = {}
|
||||
readonly latestUsageOfThreadId: { [threadId: string]: LLMUsage | undefined } = {}
|
||||
state: ThreadsState // allThreads is persisted, currentThread is not
|
||||
|
||||
// used in checkpointing
|
||||
|
|
@ -484,6 +486,13 @@ class ChatThreadService extends Disposable implements IChatThreadService {
|
|||
this._onDidChangeStreamState.fire({ threadId })
|
||||
}
|
||||
|
||||
// updates per-thread latest usage and re-uses the streamState emitter so existing
|
||||
// listeners (and the React mirror in services.tsx) re-read without extra plumbing
|
||||
private _setLatestUsage(threadId: string, usage: LLMUsage) {
|
||||
this.latestUsageOfThreadId[threadId] = usage
|
||||
this._onDidChangeStreamState.fire({ threadId })
|
||||
}
|
||||
|
||||
|
||||
// ---------- streaming ----------
|
||||
|
||||
|
|
@ -811,10 +820,12 @@ class ChatThreadService extends Disposable implements IChatThreadService {
|
|||
overridesOfModel,
|
||||
logging: { loggingName: `Chat - ${chatMode}`, loggingExtras: { threadId, nMessagesSent, chatMode } },
|
||||
separateSystemMessage: separateSystemMessage,
|
||||
onText: ({ fullText, fullReasoning, toolCall }) => {
|
||||
onText: ({ fullText, fullReasoning, toolCall, usage }) => {
|
||||
if (usage) this._setLatestUsage(threadId, usage)
|
||||
this._setStreamState(threadId, { isRunning: 'LLM', llmInfo: { displayContentSoFar: fullText, reasoningSoFar: fullReasoning, toolCallSoFar: toolCall ?? null }, interrupt: Promise.resolve(() => { if (llmCancelToken) this._llmMessageService.abort(llmCancelToken) }) })
|
||||
},
|
||||
onFinalMessage: async ({ fullText, fullReasoning, toolCall, anthropicReasoning, }) => {
|
||||
onFinalMessage: async ({ fullText, fullReasoning, toolCall, anthropicReasoning, usage }) => {
|
||||
if (usage) this._setLatestUsage(threadId, usage)
|
||||
resMessageIsDonePromise({ type: 'llmDone', toolCall, info: { fullText, fullReasoning, anthropicReasoning } }) // resolve with tool calls
|
||||
},
|
||||
onError: async (error) => {
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@
|
|||
import React, { ButtonHTMLAttributes, FormEvent, FormHTMLAttributes, Fragment, KeyboardEvent, useCallback, useEffect, useMemo, useRef, useState } from 'react';
|
||||
|
||||
|
||||
import { useAccessor, useChatThreadsState, useChatThreadsStreamState, useSettingsState, useActiveURI, useCommandBarState, useFullChatThreadsStreamState } from '../util/services.js';
|
||||
import { useAccessor, useChatThreadsState, useChatThreadsStreamState, useSettingsState, useActiveURI, useCommandBarState, useFullChatThreadsStreamState, useChatThreadLatestUsage } from '../util/services.js';
|
||||
import { ScrollType } from '../../../../../../../editor/common/editorCommon.js';
|
||||
|
||||
import { ChatMarkdownRender, ChatMessageLocation, getApplyBoxId } from '../markdown/ChatMarkdownRender.js';
|
||||
|
|
@ -29,7 +29,7 @@ import { CopyButton, EditToolAcceptRejectButtonsHTML, IconShell1, JumpToFileButt
|
|||
import { IsRunningType } from '../../../chatThreadService.js';
|
||||
import { acceptAllBg, acceptBorder, buttonFontSize, buttonTextColor, rejectAllBg, rejectBg, rejectBorder } from '../../../../common/helpers/colors.js';
|
||||
import { builtinToolNames, isABuiltinToolName, MAX_FILE_CHARS_PAGE, MAX_TERMINAL_INACTIVE_TIME } from '../../../../common/prompt/prompts.js';
|
||||
import { RawToolCallObj } from '../../../../common/sendLLMMessageTypes.js';
|
||||
import { type LLMUsage, RawToolCallObj } from '../../../../common/sendLLMMessageTypes.js';
|
||||
import ErrorBoundary from './ErrorBoundary.js';
|
||||
import { ToolApprovalTypeSwitch } from '../void-settings-tsx/Settings.js';
|
||||
|
||||
|
|
@ -289,6 +289,127 @@ const ChatModeDropdown = ({ className }: { className: string }) => {
|
|||
|
||||
|
||||
|
||||
// ----- Token usage ring -----
|
||||
// Wraps the send/stop button with an SVG donut showing totalTokens / contextWindow.
|
||||
// On hover: shows percentage + per-bucket breakdown (input / output / reasoning / total).
|
||||
|
||||
const formatTokenCount = (n: number | undefined): string => {
|
||||
if (n === undefined || n === null) return '-'
|
||||
if (n < 1_000) return `${n}`
|
||||
if (n < 1_000_000) return `${(n / 1_000).toFixed(n < 10_000 ? 2 : 1)}k`
|
||||
return `${(n / 1_000_000).toFixed(2)}M`
|
||||
}
|
||||
|
||||
const colorForUsagePct = (pct: number) => {
|
||||
if (pct < 50) return '#6d28d9' // violet-700 (normal)
|
||||
if (pct < 80) return '#a16207' // yellow-700 (warning)
|
||||
return '#b91c1c' // red-700 (critical)
|
||||
}
|
||||
|
||||
interface TokenUsageRingProps {
|
||||
// when usage is undefined the wrapper still renders at the same size, but no
|
||||
// ring is drawn — this prevents the send button from shifting once usage arrives
|
||||
usage: LLMUsage | undefined;
|
||||
contextWindow: number; // model's max input context, in tokens
|
||||
children: React.ReactNode;
|
||||
size?: number;
|
||||
}
|
||||
const TokenUsageRing: React.FC<TokenUsageRingProps> = ({ usage, contextWindow, children, size = 34 }) => {
|
||||
const strokeWidth = 3
|
||||
const radius = (size - strokeWidth) / 2
|
||||
const hasData = !!usage && contextWindow > 0
|
||||
|
||||
let svgEl: React.ReactNode = null
|
||||
let tooltipContent: string | undefined = undefined
|
||||
|
||||
if (hasData && usage) {
|
||||
const total = usage.totalTokens ?? ((usage.inputTokens ?? 0) + (usage.outputTokens ?? 0) + (usage.reasoningTokens ?? 0))
|
||||
const rawPct = (total / contextWindow) * 100
|
||||
const clampedPct = Math.max(0, Math.min(100, rawPct))
|
||||
const circumference = 2 * Math.PI * radius
|
||||
const dashOffset = circumference * (1 - clampedPct / 100)
|
||||
const color = colorForUsagePct(clampedPct)
|
||||
|
||||
const displayPct = rawPct < 0.01 ? '<0.01%' : rawPct < 1 ? `${rawPct.toFixed(2)}%` : `${rawPct.toFixed(1)}%`
|
||||
// Use plain text (no HTML) because the renderer enforces Trusted Types and
|
||||
// react-tooltip's html mode would set innerHTML directly, which is blocked.
|
||||
tooltipContent = [
|
||||
`Context window usage`,
|
||||
`${formatTokenCount(total)} / ${formatTokenCount(contextWindow)} (${displayPct})`,
|
||||
``,
|
||||
`Input: ${formatTokenCount(usage.inputTokens)}`,
|
||||
`Output: ${formatTokenCount(usage.outputTokens)}`,
|
||||
usage.reasoningTokens !== undefined ? `Reasoning: ${formatTokenCount(usage.reasoningTokens)}` : null,
|
||||
`Total: ${formatTokenCount(total)}`,
|
||||
].filter(s => s !== null).join('\n')
|
||||
|
||||
svgEl = (
|
||||
<svg
|
||||
className='absolute inset-0'
|
||||
width={size}
|
||||
height={size}
|
||||
style={{ transform: 'rotate(-90deg)' }}
|
||||
>
|
||||
<circle
|
||||
cx={size / 2}
|
||||
cy={size / 2}
|
||||
r={radius}
|
||||
stroke='rgba(180,180,180,0.45)'
|
||||
strokeWidth={strokeWidth}
|
||||
fill='none'
|
||||
/>
|
||||
<circle
|
||||
cx={size / 2}
|
||||
cy={size / 2}
|
||||
r={radius}
|
||||
stroke={color}
|
||||
strokeWidth={strokeWidth}
|
||||
fill='none'
|
||||
strokeDasharray={circumference}
|
||||
strokeDashoffset={dashOffset}
|
||||
strokeLinecap='butt'
|
||||
style={{ transition: 'stroke-dashoffset 250ms ease, stroke 250ms ease' }}
|
||||
/>
|
||||
</svg>
|
||||
)
|
||||
}
|
||||
|
||||
return (
|
||||
<div
|
||||
className='relative flex items-center justify-center flex-shrink-0'
|
||||
style={{ width: size, height: size }}
|
||||
data-tooltip-id={hasData ? 'void-tooltip' : undefined}
|
||||
data-tooltip-content={tooltipContent}
|
||||
data-tooltip-place={hasData ? 'left' : undefined}
|
||||
>
|
||||
{svgEl}
|
||||
<div className='relative z-1 flex items-center justify-center'>{children}</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
// Chooses whether to wrap the send/stop button in a ring based on the current chat
|
||||
// thread's latest usage and the active model's context window.
|
||||
const SubmitButtonWithUsageRing: React.FC<{ threadId: string; featureName: FeatureName; children: React.ReactNode }> = ({ threadId, featureName, children }) => {
|
||||
const settingsState = useSettingsState()
|
||||
const usage = useChatThreadLatestUsage(threadId)
|
||||
|
||||
const modelSelection = settingsState.modelSelectionOfFeature[featureName]
|
||||
// Always render the wrapper so the send button doesn't jump sideways when
|
||||
// usage first becomes available. TokenUsageRing hides the SVG when there's
|
||||
// no data, but keeps the size reserved.
|
||||
const contextWindow = modelSelection
|
||||
? getModelCapabilities(modelSelection.providerName, modelSelection.modelName, settingsState.overridesOfModel).contextWindow
|
||||
: 0
|
||||
|
||||
return (
|
||||
<TokenUsageRing usage={usage} contextWindow={contextWindow}>
|
||||
{children}
|
||||
</TokenUsageRing>
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
interface VoidChatAreaProps {
|
||||
// Required
|
||||
children: React.ReactNode; // This will be the input component
|
||||
|
|
@ -300,6 +421,10 @@ interface VoidChatAreaProps {
|
|||
isDisabled?: boolean;
|
||||
divRef?: React.RefObject<HTMLDivElement | null>;
|
||||
|
||||
// when provided, the send/stop button is wrapped with a ring showing
|
||||
// totalTokens / model.contextWindow for the latest LLM usage on this thread
|
||||
threadIdForUsageRing?: string;
|
||||
|
||||
// UI customization
|
||||
className?: string;
|
||||
showModelDropdown?: boolean;
|
||||
|
|
@ -336,6 +461,7 @@ export const VoidChatArea: React.FC<VoidChatAreaProps> = ({
|
|||
setSelections,
|
||||
featureName,
|
||||
loadingIcon,
|
||||
threadIdForUsageRing,
|
||||
}) => {
|
||||
return (
|
||||
<div
|
||||
|
|
@ -397,14 +523,17 @@ export const VoidChatArea: React.FC<VoidChatAreaProps> = ({
|
|||
|
||||
{isStreaming && loadingIcon}
|
||||
|
||||
{isStreaming ? (
|
||||
<ButtonStop onClick={onAbort} />
|
||||
) : (
|
||||
<ButtonSubmit
|
||||
onClick={onSubmit}
|
||||
disabled={isDisabled}
|
||||
/>
|
||||
)}
|
||||
{(() => {
|
||||
const button = isStreaming
|
||||
? <ButtonStop onClick={onAbort} />
|
||||
: <ButtonSubmit onClick={onSubmit} disabled={isDisabled} />
|
||||
if (!threadIdForUsageRing) return button
|
||||
return (
|
||||
<SubmitButtonWithUsageRing threadId={threadIdForUsageRing} featureName={featureName}>
|
||||
{button}
|
||||
</SubmitButtonWithUsageRing>
|
||||
)
|
||||
})()}
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
|
@ -1148,6 +1277,7 @@ const UserMessageComponent = ({ chatMessage, messageIdx, isCheckpointGhost, curr
|
|||
showProspectiveSelections={false}
|
||||
selections={stagingSelections}
|
||||
setSelections={setStagingSelections}
|
||||
threadIdForUsageRing={chatThreadsService.state.currentThreadId}
|
||||
>
|
||||
<VoidInputBox2
|
||||
enableAtToMention
|
||||
|
|
@ -3074,6 +3204,7 @@ export const SidebarChat = () => {
|
|||
selections={selections}
|
||||
setSelections={setSelections}
|
||||
onClickAnywhere={() => { textAreaRef.current?.focus() }}
|
||||
threadIdForUsageRing={chatThreadsState.currentThreadId}
|
||||
>
|
||||
<VoidInputBox2
|
||||
enableAtToMention
|
||||
|
|
|
|||
|
|
@ -39,6 +39,7 @@ import { IPathService } from '../../../../../../../workbench/services/path/commo
|
|||
import { IMetricsService } from '../../../../../../../workbench/contrib/void/common/metricsService.js'
|
||||
import { URI } from '../../../../../../../base/common/uri.js'
|
||||
import { IChatThreadService, ThreadsState, ThreadStreamState } from '../../../chatThreadService.js'
|
||||
import { type LLMUsage } from '../../../../common/sendLLMMessageTypes.js'
|
||||
import { ITerminalToolService } from '../../../terminalToolService.js'
|
||||
import { ILanguageService } from '../../../../../../../editor/common/languages/language.js'
|
||||
import { IVoidModelService } from '../../../../common/voidModelService.js'
|
||||
|
|
@ -67,6 +68,8 @@ const chatThreadsStateListeners: Set<(s: ThreadsState) => void> = new Set()
|
|||
let chatThreadsStreamState: ThreadStreamState
|
||||
const chatThreadsStreamStateListeners: Set<(threadId: string) => void> = new Set()
|
||||
|
||||
let chatThreadsLatestUsageOfThreadId: { [threadId: string]: LLMUsage | undefined } = {}
|
||||
|
||||
let settingsState: VoidSettingsState
|
||||
const settingsStateListeners: Set<(s: VoidSettingsState) => void> = new Set()
|
||||
|
||||
|
|
@ -118,9 +121,11 @@ export const _registerServices = (accessor: ServicesAccessor) => {
|
|||
|
||||
// same service, different state
|
||||
chatThreadsStreamState = chatThreadsStateService.streamState
|
||||
chatThreadsLatestUsageOfThreadId = chatThreadsStateService.latestUsageOfThreadId
|
||||
disposables.push(
|
||||
chatThreadsStateService.onDidChangeStreamState(({ threadId }) => {
|
||||
chatThreadsStreamState = chatThreadsStateService.streamState
|
||||
chatThreadsLatestUsageOfThreadId = chatThreadsStateService.latestUsageOfThreadId
|
||||
chatThreadsStreamStateListeners.forEach(l => l(threadId))
|
||||
})
|
||||
)
|
||||
|
|
@ -304,6 +309,20 @@ export const useChatThreadsStreamState = (threadId: string) => {
|
|||
return s
|
||||
}
|
||||
|
||||
export const useChatThreadLatestUsage = (threadId: string) => {
|
||||
const [u, su] = useState<LLMUsage | undefined>(chatThreadsLatestUsageOfThreadId[threadId])
|
||||
useEffect(() => {
|
||||
su(chatThreadsLatestUsageOfThreadId[threadId])
|
||||
const listener = (threadId_: string) => {
|
||||
if (threadId_ !== threadId) return
|
||||
su(chatThreadsLatestUsageOfThreadId[threadId])
|
||||
}
|
||||
chatThreadsStreamStateListeners.add(listener)
|
||||
return () => { chatThreadsStreamStateListeners.delete(listener) }
|
||||
}, [su, threadId])
|
||||
return u
|
||||
}
|
||||
|
||||
export const useFullChatThreadsStreamState = () => {
|
||||
const [s, ss] = useState(chatThreadsStreamState)
|
||||
useEffect(() => {
|
||||
|
|
|
|||
|
|
@ -52,6 +52,7 @@ export const VoidTooltip = () => {
|
|||
z-index: 999999;
|
||||
max-width: 300px;
|
||||
word-wrap: break-word;
|
||||
white-space: pre-line;
|
||||
}
|
||||
|
||||
#void-tooltip {
|
||||
|
|
|
|||
|
|
@ -58,6 +58,9 @@ export class LLMMessageService extends Disposable implements ILLMMessageService
|
|||
}
|
||||
}
|
||||
|
||||
// remembers {provider, model} per request so we can include it when logging usage
|
||||
private readonly modelInfoOfRequestId: { [requestId: string]: { providerName: string, modelName: string } } = {}
|
||||
|
||||
constructor(
|
||||
@IMainProcessService private readonly mainProcessService: IMainProcessService, // used as a renderer (only usable on client side)
|
||||
@IVoidSettingsService private readonly voidSettingsService: IVoidSettingsService,
|
||||
|
|
@ -76,6 +79,18 @@ export class LLMMessageService extends Disposable implements ILLMMessageService
|
|||
this.llmMessageHooks.onText[e.requestId]?.(e)
|
||||
}))
|
||||
this._register((this.channel.listen('onFinalMessage_sendLLMMessage') satisfies Event<EventLLMMessageOnFinalMessageParams>)(e => {
|
||||
if (e.usage) {
|
||||
const info = this.modelInfoOfRequestId[e.requestId]
|
||||
console.log('[Void][LLM] usage', {
|
||||
provider: info?.providerName,
|
||||
model: info?.modelName,
|
||||
requestId: e.requestId,
|
||||
inputTokens: e.usage.inputTokens,
|
||||
outputTokens: e.usage.outputTokens,
|
||||
reasoningTokens: e.usage.reasoningTokens,
|
||||
totalTokens: e.usage.totalTokens,
|
||||
})
|
||||
}
|
||||
this.llmMessageHooks.onFinalMessage[e.requestId]?.(e);
|
||||
this._clearChannelHooks(e.requestId)
|
||||
}))
|
||||
|
|
@ -126,6 +141,7 @@ export class LLMMessageService extends Disposable implements ILLMMessageService
|
|||
this.llmMessageHooks.onFinalMessage[requestId] = onFinalMessage
|
||||
this.llmMessageHooks.onError[requestId] = onError
|
||||
this.llmMessageHooks.onAbort[requestId] = onAbort // used internally only
|
||||
this.modelInfoOfRequestId[requestId] = { providerName: modelSelection.providerName, modelName: modelSelection.modelName }
|
||||
|
||||
// params will be stripped of all its functions over the IPC channel
|
||||
this.channel.call('sendLLMMessage', {
|
||||
|
|
@ -186,6 +202,7 @@ export class LLMMessageService extends Disposable implements ILLMMessageService
|
|||
delete this.llmMessageHooks.onText[requestId]
|
||||
delete this.llmMessageHooks.onFinalMessage[requestId]
|
||||
delete this.llmMessageHooks.onError[requestId]
|
||||
delete this.modelInfoOfRequestId[requestId]
|
||||
|
||||
delete this.listHooks.ollama.success[requestId]
|
||||
delete this.listHooks.ollama.error[requestId]
|
||||
|
|
|
|||
|
|
@ -91,8 +91,18 @@ export type RawToolCallObj = {
|
|||
|
||||
export type AnthropicReasoning = ({ type: 'thinking'; thinking: any; signature: string; } | { type: 'redacted_thinking', data: any })
|
||||
|
||||
export type OnText = (p: { fullText: string; fullReasoning: string; toolCall?: RawToolCallObj }) => void
|
||||
export type OnFinalMessage = (p: { fullText: string; fullReasoning: string; toolCall?: RawToolCallObj; anthropicReasoning: AnthropicReasoning[] | null }) => void // id is tool_use_id
|
||||
// Token usage reported by the provider. All fields optional because providers expose
|
||||
// different subsets (e.g. Anthropic streams input/output separately; OpenAI only at end with
|
||||
// stream_options.include_usage; Gemini gives it via usageMetadata; Ollama on the final chunk).
|
||||
export type LLMUsage = {
|
||||
inputTokens?: number;
|
||||
outputTokens?: number;
|
||||
totalTokens?: number;
|
||||
reasoningTokens?: number;
|
||||
}
|
||||
|
||||
export type OnText = (p: { fullText: string; fullReasoning: string; toolCall?: RawToolCallObj; usage?: LLMUsage }) => void
|
||||
export type OnFinalMessage = (p: { fullText: string; fullReasoning: string; toolCall?: RawToolCallObj; anthropicReasoning: AnthropicReasoning[] | null; usage?: LLMUsage }) => void // id is tool_use_id
|
||||
export type OnError = (p: { message: string; fullError: Error | null }) => void
|
||||
export type OnAbort = () => void
|
||||
export type AbortRef = { current: (() => void) | null }
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@ import { Tool as GeminiTool, FunctionDeclaration, GoogleGenAI, ThinkingConfig, S
|
|||
import { GoogleAuth } from 'google-auth-library'
|
||||
/* eslint-enable */
|
||||
|
||||
import { AnthropicLLMChatMessage, GeminiLLMChatMessage, LLMChatMessage, LLMFIMMessage, ModelListParams, OllamaModelResponse, OnError, OnFinalMessage, OnText, RawToolCallObj, RawToolParamsObj } from '../../common/sendLLMMessageTypes.js';
|
||||
import { AnthropicLLMChatMessage, GeminiLLMChatMessage, LLMChatMessage, LLMFIMMessage, type LLMUsage, ModelListParams, OllamaModelResponse, OnError, OnFinalMessage, OnText, RawToolCallObj, RawToolParamsObj } from '../../common/sendLLMMessageTypes.js';
|
||||
import { ChatMode, displayInfoOfProviderName, ModelSelectionOptions, OverridesOfModel, ProviderName, SettingsOfProvider } from '../../common/voidSettingsTypes.js';
|
||||
import { getSendableReasoningInfo, getModelCapabilities, getProviderCapabilities, defaultProviderSettings, getReservedOutputTokenSpace } from '../../common/modelCapabilities.js';
|
||||
import { extractReasoningWrapper, extractXMLToolsWrapper } from './extractGrammar.js';
|
||||
|
|
@ -777,6 +777,9 @@ const sendGeminiChat = async ({
|
|||
let toolParamsStr = ''
|
||||
let toolId = ''
|
||||
|
||||
// Gemini reports token usage via chunk.usageMetadata. It typically appears in the last
|
||||
// chunk(s), but we keep the latest seen so we always forward the freshest values.
|
||||
let latestUsage: LLMUsage | undefined = undefined
|
||||
|
||||
genAI.models.generateContentStream({
|
||||
model: modelName,
|
||||
|
|
@ -807,11 +810,24 @@ const sendGeminiChat = async ({
|
|||
|
||||
// (do not handle reasoning yet)
|
||||
|
||||
// usage (Gemini exposes promptTokenCount / candidatesTokenCount / totalTokenCount /
|
||||
// thoughtsTokenCount via usageMetadata). Only update when the chunk reports it.
|
||||
const usageMetadata = chunk.usageMetadata
|
||||
if (usageMetadata) {
|
||||
latestUsage = {
|
||||
inputTokens: usageMetadata.promptTokenCount,
|
||||
outputTokens: usageMetadata.candidatesTokenCount,
|
||||
totalTokens: usageMetadata.totalTokenCount,
|
||||
reasoningTokens: usageMetadata.thoughtsTokenCount,
|
||||
}
|
||||
}
|
||||
|
||||
// call onText
|
||||
onText({
|
||||
fullText: fullTextSoFar,
|
||||
fullReasoning: fullReasoningSoFar,
|
||||
toolCall: !toolName ? undefined : { name: toolName, rawParams: {}, isDone: false, doneParams: [], id: toolId },
|
||||
usage: latestUsage,
|
||||
})
|
||||
}
|
||||
|
||||
|
|
@ -822,7 +838,7 @@ const sendGeminiChat = async ({
|
|||
if (!toolId) toolId = generateUuid() // ids are empty, but other providers might expect an id
|
||||
const toolCall = rawToolCallObjOfParamsStr(toolName, toolParamsStr, toolId)
|
||||
const toolCallObj = toolCall ? { toolCall } : {}
|
||||
onFinalMessage({ fullText: fullTextSoFar, fullReasoning: fullReasoningSoFar, anthropicReasoning: null, ...toolCallObj });
|
||||
onFinalMessage({ fullText: fullTextSoFar, fullReasoning: fullReasoningSoFar, anthropicReasoning: null, usage: latestUsage, ...toolCallObj });
|
||||
}
|
||||
})
|
||||
.catch(error => {
|
||||
|
|
|
|||
Loading…
Reference in a new issue