🐛 fix:(agent-runtime): keep reasoning state in openai providers (#13701)

* 🐛 fix: preserve assistant reasoning in runtime state

* 🐛 fix: preserve agent reasoning and cached usage conversion

* 💬 docs: move usage retention comment to helper

* ♻️ refactor: remove redundant any cast in runtime executor

* 🐛 filter non-finite OpenAI usage values
This commit is contained in:
Rylan Cai 2026-04-10 10:19:08 +08:00 committed by GitHub
parent 4f1d2d494f
commit c85be1265f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 147 additions and 12 deletions

View file

@ -100,6 +100,38 @@ describe('convertUsage', () => {
});
});
it('should preserve zero cache miss tokens for fully cached completion usage', () => {
const pricing: Pricing = {
units: [
{ name: 'textInput', rate: 1, strategy: 'fixed', unit: 'millionTokens' },
{ name: 'textInput_cacheRead', rate: 0.1, strategy: 'fixed', unit: 'millionTokens' },
{ name: 'textOutput', rate: 2, strategy: 'fixed', unit: 'millionTokens' },
],
};
const usageWithFullyCachedPrompt = {
completion_tokens: 598,
prompt_tokens: 4198,
prompt_tokens_details: {
cached_tokens: 4198,
},
total_tokens: 4796,
} as OpenAI.Completions.CompletionUsage;
const result = convertOpenAIUsage(usageWithFullyCachedPrompt, { pricing });
expect(result).toMatchObject({
inputCacheMissTokens: 0,
inputCachedTokens: 4198,
inputTextTokens: 4198,
outputTextTokens: 598,
totalInputTokens: 4198,
totalOutputTokens: 598,
totalTokens: 4796,
});
expect(result.cost).toBeGreaterThan(0);
});
it('should handle audio tokens in input correctly', () => {
// Arrange
const usageWithAudioInput = {
@ -248,6 +280,23 @@ describe('convertUsage', () => {
expect(result).not.toHaveProperty('outputAudioTokens');
});
it('should omit NaN usage fields from completion usage output', () => {
const embeddingLikeUsage = {
prompt_tokens: 100,
total_tokens: 100,
} as OpenAI.Completions.CompletionUsage;
const result = convertOpenAIUsage(embeddingLikeUsage);
expect(result).toEqual({
inputTextTokens: 100,
totalInputTokens: 100,
totalTokens: 100,
});
expect(result).not.toHaveProperty('outputTextTokens');
expect(result).not.toHaveProperty('totalOutputTokens');
});
it('should handle XAI provider correctly where completion_tokens does not include reasoning_tokens', () => {
// Arrange
const xaiUsage: OpenAI.Completions.CompletionUsage = {
@ -352,6 +401,38 @@ describe('convertUsage', () => {
});
});
it('should preserve zero cache miss tokens for fully cached response usage', () => {
const pricing: Pricing = {
units: [
{ name: 'textInput', rate: 1, strategy: 'fixed', unit: 'millionTokens' },
{ name: 'textInput_cacheRead', rate: 0.1, strategy: 'fixed', unit: 'millionTokens' },
{ name: 'textOutput', rate: 2, strategy: 'fixed', unit: 'millionTokens' },
],
};
const responseUsage = {
input_tokens: 4198,
input_tokens_details: {
cached_tokens: 4198,
},
output_tokens: 598,
total_tokens: 4796,
} as OpenAI.Responses.ResponseUsage;
const result = convertOpenAIResponseUsage(responseUsage, { pricing });
expect(result).toMatchObject({
inputCacheMissTokens: 0,
inputCachedTokens: 4198,
inputTextTokens: 4198,
outputTextTokens: 598,
totalInputTokens: 4198,
totalOutputTokens: 598,
totalTokens: 4796,
});
expect(result.cost).toBeGreaterThan(0);
});
it('should enrich completion usage with pricing cost when pricing is provided', () => {
const pricing: Pricing = {
units: [

View file

@ -8,6 +8,19 @@ import { withUsageCost } from './utils/withUsageCost';
const log = debug('lobe-cost:convertOpenAIUsage');
// Keep the reference implementation's behavior of filtering out zero/falsy values,
// except for inputCacheMissTokens where 0 is semantically meaningful for fully cached prompts.
// `!!value` would filter out 0, which is often desired for token counts.
const shouldKeepUsageValue = (key: string, value: unknown) => {
if (value === undefined || value === null) return false;
if (typeof value !== 'number') return Boolean(value);
if (!Number.isFinite(value)) return false;
if (value !== 0) return true;
return key === 'inputCacheMissTokens';
};
export const convertOpenAIUsage = (
usage: OpenAI.Completions.CompletionUsage,
payload?: ChatPayloadForTransformStream,
@ -21,7 +34,8 @@ export const convertOpenAIUsage = (
(usage as any).prompt_cache_hit_tokens || usage.prompt_tokens_details?.cached_tokens;
const inputCacheMissTokens =
(usage as any).prompt_cache_miss_tokens || totalInputTokens - cachedTokens;
(usage as any).prompt_cache_miss_tokens ??
(typeof cachedTokens === 'number' ? totalInputTokens - cachedTokens : undefined);
const totalOutputTokens = usage.completion_tokens;
const outputReasoning = usage.completion_tokens_details?.reasoning_tokens || 0;
@ -58,7 +72,7 @@ export const convertOpenAIUsage = (
const finalData = {};
Object.entries(data).forEach(([key, value]) => {
if (!!value) {
if (shouldKeepUsageValue(key, value)) {
// @ts-ignore
finalData[key] = value;
}
@ -112,18 +126,10 @@ export const convertOpenAIResponseUsage = (
totalTokens: overallTotalTokens,
} satisfies ModelTokensUsage; // This helps ensure all keys of ModelTokensUsage are considered
// 4. Filter out zero/falsy values, as done in the reference implementation
// 4. Filter out zero/falsy values using the shared retention rules above.
const finalData: Partial<ModelUsage> = {}; // Use Partial for type safety during construction
Object.entries(data).forEach(([key, value]) => {
if (
value !== undefined &&
value !== null &&
(typeof value !== 'number' || value !== 0) && // A more explicit check than `!!value` if we want to be very specific about
// keeping non-numeric truthy values, but the reference uses `!!value`.
// `!!value` will filter out 0, which is often desired for token counts.
// Let's stick to the reference's behavior:
!!value
) {
if (shouldKeepUsageValue(key, value)) {
// @ts-ignore - We are building an object that will conform to ModelTokensUsage
// by selectively adding properties.
finalData[key as keyof ModelUsage] = value as number;

View file

@ -915,6 +915,7 @@ export const createRuntimeExecutors = (
newState.messages.push({
content,
reasoning: finalReasoning,
role: 'assistant',
tool_calls: tool_calls.length > 0 ? tool_calls : undefined,
});

View file

@ -287,6 +287,53 @@ describe('RuntimeExecutors', () => {
);
});
it('should preserve reasoning in newState when assistant returns tool calls', async () => {
const toolCallPayload = [
{
function: { arguments: '{}', name: 'search' },
id: 'call_1',
type: 'function',
},
];
const mockChat = vi.fn().mockImplementation(async (_payload, options) => {
await options?.callback?.onThinking?.('Need to inspect the search results first.');
await options?.callback?.onToolsCalling?.({ toolsCalling: toolCallPayload });
await options?.callback?.onCompletion?.({
usage: {
totalInputTokens: 1,
totalOutputTokens: 2,
totalTokens: 3,
},
});
return new Response('done');
});
vi.mocked(initModelRuntimeFromDB).mockResolvedValueOnce({ chat: mockChat } as any);
const executors = createRuntimeExecutors(ctx);
const state = createMockState();
const instruction = {
payload: {
messages: [{ content: 'Hello', role: 'user' }],
model: 'gpt-4',
provider: 'openai',
tools: [],
},
type: 'call_llm' as const,
};
const result = await executors.call_llm!(instruction, state);
expect(result.newState.messages.at(-1)).toEqual(
expect.objectContaining({
reasoning: { content: 'Need to inspect the search results first.' },
role: 'assistant',
tool_calls: [expect.objectContaining({ id: 'call_1' })],
}),
);
});
it('should execute compress_context and return compression_result', async () => {
const mockChat = vi.fn().mockImplementation(async (_payload, options) => {
await options?.callback?.onText?.('summary');