🐛 fix:(agent-runtime): keep reasoning state in openai providers (#13701)

* 🐛 fix: preserve assistant reasoning in runtime state * 🐛 fix: preserve agent reasoning and cached usage conversion * 💬 docs: move usage retention comment to helper * ♻️ refactor: remove redundant any cast in runtime executor * 🐛 filter non-finite OpenAI usage values
2026-04-21 17:47:27 +00:00 · 2026-04-10 10:19:08 +08:00 · 2026-04-10 10:19:08 +08:00 · c85be1265f
commit c85be1265f
parent 4f1d2d494f
4 changed files with 147 additions and 12 deletions
--- a/packages/model-runtime/src/core/usageConverters/openai.test.ts
+++ b/packages/model-runtime/src/core/usageConverters/openai.test.ts
@ -100,6 +100,38 @@ describe('convertUsage', () => {
    });
  });

+  it('should preserve zero cache miss tokens for fully cached completion usage', () => {
+    const pricing: Pricing = {
+      units: [
+        { name: 'textInput', rate: 1, strategy: 'fixed', unit: 'millionTokens' },
+        { name: 'textInput_cacheRead', rate: 0.1, strategy: 'fixed', unit: 'millionTokens' },
+        { name: 'textOutput', rate: 2, strategy: 'fixed', unit: 'millionTokens' },
+      ],
+    };
+
+    const usageWithFullyCachedPrompt = {
+      completion_tokens: 598,
+      prompt_tokens: 4198,
+      prompt_tokens_details: {
+        cached_tokens: 4198,
+      },
+      total_tokens: 4796,
+    } as OpenAI.Completions.CompletionUsage;
+
+    const result = convertOpenAIUsage(usageWithFullyCachedPrompt, { pricing });
+
+    expect(result).toMatchObject({
+      inputCacheMissTokens: 0,
+      inputCachedTokens: 4198,
+      inputTextTokens: 4198,
+      outputTextTokens: 598,
+      totalInputTokens: 4198,
+      totalOutputTokens: 598,
+      totalTokens: 4796,
+    });
+    expect(result.cost).toBeGreaterThan(0);
+  });
+
  it('should handle audio tokens in input correctly', () => {
    // Arrange
    const usageWithAudioInput = {
@ -248,6 +280,23 @@ describe('convertUsage', () => {
    expect(result).not.toHaveProperty('outputAudioTokens');
  });

+  it('should omit NaN usage fields from completion usage output', () => {
+    const embeddingLikeUsage = {
+      prompt_tokens: 100,
+      total_tokens: 100,
+    } as OpenAI.Completions.CompletionUsage;
+
+    const result = convertOpenAIUsage(embeddingLikeUsage);
+
+    expect(result).toEqual({
+      inputTextTokens: 100,
+      totalInputTokens: 100,
+      totalTokens: 100,
+    });
+    expect(result).not.toHaveProperty('outputTextTokens');
+    expect(result).not.toHaveProperty('totalOutputTokens');
+  });
+
  it('should handle XAI provider correctly where completion_tokens does not include reasoning_tokens', () => {
    // Arrange
    const xaiUsage: OpenAI.Completions.CompletionUsage = {
@ -352,6 +401,38 @@ describe('convertUsage', () => {
    });
  });

+  it('should preserve zero cache miss tokens for fully cached response usage', () => {
+    const pricing: Pricing = {
+      units: [
+        { name: 'textInput', rate: 1, strategy: 'fixed', unit: 'millionTokens' },
+        { name: 'textInput_cacheRead', rate: 0.1, strategy: 'fixed', unit: 'millionTokens' },
+        { name: 'textOutput', rate: 2, strategy: 'fixed', unit: 'millionTokens' },
+      ],
+    };
+
+    const responseUsage = {
+      input_tokens: 4198,
+      input_tokens_details: {
+        cached_tokens: 4198,
+      },
+      output_tokens: 598,
+      total_tokens: 4796,
+    } as OpenAI.Responses.ResponseUsage;
+
+    const result = convertOpenAIResponseUsage(responseUsage, { pricing });
+
+    expect(result).toMatchObject({
+      inputCacheMissTokens: 0,
+      inputCachedTokens: 4198,
+      inputTextTokens: 4198,
+      outputTextTokens: 598,
+      totalInputTokens: 4198,
+      totalOutputTokens: 598,
+      totalTokens: 4796,
+    });
+    expect(result.cost).toBeGreaterThan(0);
+  });
+
  it('should enrich completion usage with pricing cost when pricing is provided', () => {
    const pricing: Pricing = {
      units: [
--- a/packages/model-runtime/src/core/usageConverters/openai.ts
+++ b/packages/model-runtime/src/core/usageConverters/openai.ts
@ -8,6 +8,19 @@ import { withUsageCost } from './utils/withUsageCost';

 const log = debug('lobe-cost:convertOpenAIUsage');

+// Keep the reference implementation's behavior of filtering out zero/falsy values,
+// except for inputCacheMissTokens where 0 is semantically meaningful for fully cached prompts.
+// `!!value` would filter out 0, which is often desired for token counts.
+const shouldKeepUsageValue = (key: string, value: unknown) => {
+  if (value === undefined || value === null) return false;
+  if (typeof value !== 'number') return Boolean(value);
+  if (!Number.isFinite(value)) return false;
+
+  if (value !== 0) return true;
+
+  return key === 'inputCacheMissTokens';
+};
+
 export const convertOpenAIUsage = (
  usage: OpenAI.Completions.CompletionUsage,
  payload?: ChatPayloadForTransformStream,
@ -21,7 +34,8 @@ export const convertOpenAIUsage = (
    (usage as any).prompt_cache_hit_tokens || usage.prompt_tokens_details?.cached_tokens;

  const inputCacheMissTokens =
-    (usage as any).prompt_cache_miss_tokens || totalInputTokens - cachedTokens;
+    (usage as any).prompt_cache_miss_tokens ??
+    (typeof cachedTokens === 'number' ? totalInputTokens - cachedTokens : undefined);

  const totalOutputTokens = usage.completion_tokens;
  const outputReasoning = usage.completion_tokens_details?.reasoning_tokens || 0;
@ -58,7 +72,7 @@ export const convertOpenAIUsage = (
  const finalData = {};

  Object.entries(data).forEach(([key, value]) => {
-    if (!!value) {
+    if (shouldKeepUsageValue(key, value)) {
      // @ts-ignore
      finalData[key] = value;
    }
@ -112,18 +126,10 @@ export const convertOpenAIResponseUsage = (
    totalTokens: overallTotalTokens,
  } satisfies ModelTokensUsage; // This helps ensure all keys of ModelTokensUsage are considered

-  // 4. Filter out zero/falsy values, as done in the reference implementation
+  // 4. Filter out zero/falsy values using the shared retention rules above.
  const finalData: Partial<ModelUsage> = {}; // Use Partial for type safety during construction
  Object.entries(data).forEach(([key, value]) => {
-    if (
-      value !== undefined &&
-      value !== null &&
-      (typeof value !== 'number' || value !== 0) && // A more explicit check than `!!value` if we want to be very specific about
-      // keeping non-numeric truthy values, but the reference uses `!!value`.
-      // `!!value` will filter out 0, which is often desired for token counts.
-      // Let's stick to the reference's behavior:
-      !!value
-    ) {
+    if (shouldKeepUsageValue(key, value)) {
      // @ts-ignore - We are building an object that will conform to ModelTokensUsage
      // by selectively adding properties.
      finalData[key as keyof ModelUsage] = value as number;
--- a/src/server/modules/AgentRuntime/RuntimeExecutors.ts
+++ b/src/server/modules/AgentRuntime/RuntimeExecutors.ts
@ -915,6 +915,7 @@ export const createRuntimeExecutors = (

          newState.messages.push({
            content,
+            reasoning: finalReasoning,
            role: 'assistant',
            tool_calls: tool_calls.length > 0 ? tool_calls : undefined,
          });
--- a/src/server/modules/AgentRuntime/tests/RuntimeExecutors.test.ts
+++ b/src/server/modules/AgentRuntime/tests/RuntimeExecutors.test.ts
@ -287,6 +287,53 @@ describe('RuntimeExecutors', () => {
      );
    });

+    it('should preserve reasoning in newState when assistant returns tool calls', async () => {
+      const toolCallPayload = [
+        {
+          function: { arguments: '{}', name: 'search' },
+          id: 'call_1',
+          type: 'function',
+        },
+      ];
+
+      const mockChat = vi.fn().mockImplementation(async (_payload, options) => {
+        await options?.callback?.onThinking?.('Need to inspect the search results first.');
+        await options?.callback?.onToolsCalling?.({ toolsCalling: toolCallPayload });
+        await options?.callback?.onCompletion?.({
+          usage: {
+            totalInputTokens: 1,
+            totalOutputTokens: 2,
+            totalTokens: 3,
+          },
+        });
+        return new Response('done');
+      });
+      vi.mocked(initModelRuntimeFromDB).mockResolvedValueOnce({ chat: mockChat } as any);
+
+      const executors = createRuntimeExecutors(ctx);
+      const state = createMockState();
+
+      const instruction = {
+        payload: {
+          messages: [{ content: 'Hello', role: 'user' }],
+          model: 'gpt-4',
+          provider: 'openai',
+          tools: [],
+        },
+        type: 'call_llm' as const,
+      };
+
+      const result = await executors.call_llm!(instruction, state);
+
+      expect(result.newState.messages.at(-1)).toEqual(
+        expect.objectContaining({
+          reasoning: { content: 'Need to inspect the search results first.' },
+          role: 'assistant',
+          tool_calls: [expect.objectContaining({ id: 'call_1' })],
+        }),
+      );
+    });
+
    it('should execute compress_context and return compression_result', async () => {
      const mockChat = vi.fn().mockImplementation(async (_payload, options) => {
        await options?.callback?.onText?.('summary');