mirror of
https://github.com/lobehub/lobehub
synced 2026-04-21 17:47:27 +00:00
🐛 fix:(agent-runtime): keep reasoning state in openai providers (#13701)
* 🐛 fix: preserve assistant reasoning in runtime state * 🐛 fix: preserve agent reasoning and cached usage conversion * 💬 docs: move usage retention comment to helper * ♻️ refactor: remove redundant any cast in runtime executor * 🐛 filter non-finite OpenAI usage values
This commit is contained in:
parent
4f1d2d494f
commit
c85be1265f
4 changed files with 147 additions and 12 deletions
|
|
@ -100,6 +100,38 @@ describe('convertUsage', () => {
|
|||
});
|
||||
});
|
||||
|
||||
it('should preserve zero cache miss tokens for fully cached completion usage', () => {
|
||||
const pricing: Pricing = {
|
||||
units: [
|
||||
{ name: 'textInput', rate: 1, strategy: 'fixed', unit: 'millionTokens' },
|
||||
{ name: 'textInput_cacheRead', rate: 0.1, strategy: 'fixed', unit: 'millionTokens' },
|
||||
{ name: 'textOutput', rate: 2, strategy: 'fixed', unit: 'millionTokens' },
|
||||
],
|
||||
};
|
||||
|
||||
const usageWithFullyCachedPrompt = {
|
||||
completion_tokens: 598,
|
||||
prompt_tokens: 4198,
|
||||
prompt_tokens_details: {
|
||||
cached_tokens: 4198,
|
||||
},
|
||||
total_tokens: 4796,
|
||||
} as OpenAI.Completions.CompletionUsage;
|
||||
|
||||
const result = convertOpenAIUsage(usageWithFullyCachedPrompt, { pricing });
|
||||
|
||||
expect(result).toMatchObject({
|
||||
inputCacheMissTokens: 0,
|
||||
inputCachedTokens: 4198,
|
||||
inputTextTokens: 4198,
|
||||
outputTextTokens: 598,
|
||||
totalInputTokens: 4198,
|
||||
totalOutputTokens: 598,
|
||||
totalTokens: 4796,
|
||||
});
|
||||
expect(result.cost).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it('should handle audio tokens in input correctly', () => {
|
||||
// Arrange
|
||||
const usageWithAudioInput = {
|
||||
|
|
@ -248,6 +280,23 @@ describe('convertUsage', () => {
|
|||
expect(result).not.toHaveProperty('outputAudioTokens');
|
||||
});
|
||||
|
||||
it('should omit NaN usage fields from completion usage output', () => {
|
||||
const embeddingLikeUsage = {
|
||||
prompt_tokens: 100,
|
||||
total_tokens: 100,
|
||||
} as OpenAI.Completions.CompletionUsage;
|
||||
|
||||
const result = convertOpenAIUsage(embeddingLikeUsage);
|
||||
|
||||
expect(result).toEqual({
|
||||
inputTextTokens: 100,
|
||||
totalInputTokens: 100,
|
||||
totalTokens: 100,
|
||||
});
|
||||
expect(result).not.toHaveProperty('outputTextTokens');
|
||||
expect(result).not.toHaveProperty('totalOutputTokens');
|
||||
});
|
||||
|
||||
it('should handle XAI provider correctly where completion_tokens does not include reasoning_tokens', () => {
|
||||
// Arrange
|
||||
const xaiUsage: OpenAI.Completions.CompletionUsage = {
|
||||
|
|
@ -352,6 +401,38 @@ describe('convertUsage', () => {
|
|||
});
|
||||
});
|
||||
|
||||
it('should preserve zero cache miss tokens for fully cached response usage', () => {
|
||||
const pricing: Pricing = {
|
||||
units: [
|
||||
{ name: 'textInput', rate: 1, strategy: 'fixed', unit: 'millionTokens' },
|
||||
{ name: 'textInput_cacheRead', rate: 0.1, strategy: 'fixed', unit: 'millionTokens' },
|
||||
{ name: 'textOutput', rate: 2, strategy: 'fixed', unit: 'millionTokens' },
|
||||
],
|
||||
};
|
||||
|
||||
const responseUsage = {
|
||||
input_tokens: 4198,
|
||||
input_tokens_details: {
|
||||
cached_tokens: 4198,
|
||||
},
|
||||
output_tokens: 598,
|
||||
total_tokens: 4796,
|
||||
} as OpenAI.Responses.ResponseUsage;
|
||||
|
||||
const result = convertOpenAIResponseUsage(responseUsage, { pricing });
|
||||
|
||||
expect(result).toMatchObject({
|
||||
inputCacheMissTokens: 0,
|
||||
inputCachedTokens: 4198,
|
||||
inputTextTokens: 4198,
|
||||
outputTextTokens: 598,
|
||||
totalInputTokens: 4198,
|
||||
totalOutputTokens: 598,
|
||||
totalTokens: 4796,
|
||||
});
|
||||
expect(result.cost).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it('should enrich completion usage with pricing cost when pricing is provided', () => {
|
||||
const pricing: Pricing = {
|
||||
units: [
|
||||
|
|
|
|||
|
|
@ -8,6 +8,19 @@ import { withUsageCost } from './utils/withUsageCost';
|
|||
|
||||
const log = debug('lobe-cost:convertOpenAIUsage');
|
||||
|
||||
// Keep the reference implementation's behavior of filtering out zero/falsy values,
|
||||
// except for inputCacheMissTokens where 0 is semantically meaningful for fully cached prompts.
|
||||
// `!!value` would filter out 0, which is often desired for token counts.
|
||||
const shouldKeepUsageValue = (key: string, value: unknown) => {
|
||||
if (value === undefined || value === null) return false;
|
||||
if (typeof value !== 'number') return Boolean(value);
|
||||
if (!Number.isFinite(value)) return false;
|
||||
|
||||
if (value !== 0) return true;
|
||||
|
||||
return key === 'inputCacheMissTokens';
|
||||
};
|
||||
|
||||
export const convertOpenAIUsage = (
|
||||
usage: OpenAI.Completions.CompletionUsage,
|
||||
payload?: ChatPayloadForTransformStream,
|
||||
|
|
@ -21,7 +34,8 @@ export const convertOpenAIUsage = (
|
|||
(usage as any).prompt_cache_hit_tokens || usage.prompt_tokens_details?.cached_tokens;
|
||||
|
||||
const inputCacheMissTokens =
|
||||
(usage as any).prompt_cache_miss_tokens || totalInputTokens - cachedTokens;
|
||||
(usage as any).prompt_cache_miss_tokens ??
|
||||
(typeof cachedTokens === 'number' ? totalInputTokens - cachedTokens : undefined);
|
||||
|
||||
const totalOutputTokens = usage.completion_tokens;
|
||||
const outputReasoning = usage.completion_tokens_details?.reasoning_tokens || 0;
|
||||
|
|
@ -58,7 +72,7 @@ export const convertOpenAIUsage = (
|
|||
const finalData = {};
|
||||
|
||||
Object.entries(data).forEach(([key, value]) => {
|
||||
if (!!value) {
|
||||
if (shouldKeepUsageValue(key, value)) {
|
||||
// @ts-ignore
|
||||
finalData[key] = value;
|
||||
}
|
||||
|
|
@ -112,18 +126,10 @@ export const convertOpenAIResponseUsage = (
|
|||
totalTokens: overallTotalTokens,
|
||||
} satisfies ModelTokensUsage; // This helps ensure all keys of ModelTokensUsage are considered
|
||||
|
||||
// 4. Filter out zero/falsy values, as done in the reference implementation
|
||||
// 4. Filter out zero/falsy values using the shared retention rules above.
|
||||
const finalData: Partial<ModelUsage> = {}; // Use Partial for type safety during construction
|
||||
Object.entries(data).forEach(([key, value]) => {
|
||||
if (
|
||||
value !== undefined &&
|
||||
value !== null &&
|
||||
(typeof value !== 'number' || value !== 0) && // A more explicit check than `!!value` if we want to be very specific about
|
||||
// keeping non-numeric truthy values, but the reference uses `!!value`.
|
||||
// `!!value` will filter out 0, which is often desired for token counts.
|
||||
// Let's stick to the reference's behavior:
|
||||
!!value
|
||||
) {
|
||||
if (shouldKeepUsageValue(key, value)) {
|
||||
// @ts-ignore - We are building an object that will conform to ModelTokensUsage
|
||||
// by selectively adding properties.
|
||||
finalData[key as keyof ModelUsage] = value as number;
|
||||
|
|
|
|||
|
|
@ -915,6 +915,7 @@ export const createRuntimeExecutors = (
|
|||
|
||||
newState.messages.push({
|
||||
content,
|
||||
reasoning: finalReasoning,
|
||||
role: 'assistant',
|
||||
tool_calls: tool_calls.length > 0 ? tool_calls : undefined,
|
||||
});
|
||||
|
|
|
|||
|
|
@ -287,6 +287,53 @@ describe('RuntimeExecutors', () => {
|
|||
);
|
||||
});
|
||||
|
||||
it('should preserve reasoning in newState when assistant returns tool calls', async () => {
|
||||
const toolCallPayload = [
|
||||
{
|
||||
function: { arguments: '{}', name: 'search' },
|
||||
id: 'call_1',
|
||||
type: 'function',
|
||||
},
|
||||
];
|
||||
|
||||
const mockChat = vi.fn().mockImplementation(async (_payload, options) => {
|
||||
await options?.callback?.onThinking?.('Need to inspect the search results first.');
|
||||
await options?.callback?.onToolsCalling?.({ toolsCalling: toolCallPayload });
|
||||
await options?.callback?.onCompletion?.({
|
||||
usage: {
|
||||
totalInputTokens: 1,
|
||||
totalOutputTokens: 2,
|
||||
totalTokens: 3,
|
||||
},
|
||||
});
|
||||
return new Response('done');
|
||||
});
|
||||
vi.mocked(initModelRuntimeFromDB).mockResolvedValueOnce({ chat: mockChat } as any);
|
||||
|
||||
const executors = createRuntimeExecutors(ctx);
|
||||
const state = createMockState();
|
||||
|
||||
const instruction = {
|
||||
payload: {
|
||||
messages: [{ content: 'Hello', role: 'user' }],
|
||||
model: 'gpt-4',
|
||||
provider: 'openai',
|
||||
tools: [],
|
||||
},
|
||||
type: 'call_llm' as const,
|
||||
};
|
||||
|
||||
const result = await executors.call_llm!(instruction, state);
|
||||
|
||||
expect(result.newState.messages.at(-1)).toEqual(
|
||||
expect.objectContaining({
|
||||
reasoning: { content: 'Need to inspect the search results first.' },
|
||||
role: 'assistant',
|
||||
tool_calls: [expect.objectContaining({ id: 'call_1' })],
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it('should execute compress_context and return compression_result', async () => {
|
||||
const mockChat = vi.fn().mockImplementation(async (_payload, options) => {
|
||||
await options?.callback?.onText?.('summary');
|
||||
|
|
|
|||
Loading…
Reference in a new issue