diff --git a/packages/model-bank/src/aiModels/kimiCodingPlan.ts b/packages/model-bank/src/aiModels/kimiCodingPlan.ts index 1888b04aa3..0454843940 100644 --- a/packages/model-bank/src/aiModels/kimiCodingPlan.ts +++ b/packages/model-bank/src/aiModels/kimiCodingPlan.ts @@ -10,12 +10,15 @@ const kimiCodingPlanChatModels: AIChatModelCard[] = [ video: true, vision: true, }, + config: { + deploymentName: 'k2p5', + }, contextWindowTokens: 262_144, description: "Kimi K2.5 is Kimi's most versatile model to date, featuring a native multimodal architecture that supports both vision and text inputs, 'thinking' and 'non-thinking' modes, and both conversational and agent tasks.", displayName: 'Kimi K2.5', enabled: true, - id: 'k2p5', + id: 'kimi-k2.5', maxOutput: 32_768, organization: 'Moonshot', releasedAt: '2026-01-27', diff --git a/packages/model-bank/src/aiModels/siliconcloud.ts b/packages/model-bank/src/aiModels/siliconcloud.ts index adc6828c49..d500e82955 100644 --- a/packages/model-bank/src/aiModels/siliconcloud.ts +++ b/packages/model-bank/src/aiModels/siliconcloud.ts @@ -329,6 +329,63 @@ const siliconcloudChatModels: AIChatModelCard[] = [ }, type: 'chat', }, + { + abilities: { + functionCall: true, + reasoning: true, + }, + contextWindowTokens: 198_000, + description: + 'GLM-5.1 is a next-generation flagship model designed for agent engineering, using a Mixture of Experts (MoE) architecture with 754B parameters. It significantly enhances programming capabilities, achieving leading results on SWE-Bench Pro, and substantially outperforms its predecessor on benchmarks like NL2Repo and Terminal-Bench 2.0. Designed for long-duration agent tasks, it handles ambiguous questions with better judgment, decomposes complex tasks, executes experiments, analyzes results, and continuously optimizes through hundreds of iterations and thousands of tool calls.', + displayName: 'GLM-5.1 (Pro)', + id: 'Pro/zai-org/GLM-5.1', + pricing: { + currency: 'CNY', + units: [ + { + lookup: { + prices: { + '[0, 0.032]': 1.3, + '[0.032, infinity]': 2, + }, + pricingParams: ['textInput'], + }, + name: 'textInput_cacheRead', + strategy: 'lookup', + unit: 'millionTokens', + }, + { + lookup: { + prices: { + '[0, 0.032]': 6, + '[0.032, infinity]': 8, + }, + pricingParams: ['textInput'], + }, + name: 'textInput', + strategy: 'lookup', + unit: 'millionTokens', + }, + { + lookup: { + prices: { + '[0, 0.032]': 24, + '[0.032, infinity]': 28, + }, + pricingParams: ['textInput'], + }, + name: 'textOutput', + strategy: 'lookup', + unit: 'millionTokens', + }, + ], + }, + releasedAt: '2026-04-08', + settings: { + extendParams: ['enableReasoning', 'reasoningBudgetToken32k'], + }, + type: 'chat', + }, { abilities: { functionCall: true, diff --git a/packages/model-bank/src/modelProviders/kimiCodingPlan.ts b/packages/model-bank/src/modelProviders/kimiCodingPlan.ts index 9c939644ac..9d85ccd80b 100644 --- a/packages/model-bank/src/modelProviders/kimiCodingPlan.ts +++ b/packages/model-bank/src/modelProviders/kimiCodingPlan.ts @@ -3,7 +3,7 @@ import type { ModelProviderCard } from '@/types/llm'; // ref: https://platform.moonshot.ai/docs const KimiCodingPlan: ModelProviderCard = { chatModels: [], - checkModel: 'k2p5', + checkModel: 'kimi-k2.5', description: 'Kimi Code from Moonshot AI provides access to Kimi models including K2.5 for coding tasks.', disableBrowserRequest: true, diff --git a/packages/model-runtime/src/providers/kimiCodingPlan/index.test.ts b/packages/model-runtime/src/providers/kimiCodingPlan/index.test.ts index 3a440421d3..63cc03892d 100644 --- a/packages/model-runtime/src/providers/kimiCodingPlan/index.test.ts +++ b/packages/model-runtime/src/providers/kimiCodingPlan/index.test.ts @@ -77,6 +77,425 @@ describe('LobeKimiCodingPlanAI', () => { expect(result).toBeInstanceOf(Response); }); + describe('max_tokens handling', () => { + const getLastRequestPayload = () => { + const calls = (instance['client'].messages.create as Mock).mock.calls; + return calls.at(-1)?.[0]; + }; + + it('should use hardcoded maxOutput for k2p5 (deploymentName)', async () => { + await instance.chat({ + messages: [{ content: 'Hello', role: 'user' }], + model: 'k2p5', + }); + + const payload = getLastRequestPayload(); + expect(payload.max_tokens).toBe(32_768); + }); + + it('should use hardcoded maxOutput for kimi-k2.5 (model id)', async () => { + await instance.chat({ + messages: [{ content: 'Hello', role: 'user' }], + model: 'kimi-k2.5', + }); + + const payload = getLastRequestPayload(); + expect(payload.max_tokens).toBe(32_768); + }); + + it('should use hardcoded maxOutput for kimi-k2-thinking', async () => { + await instance.chat({ + messages: [{ content: 'Hello', role: 'user' }], + model: 'kimi-k2-thinking', + }); + + const payload = getLastRequestPayload(); + expect(payload.max_tokens).toBe(65_536); + }); + + it('should use default 8192 for unknown models', async () => { + await instance.chat({ + messages: [{ content: 'Hello', role: 'user' }], + model: 'unknown-model', + }); + + const payload = getLastRequestPayload(); + expect(payload.max_tokens).toBe(8192); + }); + + it('should respect user-provided max_tokens', async () => { + await instance.chat({ + messages: [{ content: 'Hello', role: 'user' }], + model: 'kimi-k2.5', + max_tokens: 4096, + }); + + const payload = getLastRequestPayload(); + expect(payload.max_tokens).toBe(4096); + }); + }); + + describe('thinking parameter handling', () => { + const getLastRequestPayload = () => { + const calls = (instance['client'].messages.create as Mock).mock.calls; + return calls.at(-1)?.[0]; + }; + + it('should enable thinking by default for kimi-k2.5', async () => { + await instance.chat({ + messages: [{ content: 'Hello', role: 'user' }], + model: 'kimi-k2.5', + }); + + const payload = getLastRequestPayload(); + expect(payload.thinking).toEqual({ budget_tokens: 1024, type: 'enabled' }); + expect(payload.temperature).toBe(1); + expect(payload.top_p).toBe(0.95); + }); + + it('should disable thinking when type is disabled for kimi-k2.5', async () => { + await instance.chat({ + messages: [{ content: 'Hello', role: 'user' }], + model: 'kimi-k2.5', + thinking: { budget_tokens: 0, type: 'disabled' }, + }); + + const payload = getLastRequestPayload(); + expect(payload.thinking).toEqual({ type: 'disabled' }); + expect(payload.temperature).toBe(0.6); + }); + + it('should always enable thinking for kimi-k2-thinking', async () => { + await instance.chat({ + messages: [{ content: 'Hello', role: 'user' }], + model: 'kimi-k2-thinking', + }); + + const payload = getLastRequestPayload(); + expect(payload.thinking).toEqual({ budget_tokens: 1024, type: 'enabled' }); + expect(payload.temperature).toBe(1); + expect(payload.top_p).toBe(0.95); + }); + + it('should ignore thinking disabled for native thinking models', async () => { + await instance.chat({ + messages: [{ content: 'Hello', role: 'user' }], + model: 'kimi-k2-thinking', + thinking: { budget_tokens: 0, type: 'disabled' }, + }); + + const payload = getLastRequestPayload(); + expect(payload.thinking).toEqual({ budget_tokens: 1024, type: 'enabled' }); + }); + + it('should respect custom thinking budget', async () => { + await instance.chat({ + messages: [{ content: 'Hello', role: 'user' }], + model: 'kimi-k2.5', + max_tokens: 4096, + thinking: { budget_tokens: 2048, type: 'enabled' }, + }); + + const payload = getLastRequestPayload(); + expect(payload.thinking).toEqual({ budget_tokens: 2048, type: 'enabled' }); + }); + + it('should cap thinking budget to max_tokens - 1', async () => { + await instance.chat({ + messages: [{ content: 'Hello', role: 'user' }], + model: 'kimi-k2.5', + thinking: { budget_tokens: 100_000, type: 'enabled' }, + }); + + const payload = getLastRequestPayload(); + // max_tokens defaults to 32_768 for kimi-k2.5, so budget capped to 32_767 + expect(payload.thinking!.budget_tokens).toBe(32_767); + }); + + it('should not add thinking params for unknown models', async () => { + await instance.chat({ + messages: [{ content: 'Hello', role: 'user' }], + model: 'unknown-model', + }); + + const payload = getLastRequestPayload(); + expect(payload.thinking).toBeUndefined(); + }); + }); + + describe('message normalization for thinking', () => { + const getLastRequestPayload = () => { + const calls = (instance['client'].messages.create as Mock).mock.calls; + return calls.at(-1)?.[0]; + }; + + it('should force thinking block on assistant messages for kimi-k2-thinking', async () => { + await instance.chat({ + messages: [ + { content: 'Hello', role: 'user' }, + { content: 'Response', role: 'assistant' }, + { content: 'Follow-up', role: 'user' }, + ], + model: 'kimi-k2-thinking', + }); + + const payload = getLastRequestPayload(); + const assistantMessage = payload.messages.find( + (message: any) => message.role === 'assistant', + ); + + expect(assistantMessage?.content).toEqual([ + { type: 'thinking', thinking: ' ' }, + { type: 'text', text: 'Response' }, + ]); + }); + + it('should force thinking block on assistant messages for kimi-k2.5 with thinking enabled', async () => { + await instance.chat({ + messages: [ + { content: 'Hello', role: 'user' }, + { content: 'Response', role: 'assistant' }, + { content: 'Follow-up', role: 'user' }, + ], + model: 'kimi-k2.5', + }); + + const payload = getLastRequestPayload(); + const assistantMessage = payload.messages.find( + (message: any) => message.role === 'assistant', + ); + + expect(assistantMessage?.content).toEqual([ + { type: 'thinking', thinking: ' ' }, + { type: 'text', text: 'Response' }, + ]); + }); + + it('should not force thinking block when thinking is disabled', async () => { + await instance.chat({ + messages: [ + { content: 'Hello', role: 'user' }, + { content: 'Response', role: 'assistant' }, + ], + model: 'kimi-k2.5', + thinking: { budget_tokens: 0, type: 'disabled' }, + }); + + const payload = getLastRequestPayload(); + const assistantMessage = payload.messages.find( + (message: any) => message.role === 'assistant', + ); + + // Content is converted to array by Anthropic factory, but no thinking block + expect(assistantMessage?.content).toEqual( + expect.arrayContaining([expect.objectContaining({ type: 'text', text: 'Response' })]), + ); + expect(assistantMessage?.content).not.toContainEqual( + expect.objectContaining({ type: 'thinking' }), + ); + }); + + it('should convert reasoning to thinking block for assistant messages', async () => { + await instance.chat({ + messages: [ + { content: 'Hello', role: 'user' }, + { + content: 'Response', + role: 'assistant', + reasoning: { content: 'My reasoning process' }, + } as any, + ], + model: 'kimi-k2.5', + }); + + const payload = getLastRequestPayload(); + const assistantMessage = payload.messages.find( + (message: any) => message.role === 'assistant', + ); + + expect(assistantMessage?.content).toEqual( + expect.arrayContaining([ + expect.objectContaining({ type: 'thinking', thinking: 'My reasoning process' }), + expect.objectContaining({ type: 'text', text: 'Response' }), + ]), + ); + }); + + it('should handle empty content with reasoning', async () => { + await instance.chat({ + messages: [ + { content: 'Hello', role: 'user' }, + { + content: '', + role: 'assistant', + reasoning: { content: 'My reasoning process' }, + } as any, + ], + model: 'kimi-k2.5', + }); + + const payload = getLastRequestPayload(); + const assistantMessage = payload.messages.find( + (message: any) => message.role === 'assistant', + ); + + expect(assistantMessage?.content).toEqual( + expect.arrayContaining([ + expect.objectContaining({ type: 'thinking', thinking: 'My reasoning process' }), + expect.objectContaining({ type: 'text', text: ' ' }), + ]), + ); + }); + + it('should add placeholder thinking when reasoning has signature', async () => { + await instance.chat({ + messages: [ + { content: 'Hello', role: 'user' }, + { + content: 'Response', + role: 'assistant', + reasoning: { content: 'My reasoning', signature: 'some-signature' }, + } as any, + ], + model: 'kimi-k2.5', + }); + + const payload = getLastRequestPayload(); + const assistantMessage = payload.messages.find( + (message: any) => message.role === 'assistant', + ); + + // reasoning with signature is invalid, so placeholder thinking is added + expect(assistantMessage?.content).toEqual( + expect.arrayContaining([ + expect.objectContaining({ type: 'thinking', thinking: ' ' }), + expect.objectContaining({ type: 'text', text: 'Response' }), + ]), + ); + }); + + it('should handle assistant message with tool_calls and reasoning', async () => { + await instance.chat({ + messages: [ + { content: 'Hello', role: 'user' }, + { + content: '', + role: 'assistant', + reasoning: { content: 'Thinking about tools' }, + tool_calls: [ + { + id: 'call_1', + type: 'function', + function: { name: 'get_weather', arguments: '{"city":"Beijing"}' }, + }, + ], + } as any, + { + content: '{"temp": 20}', + role: 'tool', + tool_call_id: 'call_1', + } as any, + ], + model: 'kimi-k2.5', + }); + + const payload = getLastRequestPayload(); + const assistantMessage = payload.messages.find( + (message: any) => message.role === 'assistant', + ); + + expect(assistantMessage?.content).toEqual( + expect.arrayContaining([ + expect.objectContaining({ type: 'thinking', thinking: 'Thinking about tools' }), + expect.objectContaining({ type: 'tool_use', name: 'get_weather' }), + ]), + ); + }); + + it('should add placeholder thinking for tool_calls without reasoning', async () => { + // This is the bug scenario: tool_calls without reasoning_content + await instance.chat({ + messages: [ + { content: 'Hello', role: 'user' }, + { + content: '', + role: 'assistant', + tool_calls: [ + { + id: 'call_1', + type: 'function', + function: { name: 'get_weather', arguments: '{"city":"Beijing"}' }, + }, + ], + } as any, + { + content: '{"temp": 20}', + role: 'tool', + tool_call_id: 'call_1', + } as any, + ], + model: 'kimi-k2.5', + }); + + const payload = getLastRequestPayload(); + const assistantMessage = payload.messages.find( + (message: any) => message.role === 'assistant', + ); + + // Should have placeholder thinking block to avoid API error + expect(assistantMessage?.content).toEqual( + expect.arrayContaining([ + expect.objectContaining({ type: 'thinking', thinking: ' ' }), + expect.objectContaining({ type: 'tool_use', name: 'get_weather' }), + ]), + ); + }); + + it('should handle empty assistant message with placeholder', async () => { + await instance.chat({ + messages: [ + { content: 'Hello', role: 'user' }, + { content: '', role: 'assistant' }, + { content: 'Follow-up', role: 'user' }, + ], + model: 'kimi-k2-thinking', + }); + + const payload = getLastRequestPayload(); + const assistantMessage = payload.messages.find( + (message: any) => message.role === 'assistant', + ); + + expect(assistantMessage?.content).toEqual([ + { type: 'thinking', thinking: ' ' }, + { type: 'text', text: ' ' }, + ]); + }); + + it('should not modify non-thinking model messages', async () => { + await instance.chat({ + messages: [ + { content: 'Hello', role: 'user' }, + { content: 'Response', role: 'assistant' }, + ], + model: 'unknown-model', + }); + + const payload = getLastRequestPayload(); + const assistantMessage = payload.messages.find( + (message: any) => message.role === 'assistant', + ); + + // Content is converted to array by Anthropic factory, but no thinking block + expect(assistantMessage?.content).toEqual( + expect.arrayContaining([expect.objectContaining({ type: 'text', text: 'Response' })]), + ); + expect(assistantMessage?.content).not.toContainEqual( + expect.objectContaining({ type: 'thinking' }), + ); + }); + }); + it('should handle text messages correctly', async () => { // Arrange const mockStream = new ReadableStream({ diff --git a/packages/model-runtime/src/providers/kimiCodingPlan/index.ts b/packages/model-runtime/src/providers/kimiCodingPlan/index.ts index 515c56ebb3..e7515c5866 100644 --- a/packages/model-runtime/src/providers/kimiCodingPlan/index.ts +++ b/packages/model-runtime/src/providers/kimiCodingPlan/index.ts @@ -7,27 +7,93 @@ import { createAnthropicCompatibleRuntime, } from '../../core/anthropicCompatibleFactory'; import type { ChatStreamPayload } from '../../types'; -import { getModelPropertyWithFallback } from '../../utils/getFallbackModelProperty'; import { processMultiProviderModelList } from '../../utils/modelParse'; const DEFAULT_KIMI_CODING_BASE_URL = 'https://api.kimi.com/coding'; +// Max output tokens for each model (supports both model id and deploymentName) +const KIMI_MODEL_MAX_OUTPUT: Record = { + 'k2p5': 32_768, + 'kimi-k2.5': 32_768, + 'kimi-k2-thinking': 65_536, +}; + +// Helpers for message normalization (shared with Moonshot provider) +const isKimiK25Model = (model: string) => model === 'kimi-k2.5' || model === 'k2p5'; +const isKimiNativeThinkingModel = (model: string) => model.startsWith('kimi-k2-thinking'); +const isEmptyContent = (content: any) => + content === '' || content === null || content === undefined; +const hasValidReasoning = (reasoning: any) => reasoning?.content && !reasoning?.signature; + +const getK25Params = (isThinkingEnabled: boolean) => ({ + temperature: isThinkingEnabled ? 1 : 0.6, + top_p: 0.95, +}); + +// Anthropic format helpers +const buildThinkingBlock = (reasoning: any) => + hasValidReasoning(reasoning) ? { thinking: reasoning.content, type: 'thinking' as const } : null; + +const toContentArray = (content: any) => + Array.isArray(content) ? content : [{ text: content, type: 'text' as const }]; + +/** + * Normalize assistant messages for Anthropic format. + * When forceThinking is true (kimi-k2.5 with thinking enabled), every assistant + * message must carry a thinking block, otherwise Kimi API rejects with: + * "thinking is enabled but reasoning_content is missing in assistant tool call message" + */ +const normalizeMessagesForAnthropic = ( + messages: ChatStreamPayload['messages'], + forceThinking = false, +) => + messages.map((message: any) => { + if (message.role !== 'assistant') return message; + + const { reasoning, ...rest } = message; + const thinkingBlock = buildThinkingBlock(reasoning); + const effectiveBlock = + thinkingBlock || (forceThinking ? { thinking: ' ', type: 'thinking' as const } : null); + + if (isEmptyContent(message.content)) { + const placeholder = { text: ' ', type: 'text' as const }; + return { ...rest, content: effectiveBlock ? [effectiveBlock, placeholder] : [placeholder] }; + } + + if (!effectiveBlock) return rest; + return { ...rest, content: [effectiveBlock, ...toContentArray(message.content)] }; + }); + const buildKimiCodingPlanAnthropicPayload = async ( payload: ChatStreamPayload, ): Promise => { - const resolvedMaxTokens = - payload.max_tokens ?? - (await getModelPropertyWithFallback( - payload.model, - 'maxOutput', - ModelProvider.KimiCodingPlan, - )) ?? - 8192; + const resolvedMaxTokens = payload.max_tokens ?? KIMI_MODEL_MAX_OUTPUT[payload.model] ?? 8192; - return buildDefaultAnthropicPayload({ + const isK25 = isKimiK25Model(payload.model); + const isNativeThinking = isKimiNativeThinkingModel(payload.model); + const isThinkingEnabled = isNativeThinking || (isK25 && payload.thinking?.type !== 'disabled'); + + const basePayload = await buildDefaultAnthropicPayload({ ...payload, max_tokens: resolvedMaxTokens, + messages: normalizeMessagesForAnthropic(payload.messages, isThinkingEnabled), }); + + if (!isK25 && !isNativeThinking) return basePayload; + + const resolvedThinkingBudget = payload.thinking?.budget_tokens + ? Math.min(payload.thinking.budget_tokens, resolvedMaxTokens - 1) + : 1024; + const thinkingParam = + isNativeThinking || payload.thinking?.type !== 'disabled' + ? ({ budget_tokens: resolvedThinkingBudget, type: 'enabled' } as const) + : ({ type: 'disabled' } as const); + + return { + ...basePayload, + ...getK25Params(thinkingParam.type === 'enabled'), + thinking: thinkingParam, + }; }; export const params = createAnthropicCompatibleParams({ diff --git a/src/services/chat/index.ts b/src/services/chat/index.ts index c6fe5998fd..bb9c8fd931 100644 --- a/src/services/chat/index.ts +++ b/src/services/chat/index.ts @@ -351,6 +351,7 @@ class ChatService { ModelProvider.Volcengine, ModelProvider.AzureAI, ModelProvider.Qwen, + ModelProvider.KimiCodingPlan, ] as string[]; if (providersWithDeploymentName.includes(provider)) {