feat: add GLM-5.1 model and fix KimiCodingPlan issues (#13700)

* 🐛 fix: fix Kimi K2.5 model icon display by using deploymentName

- Change model id from 'k2p5' to 'kimi-k2.5' to match Moonshot icon keywords
- Add deploymentName 'k2p5' for API calls to use original model name
- Add KimiCodingPlan to providersWithDeploymentName list

This allows the model icon to display correctly while maintaining
backward compatibility with the API using the original 'k2p5' name.

* 🐛 fix: normalize messages for KimiCodingPlan thinking models

Add message normalization for Kimi K2.5 and K2 Thinking models to ensure
every assistant message has a thinking block when thinking is enabled.

This fixes the issue where regenerating with KimiCodingPlan after using
other providers would fail with "thinking is enabled but reasoning_content
is missing" error, because historical messages from other providers don't
have reasoning fields.

The normalization adds a placeholder thinking block when:
1. Thinking is enabled for Kimi K2.5/K2 Thinking models
2. Assistant message doesn't have reasoning content

*  feat(siliconcloud): add GLM-5.1 model support

Add GLM-5.1 (Pro) model configuration with:
- 198K context window
- Function call and reasoning capabilities
- Tiered pricing (0-32k / 32k+)
- reasoningBudgetToken32k extension parameter

* 🐛 fix: use hardcoded maxOutput mapping for KimiCodingPlan models

Replace getModelPropertyWithFallback with a simple hardcoded mapping to fix
the issue where max_tokens lookup fails when using deploymentName (k2p5).

The model id is converted to deploymentName in ChatService layer before
reaching the provider, causing getModelPropertyWithFallback('k2p5', ...) to
fail since the model card uses id 'kimi-k2.5'.

By using a hardcoded mapping that supports both model id and deploymentName,
we avoid the lookup issue while keeping the code simple (KimiCodingPlan only
has a few models).

*  test(kimiCodingPlan): add tests for thinking and max_tokens handling

Add comprehensive tests for KimiCodingPlan provider covering:
- Hardcoded maxOutput mapping for k2p5, kimi-k2.5, kimi-k2-thinking
- Thinking parameter handling for kimi-k2.5 and kimi-k2-thinking models
- Message normalization with forceThinking for assistant messages
- Tool calls with reasoning content to prevent API error

*  test(kimiCodingPlan): add tests for thinking and max_tokens handling

Add comprehensive tests for KimiCodingPlan provider covering:
- Hardcoded maxOutput mapping for k2p5, kimi-k2.5, kimi-k2-thinking
- Thinking parameter handling for kimi-k2.5 and kimi-k2-thinking models
- Message normalization with forceThinking for assistant messages
- Tool calls with reasoning content to prevent API error
This commit is contained in:
Hardy 2026-04-10 10:41:06 +08:00 committed by GitHub
parent c85be1265f
commit 5f25efd54c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 558 additions and 12 deletions

View file

@ -10,12 +10,15 @@ const kimiCodingPlanChatModels: AIChatModelCard[] = [
video: true,
vision: true,
},
config: {
deploymentName: 'k2p5',
},
contextWindowTokens: 262_144,
description:
"Kimi K2.5 is Kimi's most versatile model to date, featuring a native multimodal architecture that supports both vision and text inputs, 'thinking' and 'non-thinking' modes, and both conversational and agent tasks.",
displayName: 'Kimi K2.5',
enabled: true,
id: 'k2p5',
id: 'kimi-k2.5',
maxOutput: 32_768,
organization: 'Moonshot',
releasedAt: '2026-01-27',

View file

@ -329,6 +329,63 @@ const siliconcloudChatModels: AIChatModelCard[] = [
},
type: 'chat',
},
{
abilities: {
functionCall: true,
reasoning: true,
},
contextWindowTokens: 198_000,
description:
'GLM-5.1 is a next-generation flagship model designed for agent engineering, using a Mixture of Experts (MoE) architecture with 754B parameters. It significantly enhances programming capabilities, achieving leading results on SWE-Bench Pro, and substantially outperforms its predecessor on benchmarks like NL2Repo and Terminal-Bench 2.0. Designed for long-duration agent tasks, it handles ambiguous questions with better judgment, decomposes complex tasks, executes experiments, analyzes results, and continuously optimizes through hundreds of iterations and thousands of tool calls.',
displayName: 'GLM-5.1 (Pro)',
id: 'Pro/zai-org/GLM-5.1',
pricing: {
currency: 'CNY',
units: [
{
lookup: {
prices: {
'[0, 0.032]': 1.3,
'[0.032, infinity]': 2,
},
pricingParams: ['textInput'],
},
name: 'textInput_cacheRead',
strategy: 'lookup',
unit: 'millionTokens',
},
{
lookup: {
prices: {
'[0, 0.032]': 6,
'[0.032, infinity]': 8,
},
pricingParams: ['textInput'],
},
name: 'textInput',
strategy: 'lookup',
unit: 'millionTokens',
},
{
lookup: {
prices: {
'[0, 0.032]': 24,
'[0.032, infinity]': 28,
},
pricingParams: ['textInput'],
},
name: 'textOutput',
strategy: 'lookup',
unit: 'millionTokens',
},
],
},
releasedAt: '2026-04-08',
settings: {
extendParams: ['enableReasoning', 'reasoningBudgetToken32k'],
},
type: 'chat',
},
{
abilities: {
functionCall: true,

View file

@ -3,7 +3,7 @@ import type { ModelProviderCard } from '@/types/llm';
// ref: https://platform.moonshot.ai/docs
const KimiCodingPlan: ModelProviderCard = {
chatModels: [],
checkModel: 'k2p5',
checkModel: 'kimi-k2.5',
description:
'Kimi Code from Moonshot AI provides access to Kimi models including K2.5 for coding tasks.',
disableBrowserRequest: true,

View file

@ -77,6 +77,425 @@ describe('LobeKimiCodingPlanAI', () => {
expect(result).toBeInstanceOf(Response);
});
describe('max_tokens handling', () => {
const getLastRequestPayload = () => {
const calls = (instance['client'].messages.create as Mock).mock.calls;
return calls.at(-1)?.[0];
};
it('should use hardcoded maxOutput for k2p5 (deploymentName)', async () => {
await instance.chat({
messages: [{ content: 'Hello', role: 'user' }],
model: 'k2p5',
});
const payload = getLastRequestPayload();
expect(payload.max_tokens).toBe(32_768);
});
it('should use hardcoded maxOutput for kimi-k2.5 (model id)', async () => {
await instance.chat({
messages: [{ content: 'Hello', role: 'user' }],
model: 'kimi-k2.5',
});
const payload = getLastRequestPayload();
expect(payload.max_tokens).toBe(32_768);
});
it('should use hardcoded maxOutput for kimi-k2-thinking', async () => {
await instance.chat({
messages: [{ content: 'Hello', role: 'user' }],
model: 'kimi-k2-thinking',
});
const payload = getLastRequestPayload();
expect(payload.max_tokens).toBe(65_536);
});
it('should use default 8192 for unknown models', async () => {
await instance.chat({
messages: [{ content: 'Hello', role: 'user' }],
model: 'unknown-model',
});
const payload = getLastRequestPayload();
expect(payload.max_tokens).toBe(8192);
});
it('should respect user-provided max_tokens', async () => {
await instance.chat({
messages: [{ content: 'Hello', role: 'user' }],
model: 'kimi-k2.5',
max_tokens: 4096,
});
const payload = getLastRequestPayload();
expect(payload.max_tokens).toBe(4096);
});
});
describe('thinking parameter handling', () => {
const getLastRequestPayload = () => {
const calls = (instance['client'].messages.create as Mock).mock.calls;
return calls.at(-1)?.[0];
};
it('should enable thinking by default for kimi-k2.5', async () => {
await instance.chat({
messages: [{ content: 'Hello', role: 'user' }],
model: 'kimi-k2.5',
});
const payload = getLastRequestPayload();
expect(payload.thinking).toEqual({ budget_tokens: 1024, type: 'enabled' });
expect(payload.temperature).toBe(1);
expect(payload.top_p).toBe(0.95);
});
it('should disable thinking when type is disabled for kimi-k2.5', async () => {
await instance.chat({
messages: [{ content: 'Hello', role: 'user' }],
model: 'kimi-k2.5',
thinking: { budget_tokens: 0, type: 'disabled' },
});
const payload = getLastRequestPayload();
expect(payload.thinking).toEqual({ type: 'disabled' });
expect(payload.temperature).toBe(0.6);
});
it('should always enable thinking for kimi-k2-thinking', async () => {
await instance.chat({
messages: [{ content: 'Hello', role: 'user' }],
model: 'kimi-k2-thinking',
});
const payload = getLastRequestPayload();
expect(payload.thinking).toEqual({ budget_tokens: 1024, type: 'enabled' });
expect(payload.temperature).toBe(1);
expect(payload.top_p).toBe(0.95);
});
it('should ignore thinking disabled for native thinking models', async () => {
await instance.chat({
messages: [{ content: 'Hello', role: 'user' }],
model: 'kimi-k2-thinking',
thinking: { budget_tokens: 0, type: 'disabled' },
});
const payload = getLastRequestPayload();
expect(payload.thinking).toEqual({ budget_tokens: 1024, type: 'enabled' });
});
it('should respect custom thinking budget', async () => {
await instance.chat({
messages: [{ content: 'Hello', role: 'user' }],
model: 'kimi-k2.5',
max_tokens: 4096,
thinking: { budget_tokens: 2048, type: 'enabled' },
});
const payload = getLastRequestPayload();
expect(payload.thinking).toEqual({ budget_tokens: 2048, type: 'enabled' });
});
it('should cap thinking budget to max_tokens - 1', async () => {
await instance.chat({
messages: [{ content: 'Hello', role: 'user' }],
model: 'kimi-k2.5',
thinking: { budget_tokens: 100_000, type: 'enabled' },
});
const payload = getLastRequestPayload();
// max_tokens defaults to 32_768 for kimi-k2.5, so budget capped to 32_767
expect(payload.thinking!.budget_tokens).toBe(32_767);
});
it('should not add thinking params for unknown models', async () => {
await instance.chat({
messages: [{ content: 'Hello', role: 'user' }],
model: 'unknown-model',
});
const payload = getLastRequestPayload();
expect(payload.thinking).toBeUndefined();
});
});
describe('message normalization for thinking', () => {
const getLastRequestPayload = () => {
const calls = (instance['client'].messages.create as Mock).mock.calls;
return calls.at(-1)?.[0];
};
it('should force thinking block on assistant messages for kimi-k2-thinking', async () => {
await instance.chat({
messages: [
{ content: 'Hello', role: 'user' },
{ content: 'Response', role: 'assistant' },
{ content: 'Follow-up', role: 'user' },
],
model: 'kimi-k2-thinking',
});
const payload = getLastRequestPayload();
const assistantMessage = payload.messages.find(
(message: any) => message.role === 'assistant',
);
expect(assistantMessage?.content).toEqual([
{ type: 'thinking', thinking: ' ' },
{ type: 'text', text: 'Response' },
]);
});
it('should force thinking block on assistant messages for kimi-k2.5 with thinking enabled', async () => {
await instance.chat({
messages: [
{ content: 'Hello', role: 'user' },
{ content: 'Response', role: 'assistant' },
{ content: 'Follow-up', role: 'user' },
],
model: 'kimi-k2.5',
});
const payload = getLastRequestPayload();
const assistantMessage = payload.messages.find(
(message: any) => message.role === 'assistant',
);
expect(assistantMessage?.content).toEqual([
{ type: 'thinking', thinking: ' ' },
{ type: 'text', text: 'Response' },
]);
});
it('should not force thinking block when thinking is disabled', async () => {
await instance.chat({
messages: [
{ content: 'Hello', role: 'user' },
{ content: 'Response', role: 'assistant' },
],
model: 'kimi-k2.5',
thinking: { budget_tokens: 0, type: 'disabled' },
});
const payload = getLastRequestPayload();
const assistantMessage = payload.messages.find(
(message: any) => message.role === 'assistant',
);
// Content is converted to array by Anthropic factory, but no thinking block
expect(assistantMessage?.content).toEqual(
expect.arrayContaining([expect.objectContaining({ type: 'text', text: 'Response' })]),
);
expect(assistantMessage?.content).not.toContainEqual(
expect.objectContaining({ type: 'thinking' }),
);
});
it('should convert reasoning to thinking block for assistant messages', async () => {
await instance.chat({
messages: [
{ content: 'Hello', role: 'user' },
{
content: 'Response',
role: 'assistant',
reasoning: { content: 'My reasoning process' },
} as any,
],
model: 'kimi-k2.5',
});
const payload = getLastRequestPayload();
const assistantMessage = payload.messages.find(
(message: any) => message.role === 'assistant',
);
expect(assistantMessage?.content).toEqual(
expect.arrayContaining([
expect.objectContaining({ type: 'thinking', thinking: 'My reasoning process' }),
expect.objectContaining({ type: 'text', text: 'Response' }),
]),
);
});
it('should handle empty content with reasoning', async () => {
await instance.chat({
messages: [
{ content: 'Hello', role: 'user' },
{
content: '',
role: 'assistant',
reasoning: { content: 'My reasoning process' },
} as any,
],
model: 'kimi-k2.5',
});
const payload = getLastRequestPayload();
const assistantMessage = payload.messages.find(
(message: any) => message.role === 'assistant',
);
expect(assistantMessage?.content).toEqual(
expect.arrayContaining([
expect.objectContaining({ type: 'thinking', thinking: 'My reasoning process' }),
expect.objectContaining({ type: 'text', text: ' ' }),
]),
);
});
it('should add placeholder thinking when reasoning has signature', async () => {
await instance.chat({
messages: [
{ content: 'Hello', role: 'user' },
{
content: 'Response',
role: 'assistant',
reasoning: { content: 'My reasoning', signature: 'some-signature' },
} as any,
],
model: 'kimi-k2.5',
});
const payload = getLastRequestPayload();
const assistantMessage = payload.messages.find(
(message: any) => message.role === 'assistant',
);
// reasoning with signature is invalid, so placeholder thinking is added
expect(assistantMessage?.content).toEqual(
expect.arrayContaining([
expect.objectContaining({ type: 'thinking', thinking: ' ' }),
expect.objectContaining({ type: 'text', text: 'Response' }),
]),
);
});
it('should handle assistant message with tool_calls and reasoning', async () => {
await instance.chat({
messages: [
{ content: 'Hello', role: 'user' },
{
content: '',
role: 'assistant',
reasoning: { content: 'Thinking about tools' },
tool_calls: [
{
id: 'call_1',
type: 'function',
function: { name: 'get_weather', arguments: '{"city":"Beijing"}' },
},
],
} as any,
{
content: '{"temp": 20}',
role: 'tool',
tool_call_id: 'call_1',
} as any,
],
model: 'kimi-k2.5',
});
const payload = getLastRequestPayload();
const assistantMessage = payload.messages.find(
(message: any) => message.role === 'assistant',
);
expect(assistantMessage?.content).toEqual(
expect.arrayContaining([
expect.objectContaining({ type: 'thinking', thinking: 'Thinking about tools' }),
expect.objectContaining({ type: 'tool_use', name: 'get_weather' }),
]),
);
});
it('should add placeholder thinking for tool_calls without reasoning', async () => {
// This is the bug scenario: tool_calls without reasoning_content
await instance.chat({
messages: [
{ content: 'Hello', role: 'user' },
{
content: '',
role: 'assistant',
tool_calls: [
{
id: 'call_1',
type: 'function',
function: { name: 'get_weather', arguments: '{"city":"Beijing"}' },
},
],
} as any,
{
content: '{"temp": 20}',
role: 'tool',
tool_call_id: 'call_1',
} as any,
],
model: 'kimi-k2.5',
});
const payload = getLastRequestPayload();
const assistantMessage = payload.messages.find(
(message: any) => message.role === 'assistant',
);
// Should have placeholder thinking block to avoid API error
expect(assistantMessage?.content).toEqual(
expect.arrayContaining([
expect.objectContaining({ type: 'thinking', thinking: ' ' }),
expect.objectContaining({ type: 'tool_use', name: 'get_weather' }),
]),
);
});
it('should handle empty assistant message with placeholder', async () => {
await instance.chat({
messages: [
{ content: 'Hello', role: 'user' },
{ content: '', role: 'assistant' },
{ content: 'Follow-up', role: 'user' },
],
model: 'kimi-k2-thinking',
});
const payload = getLastRequestPayload();
const assistantMessage = payload.messages.find(
(message: any) => message.role === 'assistant',
);
expect(assistantMessage?.content).toEqual([
{ type: 'thinking', thinking: ' ' },
{ type: 'text', text: ' ' },
]);
});
it('should not modify non-thinking model messages', async () => {
await instance.chat({
messages: [
{ content: 'Hello', role: 'user' },
{ content: 'Response', role: 'assistant' },
],
model: 'unknown-model',
});
const payload = getLastRequestPayload();
const assistantMessage = payload.messages.find(
(message: any) => message.role === 'assistant',
);
// Content is converted to array by Anthropic factory, but no thinking block
expect(assistantMessage?.content).toEqual(
expect.arrayContaining([expect.objectContaining({ type: 'text', text: 'Response' })]),
);
expect(assistantMessage?.content).not.toContainEqual(
expect.objectContaining({ type: 'thinking' }),
);
});
});
it('should handle text messages correctly', async () => {
// Arrange
const mockStream = new ReadableStream({

View file

@ -7,27 +7,93 @@ import {
createAnthropicCompatibleRuntime,
} from '../../core/anthropicCompatibleFactory';
import type { ChatStreamPayload } from '../../types';
import { getModelPropertyWithFallback } from '../../utils/getFallbackModelProperty';
import { processMultiProviderModelList } from '../../utils/modelParse';
const DEFAULT_KIMI_CODING_BASE_URL = 'https://api.kimi.com/coding';
// Max output tokens for each model (supports both model id and deploymentName)
const KIMI_MODEL_MAX_OUTPUT: Record<string, number> = {
'k2p5': 32_768,
'kimi-k2.5': 32_768,
'kimi-k2-thinking': 65_536,
};
// Helpers for message normalization (shared with Moonshot provider)
const isKimiK25Model = (model: string) => model === 'kimi-k2.5' || model === 'k2p5';
const isKimiNativeThinkingModel = (model: string) => model.startsWith('kimi-k2-thinking');
const isEmptyContent = (content: any) =>
content === '' || content === null || content === undefined;
const hasValidReasoning = (reasoning: any) => reasoning?.content && !reasoning?.signature;
const getK25Params = (isThinkingEnabled: boolean) => ({
temperature: isThinkingEnabled ? 1 : 0.6,
top_p: 0.95,
});
// Anthropic format helpers
const buildThinkingBlock = (reasoning: any) =>
hasValidReasoning(reasoning) ? { thinking: reasoning.content, type: 'thinking' as const } : null;
const toContentArray = (content: any) =>
Array.isArray(content) ? content : [{ text: content, type: 'text' as const }];
/**
* Normalize assistant messages for Anthropic format.
* When forceThinking is true (kimi-k2.5 with thinking enabled), every assistant
* message must carry a thinking block, otherwise Kimi API rejects with:
* "thinking is enabled but reasoning_content is missing in assistant tool call message"
*/
const normalizeMessagesForAnthropic = (
messages: ChatStreamPayload['messages'],
forceThinking = false,
) =>
messages.map((message: any) => {
if (message.role !== 'assistant') return message;
const { reasoning, ...rest } = message;
const thinkingBlock = buildThinkingBlock(reasoning);
const effectiveBlock =
thinkingBlock || (forceThinking ? { thinking: ' ', type: 'thinking' as const } : null);
if (isEmptyContent(message.content)) {
const placeholder = { text: ' ', type: 'text' as const };
return { ...rest, content: effectiveBlock ? [effectiveBlock, placeholder] : [placeholder] };
}
if (!effectiveBlock) return rest;
return { ...rest, content: [effectiveBlock, ...toContentArray(message.content)] };
});
const buildKimiCodingPlanAnthropicPayload = async (
payload: ChatStreamPayload,
): Promise<Anthropic.MessageCreateParams> => {
const resolvedMaxTokens =
payload.max_tokens ??
(await getModelPropertyWithFallback<number | undefined>(
payload.model,
'maxOutput',
ModelProvider.KimiCodingPlan,
)) ??
8192;
const resolvedMaxTokens = payload.max_tokens ?? KIMI_MODEL_MAX_OUTPUT[payload.model] ?? 8192;
return buildDefaultAnthropicPayload({
const isK25 = isKimiK25Model(payload.model);
const isNativeThinking = isKimiNativeThinkingModel(payload.model);
const isThinkingEnabled = isNativeThinking || (isK25 && payload.thinking?.type !== 'disabled');
const basePayload = await buildDefaultAnthropicPayload({
...payload,
max_tokens: resolvedMaxTokens,
messages: normalizeMessagesForAnthropic(payload.messages, isThinkingEnabled),
});
if (!isK25 && !isNativeThinking) return basePayload;
const resolvedThinkingBudget = payload.thinking?.budget_tokens
? Math.min(payload.thinking.budget_tokens, resolvedMaxTokens - 1)
: 1024;
const thinkingParam =
isNativeThinking || payload.thinking?.type !== 'disabled'
? ({ budget_tokens: resolvedThinkingBudget, type: 'enabled' } as const)
: ({ type: 'disabled' } as const);
return {
...basePayload,
...getK25Params(thinkingParam.type === 'enabled'),
thinking: thinkingParam,
};
};
export const params = createAnthropicCompatibleParams({

View file

@ -351,6 +351,7 @@ class ChatService {
ModelProvider.Volcengine,
ModelProvider.AzureAI,
ModelProvider.Qwen,
ModelProvider.KimiCodingPlan,
] as string[];
if (providersWithDeploymentName.includes(provider)) {