mirror of
https://github.com/lobehub/lobehub
synced 2026-04-21 09:37:28 +00:00
✨ feat: add GLM-5.1 model and fix KimiCodingPlan issues (#13700)
* 🐛 fix: fix Kimi K2.5 model icon display by using deploymentName - Change model id from 'k2p5' to 'kimi-k2.5' to match Moonshot icon keywords - Add deploymentName 'k2p5' for API calls to use original model name - Add KimiCodingPlan to providersWithDeploymentName list This allows the model icon to display correctly while maintaining backward compatibility with the API using the original 'k2p5' name. * 🐛 fix: normalize messages for KimiCodingPlan thinking models Add message normalization for Kimi K2.5 and K2 Thinking models to ensure every assistant message has a thinking block when thinking is enabled. This fixes the issue where regenerating with KimiCodingPlan after using other providers would fail with "thinking is enabled but reasoning_content is missing" error, because historical messages from other providers don't have reasoning fields. The normalization adds a placeholder thinking block when: 1. Thinking is enabled for Kimi K2.5/K2 Thinking models 2. Assistant message doesn't have reasoning content * ✨ feat(siliconcloud): add GLM-5.1 model support Add GLM-5.1 (Pro) model configuration with: - 198K context window - Function call and reasoning capabilities - Tiered pricing (0-32k / 32k+) - reasoningBudgetToken32k extension parameter * 🐛 fix: use hardcoded maxOutput mapping for KimiCodingPlan models Replace getModelPropertyWithFallback with a simple hardcoded mapping to fix the issue where max_tokens lookup fails when using deploymentName (k2p5). The model id is converted to deploymentName in ChatService layer before reaching the provider, causing getModelPropertyWithFallback('k2p5', ...) to fail since the model card uses id 'kimi-k2.5'. By using a hardcoded mapping that supports both model id and deploymentName, we avoid the lookup issue while keeping the code simple (KimiCodingPlan only has a few models). * ✅ test(kimiCodingPlan): add tests for thinking and max_tokens handling Add comprehensive tests for KimiCodingPlan provider covering: - Hardcoded maxOutput mapping for k2p5, kimi-k2.5, kimi-k2-thinking - Thinking parameter handling for kimi-k2.5 and kimi-k2-thinking models - Message normalization with forceThinking for assistant messages - Tool calls with reasoning content to prevent API error * ✅ test(kimiCodingPlan): add tests for thinking and max_tokens handling Add comprehensive tests for KimiCodingPlan provider covering: - Hardcoded maxOutput mapping for k2p5, kimi-k2.5, kimi-k2-thinking - Thinking parameter handling for kimi-k2.5 and kimi-k2-thinking models - Message normalization with forceThinking for assistant messages - Tool calls with reasoning content to prevent API error
This commit is contained in:
parent
c85be1265f
commit
5f25efd54c
6 changed files with 558 additions and 12 deletions
|
|
@ -10,12 +10,15 @@ const kimiCodingPlanChatModels: AIChatModelCard[] = [
|
|||
video: true,
|
||||
vision: true,
|
||||
},
|
||||
config: {
|
||||
deploymentName: 'k2p5',
|
||||
},
|
||||
contextWindowTokens: 262_144,
|
||||
description:
|
||||
"Kimi K2.5 is Kimi's most versatile model to date, featuring a native multimodal architecture that supports both vision and text inputs, 'thinking' and 'non-thinking' modes, and both conversational and agent tasks.",
|
||||
displayName: 'Kimi K2.5',
|
||||
enabled: true,
|
||||
id: 'k2p5',
|
||||
id: 'kimi-k2.5',
|
||||
maxOutput: 32_768,
|
||||
organization: 'Moonshot',
|
||||
releasedAt: '2026-01-27',
|
||||
|
|
|
|||
|
|
@ -329,6 +329,63 @@ const siliconcloudChatModels: AIChatModelCard[] = [
|
|||
},
|
||||
type: 'chat',
|
||||
},
|
||||
{
|
||||
abilities: {
|
||||
functionCall: true,
|
||||
reasoning: true,
|
||||
},
|
||||
contextWindowTokens: 198_000,
|
||||
description:
|
||||
'GLM-5.1 is a next-generation flagship model designed for agent engineering, using a Mixture of Experts (MoE) architecture with 754B parameters. It significantly enhances programming capabilities, achieving leading results on SWE-Bench Pro, and substantially outperforms its predecessor on benchmarks like NL2Repo and Terminal-Bench 2.0. Designed for long-duration agent tasks, it handles ambiguous questions with better judgment, decomposes complex tasks, executes experiments, analyzes results, and continuously optimizes through hundreds of iterations and thousands of tool calls.',
|
||||
displayName: 'GLM-5.1 (Pro)',
|
||||
id: 'Pro/zai-org/GLM-5.1',
|
||||
pricing: {
|
||||
currency: 'CNY',
|
||||
units: [
|
||||
{
|
||||
lookup: {
|
||||
prices: {
|
||||
'[0, 0.032]': 1.3,
|
||||
'[0.032, infinity]': 2,
|
||||
},
|
||||
pricingParams: ['textInput'],
|
||||
},
|
||||
name: 'textInput_cacheRead',
|
||||
strategy: 'lookup',
|
||||
unit: 'millionTokens',
|
||||
},
|
||||
{
|
||||
lookup: {
|
||||
prices: {
|
||||
'[0, 0.032]': 6,
|
||||
'[0.032, infinity]': 8,
|
||||
},
|
||||
pricingParams: ['textInput'],
|
||||
},
|
||||
name: 'textInput',
|
||||
strategy: 'lookup',
|
||||
unit: 'millionTokens',
|
||||
},
|
||||
{
|
||||
lookup: {
|
||||
prices: {
|
||||
'[0, 0.032]': 24,
|
||||
'[0.032, infinity]': 28,
|
||||
},
|
||||
pricingParams: ['textInput'],
|
||||
},
|
||||
name: 'textOutput',
|
||||
strategy: 'lookup',
|
||||
unit: 'millionTokens',
|
||||
},
|
||||
],
|
||||
},
|
||||
releasedAt: '2026-04-08',
|
||||
settings: {
|
||||
extendParams: ['enableReasoning', 'reasoningBudgetToken32k'],
|
||||
},
|
||||
type: 'chat',
|
||||
},
|
||||
{
|
||||
abilities: {
|
||||
functionCall: true,
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ import type { ModelProviderCard } from '@/types/llm';
|
|||
// ref: https://platform.moonshot.ai/docs
|
||||
const KimiCodingPlan: ModelProviderCard = {
|
||||
chatModels: [],
|
||||
checkModel: 'k2p5',
|
||||
checkModel: 'kimi-k2.5',
|
||||
description:
|
||||
'Kimi Code from Moonshot AI provides access to Kimi models including K2.5 for coding tasks.',
|
||||
disableBrowserRequest: true,
|
||||
|
|
|
|||
|
|
@ -77,6 +77,425 @@ describe('LobeKimiCodingPlanAI', () => {
|
|||
expect(result).toBeInstanceOf(Response);
|
||||
});
|
||||
|
||||
describe('max_tokens handling', () => {
|
||||
const getLastRequestPayload = () => {
|
||||
const calls = (instance['client'].messages.create as Mock).mock.calls;
|
||||
return calls.at(-1)?.[0];
|
||||
};
|
||||
|
||||
it('should use hardcoded maxOutput for k2p5 (deploymentName)', async () => {
|
||||
await instance.chat({
|
||||
messages: [{ content: 'Hello', role: 'user' }],
|
||||
model: 'k2p5',
|
||||
});
|
||||
|
||||
const payload = getLastRequestPayload();
|
||||
expect(payload.max_tokens).toBe(32_768);
|
||||
});
|
||||
|
||||
it('should use hardcoded maxOutput for kimi-k2.5 (model id)', async () => {
|
||||
await instance.chat({
|
||||
messages: [{ content: 'Hello', role: 'user' }],
|
||||
model: 'kimi-k2.5',
|
||||
});
|
||||
|
||||
const payload = getLastRequestPayload();
|
||||
expect(payload.max_tokens).toBe(32_768);
|
||||
});
|
||||
|
||||
it('should use hardcoded maxOutput for kimi-k2-thinking', async () => {
|
||||
await instance.chat({
|
||||
messages: [{ content: 'Hello', role: 'user' }],
|
||||
model: 'kimi-k2-thinking',
|
||||
});
|
||||
|
||||
const payload = getLastRequestPayload();
|
||||
expect(payload.max_tokens).toBe(65_536);
|
||||
});
|
||||
|
||||
it('should use default 8192 for unknown models', async () => {
|
||||
await instance.chat({
|
||||
messages: [{ content: 'Hello', role: 'user' }],
|
||||
model: 'unknown-model',
|
||||
});
|
||||
|
||||
const payload = getLastRequestPayload();
|
||||
expect(payload.max_tokens).toBe(8192);
|
||||
});
|
||||
|
||||
it('should respect user-provided max_tokens', async () => {
|
||||
await instance.chat({
|
||||
messages: [{ content: 'Hello', role: 'user' }],
|
||||
model: 'kimi-k2.5',
|
||||
max_tokens: 4096,
|
||||
});
|
||||
|
||||
const payload = getLastRequestPayload();
|
||||
expect(payload.max_tokens).toBe(4096);
|
||||
});
|
||||
});
|
||||
|
||||
describe('thinking parameter handling', () => {
|
||||
const getLastRequestPayload = () => {
|
||||
const calls = (instance['client'].messages.create as Mock).mock.calls;
|
||||
return calls.at(-1)?.[0];
|
||||
};
|
||||
|
||||
it('should enable thinking by default for kimi-k2.5', async () => {
|
||||
await instance.chat({
|
||||
messages: [{ content: 'Hello', role: 'user' }],
|
||||
model: 'kimi-k2.5',
|
||||
});
|
||||
|
||||
const payload = getLastRequestPayload();
|
||||
expect(payload.thinking).toEqual({ budget_tokens: 1024, type: 'enabled' });
|
||||
expect(payload.temperature).toBe(1);
|
||||
expect(payload.top_p).toBe(0.95);
|
||||
});
|
||||
|
||||
it('should disable thinking when type is disabled for kimi-k2.5', async () => {
|
||||
await instance.chat({
|
||||
messages: [{ content: 'Hello', role: 'user' }],
|
||||
model: 'kimi-k2.5',
|
||||
thinking: { budget_tokens: 0, type: 'disabled' },
|
||||
});
|
||||
|
||||
const payload = getLastRequestPayload();
|
||||
expect(payload.thinking).toEqual({ type: 'disabled' });
|
||||
expect(payload.temperature).toBe(0.6);
|
||||
});
|
||||
|
||||
it('should always enable thinking for kimi-k2-thinking', async () => {
|
||||
await instance.chat({
|
||||
messages: [{ content: 'Hello', role: 'user' }],
|
||||
model: 'kimi-k2-thinking',
|
||||
});
|
||||
|
||||
const payload = getLastRequestPayload();
|
||||
expect(payload.thinking).toEqual({ budget_tokens: 1024, type: 'enabled' });
|
||||
expect(payload.temperature).toBe(1);
|
||||
expect(payload.top_p).toBe(0.95);
|
||||
});
|
||||
|
||||
it('should ignore thinking disabled for native thinking models', async () => {
|
||||
await instance.chat({
|
||||
messages: [{ content: 'Hello', role: 'user' }],
|
||||
model: 'kimi-k2-thinking',
|
||||
thinking: { budget_tokens: 0, type: 'disabled' },
|
||||
});
|
||||
|
||||
const payload = getLastRequestPayload();
|
||||
expect(payload.thinking).toEqual({ budget_tokens: 1024, type: 'enabled' });
|
||||
});
|
||||
|
||||
it('should respect custom thinking budget', async () => {
|
||||
await instance.chat({
|
||||
messages: [{ content: 'Hello', role: 'user' }],
|
||||
model: 'kimi-k2.5',
|
||||
max_tokens: 4096,
|
||||
thinking: { budget_tokens: 2048, type: 'enabled' },
|
||||
});
|
||||
|
||||
const payload = getLastRequestPayload();
|
||||
expect(payload.thinking).toEqual({ budget_tokens: 2048, type: 'enabled' });
|
||||
});
|
||||
|
||||
it('should cap thinking budget to max_tokens - 1', async () => {
|
||||
await instance.chat({
|
||||
messages: [{ content: 'Hello', role: 'user' }],
|
||||
model: 'kimi-k2.5',
|
||||
thinking: { budget_tokens: 100_000, type: 'enabled' },
|
||||
});
|
||||
|
||||
const payload = getLastRequestPayload();
|
||||
// max_tokens defaults to 32_768 for kimi-k2.5, so budget capped to 32_767
|
||||
expect(payload.thinking!.budget_tokens).toBe(32_767);
|
||||
});
|
||||
|
||||
it('should not add thinking params for unknown models', async () => {
|
||||
await instance.chat({
|
||||
messages: [{ content: 'Hello', role: 'user' }],
|
||||
model: 'unknown-model',
|
||||
});
|
||||
|
||||
const payload = getLastRequestPayload();
|
||||
expect(payload.thinking).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
describe('message normalization for thinking', () => {
|
||||
const getLastRequestPayload = () => {
|
||||
const calls = (instance['client'].messages.create as Mock).mock.calls;
|
||||
return calls.at(-1)?.[0];
|
||||
};
|
||||
|
||||
it('should force thinking block on assistant messages for kimi-k2-thinking', async () => {
|
||||
await instance.chat({
|
||||
messages: [
|
||||
{ content: 'Hello', role: 'user' },
|
||||
{ content: 'Response', role: 'assistant' },
|
||||
{ content: 'Follow-up', role: 'user' },
|
||||
],
|
||||
model: 'kimi-k2-thinking',
|
||||
});
|
||||
|
||||
const payload = getLastRequestPayload();
|
||||
const assistantMessage = payload.messages.find(
|
||||
(message: any) => message.role === 'assistant',
|
||||
);
|
||||
|
||||
expect(assistantMessage?.content).toEqual([
|
||||
{ type: 'thinking', thinking: ' ' },
|
||||
{ type: 'text', text: 'Response' },
|
||||
]);
|
||||
});
|
||||
|
||||
it('should force thinking block on assistant messages for kimi-k2.5 with thinking enabled', async () => {
|
||||
await instance.chat({
|
||||
messages: [
|
||||
{ content: 'Hello', role: 'user' },
|
||||
{ content: 'Response', role: 'assistant' },
|
||||
{ content: 'Follow-up', role: 'user' },
|
||||
],
|
||||
model: 'kimi-k2.5',
|
||||
});
|
||||
|
||||
const payload = getLastRequestPayload();
|
||||
const assistantMessage = payload.messages.find(
|
||||
(message: any) => message.role === 'assistant',
|
||||
);
|
||||
|
||||
expect(assistantMessage?.content).toEqual([
|
||||
{ type: 'thinking', thinking: ' ' },
|
||||
{ type: 'text', text: 'Response' },
|
||||
]);
|
||||
});
|
||||
|
||||
it('should not force thinking block when thinking is disabled', async () => {
|
||||
await instance.chat({
|
||||
messages: [
|
||||
{ content: 'Hello', role: 'user' },
|
||||
{ content: 'Response', role: 'assistant' },
|
||||
],
|
||||
model: 'kimi-k2.5',
|
||||
thinking: { budget_tokens: 0, type: 'disabled' },
|
||||
});
|
||||
|
||||
const payload = getLastRequestPayload();
|
||||
const assistantMessage = payload.messages.find(
|
||||
(message: any) => message.role === 'assistant',
|
||||
);
|
||||
|
||||
// Content is converted to array by Anthropic factory, but no thinking block
|
||||
expect(assistantMessage?.content).toEqual(
|
||||
expect.arrayContaining([expect.objectContaining({ type: 'text', text: 'Response' })]),
|
||||
);
|
||||
expect(assistantMessage?.content).not.toContainEqual(
|
||||
expect.objectContaining({ type: 'thinking' }),
|
||||
);
|
||||
});
|
||||
|
||||
it('should convert reasoning to thinking block for assistant messages', async () => {
|
||||
await instance.chat({
|
||||
messages: [
|
||||
{ content: 'Hello', role: 'user' },
|
||||
{
|
||||
content: 'Response',
|
||||
role: 'assistant',
|
||||
reasoning: { content: 'My reasoning process' },
|
||||
} as any,
|
||||
],
|
||||
model: 'kimi-k2.5',
|
||||
});
|
||||
|
||||
const payload = getLastRequestPayload();
|
||||
const assistantMessage = payload.messages.find(
|
||||
(message: any) => message.role === 'assistant',
|
||||
);
|
||||
|
||||
expect(assistantMessage?.content).toEqual(
|
||||
expect.arrayContaining([
|
||||
expect.objectContaining({ type: 'thinking', thinking: 'My reasoning process' }),
|
||||
expect.objectContaining({ type: 'text', text: 'Response' }),
|
||||
]),
|
||||
);
|
||||
});
|
||||
|
||||
it('should handle empty content with reasoning', async () => {
|
||||
await instance.chat({
|
||||
messages: [
|
||||
{ content: 'Hello', role: 'user' },
|
||||
{
|
||||
content: '',
|
||||
role: 'assistant',
|
||||
reasoning: { content: 'My reasoning process' },
|
||||
} as any,
|
||||
],
|
||||
model: 'kimi-k2.5',
|
||||
});
|
||||
|
||||
const payload = getLastRequestPayload();
|
||||
const assistantMessage = payload.messages.find(
|
||||
(message: any) => message.role === 'assistant',
|
||||
);
|
||||
|
||||
expect(assistantMessage?.content).toEqual(
|
||||
expect.arrayContaining([
|
||||
expect.objectContaining({ type: 'thinking', thinking: 'My reasoning process' }),
|
||||
expect.objectContaining({ type: 'text', text: ' ' }),
|
||||
]),
|
||||
);
|
||||
});
|
||||
|
||||
it('should add placeholder thinking when reasoning has signature', async () => {
|
||||
await instance.chat({
|
||||
messages: [
|
||||
{ content: 'Hello', role: 'user' },
|
||||
{
|
||||
content: 'Response',
|
||||
role: 'assistant',
|
||||
reasoning: { content: 'My reasoning', signature: 'some-signature' },
|
||||
} as any,
|
||||
],
|
||||
model: 'kimi-k2.5',
|
||||
});
|
||||
|
||||
const payload = getLastRequestPayload();
|
||||
const assistantMessage = payload.messages.find(
|
||||
(message: any) => message.role === 'assistant',
|
||||
);
|
||||
|
||||
// reasoning with signature is invalid, so placeholder thinking is added
|
||||
expect(assistantMessage?.content).toEqual(
|
||||
expect.arrayContaining([
|
||||
expect.objectContaining({ type: 'thinking', thinking: ' ' }),
|
||||
expect.objectContaining({ type: 'text', text: 'Response' }),
|
||||
]),
|
||||
);
|
||||
});
|
||||
|
||||
it('should handle assistant message with tool_calls and reasoning', async () => {
|
||||
await instance.chat({
|
||||
messages: [
|
||||
{ content: 'Hello', role: 'user' },
|
||||
{
|
||||
content: '',
|
||||
role: 'assistant',
|
||||
reasoning: { content: 'Thinking about tools' },
|
||||
tool_calls: [
|
||||
{
|
||||
id: 'call_1',
|
||||
type: 'function',
|
||||
function: { name: 'get_weather', arguments: '{"city":"Beijing"}' },
|
||||
},
|
||||
],
|
||||
} as any,
|
||||
{
|
||||
content: '{"temp": 20}',
|
||||
role: 'tool',
|
||||
tool_call_id: 'call_1',
|
||||
} as any,
|
||||
],
|
||||
model: 'kimi-k2.5',
|
||||
});
|
||||
|
||||
const payload = getLastRequestPayload();
|
||||
const assistantMessage = payload.messages.find(
|
||||
(message: any) => message.role === 'assistant',
|
||||
);
|
||||
|
||||
expect(assistantMessage?.content).toEqual(
|
||||
expect.arrayContaining([
|
||||
expect.objectContaining({ type: 'thinking', thinking: 'Thinking about tools' }),
|
||||
expect.objectContaining({ type: 'tool_use', name: 'get_weather' }),
|
||||
]),
|
||||
);
|
||||
});
|
||||
|
||||
it('should add placeholder thinking for tool_calls without reasoning', async () => {
|
||||
// This is the bug scenario: tool_calls without reasoning_content
|
||||
await instance.chat({
|
||||
messages: [
|
||||
{ content: 'Hello', role: 'user' },
|
||||
{
|
||||
content: '',
|
||||
role: 'assistant',
|
||||
tool_calls: [
|
||||
{
|
||||
id: 'call_1',
|
||||
type: 'function',
|
||||
function: { name: 'get_weather', arguments: '{"city":"Beijing"}' },
|
||||
},
|
||||
],
|
||||
} as any,
|
||||
{
|
||||
content: '{"temp": 20}',
|
||||
role: 'tool',
|
||||
tool_call_id: 'call_1',
|
||||
} as any,
|
||||
],
|
||||
model: 'kimi-k2.5',
|
||||
});
|
||||
|
||||
const payload = getLastRequestPayload();
|
||||
const assistantMessage = payload.messages.find(
|
||||
(message: any) => message.role === 'assistant',
|
||||
);
|
||||
|
||||
// Should have placeholder thinking block to avoid API error
|
||||
expect(assistantMessage?.content).toEqual(
|
||||
expect.arrayContaining([
|
||||
expect.objectContaining({ type: 'thinking', thinking: ' ' }),
|
||||
expect.objectContaining({ type: 'tool_use', name: 'get_weather' }),
|
||||
]),
|
||||
);
|
||||
});
|
||||
|
||||
it('should handle empty assistant message with placeholder', async () => {
|
||||
await instance.chat({
|
||||
messages: [
|
||||
{ content: 'Hello', role: 'user' },
|
||||
{ content: '', role: 'assistant' },
|
||||
{ content: 'Follow-up', role: 'user' },
|
||||
],
|
||||
model: 'kimi-k2-thinking',
|
||||
});
|
||||
|
||||
const payload = getLastRequestPayload();
|
||||
const assistantMessage = payload.messages.find(
|
||||
(message: any) => message.role === 'assistant',
|
||||
);
|
||||
|
||||
expect(assistantMessage?.content).toEqual([
|
||||
{ type: 'thinking', thinking: ' ' },
|
||||
{ type: 'text', text: ' ' },
|
||||
]);
|
||||
});
|
||||
|
||||
it('should not modify non-thinking model messages', async () => {
|
||||
await instance.chat({
|
||||
messages: [
|
||||
{ content: 'Hello', role: 'user' },
|
||||
{ content: 'Response', role: 'assistant' },
|
||||
],
|
||||
model: 'unknown-model',
|
||||
});
|
||||
|
||||
const payload = getLastRequestPayload();
|
||||
const assistantMessage = payload.messages.find(
|
||||
(message: any) => message.role === 'assistant',
|
||||
);
|
||||
|
||||
// Content is converted to array by Anthropic factory, but no thinking block
|
||||
expect(assistantMessage?.content).toEqual(
|
||||
expect.arrayContaining([expect.objectContaining({ type: 'text', text: 'Response' })]),
|
||||
);
|
||||
expect(assistantMessage?.content).not.toContainEqual(
|
||||
expect.objectContaining({ type: 'thinking' }),
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
it('should handle text messages correctly', async () => {
|
||||
// Arrange
|
||||
const mockStream = new ReadableStream({
|
||||
|
|
|
|||
|
|
@ -7,27 +7,93 @@ import {
|
|||
createAnthropicCompatibleRuntime,
|
||||
} from '../../core/anthropicCompatibleFactory';
|
||||
import type { ChatStreamPayload } from '../../types';
|
||||
import { getModelPropertyWithFallback } from '../../utils/getFallbackModelProperty';
|
||||
import { processMultiProviderModelList } from '../../utils/modelParse';
|
||||
|
||||
const DEFAULT_KIMI_CODING_BASE_URL = 'https://api.kimi.com/coding';
|
||||
|
||||
// Max output tokens for each model (supports both model id and deploymentName)
|
||||
const KIMI_MODEL_MAX_OUTPUT: Record<string, number> = {
|
||||
'k2p5': 32_768,
|
||||
'kimi-k2.5': 32_768,
|
||||
'kimi-k2-thinking': 65_536,
|
||||
};
|
||||
|
||||
// Helpers for message normalization (shared with Moonshot provider)
|
||||
const isKimiK25Model = (model: string) => model === 'kimi-k2.5' || model === 'k2p5';
|
||||
const isKimiNativeThinkingModel = (model: string) => model.startsWith('kimi-k2-thinking');
|
||||
const isEmptyContent = (content: any) =>
|
||||
content === '' || content === null || content === undefined;
|
||||
const hasValidReasoning = (reasoning: any) => reasoning?.content && !reasoning?.signature;
|
||||
|
||||
const getK25Params = (isThinkingEnabled: boolean) => ({
|
||||
temperature: isThinkingEnabled ? 1 : 0.6,
|
||||
top_p: 0.95,
|
||||
});
|
||||
|
||||
// Anthropic format helpers
|
||||
const buildThinkingBlock = (reasoning: any) =>
|
||||
hasValidReasoning(reasoning) ? { thinking: reasoning.content, type: 'thinking' as const } : null;
|
||||
|
||||
const toContentArray = (content: any) =>
|
||||
Array.isArray(content) ? content : [{ text: content, type: 'text' as const }];
|
||||
|
||||
/**
|
||||
* Normalize assistant messages for Anthropic format.
|
||||
* When forceThinking is true (kimi-k2.5 with thinking enabled), every assistant
|
||||
* message must carry a thinking block, otherwise Kimi API rejects with:
|
||||
* "thinking is enabled but reasoning_content is missing in assistant tool call message"
|
||||
*/
|
||||
const normalizeMessagesForAnthropic = (
|
||||
messages: ChatStreamPayload['messages'],
|
||||
forceThinking = false,
|
||||
) =>
|
||||
messages.map((message: any) => {
|
||||
if (message.role !== 'assistant') return message;
|
||||
|
||||
const { reasoning, ...rest } = message;
|
||||
const thinkingBlock = buildThinkingBlock(reasoning);
|
||||
const effectiveBlock =
|
||||
thinkingBlock || (forceThinking ? { thinking: ' ', type: 'thinking' as const } : null);
|
||||
|
||||
if (isEmptyContent(message.content)) {
|
||||
const placeholder = { text: ' ', type: 'text' as const };
|
||||
return { ...rest, content: effectiveBlock ? [effectiveBlock, placeholder] : [placeholder] };
|
||||
}
|
||||
|
||||
if (!effectiveBlock) return rest;
|
||||
return { ...rest, content: [effectiveBlock, ...toContentArray(message.content)] };
|
||||
});
|
||||
|
||||
const buildKimiCodingPlanAnthropicPayload = async (
|
||||
payload: ChatStreamPayload,
|
||||
): Promise<Anthropic.MessageCreateParams> => {
|
||||
const resolvedMaxTokens =
|
||||
payload.max_tokens ??
|
||||
(await getModelPropertyWithFallback<number | undefined>(
|
||||
payload.model,
|
||||
'maxOutput',
|
||||
ModelProvider.KimiCodingPlan,
|
||||
)) ??
|
||||
8192;
|
||||
const resolvedMaxTokens = payload.max_tokens ?? KIMI_MODEL_MAX_OUTPUT[payload.model] ?? 8192;
|
||||
|
||||
return buildDefaultAnthropicPayload({
|
||||
const isK25 = isKimiK25Model(payload.model);
|
||||
const isNativeThinking = isKimiNativeThinkingModel(payload.model);
|
||||
const isThinkingEnabled = isNativeThinking || (isK25 && payload.thinking?.type !== 'disabled');
|
||||
|
||||
const basePayload = await buildDefaultAnthropicPayload({
|
||||
...payload,
|
||||
max_tokens: resolvedMaxTokens,
|
||||
messages: normalizeMessagesForAnthropic(payload.messages, isThinkingEnabled),
|
||||
});
|
||||
|
||||
if (!isK25 && !isNativeThinking) return basePayload;
|
||||
|
||||
const resolvedThinkingBudget = payload.thinking?.budget_tokens
|
||||
? Math.min(payload.thinking.budget_tokens, resolvedMaxTokens - 1)
|
||||
: 1024;
|
||||
const thinkingParam =
|
||||
isNativeThinking || payload.thinking?.type !== 'disabled'
|
||||
? ({ budget_tokens: resolvedThinkingBudget, type: 'enabled' } as const)
|
||||
: ({ type: 'disabled' } as const);
|
||||
|
||||
return {
|
||||
...basePayload,
|
||||
...getK25Params(thinkingParam.type === 'enabled'),
|
||||
thinking: thinkingParam,
|
||||
};
|
||||
};
|
||||
|
||||
export const params = createAnthropicCompatibleParams({
|
||||
|
|
|
|||
|
|
@ -351,6 +351,7 @@ class ChatService {
|
|||
ModelProvider.Volcengine,
|
||||
ModelProvider.AzureAI,
|
||||
ModelProvider.Qwen,
|
||||
ModelProvider.KimiCodingPlan,
|
||||
] as string[];
|
||||
|
||||
if (providersWithDeploymentName.includes(provider)) {
|
||||
|
|
|
|||
Loading…
Reference in a new issue