feat: add Xinference provider support (#7081)

This commit is contained in:
Zhijie He 2025-03-26 08:23:40 +08:00 committed by GitHub
parent 4213d20dcf
commit e32c8e7fc3
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
13 changed files with 267 additions and 0 deletions

View file

@ -227,6 +227,8 @@ ENV \
WENXIN_API_KEY="" WENXIN_MODEL_LIST="" \
# xAI
XAI_API_KEY="" XAI_MODEL_LIST="" XAI_PROXY_URL="" \
# Xinference
XINFERENCE_API_KEY="" XINFERENCE_MODEL_LIST="" XINFERENCE_PROXY_URL="" \
# 01.AI
ZEROONE_API_KEY="" ZEROONE_MODEL_LIST="" \
# Zhipu

View file

@ -270,6 +270,8 @@ ENV \
WENXIN_API_KEY="" WENXIN_MODEL_LIST="" \
# xAI
XAI_API_KEY="" XAI_MODEL_LIST="" XAI_PROXY_URL="" \
# Xinference
XINFERENCE_API_KEY="" XINFERENCE_MODEL_LIST="" XINFERENCE_PROXY_URL="" \
# 01.AI
ZEROONE_API_KEY="" ZEROONE_MODEL_LIST="" \
# Zhipu

View file

@ -224,6 +224,8 @@ ENV \
WENXIN_API_KEY="" WENXIN_MODEL_LIST="" \
# xAI
XAI_API_KEY="" XAI_MODEL_LIST="" XAI_PROXY_URL="" \
# Xinference
XINFERENCE_API_KEY="" XINFERENCE_MODEL_LIST="" XINFERENCE_PROXY_URL="" \
# 01.AI
ZEROONE_API_KEY="" ZEROONE_MODEL_LIST="" \
# Zhipu

View file

@ -37,6 +37,7 @@ import {
VLLMProviderCard,
WenxinProviderCard,
XAIProviderCard,
XinferenceProviderCard,
ZeroOneProviderCard,
ZhiPuProviderCard,
} from '@/config/modelProviders';
@ -65,6 +66,7 @@ export const useProviderList = (): ProviderItem[] => {
AzureProvider,
OllamaProvider,
VLLMProviderCard,
XinferenceProviderCard,
AnthropicProviderCard,
BedrockProvider,
GoogleProviderCard,

View file

@ -49,6 +49,7 @@ import { default as vllm } from './vllm';
import { default as volcengine } from './volcengine';
import { default as wenxin } from './wenxin';
import { default as xai } from './xai';
import { default as xinference } from './xinference';
import { default as zeroone } from './zeroone';
import { default as zhipu } from './zhipu';
@ -121,6 +122,7 @@ export const LOBE_DEFAULT_MODEL_LIST = buildDefaultModelList({
volcengine,
wenxin,
xai,
xinference,
zeroone,
zhipu,
});
@ -174,5 +176,6 @@ export { default as vllm } from './vllm';
export { default as volcengine } from './volcengine';
export { default as wenxin } from './wenxin';
export { default as xai } from './xai';
export { default as xinference } from './xinference';
export { default as zeroone } from './zeroone';
export { default as zhipu } from './zhipu';

View file

@ -0,0 +1,171 @@
import { AIChatModelCard } from '@/types/aiModel';
const xinferenceChatModels: AIChatModelCard[] = [
{
abilities: {
functionCall: true,
},
contextWindowTokens: 163_840,
description:
'DeepSeek-V3 是一个强大的专家混合MoE语言模型拥有总计 6710 亿参数,每个 token 激活 370 亿参数。',
displayName: 'DeepSeek V3',
enabled: true,
id: 'deepseek-v3',
type: 'chat'
},
{
abilities: {
reasoning: true,
},
contextWindowTokens: 163_840,
description:
'DeepSeek-R1 在强化学习RL之前引入了冷启动数据在数学、代码和推理任务上表现可与 OpenAI-o1 相媲美。',
displayName: 'DeepSeek R1',
enabled: true,
id: 'deepseek-r1',
type: 'chat'
},
{
abilities: {
reasoning: true,
},
contextWindowTokens: 131_072,
description:
'deepseek-r1-distill-llama 是基于 Llama 从 DeepSeek-R1 蒸馏而来的模型。',
displayName: 'DeepSeek R1 Distill Llama',
enabled: true,
id: 'deepseek-r1-distill-llama',
type: 'chat'
},
{
abilities: {
reasoning: true,
},
contextWindowTokens: 131_072,
description:
'deepseek-r1-distill-qwen 是基于 Qwen 从 DeepSeek-R1 蒸馏而来的模型。',
displayName: 'DeepSeek R1 Distill Qwen',
enabled: true,
id: 'deepseek-r1-distill-qwen',
type: 'chat'
},
{
abilities: {
reasoning: true,
},
contextWindowTokens: 32_768,
description:
'QwQ 是 Qwen 系列的推理模型。与传统的指令微调模型相比QwQ 具备思考和推理能力在下游任务中尤其是复杂问题上能够实现显著增强的性能。QwQ-32B 是一款中型推理模型,其性能可与最先进的推理模型(如 DeepSeek-R1、o1-mini相媲美。',
displayName: 'QwQ 32B',
enabled: true,
id: 'qwq-32b',
type: 'chat'
},
{
abilities: {
reasoning: true,
vision: true,
},
contextWindowTokens: 32_768,
description:
'QVQ-72B-Preview 是由 Qwen 团队开发的实验性研究模型,专注于提升视觉推理能力。',
displayName: 'QVQ 72B Preview',
enabled: true,
id: 'qvq-72b-preview',
type: 'chat'
},
{
abilities: {
functionCall: true,
},
contextWindowTokens: 32_768,
description:
'Qwen2.5 是 Qwen 大型语言模型的最新系列。对于 Qwen2.5,我们发布了多个基础语言模型和指令微调语言模型,参数范围从 5 亿到 72 亿不等。',
displayName: 'Qwen2.5 Instruct',
enabled: true,
id: 'qwen2.5-instruct',
type: 'chat'
},
{
abilities: {
functionCall: true,
},
contextWindowTokens: 32_768,
description:
'Qwen2.5-Coder 是 Qwen 系列中最新的代码专用大型语言模型(前身为 CodeQwen。',
displayName: 'Qwen2.5 Coder Instruct',
enabled: true,
id: 'qwen2.5-coder-instruct',
type: 'chat'
},
{
abilities: {
vision: true,
},
contextWindowTokens: 128_000,
description:
'Qwen2.5-VL 是 Qwen 模型家族中视觉语言模型的最新版本。',
displayName: 'Qwen2.5 VL Instruct',
enabled: true,
id: 'qwen2.5-vl-instruct',
type: 'chat'
},
{
contextWindowTokens: 1_024_000,
description:
'Mistral-Nemo-Instruct-2407 大型语言模型LLM是 Mistral-Nemo-Base-2407 的指令微调版本。',
displayName: 'Mistral Nemo Instruct',
enabled: true,
id: 'mistral-nemo-instruct',
type: 'chat'
},
{
contextWindowTokens: 131_072,
description:
'Mistral-Large-Instruct-2407 是一款先进的稠密大型语言模型LLM拥有 1230 亿参数,具备最先进的推理、知识和编码能力。',
displayName: 'Mistral Large Instruct',
enabled: true,
id: 'mistral-large-instruct',
type: 'chat'
},
{
abilities: {
functionCall: true,
},
contextWindowTokens: 131_072,
description:
'Llama 3.3 指令微调模型针对对话场景进行了优化,在常见的行业基准测试中,超越了许多现有的开源聊天模型。',
displayName: 'Llama 3.3 Instruct',
enabled: true,
id: 'llama-3.3-instruct',
type: 'chat'
},
{
abilities: {
vision: true,
},
contextWindowTokens: 163_840,
description:
'Llama 3.2-Vision 指令微调模型针对视觉识别、图像推理、图像描述和回答与图像相关的常规问题进行了优化。',
displayName: 'Llama 3.2 Vision Instruct',
enabled: true,
id: 'llama-3.2-vision-instruct',
type: 'chat'
},
{
abilities: {
functionCall: true,
},
contextWindowTokens: 131_072,
description:
'Llama 3.1 指令微调模型针对对话场景进行了优化,在常见的行业基准测试中,超越了许多现有的开源聊天模型。',
displayName: 'Llama 3.1 Instruct',
enabled: true,
id: 'llama-3.1-instruct',
type: 'chat'
},
]
export const allModels = [...xinferenceChatModels];
export default allModels;

View file

@ -75,6 +75,9 @@ export const getLLMConfig = () => {
ENABLED_VLLM: z.boolean(),
VLLM_API_KEY: z.string().optional(),
ENABLED_XINFERENCE: z.boolean(),
XINFERENCE_API_KEY: z.string().optional(),
ENABLED_QWEN: z.boolean(),
QWEN_API_KEY: z.string().optional(),
@ -234,6 +237,9 @@ export const getLLMConfig = () => {
ENABLED_VLLM: !!process.env.VLLM_API_KEY,
VLLM_API_KEY: process.env.VLLM_API_KEY,
ENABLED_XINFERENCE: !!process.env.XINFERENCE_API_KEY,
XINFERENCE_API_KEY: process.env.XINFERENCE_API_KEY,
ENABLED_QWEN: !!process.env.QWEN_API_KEY,
QWEN_API_KEY: process.env.QWEN_API_KEY,

View file

@ -49,6 +49,7 @@ import VLLMProvider from './vllm';
import VolcengineProvider from './volcengine';
import WenxinProvider from './wenxin';
import XAIProvider from './xai';
import XinferenceProvider from './xinference';
import ZeroOneProvider from './zeroone';
import ZhiPuProvider from './zhipu';
@ -69,6 +70,7 @@ export const LOBE_DEFAULT_MODEL_LIST: ChatModelCard[] = [
MoonshotProvider.chatModels,
OllamaProvider.chatModels,
VLLMProvider.chatModels,
XinferenceProvider.chatModels,
OpenRouterProvider.chatModels,
TogetherAIProvider.chatModels,
FireworksAIProvider.chatModels,
@ -108,6 +110,7 @@ export const DEFAULT_MODEL_PROVIDER_LIST = [
AzureAIProvider,
OllamaProvider,
VLLMProvider,
XinferenceProvider,
AnthropicProvider,
BedrockProvider,
GoogleProvider,
@ -215,5 +218,6 @@ export { default as VLLMProviderCard } from './vllm';
export { default as VolcengineProviderCard } from './volcengine';
export { default as WenxinProviderCard } from './wenxin';
export { default as XAIProviderCard } from './xai';
export { default as XinferenceProviderCard } from './xinference';
export { default as ZeroOneProviderCard } from './zeroone';
export { default as ZhiPuProviderCard } from './zhipu';

View file

@ -0,0 +1,18 @@
import { ModelProviderCard } from '@/types/llm';
const Xinference: ModelProviderCard = {
chatModels: [],
description: 'Xorbits Inference (Xinference) 是一个开源平台,用于简化各种 AI 模型的运行和集成。借助 Xinference您可以使用任何开源 LLM、嵌入模型和多模态模型在云端或本地环境中运行推理并创建强大的 AI 应用。',
id: 'xinference',
modelsUrl: 'https://inference.readthedocs.io/zh-cn/latest/models/builtin/index.html',
name: 'Xinference',
settings: {
proxyUrl: {
placeholder: 'http://localhost:9997/v1',
},
sdkType: 'openai',
},
url: 'https://inference.readthedocs.io/zh-cn/v0.12.3/index.html',
};
export default Xinference;

View file

@ -45,6 +45,7 @@ import { LobeVLLMAI } from './vllm';
import { LobeVolcengineAI } from './volcengine';
import { LobeWenxinAI } from './wenxin';
import { LobeXAI } from './xai';
import { LobeXinferenceAI } from './xinference';
import { LobeZeroOneAI } from './zeroone';
import { LobeZhipuAI } from './zhipu';
@ -97,6 +98,7 @@ export const providerRuntimeMap = {
volcengine: LobeVolcengineAI,
wenxin: LobeWenxinAI,
xai: LobeXAI,
xinference: LobeXinferenceAI,
zeroone: LobeZeroOneAI,
zhipu: LobeZhipuAI,
};

View file

@ -74,6 +74,7 @@ export enum ModelProvider {
Volcengine = 'volcengine',
Wenxin = 'wenxin',
XAI = 'xai',
Xinference = 'xinference',
ZeroOne = 'zeroone',
ZhiPu = 'zhipu',
}

View file

@ -0,0 +1,53 @@
import { ModelProvider } from '../types';
import { LobeOpenAICompatibleFactory } from '../utils/openaiCompatibleFactory';
import type { ChatModelCard } from '@/types/llm';
export interface XinferenceModelCard {
context_length: number;
id: string;
model_ability: string[];
model_description: string;
model_type: string;
name: string;
}
export const LobeXinferenceAI = LobeOpenAICompatibleFactory({
baseURL: 'http://localhost:9997/v1',
debug: {
chatCompletion: () => process.env.DEBUG_XINFERENCE_CHAT_COMPLETION === '1',
},
models: async ({ client }) => {
const { LOBE_DEFAULT_MODEL_LIST } = await import('@/config/aiModels');
const modelsPage = await client.models.list() as any;
const modelList: XinferenceModelCard[] = modelsPage.data;
return modelList
.map((model) => {
const knownModel = LOBE_DEFAULT_MODEL_LIST.find((m) => model.id.toLowerCase() === m.id.toLowerCase());
return {
contextWindowTokens: model.context_length,
description: model.model_description,
displayName: model.name,
enabled: knownModel?.enabled || false,
functionCall:
(model.model_ability && model.model_ability.includes("tools"))
|| knownModel?.abilities?.functionCall
|| false,
id: model.id,
reasoning:
(model.model_ability && model.model_ability.includes("reasoning"))
|| knownModel?.abilities?.reasoning
|| false,
vision:
(model.model_ability && model.model_ability.includes("vision"))
|| knownModel?.abilities?.vision
|| false,
};
})
.filter(Boolean) as ChatModelCard[];
},
provider: ModelProvider.Xinference,
});

View file

@ -84,6 +84,7 @@ export interface UserKeyVaults extends SearchEngineKeyVaults {
volcengine?: OpenAICompatibleKeyVault;
wenxin?: OpenAICompatibleKeyVault;
xai?: OpenAICompatibleKeyVault;
xinference?: OpenAICompatibleKeyVault;
zeroone?: OpenAICompatibleKeyVault;
zhipu?: OpenAICompatibleKeyVault;
}