mirror of
https://github.com/lobehub/lobehub
synced 2026-04-21 17:47:27 +00:00
✨ feat: add Xinference provider support (#7081)
This commit is contained in:
parent
4213d20dcf
commit
e32c8e7fc3
13 changed files with 267 additions and 0 deletions
|
|
@ -227,6 +227,8 @@ ENV \
|
|||
WENXIN_API_KEY="" WENXIN_MODEL_LIST="" \
|
||||
# xAI
|
||||
XAI_API_KEY="" XAI_MODEL_LIST="" XAI_PROXY_URL="" \
|
||||
# Xinference
|
||||
XINFERENCE_API_KEY="" XINFERENCE_MODEL_LIST="" XINFERENCE_PROXY_URL="" \
|
||||
# 01.AI
|
||||
ZEROONE_API_KEY="" ZEROONE_MODEL_LIST="" \
|
||||
# Zhipu
|
||||
|
|
|
|||
|
|
@ -270,6 +270,8 @@ ENV \
|
|||
WENXIN_API_KEY="" WENXIN_MODEL_LIST="" \
|
||||
# xAI
|
||||
XAI_API_KEY="" XAI_MODEL_LIST="" XAI_PROXY_URL="" \
|
||||
# Xinference
|
||||
XINFERENCE_API_KEY="" XINFERENCE_MODEL_LIST="" XINFERENCE_PROXY_URL="" \
|
||||
# 01.AI
|
||||
ZEROONE_API_KEY="" ZEROONE_MODEL_LIST="" \
|
||||
# Zhipu
|
||||
|
|
|
|||
|
|
@ -224,6 +224,8 @@ ENV \
|
|||
WENXIN_API_KEY="" WENXIN_MODEL_LIST="" \
|
||||
# xAI
|
||||
XAI_API_KEY="" XAI_MODEL_LIST="" XAI_PROXY_URL="" \
|
||||
# Xinference
|
||||
XINFERENCE_API_KEY="" XINFERENCE_MODEL_LIST="" XINFERENCE_PROXY_URL="" \
|
||||
# 01.AI
|
||||
ZEROONE_API_KEY="" ZEROONE_MODEL_LIST="" \
|
||||
# Zhipu
|
||||
|
|
|
|||
|
|
@ -37,6 +37,7 @@ import {
|
|||
VLLMProviderCard,
|
||||
WenxinProviderCard,
|
||||
XAIProviderCard,
|
||||
XinferenceProviderCard,
|
||||
ZeroOneProviderCard,
|
||||
ZhiPuProviderCard,
|
||||
} from '@/config/modelProviders';
|
||||
|
|
@ -65,6 +66,7 @@ export const useProviderList = (): ProviderItem[] => {
|
|||
AzureProvider,
|
||||
OllamaProvider,
|
||||
VLLMProviderCard,
|
||||
XinferenceProviderCard,
|
||||
AnthropicProviderCard,
|
||||
BedrockProvider,
|
||||
GoogleProviderCard,
|
||||
|
|
|
|||
|
|
@ -49,6 +49,7 @@ import { default as vllm } from './vllm';
|
|||
import { default as volcengine } from './volcengine';
|
||||
import { default as wenxin } from './wenxin';
|
||||
import { default as xai } from './xai';
|
||||
import { default as xinference } from './xinference';
|
||||
import { default as zeroone } from './zeroone';
|
||||
import { default as zhipu } from './zhipu';
|
||||
|
||||
|
|
@ -121,6 +122,7 @@ export const LOBE_DEFAULT_MODEL_LIST = buildDefaultModelList({
|
|||
volcengine,
|
||||
wenxin,
|
||||
xai,
|
||||
xinference,
|
||||
zeroone,
|
||||
zhipu,
|
||||
});
|
||||
|
|
@ -174,5 +176,6 @@ export { default as vllm } from './vllm';
|
|||
export { default as volcengine } from './volcengine';
|
||||
export { default as wenxin } from './wenxin';
|
||||
export { default as xai } from './xai';
|
||||
export { default as xinference } from './xinference';
|
||||
export { default as zeroone } from './zeroone';
|
||||
export { default as zhipu } from './zhipu';
|
||||
|
|
|
|||
171
src/config/aiModels/xinference.ts
Normal file
171
src/config/aiModels/xinference.ts
Normal file
|
|
@ -0,0 +1,171 @@
|
|||
import { AIChatModelCard } from '@/types/aiModel';
|
||||
|
||||
const xinferenceChatModels: AIChatModelCard[] = [
|
||||
{
|
||||
abilities: {
|
||||
functionCall: true,
|
||||
},
|
||||
contextWindowTokens: 163_840,
|
||||
description:
|
||||
'DeepSeek-V3 是一个强大的专家混合(MoE)语言模型,拥有总计 6710 亿参数,每个 token 激活 370 亿参数。',
|
||||
displayName: 'DeepSeek V3',
|
||||
enabled: true,
|
||||
id: 'deepseek-v3',
|
||||
type: 'chat'
|
||||
},
|
||||
{
|
||||
abilities: {
|
||||
reasoning: true,
|
||||
},
|
||||
contextWindowTokens: 163_840,
|
||||
description:
|
||||
'DeepSeek-R1 在强化学习(RL)之前引入了冷启动数据,在数学、代码和推理任务上表现可与 OpenAI-o1 相媲美。',
|
||||
displayName: 'DeepSeek R1',
|
||||
enabled: true,
|
||||
id: 'deepseek-r1',
|
||||
type: 'chat'
|
||||
},
|
||||
{
|
||||
abilities: {
|
||||
reasoning: true,
|
||||
},
|
||||
contextWindowTokens: 131_072,
|
||||
description:
|
||||
'deepseek-r1-distill-llama 是基于 Llama 从 DeepSeek-R1 蒸馏而来的模型。',
|
||||
displayName: 'DeepSeek R1 Distill Llama',
|
||||
enabled: true,
|
||||
id: 'deepseek-r1-distill-llama',
|
||||
type: 'chat'
|
||||
},
|
||||
{
|
||||
abilities: {
|
||||
reasoning: true,
|
||||
},
|
||||
contextWindowTokens: 131_072,
|
||||
description:
|
||||
'deepseek-r1-distill-qwen 是基于 Qwen 从 DeepSeek-R1 蒸馏而来的模型。',
|
||||
displayName: 'DeepSeek R1 Distill Qwen',
|
||||
enabled: true,
|
||||
id: 'deepseek-r1-distill-qwen',
|
||||
type: 'chat'
|
||||
},
|
||||
{
|
||||
abilities: {
|
||||
reasoning: true,
|
||||
},
|
||||
contextWindowTokens: 32_768,
|
||||
description:
|
||||
'QwQ 是 Qwen 系列的推理模型。与传统的指令微调模型相比,QwQ 具备思考和推理能力,在下游任务中,尤其是复杂问题上,能够实现显著增强的性能。QwQ-32B 是一款中型推理模型,其性能可与最先进的推理模型(如 DeepSeek-R1、o1-mini)相媲美。',
|
||||
displayName: 'QwQ 32B',
|
||||
enabled: true,
|
||||
id: 'qwq-32b',
|
||||
type: 'chat'
|
||||
},
|
||||
{
|
||||
abilities: {
|
||||
reasoning: true,
|
||||
vision: true,
|
||||
},
|
||||
contextWindowTokens: 32_768,
|
||||
description:
|
||||
'QVQ-72B-Preview 是由 Qwen 团队开发的实验性研究模型,专注于提升视觉推理能力。',
|
||||
displayName: 'QVQ 72B Preview',
|
||||
enabled: true,
|
||||
id: 'qvq-72b-preview',
|
||||
type: 'chat'
|
||||
},
|
||||
{
|
||||
abilities: {
|
||||
functionCall: true,
|
||||
},
|
||||
contextWindowTokens: 32_768,
|
||||
description:
|
||||
'Qwen2.5 是 Qwen 大型语言模型的最新系列。对于 Qwen2.5,我们发布了多个基础语言模型和指令微调语言模型,参数范围从 5 亿到 72 亿不等。',
|
||||
displayName: 'Qwen2.5 Instruct',
|
||||
enabled: true,
|
||||
id: 'qwen2.5-instruct',
|
||||
type: 'chat'
|
||||
},
|
||||
{
|
||||
abilities: {
|
||||
functionCall: true,
|
||||
},
|
||||
contextWindowTokens: 32_768,
|
||||
description:
|
||||
'Qwen2.5-Coder 是 Qwen 系列中最新的代码专用大型语言模型(前身为 CodeQwen)。',
|
||||
displayName: 'Qwen2.5 Coder Instruct',
|
||||
enabled: true,
|
||||
id: 'qwen2.5-coder-instruct',
|
||||
type: 'chat'
|
||||
},
|
||||
{
|
||||
abilities: {
|
||||
vision: true,
|
||||
},
|
||||
contextWindowTokens: 128_000,
|
||||
description:
|
||||
'Qwen2.5-VL 是 Qwen 模型家族中视觉语言模型的最新版本。',
|
||||
displayName: 'Qwen2.5 VL Instruct',
|
||||
enabled: true,
|
||||
id: 'qwen2.5-vl-instruct',
|
||||
type: 'chat'
|
||||
},
|
||||
{
|
||||
contextWindowTokens: 1_024_000,
|
||||
description:
|
||||
'Mistral-Nemo-Instruct-2407 大型语言模型(LLM)是 Mistral-Nemo-Base-2407 的指令微调版本。',
|
||||
displayName: 'Mistral Nemo Instruct',
|
||||
enabled: true,
|
||||
id: 'mistral-nemo-instruct',
|
||||
type: 'chat'
|
||||
},
|
||||
{
|
||||
contextWindowTokens: 131_072,
|
||||
description:
|
||||
'Mistral-Large-Instruct-2407 是一款先进的稠密大型语言模型(LLM),拥有 1230 亿参数,具备最先进的推理、知识和编码能力。',
|
||||
displayName: 'Mistral Large Instruct',
|
||||
enabled: true,
|
||||
id: 'mistral-large-instruct',
|
||||
type: 'chat'
|
||||
},
|
||||
{
|
||||
abilities: {
|
||||
functionCall: true,
|
||||
},
|
||||
contextWindowTokens: 131_072,
|
||||
description:
|
||||
'Llama 3.3 指令微调模型针对对话场景进行了优化,在常见的行业基准测试中,超越了许多现有的开源聊天模型。',
|
||||
displayName: 'Llama 3.3 Instruct',
|
||||
enabled: true,
|
||||
id: 'llama-3.3-instruct',
|
||||
type: 'chat'
|
||||
},
|
||||
{
|
||||
abilities: {
|
||||
vision: true,
|
||||
},
|
||||
contextWindowTokens: 163_840,
|
||||
description:
|
||||
'Llama 3.2-Vision 指令微调模型针对视觉识别、图像推理、图像描述和回答与图像相关的常规问题进行了优化。',
|
||||
displayName: 'Llama 3.2 Vision Instruct',
|
||||
enabled: true,
|
||||
id: 'llama-3.2-vision-instruct',
|
||||
type: 'chat'
|
||||
},
|
||||
{
|
||||
abilities: {
|
||||
functionCall: true,
|
||||
},
|
||||
contextWindowTokens: 131_072,
|
||||
description:
|
||||
'Llama 3.1 指令微调模型针对对话场景进行了优化,在常见的行业基准测试中,超越了许多现有的开源聊天模型。',
|
||||
displayName: 'Llama 3.1 Instruct',
|
||||
enabled: true,
|
||||
id: 'llama-3.1-instruct',
|
||||
type: 'chat'
|
||||
},
|
||||
]
|
||||
|
||||
export const allModels = [...xinferenceChatModels];
|
||||
|
||||
export default allModels;
|
||||
|
|
@ -75,6 +75,9 @@ export const getLLMConfig = () => {
|
|||
ENABLED_VLLM: z.boolean(),
|
||||
VLLM_API_KEY: z.string().optional(),
|
||||
|
||||
ENABLED_XINFERENCE: z.boolean(),
|
||||
XINFERENCE_API_KEY: z.string().optional(),
|
||||
|
||||
ENABLED_QWEN: z.boolean(),
|
||||
QWEN_API_KEY: z.string().optional(),
|
||||
|
||||
|
|
@ -234,6 +237,9 @@ export const getLLMConfig = () => {
|
|||
ENABLED_VLLM: !!process.env.VLLM_API_KEY,
|
||||
VLLM_API_KEY: process.env.VLLM_API_KEY,
|
||||
|
||||
ENABLED_XINFERENCE: !!process.env.XINFERENCE_API_KEY,
|
||||
XINFERENCE_API_KEY: process.env.XINFERENCE_API_KEY,
|
||||
|
||||
ENABLED_QWEN: !!process.env.QWEN_API_KEY,
|
||||
QWEN_API_KEY: process.env.QWEN_API_KEY,
|
||||
|
||||
|
|
|
|||
|
|
@ -49,6 +49,7 @@ import VLLMProvider from './vllm';
|
|||
import VolcengineProvider from './volcengine';
|
||||
import WenxinProvider from './wenxin';
|
||||
import XAIProvider from './xai';
|
||||
import XinferenceProvider from './xinference';
|
||||
import ZeroOneProvider from './zeroone';
|
||||
import ZhiPuProvider from './zhipu';
|
||||
|
||||
|
|
@ -69,6 +70,7 @@ export const LOBE_DEFAULT_MODEL_LIST: ChatModelCard[] = [
|
|||
MoonshotProvider.chatModels,
|
||||
OllamaProvider.chatModels,
|
||||
VLLMProvider.chatModels,
|
||||
XinferenceProvider.chatModels,
|
||||
OpenRouterProvider.chatModels,
|
||||
TogetherAIProvider.chatModels,
|
||||
FireworksAIProvider.chatModels,
|
||||
|
|
@ -108,6 +110,7 @@ export const DEFAULT_MODEL_PROVIDER_LIST = [
|
|||
AzureAIProvider,
|
||||
OllamaProvider,
|
||||
VLLMProvider,
|
||||
XinferenceProvider,
|
||||
AnthropicProvider,
|
||||
BedrockProvider,
|
||||
GoogleProvider,
|
||||
|
|
@ -215,5 +218,6 @@ export { default as VLLMProviderCard } from './vllm';
|
|||
export { default as VolcengineProviderCard } from './volcengine';
|
||||
export { default as WenxinProviderCard } from './wenxin';
|
||||
export { default as XAIProviderCard } from './xai';
|
||||
export { default as XinferenceProviderCard } from './xinference';
|
||||
export { default as ZeroOneProviderCard } from './zeroone';
|
||||
export { default as ZhiPuProviderCard } from './zhipu';
|
||||
|
|
|
|||
18
src/config/modelProviders/xinference.ts
Normal file
18
src/config/modelProviders/xinference.ts
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
import { ModelProviderCard } from '@/types/llm';
|
||||
|
||||
const Xinference: ModelProviderCard = {
|
||||
chatModels: [],
|
||||
description: 'Xorbits Inference (Xinference) 是一个开源平台,用于简化各种 AI 模型的运行和集成。借助 Xinference,您可以使用任何开源 LLM、嵌入模型和多模态模型在云端或本地环境中运行推理,并创建强大的 AI 应用。',
|
||||
id: 'xinference',
|
||||
modelsUrl: 'https://inference.readthedocs.io/zh-cn/latest/models/builtin/index.html',
|
||||
name: 'Xinference',
|
||||
settings: {
|
||||
proxyUrl: {
|
||||
placeholder: 'http://localhost:9997/v1',
|
||||
},
|
||||
sdkType: 'openai',
|
||||
},
|
||||
url: 'https://inference.readthedocs.io/zh-cn/v0.12.3/index.html',
|
||||
};
|
||||
|
||||
export default Xinference;
|
||||
|
|
@ -45,6 +45,7 @@ import { LobeVLLMAI } from './vllm';
|
|||
import { LobeVolcengineAI } from './volcengine';
|
||||
import { LobeWenxinAI } from './wenxin';
|
||||
import { LobeXAI } from './xai';
|
||||
import { LobeXinferenceAI } from './xinference';
|
||||
import { LobeZeroOneAI } from './zeroone';
|
||||
import { LobeZhipuAI } from './zhipu';
|
||||
|
||||
|
|
@ -97,6 +98,7 @@ export const providerRuntimeMap = {
|
|||
volcengine: LobeVolcengineAI,
|
||||
wenxin: LobeWenxinAI,
|
||||
xai: LobeXAI,
|
||||
xinference: LobeXinferenceAI,
|
||||
zeroone: LobeZeroOneAI,
|
||||
zhipu: LobeZhipuAI,
|
||||
};
|
||||
|
|
|
|||
|
|
@ -74,6 +74,7 @@ export enum ModelProvider {
|
|||
Volcengine = 'volcengine',
|
||||
Wenxin = 'wenxin',
|
||||
XAI = 'xai',
|
||||
Xinference = 'xinference',
|
||||
ZeroOne = 'zeroone',
|
||||
ZhiPu = 'zhipu',
|
||||
}
|
||||
|
|
|
|||
53
src/libs/agent-runtime/xinference/index.ts
Normal file
53
src/libs/agent-runtime/xinference/index.ts
Normal file
|
|
@ -0,0 +1,53 @@
|
|||
import { ModelProvider } from '../types';
|
||||
import { LobeOpenAICompatibleFactory } from '../utils/openaiCompatibleFactory';
|
||||
|
||||
import type { ChatModelCard } from '@/types/llm';
|
||||
|
||||
export interface XinferenceModelCard {
|
||||
context_length: number;
|
||||
id: string;
|
||||
model_ability: string[];
|
||||
model_description: string;
|
||||
model_type: string;
|
||||
name: string;
|
||||
}
|
||||
|
||||
export const LobeXinferenceAI = LobeOpenAICompatibleFactory({
|
||||
baseURL: 'http://localhost:9997/v1',
|
||||
debug: {
|
||||
chatCompletion: () => process.env.DEBUG_XINFERENCE_CHAT_COMPLETION === '1',
|
||||
},
|
||||
models: async ({ client }) => {
|
||||
const { LOBE_DEFAULT_MODEL_LIST } = await import('@/config/aiModels');
|
||||
|
||||
const modelsPage = await client.models.list() as any;
|
||||
const modelList: XinferenceModelCard[] = modelsPage.data;
|
||||
|
||||
return modelList
|
||||
.map((model) => {
|
||||
const knownModel = LOBE_DEFAULT_MODEL_LIST.find((m) => model.id.toLowerCase() === m.id.toLowerCase());
|
||||
|
||||
return {
|
||||
contextWindowTokens: model.context_length,
|
||||
description: model.model_description,
|
||||
displayName: model.name,
|
||||
enabled: knownModel?.enabled || false,
|
||||
functionCall:
|
||||
(model.model_ability && model.model_ability.includes("tools"))
|
||||
|| knownModel?.abilities?.functionCall
|
||||
|| false,
|
||||
id: model.id,
|
||||
reasoning:
|
||||
(model.model_ability && model.model_ability.includes("reasoning"))
|
||||
|| knownModel?.abilities?.reasoning
|
||||
|| false,
|
||||
vision:
|
||||
(model.model_ability && model.model_ability.includes("vision"))
|
||||
|| knownModel?.abilities?.vision
|
||||
|| false,
|
||||
};
|
||||
})
|
||||
.filter(Boolean) as ChatModelCard[];
|
||||
},
|
||||
provider: ModelProvider.Xinference,
|
||||
});
|
||||
|
|
@ -84,6 +84,7 @@ export interface UserKeyVaults extends SearchEngineKeyVaults {
|
|||
volcengine?: OpenAICompatibleKeyVault;
|
||||
wenxin?: OpenAICompatibleKeyVault;
|
||||
xai?: OpenAICompatibleKeyVault;
|
||||
xinference?: OpenAICompatibleKeyVault;
|
||||
zeroone?: OpenAICompatibleKeyVault;
|
||||
zhipu?: OpenAICompatibleKeyVault;
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue