diff --git a/.agents/skills/typescript/SKILL.md b/.agents/skills/typescript/SKILL.md index 3916d0d69c..6948ea88b6 100644 --- a/.agents/skills/typescript/SKILL.md +++ b/.agents/skills/typescript/SKILL.md @@ -1,6 +1,6 @@ --- name: typescript -description: TypeScript code style and optimization guidelines. Use when writing TypeScript code (.ts, .tsx, .mts files), reviewing code quality, or implementing type-safe patterns. Triggers on TypeScript development, type safety questions, or code style discussions. +description: TypeScript code style and optimization guidelines. MUST READ before writing or modifying any TypeScript code (.ts, .tsx, .mts files). Also use when reviewing code quality or implementing type-safe patterns. Triggers on any TypeScript file edit, code style discussions, or type safety questions. --- # TypeScript Code Style Guide @@ -25,6 +25,17 @@ description: TypeScript code style and optimization guidelines. Use when writing - Use promise-based variants: `import { readFile } from 'fs/promises'` - Use `Promise.all`, `Promise.race` for concurrent operations where safe +## Imports + +- This project uses `simple-import-sort/imports` and `consistent-type-imports` (`fixStyle: 'separate-type-imports'`) +- **Separate type imports**: always use `import type { ... }` for type-only imports, NOT `import { type ... }` inline syntax +- When a file already has `import type { ... }` from a package and you need to add a value import, keep them as **two separate statements**: + ```ts + import type { ChatTopicBotContext } from '@lobechat/types'; + import { RequestTrigger } from '@lobechat/types'; + ``` +- Within each import statement, specifiers are sorted **alphabetically by name** + ## Code Structure - Prefer object destructuring diff --git a/locales/en-US/spend.json b/locales/en-US/spend.json index 7e1adc2488..9e1d8f0d57 100644 --- a/locales/en-US/spend.json +++ b/locales/en-US/spend.json @@ -12,7 +12,18 @@ "table.columns.spend": "Credits", "table.columns.startTime": "Created At", "table.columns.totalTokens": "Token Usage", - "table.columns.type.enums.chat": "Chat Message", + "table.columns.trigger.enums.api": "API Call", + "table.columns.trigger.enums.bot": "Bot Message", + "table.columns.trigger.enums.chat": "Chat Message", + "table.columns.trigger.enums.cron": "Scheduled Task", + "table.columns.trigger.enums.eval": "Benchmark Eval", + "table.columns.trigger.enums.file_embedding": "File Embedding", + "table.columns.trigger.enums.memory": "Memory Extraction", + "table.columns.trigger.enums.semantic_search": "Knowledge Search", + "table.columns.trigger.enums.topic": "Topic Summary", + "table.columns.trigger.title": "Trigger", + "table.columns.type.enums.chat": "Text Generation", + "table.columns.type.enums.embedding": "Embedding", "table.columns.type.enums.imageGeneration": "Image Generation", "table.columns.type.enums.videoGeneration": "Video Generation", "table.columns.type.title": "Type", diff --git a/locales/zh-CN/spend.json b/locales/zh-CN/spend.json index 71eba46449..6b84924d74 100644 --- a/locales/zh-CN/spend.json +++ b/locales/zh-CN/spend.json @@ -12,7 +12,18 @@ "table.columns.spend": "消耗积分", "table.columns.startTime": "创建时间", "table.columns.totalTokens": "令牌使用量", - "table.columns.type.enums.chat": "聊天消息", + "table.columns.trigger.enums.api": "API 调用", + "table.columns.trigger.enums.bot": "Bot 消息", + "table.columns.trigger.enums.chat": "聊天消息", + "table.columns.trigger.enums.cron": "定时任务", + "table.columns.trigger.enums.eval": "基准评测", + "table.columns.trigger.enums.file_embedding": "文件嵌入", + "table.columns.trigger.enums.memory": "记忆提取", + "table.columns.trigger.enums.semantic_search": "知识搜索", + "table.columns.trigger.enums.topic": "话题总结", + "table.columns.trigger.title": "触发方式", + "table.columns.type.enums.chat": "文本生成", + "table.columns.type.enums.embedding": "嵌入", "table.columns.type.enums.imageGeneration": "图像生成", "table.columns.type.enums.videoGeneration": "视频生成", "table.columns.type.title": "类型", diff --git a/packages/memory-user-memory/package.json b/packages/memory-user-memory/package.json index 65b9cf609c..26aef4453d 100644 --- a/packages/memory-user-memory/package.json +++ b/packages/memory-user-memory/package.json @@ -11,15 +11,16 @@ }, "main": "src/index.ts", "scripts": { + "build:gen-response-formats": "tsx scripts/generate-response-formats.ts", "test": "vitest --run", "test:coverage": "vitest --coverage --silent='passed-only'", - "build:gen-response-formats": "tsx scripts/generate-response-formats.ts", "type-check": "tsgo --noEmit -p tsconfig.json" }, "dependencies": { "@lobechat/context-engine": "workspace:*", "@lobechat/model-runtime": "workspace:*", "@lobechat/prompts": "workspace:*", + "@lobechat/types": "workspace:*", "dayjs": "^1.11.11", "dotenv": "^17.2.3", "ora": "^9.0.0", @@ -29,7 +30,6 @@ "zod-to-json-schema": "^3.24.6" }, "devDependencies": { - "@lobechat/types": "workspace:*", "@types/json-schema": "^7.0.15", "@types/xast": "^2.0.4", "promptfoo": "^0.120.17", diff --git a/packages/memory-user-memory/src/extractors/base.ts b/packages/memory-user-memory/src/extractors/base.ts index f7c09bb52d..46888763c1 100644 --- a/packages/memory-user-memory/src/extractors/base.ts +++ b/packages/memory-user-memory/src/extractors/base.ts @@ -11,6 +11,7 @@ import { ATTR_GEN_AI_REQUEST_MODEL, } from '@lobechat/observability-otel/gen-ai'; import { tracer } from '@lobechat/observability-otel/modules/memory-user-memory'; +import { RequestTrigger } from '@lobechat/types'; import type { z } from 'zod'; import type { @@ -155,7 +156,9 @@ export abstract class BaseMemoryExtractor< } span.addEvent('gen_ai.request.send'); - const result = await this.runtime.generateObject(payload); + const result = await this.runtime.generateObject(payload, { + metadata: { trigger: RequestTrigger.Memory }, + }); span.addEvent('gen_ai.response.receive'); span.setAttributes({ diff --git a/packages/memory-user-memory/src/extractors/gatekeeper.test.ts b/packages/memory-user-memory/src/extractors/gatekeeper.test.ts index fd42d9f204..a094a1a248 100644 --- a/packages/memory-user-memory/src/extractors/gatekeeper.test.ts +++ b/packages/memory-user-memory/src/extractors/gatekeeper.test.ts @@ -95,6 +95,7 @@ describe('UserMemoryGateKeeper', () => { model: 'gpt-mock', schema: expect.any(Object), }), + expect.objectContaining({ metadata: { trigger: 'memory' } }), ); }); }); diff --git a/packages/memory-user-memory/src/extractors/identity.test.ts b/packages/memory-user-memory/src/extractors/identity.test.ts index 908da921a6..90603627e0 100644 --- a/packages/memory-user-memory/src/extractors/identity.test.ts +++ b/packages/memory-user-memory/src/extractors/identity.test.ts @@ -76,6 +76,7 @@ describe('IdentityExtractor', () => { schema: expect.objectContaining({ name: expect.stringContaining('identity') }), tools: undefined, }), + expect.objectContaining({ metadata: { trigger: 'memory' } }), ); expect(result).toEqual(structuredResult); }); diff --git a/packages/memory-user-memory/src/extractors/persona.ts b/packages/memory-user-memory/src/extractors/persona.ts index b76f460ad5..8e45a48e9a 100644 --- a/packages/memory-user-memory/src/extractors/persona.ts +++ b/packages/memory-user-memory/src/extractors/persona.ts @@ -1,4 +1,5 @@ import { renderPlaceholderTemplate } from '@lobechat/context-engine'; +import { RequestTrigger } from '@lobechat/types'; import { z } from 'zod'; import { userPersonaPrompt } from '../prompts'; @@ -115,11 +116,14 @@ export class UserPersonaExtractor extends BaseMemoryExtractor< { content: userPrompt, role: 'user' as const }, ]; - const result = (await this.runtime.generateObject({ - messages, - model: this.model, - tools: this.getTools(options || {}), - })) as unknown; + const result = (await this.runtime.generateObject( + { + messages, + model: this.model, + tools: this.getTools(options || {}), + }, + { metadata: { trigger: RequestTrigger.Memory } }, + )) as unknown; if (Array.isArray(result)) { const firstCall = result[0]; diff --git a/packages/model-bank/src/aiModels/lobehub/embedding.ts b/packages/model-bank/src/aiModels/lobehub/embedding.ts new file mode 100644 index 0000000000..069b1dc9b9 --- /dev/null +++ b/packages/model-bank/src/aiModels/lobehub/embedding.ts @@ -0,0 +1,19 @@ +import type { AIEmbeddingModelCard } from '../../types/aiModel'; + +export const lobehubEmbeddingModels: AIEmbeddingModelCard[] = [ + { + contextWindowTokens: 8192, + description: + 'An efficient, cost-effective next-generation embedding model for retrieval and RAG scenarios.', + displayName: 'Text Embedding 3 Small', + enabled: true, + id: 'text-embedding-3-small', + maxDimension: 1536, + pricing: { + currency: 'USD', + units: [{ name: 'textInput', rate: 0.02, strategy: 'fixed', unit: 'millionTokens' }], + }, + releasedAt: '2024-01-25', + type: 'embedding', + }, +]; diff --git a/packages/model-bank/src/aiModels/lobehub/index.ts b/packages/model-bank/src/aiModels/lobehub/index.ts index aab9acbe18..37dc6713e3 100644 --- a/packages/model-bank/src/aiModels/lobehub/index.ts +++ b/packages/model-bank/src/aiModels/lobehub/index.ts @@ -1,12 +1,19 @@ import { lobehubChatModels } from './chat'; +import { lobehubEmbeddingModels } from './embedding'; import { lobehubImageModels } from './image'; import { lobehubVideoModels } from './video'; export { lobehubChatModels } from './chat'; +export { lobehubEmbeddingModels } from './embedding'; export { lobehubImageModels } from './image'; export * from './utils'; export { lobehubVideoModels, seedance15ProParams } from './video'; -export const allModels = [...lobehubChatModels, ...lobehubImageModels, ...lobehubVideoModels]; +export const allModels = [ + ...lobehubChatModels, + ...lobehubEmbeddingModels, + ...lobehubImageModels, + ...lobehubVideoModels, +]; export default allModels; diff --git a/packages/model-runtime/src/core/ModelRuntime.ts b/packages/model-runtime/src/core/ModelRuntime.ts index 1e6cad6274..1d9b28c3c4 100644 --- a/packages/model-runtime/src/core/ModelRuntime.ts +++ b/packages/model-runtime/src/core/ModelRuntime.ts @@ -35,6 +35,7 @@ export interface ModelRuntimeHooks { * Runs before the LLM call. Throw to abort (e.g., budget exceeded). */ beforeChat?: (payload: ChatStreamPayload, options?: ChatMethodOptions) => Promise; + beforeEmbeddings?: (payload: EmbeddingsPayload, options?: EmbeddingsOptions) => Promise; beforeGenerateObject?: ( payload: GenerateObjectPayload, options?: GenerateObjectOptions, @@ -57,6 +58,16 @@ export interface ModelRuntimeHooks { context: { options?: ChatMethodOptions; payload: ChatStreamPayload }, ) => void | Promise; + onEmbeddingsError?: ( + error: ChatCompletionErrorPayload, + context: { options?: EmbeddingsOptions; payload: EmbeddingsPayload }, + ) => void | Promise; + + onEmbeddingsFinal?: ( + data: { latencyMs?: number; usage?: ModelUsage }, + context: { options?: EmbeddingsOptions; payload: EmbeddingsPayload }, + ) => void | Promise; + onGenerateObjectError?: ( error: ChatCompletionErrorPayload, context: { options?: GenerateObjectOptions; payload: GenerateObjectPayload }, @@ -205,7 +216,36 @@ export class ModelRuntime { } async embeddings(payload: EmbeddingsPayload, options?: EmbeddingsOptions) { - return this._runtime.embeddings?.(payload, options); + await this._hooks?.beforeEmbeddings?.(payload, options); + + const startTime = Date.now(); + + const finalOptions = this._hooks?.onEmbeddingsFinal + ? { + ...options, + onUsage: async (usage: ModelUsage) => { + await options?.onUsage?.(usage); + try { + const latencyMs = Date.now() - startTime; + await this._hooks!.onEmbeddingsFinal!({ latencyMs, usage }, { options, payload }); + } catch (e) { + console.error('[ModelRuntime] onEmbeddingsFinal hook error:', e); + } + }, + } + : options; + + try { + return await this._runtime.embeddings?.(payload, finalOptions); + } catch (error) { + if (this._hooks?.onEmbeddingsError) { + await this._hooks.onEmbeddingsError(error as ChatCompletionErrorPayload, { + options, + payload, + }); + } + throw error; + } } async textToSpeech(payload: TextToSpeechPayload, options?: EmbeddingsOptions) { return this._runtime.textToSpeech?.(payload, options); @@ -243,7 +283,12 @@ export class ModelRuntime { params: Partial< ClientOptions & LobeBedrockAIParams & - LobeCloudflareParams & { apiKey?: string; apiVersion?: string; baseURL?: string } + LobeCloudflareParams & { + apiKey?: string; + apiVersion?: string; + baseURL?: string; + userId?: string; + } >, hooks?: ModelRuntimeHooks, ) { diff --git a/packages/model-runtime/src/core/RouterRuntime/createRuntime.ts b/packages/model-runtime/src/core/RouterRuntime/createRuntime.ts index fb4780c576..6daa9a01e8 100644 --- a/packages/model-runtime/src/core/RouterRuntime/createRuntime.ts +++ b/packages/model-runtime/src/core/RouterRuntime/createRuntime.ts @@ -90,6 +90,7 @@ export interface RouteAttemptResult { channelId?: string; durationMs: number; error?: unknown; + metadata?: Record; model: string; optionIndex: number; providerId: string; @@ -302,6 +303,7 @@ export const createRouterRuntime = ({ private async runWithFallback( model: string, requestHandler: (runtime: LobeRuntimeAI) => Promise, + metadata?: Record, ): Promise { const matchedRouter = await this.resolveMatchedRouter(model); const routerOptions = this.normalizeRouterOptions(matchedRouter); @@ -354,6 +356,7 @@ export const createRouterRuntime = ({ apiType: resolvedApiType, channelId, durationMs: Date.now() - startTime, + metadata, model, optionIndex: index, providerId: id, @@ -376,6 +379,7 @@ export const createRouterRuntime = ({ channelId, durationMs: Date.now() - startTime, error, + metadata, model, optionIndex: index, providerId: id, @@ -452,8 +456,10 @@ export const createRouterRuntime = ({ */ async chat(payload: ChatStreamPayload, options?: ChatMethodOptions) { try { - return await this.runWithFallback(payload.model, (runtime) => - runtime.chat!(payload, options), + return await this.runWithFallback( + payload.model, + (runtime) => runtime.chat!(payload, options), + options?.metadata, ); } catch (e) { if (params.chatCompletion?.handleError) { @@ -485,14 +491,18 @@ export const createRouterRuntime = ({ } async generateObject(payload: GenerateObjectPayload, options?: GenerateObjectOptions) { - return this.runWithFallback(payload.model, (runtime) => - runtime.generateObject!(payload, options), + return this.runWithFallback( + payload.model, + (runtime) => runtime.generateObject!(payload, options), + options?.metadata, ); } async embeddings(payload: EmbeddingsPayload, options?: EmbeddingsOptions) { - return this.runWithFallback(payload.model, (runtime) => - runtime.embeddings!(payload, options), + return this.runWithFallback( + payload.model, + (runtime) => runtime.embeddings!(payload, options), + options?.metadata, ); } diff --git a/packages/model-runtime/src/core/openaiCompatibleFactory/index.ts b/packages/model-runtime/src/core/openaiCompatibleFactory/index.ts index 835e279422..46d0a80dcb 100644 --- a/packages/model-runtime/src/core/openaiCompatibleFactory/index.ts +++ b/packages/model-runtime/src/core/openaiCompatibleFactory/index.ts @@ -820,6 +820,17 @@ export const createOpenAICompatibleRuntime = = an { headers: options?.headers, signal: options?.signal }, ); + if (res.usage && options?.onUsage) { + const pricing = await getModelPricing(payload.model, this.id); + await options.onUsage( + convertOpenAIUsage(res.usage as any, { + model: payload.model, + pricing, + provider: this.id, + }), + ); + } + log('received %d embeddings', res.data.length); return res.data.map((item) => item.embedding); } catch (error) { diff --git a/packages/model-runtime/src/providers/azureOpenai/index.ts b/packages/model-runtime/src/providers/azureOpenai/index.ts index c76ea626e9..2ddf9f762f 100644 --- a/packages/model-runtime/src/providers/azureOpenai/index.ts +++ b/packages/model-runtime/src/providers/azureOpenai/index.ts @@ -120,6 +120,19 @@ export class LobeAzureOpenAI implements LobeRuntimeAI { { headers: options?.headers, signal: options?.signal }, ); + if (res.usage && options?.onUsage) { + const { convertOpenAIUsage } = await import('../../core/usageConverters/openai'); + const { getModelPricing } = await import('../../utils/getModelPricing'); + const pricing = await getModelPricing(payload.model, ModelProvider.Azure); + await options.onUsage( + convertOpenAIUsage(res.usage as any, { + model: payload.model, + pricing, + provider: ModelProvider.Azure, + }), + ); + } + return res.data.map((item) => item.embedding); } catch (error) { return this.handleError(error, payload.model); diff --git a/packages/model-runtime/src/types/embeddings.ts b/packages/model-runtime/src/types/embeddings.ts index 4117780fac..e4ee7b3dd3 100644 --- a/packages/model-runtime/src/types/embeddings.ts +++ b/packages/model-runtime/src/types/embeddings.ts @@ -1,3 +1,5 @@ +import type { ModelUsage } from '@lobechat/types'; + export interface EmbeddingsPayload { /** * The number of dimensions the resulting output embeddings should have. Only @@ -18,6 +20,9 @@ export interface EmbeddingsPayload { export interface EmbeddingsOptions { headers?: Record; + /** Metadata passed to hooks (billing, tracing, etc.) */ + metadata?: Record; + onUsage?: (usage: ModelUsage) => void | Promise; signal?: AbortSignal; /** * userId for the embeddings diff --git a/packages/model-runtime/src/types/structureOutput.ts b/packages/model-runtime/src/types/structureOutput.ts index 6de7ba397e..2eca8a1b0d 100644 --- a/packages/model-runtime/src/types/structureOutput.ts +++ b/packages/model-runtime/src/types/structureOutput.ts @@ -34,6 +34,9 @@ export interface GenerateObjectOptions { */ headers?: Record; + /** Metadata passed to hooks (billing, tracing, etc.) */ + metadata?: Record; + onUsage?: (usage: ModelUsage) => void | Promise; signal?: AbortSignal; diff --git a/packages/model-runtime/vitest.config.mts b/packages/model-runtime/vitest.config.mts index 69ad93343c..de8f490151 100644 --- a/packages/model-runtime/vitest.config.mts +++ b/packages/model-runtime/vitest.config.mts @@ -1,9 +1,12 @@ import { resolve } from 'node:path'; + import { coverageConfigDefaults, defineConfig } from 'vitest/config'; export default defineConfig({ test: { alias: { + // Resolve @cloud/database's internal @/ paths when pnpm overrides pull in cloud packages + '@/database': resolve(__dirname, '../../packages/database/src'), // TODO: 目前仍然残留 ModelRuntime.test.ts 中的部分测试依赖了主项目的内容,后续需要拆分测试 '@': resolve(__dirname, '../../src'), }, diff --git a/packages/openapi/src/services/chat.service.ts b/packages/openapi/src/services/chat.service.ts index fecb1e48ed..13f882c065 100644 --- a/packages/openapi/src/services/chat.service.ts +++ b/packages/openapi/src/services/chat.service.ts @@ -1,5 +1,6 @@ import type { ChatStreamPayload } from '@lobechat/model-runtime'; import type { LobeAgentChatConfig, LobeAgentConfig, UserSystemAgentConfig } from '@lobechat/types'; +import { RequestTrigger } from '@lobechat/types'; import { and, eq } from 'drizzle-orm'; import { getBusinessModelRuntimeHooks } from '@/business/server/model-runtime'; @@ -341,6 +342,7 @@ export class ChatService extends BaseService { // 调用聊天 API const response = await modelRuntime.chat(chatPayload, { + metadata: { trigger: RequestTrigger.Api }, user: this.userId!, }); diff --git a/packages/types/src/agentRuntime.ts b/packages/types/src/agentRuntime.ts index 3e00d91cc1..ff00da706b 100644 --- a/packages/types/src/agentRuntime.ts +++ b/packages/types/src/agentRuntime.ts @@ -1,3 +1,15 @@ +export enum RequestTrigger { + Api = 'api', + Bot = 'bot', + Chat = 'chat', + Cron = 'cron', + Eval = 'eval', + FileEmbedding = 'file_embedding', + Memory = 'memory', + SemanticSearch = 'semantic_search', + Topic = 'topic', +} + // ******* Runtime Biz Error ******* // export const AgentRuntimeErrorType = { AgentRuntimeError: 'AgentRuntimeError', // Agent Runtime module runtime error diff --git a/src/business/server/trpc-middlewares/async.ts b/src/business/server/trpc-middlewares/async.ts index 4a4c865c99..3951c92506 100644 --- a/src/business/server/trpc-middlewares/async.ts +++ b/src/business/server/trpc-middlewares/async.ts @@ -4,10 +4,6 @@ export const checkEmbeddingUsage = asyncTrpc.middleware(async (opts) => { return opts.next(); }); -export const checkBudgetsUsage = asyncTrpc.middleware(async (opts) => { - return opts.next(); -}); - export const createImageBusinessMiddleware = asyncTrpc.middleware(async (opts) => { return opts.next(); }); diff --git a/src/business/server/trpc-middlewares/lambda.ts b/src/business/server/trpc-middlewares/lambda.ts index d399a4c2aa..ac2b2405fb 100644 --- a/src/business/server/trpc-middlewares/lambda.ts +++ b/src/business/server/trpc-middlewares/lambda.ts @@ -3,7 +3,3 @@ import { trpc } from '@/libs/trpc/lambda/init'; export const checkFileStorageUsage = trpc.middleware(async (opts) => { return opts.next(); }); - -export const checkBudgetsUsage = trpc.middleware(async (opts) => { - return opts.next(); -}); diff --git a/src/locales/default/spend.ts b/src/locales/default/spend.ts index 6d105adfc1..e532d3216d 100644 --- a/src/locales/default/spend.ts +++ b/src/locales/default/spend.ts @@ -12,7 +12,18 @@ export default { 'table.columns.spend': 'Credits', 'table.columns.startTime': 'Created At', 'table.columns.totalTokens': 'Token Usage', - 'table.columns.type.enums.chat': 'Chat Message', + 'table.columns.trigger.enums.api': 'API Call', + 'table.columns.trigger.enums.bot': 'Bot Message', + 'table.columns.trigger.enums.chat': 'Chat Message', + 'table.columns.trigger.enums.cron': 'Scheduled Task', + 'table.columns.trigger.enums.eval': 'Benchmark Eval', + 'table.columns.trigger.enums.file_embedding': 'File Embedding', + 'table.columns.trigger.enums.memory': 'Memory Extraction', + 'table.columns.trigger.enums.semantic_search': 'Knowledge Search', + 'table.columns.trigger.enums.topic': 'Topic Summary', + 'table.columns.trigger.title': 'Trigger', + 'table.columns.type.enums.chat': 'Text Generation', + 'table.columns.type.enums.embedding': 'Embedding', 'table.columns.type.enums.imageGeneration': 'Image Generation', 'table.columns.type.enums.videoGeneration': 'Video Generation', 'table.columns.type.title': 'Type', diff --git a/src/routes/(main)/settings/provider/features/ModelList/EnabledModelList/index.tsx b/src/routes/(main)/settings/provider/features/ModelList/EnabledModelList/index.tsx index 2e3d877b3f..2924d8013e 100644 --- a/src/routes/(main)/settings/provider/features/ModelList/EnabledModelList/index.tsx +++ b/src/routes/(main)/settings/provider/features/ModelList/EnabledModelList/index.tsx @@ -1,13 +1,14 @@ import { ActionIcon, Center, Flexbox, Text, TooltipGroup } from '@lobehub/ui'; import isEqual from 'fast-deep-equal'; import { ArrowDownUpIcon, ToggleLeft } from 'lucide-react'; -import { useMemo, useState } from 'react'; +import { use, useMemo, useState } from 'react'; import { useTranslation } from 'react-i18next'; import { useAiInfraStore } from '@/store/aiInfra'; import { aiModelSelectors } from '@/store/aiInfra/selectors'; import ModelItem from '../ModelItem'; +import { ProviderSettingsContext } from '../ProviderSettingsContext'; import SortModelModal from '../SortModelModal'; interface EnabledModelListProps { @@ -16,6 +17,7 @@ interface EnabledModelListProps { const EnabledModelList = ({ activeTab }: EnabledModelListProps) => { const { t } = useTranslation('modelProvider'); + const { modelEditable } = use(ProviderSettingsContext); const enabledModels = useAiInfraStore(aiModelSelectors.enabledAiProviderModelList, isEqual); const batchToggleAiModels = useAiInfraStore((s) => s.batchToggleAiModels); @@ -30,6 +32,13 @@ const EnabledModelList = ({ activeTab }: EnabledModelListProps) => { return enabledModels.filter((model) => model.type === activeTab); }, [enabledModels, activeTab]); + // Models that can be toggled (exclude embedding models when not editable) + const togglableModels = useMemo( + () => + modelEditable ? filteredModels : filteredModels.filter((model) => model.type !== 'embedding'), + [filteredModels, modelEditable], + ); + const isCurrentTabEmpty = filteredModels.length === 0; return ( <> @@ -40,20 +49,22 @@ const EnabledModelList = ({ activeTab }: EnabledModelListProps) => { {!isEmpty && ( - { - setBatchLoading(true); - await batchToggleAiModels( - enabledModels.map((i) => i.id), - false, - ); - setBatchLoading(false); - }} - /> + {togglableModels.length > 0 && ( + { + setBatchLoading(true); + await batchToggleAiModels( + togglableModels.map((i) => i.id), + false, + ); + setBatchLoading(false); + }} + /> + )} ( ); - const EnableSwitch = ( + const canToggle = modelEditable || type !== 'embedding'; + + const EnableSwitch = canToggle ? ( ( await toggleModelEnabled({ enabled: e, id, source, type }); }} /> - ); + ) : null; const Actions = modelEditable && diff --git a/src/server/modules/AgentRuntime/RuntimeExecutors.ts b/src/server/modules/AgentRuntime/RuntimeExecutors.ts index 1f543fb9d9..18ac87358f 100644 --- a/src/server/modules/AgentRuntime/RuntimeExecutors.ts +++ b/src/server/modules/AgentRuntime/RuntimeExecutors.ts @@ -410,6 +410,7 @@ export const createRuntimeExecutors = ( metadata: { operationId, topicId: state.metadata?.topicId, + trigger: state.metadata?.trigger, }, user: ctx.userId, }); diff --git a/src/server/routers/async/file.ts b/src/server/routers/async/file.ts index 01e34ecf38..f5ef84306c 100644 --- a/src/server/routers/async/file.ts +++ b/src/server/routers/async/file.ts @@ -1,10 +1,11 @@ import { ASYNC_TASK_TIMEOUT } from '@lobechat/business-config/server'; +import { RequestTrigger } from '@lobechat/types'; import { TRPCError } from '@trpc/server'; import { chunk } from 'es-toolkit/compat'; import pMap from 'p-map'; import { z } from 'zod'; -import { checkBudgetsUsage, checkEmbeddingUsage } from '@/business/server/trpc-middlewares/async'; +import { checkEmbeddingUsage } from '@/business/server/trpc-middlewares/async'; import { serverDBEnv } from '@/config/db'; import { DEFAULT_FILE_EMBEDDING_MODEL_ITEM } from '@/const/settings/knowledge'; import { AsyncTaskModel } from '@/database/models/asyncTask'; @@ -41,7 +42,6 @@ const fileProcedure = asyncAuthedProcedure.use(async (opts) => { export const fileRouter = router({ embeddingChunks: fileProcedure .use(checkEmbeddingUsage) - .use(checkBudgetsUsage) .input( z.object({ fileId: z.string(), @@ -98,11 +98,14 @@ export const fileRouter = router({ provider, ); - const embeddings = await modelRuntime.embeddings({ - dimensions: 1024, - input: chunks.map((c) => c.text), - model, - }); + const embeddings = await modelRuntime.embeddings( + { + dimensions: 1024, + input: chunks.map((c) => c.text), + model, + }, + { metadata: { trigger: RequestTrigger.FileEmbedding } }, + ); const items: NewEmbeddingsItem[] = embeddings?.map((e, idx) => ({ diff --git a/src/server/routers/async/ragEval.ts b/src/server/routers/async/ragEval.ts index b8c01f12ce..55c61969be 100644 --- a/src/server/routers/async/ragEval.ts +++ b/src/server/routers/async/ragEval.ts @@ -1,5 +1,5 @@ import { chainAnswerWithContext } from '@lobechat/prompts'; -import { EvalEvaluationStatus } from '@lobechat/types'; +import { EvalEvaluationStatus, RequestTrigger } from '@lobechat/types'; import { TRPCError } from '@trpc/server'; import { ModelProvider } from 'model-bank'; import type OpenAI from 'openai'; @@ -65,11 +65,14 @@ export const ragEvalRouter = router({ // If questionEmbeddingId does not exist, perform an embedding if (!questionEmbeddingId) { - const embeddings = await modelRuntime.embeddings({ - dimensions: 1024, - input: question, - model: !!embeddingModel ? embeddingModel : DEFAULT_EMBEDDING_MODEL, - }); + const embeddings = await modelRuntime.embeddings( + { + dimensions: 1024, + input: question, + model: !!embeddingModel ? embeddingModel : DEFAULT_EMBEDDING_MODEL, + }, + { metadata: { trigger: RequestTrigger.Eval } }, + ); const embeddingId = await ctx.embeddingModel.create({ embeddings: embeddings?.[0], @@ -102,13 +105,16 @@ export const ragEvalRouter = router({ // Generate LLM answer const { messages } = chainAnswerWithContext({ context, knowledge: [], question }); - const response = await modelRuntime.chat({ - messages: messages!, - model: !!languageModel ? languageModel : DEFAULT_MODEL, - responseMode: 'json', - stream: false, - temperature: 1, - }); + const response = await modelRuntime.chat( + { + messages: messages!, + model: !!languageModel ? languageModel : DEFAULT_MODEL, + responseMode: 'json', + stream: false, + temperature: 1, + }, + { metadata: { trigger: RequestTrigger.Eval } }, + ); const data = (await response.json()) as OpenAI.ChatCompletion; diff --git a/src/server/routers/lambda/__tests__/aiChat.test.ts b/src/server/routers/lambda/__tests__/aiChat.test.ts index bc33d079ba..eb73cf69fa 100644 --- a/src/server/routers/lambda/__tests__/aiChat.test.ts +++ b/src/server/routers/lambda/__tests__/aiChat.test.ts @@ -821,12 +821,15 @@ describe('aiChatRouter', () => { const result = await caller.outputJSON(input); expect(initModelRuntimeFromDB).toHaveBeenCalledWith({}, 'u1', 'openai'); - expect(mockGenerateObject).toHaveBeenCalledWith({ - messages: input.messages, - model: 'gpt-4o', - schema: input.schema, - tools: undefined, - }); + expect(mockGenerateObject).toHaveBeenCalledWith( + { + messages: input.messages, + model: 'gpt-4o', + schema: input.schema, + tools: undefined, + }, + { metadata: { trigger: 'chat' } }, + ); expect(result).toEqual(mockResult); }); @@ -862,12 +865,15 @@ describe('aiChatRouter', () => { await caller.outputJSON(input); - expect(mockGenerateObject).toHaveBeenCalledWith({ - messages: [], - model: 'gpt-4o', - schema: undefined, - tools: mockTools, - }); + expect(mockGenerateObject).toHaveBeenCalledWith( + { + messages: [], + model: 'gpt-4o', + schema: undefined, + tools: mockTools, + }, + { metadata: { trigger: 'chat' } }, + ); }); }); }); diff --git a/src/server/routers/lambda/aiChat.ts b/src/server/routers/lambda/aiChat.ts index 496d380c4a..cde2323e40 100644 --- a/src/server/routers/lambda/aiChat.ts +++ b/src/server/routers/lambda/aiChat.ts @@ -1,5 +1,5 @@ import { type CreateMessageParams, type SendMessageServerResponse } from '@lobechat/types'; -import { AiSendMessageServerSchema, StructureOutputSchema } from '@lobechat/types'; +import { AiSendMessageServerSchema, RequestTrigger, StructureOutputSchema } from '@lobechat/types'; import debug from 'debug'; import { LOADING_FLAT } from '@/const/message'; @@ -42,12 +42,15 @@ export const aiChatRouter = router({ const modelRuntime = await initModelRuntimeFromDB(ctx.serverDB, ctx.userId, input.provider); log('calling generateObject'); - const result = await modelRuntime.generateObject({ - messages: input.messages, - model: input.model, - schema: input.schema, - tools: input.tools, - }); + const result = await modelRuntime.generateObject( + { + messages: input.messages, + model: input.model, + schema: input.schema, + tools: input.tools, + }, + { metadata: { trigger: RequestTrigger.Chat } }, + ); log('generateObject completed, result: %O', result); return result; diff --git a/src/server/routers/lambda/chunk.ts b/src/server/routers/lambda/chunk.ts index 9ab9ef38af..9a7911469a 100644 --- a/src/server/routers/lambda/chunk.ts +++ b/src/server/routers/lambda/chunk.ts @@ -1,12 +1,11 @@ import { DEFAULT_FILE_EMBEDDING_MODEL_ITEM } from '@lobechat/const'; import { type ChatSemanticSearchChunk, type FileSearchResult } from '@lobechat/types'; -import { SemanticSearchSchema } from '@lobechat/types'; +import { RequestTrigger, SemanticSearchSchema } from '@lobechat/types'; import { TRPCError } from '@trpc/server'; import { inArray } from 'drizzle-orm'; import pMap from 'p-map'; import { z } from 'zod'; -import { checkBudgetsUsage } from '@/business/server/trpc-middlewares/lambda'; import { AsyncTaskModel } from '@/database/models/asyncTask'; import { ChunkModel } from '@/database/models/chunk'; import { DocumentModel } from '@/database/models/document'; @@ -219,18 +218,20 @@ export const chunkRouter = router({ query: z.string(), }), ) - .use(checkBudgetsUsage) .mutation(async ({ ctx, input }) => { const { model, provider } = getServerDefaultFilesConfig().embeddingModel || DEFAULT_FILE_EMBEDDING_MODEL_ITEM; // Read user's provider config from database const agentRuntime = await initModelRuntimeFromDB(ctx.serverDB, ctx.userId, provider); - const embeddings = await agentRuntime.embeddings({ - dimensions: 1024, - input: input.query, - model, - }); + const embeddings = await agentRuntime.embeddings( + { + dimensions: 1024, + input: input.query, + model, + }, + { metadata: { trigger: RequestTrigger.SemanticSearch } }, + ); return ctx.chunkModel.semanticSearch({ embedding: embeddings![0], @@ -251,11 +252,14 @@ export const chunkRouter = router({ // slice content to make sure in the context window limit const query = input.query.length > 8000 ? input.query.slice(0, 8000) : input.query; - const embeddings = await modelRuntime.embeddings({ - dimensions: 1024, - input: query, - model, - }); + const embeddings = await modelRuntime.embeddings( + { + dimensions: 1024, + input: query, + model, + }, + { metadata: { trigger: RequestTrigger.SemanticSearch } }, + ); const embedding = embeddings![0]; diff --git a/src/server/routers/lambda/userMemories.ts b/src/server/routers/lambda/userMemories.ts index 87ab433efe..1e50356139 100644 --- a/src/server/routers/lambda/userMemories.ts +++ b/src/server/routers/lambda/userMemories.ts @@ -16,7 +16,7 @@ import { UpdateIdentityActionSchema, } from '@lobechat/memory-user-memory'; import { type SearchMemoryResult } from '@lobechat/types'; -import { LayersEnum, searchMemorySchema } from '@lobechat/types'; +import { LayersEnum, RequestTrigger, searchMemorySchema } from '@lobechat/types'; import { type SQL } from 'drizzle-orm'; import { and, asc, eq, gte, lte } from 'drizzle-orm'; import pMap from 'p-map'; @@ -167,11 +167,14 @@ const searchUserMemories = async ( // Read user's provider config from database const modelRuntime = await initModelRuntimeFromDB(ctx.serverDB, ctx.userId, provider); - const queryEmbeddings = await modelRuntime.embeddings({ - dimensions: DEFAULT_USER_MEMORY_EMBEDDING_DIMENSIONS, - input: input.query, - model: embeddingModel, - }); + const queryEmbeddings = await modelRuntime.embeddings( + { + dimensions: DEFAULT_USER_MEMORY_EMBEDDING_DIMENSIONS, + input: input.query, + model: embeddingModel, + }, + { metadata: { trigger: RequestTrigger.Memory } }, + ); const effectiveEffort = normalizeMemoryEffort(input.effort ?? ctx.memoryEffort); const effortDefaults = MEMORY_SEARCH_TOP_K_LIMITS[effectiveEffort]; @@ -210,11 +213,14 @@ const createEmbedder = (agentRuntime: any, embeddingModel: string) => { return async (value?: string | null): Promise => { if (!value || value.trim().length === 0) return undefined; - const embeddings = await agentRuntime.embeddings({ - dimensions: DEFAULT_USER_MEMORY_EMBEDDING_DIMENSIONS, - input: value, - model: embeddingModel, - }); + const embeddings = await agentRuntime.embeddings( + { + dimensions: DEFAULT_USER_MEMORY_EMBEDDING_DIMENSIONS, + input: value, + model: embeddingModel, + }, + { metadata: { trigger: RequestTrigger.Memory } }, + ); return embeddings?.[0]; }; @@ -485,11 +491,14 @@ export const userMemoriesRouter = router({ const embedTexts = async (texts: string[]): Promise => { if (texts.length === 0) return []; - const response = await agentRuntime.embeddings({ - dimensions: DEFAULT_USER_MEMORY_EMBEDDING_DIMENSIONS, - input: texts, - model: embeddingModel, - }); + const response = await agentRuntime.embeddings( + { + dimensions: DEFAULT_USER_MEMORY_EMBEDDING_DIMENSIONS, + input: texts, + model: embeddingModel, + }, + { metadata: { trigger: RequestTrigger.Memory } }, + ); if (!response || response.length !== texts.length) { throw new Error('Embedding response length mismatch'); @@ -957,17 +966,14 @@ export const userMemoriesRouter = router({ } }), - searchMemory: memoryProcedure - .input(searchMemorySchema) - .query(async ({ input, ctx }) => { - try { - return await searchUserMemories(ctx, input); - } catch (error) { - console.error('Failed to retrieve memories:', error); - return EMPTY_SEARCH_RESULT; - } + searchMemory: memoryProcedure.input(searchMemorySchema).query(async ({ input, ctx }) => { + try { + return await searchUserMemories(ctx, input); + } catch (error) { + console.error('Failed to retrieve memories:', error); + return EMPTY_SEARCH_RESULT; } - ), + }), toolAddActivityMemory: memoryProcedure .input(ActivityMemoryItemSchema) diff --git a/src/server/services/agentEvalRun/index.ts b/src/server/services/agentEvalRun/index.ts index 3e80b4a072..ef5942381a 100644 --- a/src/server/services/agentEvalRun/index.ts +++ b/src/server/services/agentEvalRun/index.ts @@ -9,6 +9,7 @@ import type { EvalRunTopicResult, RubricType, } from '@lobechat/types'; +import { RequestTrigger } from '@lobechat/types'; import { AgentEvalBenchmarkModel, @@ -78,7 +79,7 @@ export class AgentEvalRunService { testCases.map((tc) => ({ agentId: params.targetAgentId ?? undefined, title: `[Eval Case #${(tc.sortOrder ?? 0) + 1}] ${tc.content?.input?.slice(0, 50) || 'Test Case'}...`, - trigger: 'eval', + trigger: RequestTrigger.Eval, })), ); @@ -165,7 +166,7 @@ export class AgentEvalRunService { errorTestCases.map((tc) => ({ agentId: run.targetAgentId ?? undefined, title: `[Eval Case #${(tc.sortOrder ?? 0) + 1}] ${tc.input?.slice(0, 50) || 'Test Case'}...`, - trigger: 'eval', + trigger: RequestTrigger.Eval, })), ); @@ -204,7 +205,7 @@ export class AgentEvalRunService { { agentId: run.targetAgentId ?? undefined, title: `[Eval Case #${(runTopic.testCase?.sortOrder ?? 0) + 1}] ${runTopic.testCase?.content?.input?.slice(0, 50) || 'Test Case'}...`, - trigger: 'eval', + trigger: RequestTrigger.Eval, }, ]); diff --git a/src/server/services/agentRuntime/types.ts b/src/server/services/agentRuntime/types.ts index ea5532d1ab..ee5e8b2119 100644 --- a/src/server/services/agentRuntime/types.ts +++ b/src/server/services/agentRuntime/types.ts @@ -135,6 +135,7 @@ export interface OperationCreationParams { groupId?: string | null; threadId?: string | null; topicId?: string | null; + trigger?: string; }; autoStart?: boolean; /** diff --git a/src/server/services/aiAgent/index.ts b/src/server/services/aiAgent/index.ts index 80a0b58aeb..8d24e476fa 100644 --- a/src/server/services/aiAgent/index.ts +++ b/src/server/services/aiAgent/index.ts @@ -778,6 +778,7 @@ export class AiAgentService { groupId: appContext?.groupId, threadId: appContext?.threadId, topicId, + trigger, }, autoStart, completionWebhook, diff --git a/src/server/services/bot/AgentBridgeService.ts b/src/server/services/bot/AgentBridgeService.ts index 39ce2da64f..4993148580 100644 --- a/src/server/services/bot/AgentBridgeService.ts +++ b/src/server/services/bot/AgentBridgeService.ts @@ -1,4 +1,5 @@ import type { ChatTopicBotContext } from '@lobechat/types'; +import { RequestTrigger } from '@lobechat/types'; import type { Message, SentMessage, Thread } from 'chat'; import { emoji } from 'chat'; import debug from 'debug'; @@ -174,7 +175,7 @@ export class AgentBridgeService { botContext, channelContext, reactionThreadId: parentChannelThreadId(thread.id), - trigger: 'bot', + trigger: RequestTrigger.Bot, }); // Persist topic mapping and channel context in thread state for follow-up messages @@ -241,7 +242,7 @@ export class AgentBridgeService { botContext, channelContext, topicId, - trigger: 'bot', + trigger: RequestTrigger.Bot, }); } catch (error) { // If the cached topicId references a deleted topic (FK violation), diff --git a/src/server/services/memory/userMemory/extract.ts b/src/server/services/memory/userMemory/extract.ts index ff0dabc05c..731dee3e32 100644 --- a/src/server/services/memory/userMemory/extract.ts +++ b/src/server/services/memory/userMemory/extract.ts @@ -22,6 +22,7 @@ import { type Embeddings, type GenerateObjectPayload, type LLMRoleType, + type ModelRuntimeHooks, type OpenAIChatMessage, } from '@lobechat/model-runtime'; import { ModelRuntime } from '@lobechat/model-runtime'; @@ -37,14 +38,15 @@ import { tracer, } from '@lobechat/observability-otel/modules/memory-user-memory'; import { attributesCommon } from '@lobechat/observability-otel/node'; -import { - type AiProviderRuntimeState, - type ChatTopicMetadata, - type IdentityMemoryDetail, - type MemoryExtractionAgentCallTrace, - type MemoryExtractionTraceError, - type MemoryExtractionTracePayload, +import type { + AiProviderRuntimeState, + ChatTopicMetadata, + IdentityMemoryDetail, + MemoryExtractionAgentCallTrace, + MemoryExtractionTraceError, + MemoryExtractionTracePayload, } from '@lobechat/types'; +import { RequestTrigger } from '@lobechat/types'; import { type FlowControl } from '@upstash/qstash'; import { Client } from '@upstash/workflow'; import debug from 'debug'; @@ -52,6 +54,7 @@ import { and, asc, eq, inArray } from 'drizzle-orm'; import { join } from 'pathe'; import { z } from 'zod'; +import { getBusinessModelRuntimeHooks } from '@/business/server/model-runtime'; import { AsyncTaskModel } from '@/database/models/asyncTask'; import { type ListTopicsForMemoryExtractorCursor } from '@/database/models/topic'; import { TopicModel } from '@/database/models/topic'; @@ -303,12 +306,14 @@ export type RuntimeResolveOptions = { preferred?: { providerIds?: string[]; }; + userId?: string; }; export const resolveRuntimeAgentConfig = ( agent: MemoryAgentConfig, keyVaults?: ProviderKeyVaultMap, options?: RuntimeResolveOptions, + hooks?: ModelRuntimeHooks, ) => { const normalizedPreferredProviders = (options?.preferred?.providerIds || []) .map(normalizeProvider) @@ -329,7 +334,7 @@ export const resolveRuntimeAgentConfig = ( source: 'user-vault' as const, }); - return ModelRuntime.initializeWithProvider(provider, {}); + return ModelRuntime.initializeWithProvider(provider, { userId: options?.userId }, hooks); } const { apiKey: userApiKey, baseURL: userBaseURL } = extractCredentialsFromVault( @@ -354,6 +359,7 @@ export const resolveRuntimeAgentConfig = ( return ModelRuntime.initializeWithProvider(provider, { apiKey: userApiKey, baseURL: userBaseURL, + userId: options?.userId, }); } @@ -367,6 +373,7 @@ export const resolveRuntimeAgentConfig = ( return ModelRuntime.initializeWithProvider(agent.provider || 'openai', { apiKey: agent.apiKey || options?.fallback?.apiKey, baseURL: agent.baseURL || options?.fallback?.baseURL, + userId: options?.userId, }); }; @@ -603,7 +610,7 @@ export class MemoryExtractionExecutor { input: requests.map((item) => item.text), model, }, - { user: 'memory-extraction' }, + { metadata: { trigger: RequestTrigger.Memory }, user: 'memory-extraction' }, ); const vectors = texts.map(() => null); @@ -1050,11 +1057,14 @@ export class MemoryExtractionExecutor { tokenLimit, ); - const embeddings = await runtime.embeddings({ - dimensions: DEFAULT_USER_MEMORY_EMBEDDING_DIMENSIONS, - input: [aggregatedContent], - model: embeddingModel, - }); + const embeddings = await runtime.embeddings( + { + dimensions: DEFAULT_USER_MEMORY_EMBEDDING_DIMENSIONS, + input: [aggregatedContent], + model: embeddingModel, + }, + { metadata: { trigger: RequestTrigger.Memory } }, + ); const vector = embeddings?.[0]; if (vector) { @@ -1978,6 +1988,7 @@ export class MemoryExtractionExecutor { baseURL: this.privateConfig.embedding.baseURL, }, preferred: { providerIds: this.embeddingPreferredProviders }, + userId, }; const gatekeeperOptions: RuntimeResolveOptions = { @@ -1986,6 +1997,7 @@ export class MemoryExtractionExecutor { baseURL: this.privateConfig.agentGateKeeper.baseURL, }, preferred: { providerIds: this.gatekeeperPreferredProviders }, + userId, }; const layerExtractorOptions: RuntimeResolveOptions = { @@ -1994,23 +2006,29 @@ export class MemoryExtractionExecutor { baseURL: this.privateConfig.agentLayerExtractor.baseURL, }, preferred: { providerIds: this.layerPreferredProviders }, + userId, }; + const hooks = getBusinessModelRuntimeHooks(userId, 'lobehub'); + const runtimes: RuntimeBundle = { embeddings: await resolveRuntimeAgentConfig( { ...this.privateConfig.embedding }, keyVaults, embeddingOptions, + hooks, ), gatekeeper: await resolveRuntimeAgentConfig( { ...this.privateConfig.agentGateKeeper }, keyVaults, gatekeeperOptions, + hooks, ), layerExtractor: await resolveRuntimeAgentConfig( { ...this.privateConfig.agentLayerExtractor }, keyVaults, layerExtractorOptions, + hooks, ), }; diff --git a/src/server/services/memory/userMemory/persona/service.ts b/src/server/services/memory/userMemory/persona/service.ts index fc59222659..3d4ee3108d 100644 --- a/src/server/services/memory/userMemory/persona/service.ts +++ b/src/server/services/memory/userMemory/persona/service.ts @@ -11,6 +11,7 @@ import { } from '@lobechat/memory-user-memory'; import { desc, eq } from 'drizzle-orm'; +import { getBusinessModelRuntimeHooks } from '@/business/server/model-runtime'; import { UserMemoryModel } from '@/database/models/userMemory'; import { UserPersonaModel } from '@/database/models/userMemory/persona'; import { AiInfraRepos } from '@/database/repositories/aiInfra'; @@ -78,13 +79,21 @@ export class UserPersonaService { {} as ProviderKeyVaultMap, ); - const runtime = await resolveRuntimeAgentConfig({ ...this.agentConfig }, keyVaults, { - fallback: { - apiKey: this.agentConfig.apiKey, - baseURL: this.agentConfig.baseURL, - }, - preferred: { providerIds: [providerId] }, - } satisfies RuntimeResolveOptions); + const hooks = getBusinessModelRuntimeHooks(payload.userId, 'lobehub'); + + const runtime = await resolveRuntimeAgentConfig( + { ...this.agentConfig }, + keyVaults, + { + fallback: { + apiKey: this.agentConfig.apiKey, + baseURL: this.agentConfig.baseURL, + }, + preferred: { providerIds: [providerId] }, + userId: payload.userId, + } satisfies RuntimeResolveOptions, + hooks, + ); const personaModel = new UserPersonaModel(this.db, payload.userId); const lastDocument = await personaModel.getLatestPersonaDocument(); diff --git a/src/server/services/systemAgent/index.ts b/src/server/services/systemAgent/index.ts index 5c5a1661b7..e1c3b15a32 100644 --- a/src/server/services/systemAgent/index.ts +++ b/src/server/services/systemAgent/index.ts @@ -1,6 +1,7 @@ import { DEFAULT_SYSTEM_AGENT_CONFIG } from '@lobechat/const'; import { chainSummaryTitle } from '@lobechat/prompts'; -import { type UserSystemAgentConfig, type UserSystemAgentConfigKey } from '@lobechat/types'; +import type { UserSystemAgentConfig, UserSystemAgentConfigKey } from '@lobechat/types'; +import { RequestTrigger } from '@lobechat/types'; import debug from 'debug'; import { UserModel } from '@/database/models/user'; @@ -65,11 +66,14 @@ export class SystemAgentService { const payload = chainSummaryTitle(messages, locale); const modelRuntime = await initModelRuntimeFromDB(this.db, this.userId, provider); - const result = await modelRuntime.generateObject({ - messages: payload.messages as any[], - model, - schema: TOPIC_TITLE_SCHEMA, - }); + const result = await modelRuntime.generateObject( + { + messages: payload.messages as any[], + model, + schema: TOPIC_TITLE_SCHEMA, + }, + { metadata: { trigger: RequestTrigger.Topic } }, + ); const title = (result as { title?: string })?.title?.trim(); if (!title) { diff --git a/src/server/services/toolExecution/serverRuntimes/memory.ts b/src/server/services/toolExecution/serverRuntimes/memory.ts index e1e2c9923f..2a65fdab92 100644 --- a/src/server/services/toolExecution/serverRuntimes/memory.ts +++ b/src/server/services/toolExecution/serverRuntimes/memory.ts @@ -30,7 +30,7 @@ import type { SearchMemoryResult, UpdateIdentityMemoryResult, } from '@lobechat/types'; -import { LayersEnum } from '@lobechat/types'; +import { LayersEnum, RequestTrigger } from '@lobechat/types'; import { eq } from 'drizzle-orm'; import type { z } from 'zod'; @@ -159,11 +159,14 @@ const createEmbedder = (agentRuntime: any, embeddingModel: string) => { return async (value?: string | null): Promise => { if (!value || value.trim().length === 0) return undefined; - const embeddings = await agentRuntime.embeddings({ - dimensions: DEFAULT_USER_MEMORY_EMBEDDING_DIMENSIONS, - input: value, - model: embeddingModel, - }); + const embeddings = await agentRuntime.embeddings( + { + dimensions: DEFAULT_USER_MEMORY_EMBEDDING_DIMENSIONS, + input: value, + model: embeddingModel, + }, + { metadata: { trigger: RequestTrigger.Memory } }, + ); return embeddings?.[0]; }; @@ -193,11 +196,14 @@ class MemoryServerRuntimeService implements MemoryRuntimeService { const modelRuntime = await initModelRuntimeFromDB(this.serverDB, this.userId, provider); - const queryEmbeddings = await modelRuntime.embeddings({ - dimensions: DEFAULT_USER_MEMORY_EMBEDDING_DIMENSIONS, - input: params.query, - model: embeddingModel, - }); + const queryEmbeddings = await modelRuntime.embeddings( + { + dimensions: DEFAULT_USER_MEMORY_EMBEDDING_DIMENSIONS, + input: params.query, + model: embeddingModel, + }, + { metadata: { trigger: RequestTrigger.Memory } }, + ); const effectiveEffort = normalizeMemoryEffort(params.effort ?? this.memoryEffort); const effortDefaults = MEMORY_SEARCH_TOP_K_LIMITS[effectiveEffort];