feat: add request trigger tracking, embeddings billing hooks, and memory extraction userId fix (#13061)

This commit is contained in:
YuTengjing 2026-03-17 20:54:28 +08:00 committed by GitHub
parent 62a6c3da1d
commit 97f4a370ab
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
39 changed files with 425 additions and 167 deletions

View file

@ -1,6 +1,6 @@
---
name: typescript
description: TypeScript code style and optimization guidelines. Use when writing TypeScript code (.ts, .tsx, .mts files), reviewing code quality, or implementing type-safe patterns. Triggers on TypeScript development, type safety questions, or code style discussions.
description: TypeScript code style and optimization guidelines. MUST READ before writing or modifying any TypeScript code (.ts, .tsx, .mts files). Also use when reviewing code quality or implementing type-safe patterns. Triggers on any TypeScript file edit, code style discussions, or type safety questions.
---
# TypeScript Code Style Guide
@ -25,6 +25,17 @@ description: TypeScript code style and optimization guidelines. Use when writing
- Use promise-based variants: `import { readFile } from 'fs/promises'`
- Use `Promise.all`, `Promise.race` for concurrent operations where safe
## Imports
- This project uses `simple-import-sort/imports` and `consistent-type-imports` (`fixStyle: 'separate-type-imports'`)
- **Separate type imports**: always use `import type { ... }` for type-only imports, NOT `import { type ... }` inline syntax
- When a file already has `import type { ... }` from a package and you need to add a value import, keep them as **two separate statements**:
```ts
import type { ChatTopicBotContext } from '@lobechat/types';
import { RequestTrigger } from '@lobechat/types';
```
- Within each import statement, specifiers are sorted **alphabetically by name**
## Code Structure
- Prefer object destructuring

View file

@ -12,7 +12,18 @@
"table.columns.spend": "Credits",
"table.columns.startTime": "Created At",
"table.columns.totalTokens": "Token Usage",
"table.columns.type.enums.chat": "Chat Message",
"table.columns.trigger.enums.api": "API Call",
"table.columns.trigger.enums.bot": "Bot Message",
"table.columns.trigger.enums.chat": "Chat Message",
"table.columns.trigger.enums.cron": "Scheduled Task",
"table.columns.trigger.enums.eval": "Benchmark Eval",
"table.columns.trigger.enums.file_embedding": "File Embedding",
"table.columns.trigger.enums.memory": "Memory Extraction",
"table.columns.trigger.enums.semantic_search": "Knowledge Search",
"table.columns.trigger.enums.topic": "Topic Summary",
"table.columns.trigger.title": "Trigger",
"table.columns.type.enums.chat": "Text Generation",
"table.columns.type.enums.embedding": "Embedding",
"table.columns.type.enums.imageGeneration": "Image Generation",
"table.columns.type.enums.videoGeneration": "Video Generation",
"table.columns.type.title": "Type",

View file

@ -12,7 +12,18 @@
"table.columns.spend": "消耗积分",
"table.columns.startTime": "创建时间",
"table.columns.totalTokens": "令牌使用量",
"table.columns.type.enums.chat": "聊天消息",
"table.columns.trigger.enums.api": "API 调用",
"table.columns.trigger.enums.bot": "Bot 消息",
"table.columns.trigger.enums.chat": "聊天消息",
"table.columns.trigger.enums.cron": "定时任务",
"table.columns.trigger.enums.eval": "基准评测",
"table.columns.trigger.enums.file_embedding": "文件嵌入",
"table.columns.trigger.enums.memory": "记忆提取",
"table.columns.trigger.enums.semantic_search": "知识搜索",
"table.columns.trigger.enums.topic": "话题总结",
"table.columns.trigger.title": "触发方式",
"table.columns.type.enums.chat": "文本生成",
"table.columns.type.enums.embedding": "嵌入",
"table.columns.type.enums.imageGeneration": "图像生成",
"table.columns.type.enums.videoGeneration": "视频生成",
"table.columns.type.title": "类型",

View file

@ -11,15 +11,16 @@
},
"main": "src/index.ts",
"scripts": {
"build:gen-response-formats": "tsx scripts/generate-response-formats.ts",
"test": "vitest --run",
"test:coverage": "vitest --coverage --silent='passed-only'",
"build:gen-response-formats": "tsx scripts/generate-response-formats.ts",
"type-check": "tsgo --noEmit -p tsconfig.json"
},
"dependencies": {
"@lobechat/context-engine": "workspace:*",
"@lobechat/model-runtime": "workspace:*",
"@lobechat/prompts": "workspace:*",
"@lobechat/types": "workspace:*",
"dayjs": "^1.11.11",
"dotenv": "^17.2.3",
"ora": "^9.0.0",
@ -29,7 +30,6 @@
"zod-to-json-schema": "^3.24.6"
},
"devDependencies": {
"@lobechat/types": "workspace:*",
"@types/json-schema": "^7.0.15",
"@types/xast": "^2.0.4",
"promptfoo": "^0.120.17",

View file

@ -11,6 +11,7 @@ import {
ATTR_GEN_AI_REQUEST_MODEL,
} from '@lobechat/observability-otel/gen-ai';
import { tracer } from '@lobechat/observability-otel/modules/memory-user-memory';
import { RequestTrigger } from '@lobechat/types';
import type { z } from 'zod';
import type {
@ -155,7 +156,9 @@ export abstract class BaseMemoryExtractor<
}
span.addEvent('gen_ai.request.send');
const result = await this.runtime.generateObject(payload);
const result = await this.runtime.generateObject(payload, {
metadata: { trigger: RequestTrigger.Memory },
});
span.addEvent('gen_ai.response.receive');
span.setAttributes({

View file

@ -95,6 +95,7 @@ describe('UserMemoryGateKeeper', () => {
model: 'gpt-mock',
schema: expect.any(Object),
}),
expect.objectContaining({ metadata: { trigger: 'memory' } }),
);
});
});

View file

@ -76,6 +76,7 @@ describe('IdentityExtractor', () => {
schema: expect.objectContaining({ name: expect.stringContaining('identity') }),
tools: undefined,
}),
expect.objectContaining({ metadata: { trigger: 'memory' } }),
);
expect(result).toEqual(structuredResult);
});

View file

@ -1,4 +1,5 @@
import { renderPlaceholderTemplate } from '@lobechat/context-engine';
import { RequestTrigger } from '@lobechat/types';
import { z } from 'zod';
import { userPersonaPrompt } from '../prompts';
@ -115,11 +116,14 @@ export class UserPersonaExtractor extends BaseMemoryExtractor<
{ content: userPrompt, role: 'user' as const },
];
const result = (await this.runtime.generateObject({
messages,
model: this.model,
tools: this.getTools(options || {}),
})) as unknown;
const result = (await this.runtime.generateObject(
{
messages,
model: this.model,
tools: this.getTools(options || {}),
},
{ metadata: { trigger: RequestTrigger.Memory } },
)) as unknown;
if (Array.isArray(result)) {
const firstCall = result[0];

View file

@ -0,0 +1,19 @@
import type { AIEmbeddingModelCard } from '../../types/aiModel';
export const lobehubEmbeddingModels: AIEmbeddingModelCard[] = [
{
contextWindowTokens: 8192,
description:
'An efficient, cost-effective next-generation embedding model for retrieval and RAG scenarios.',
displayName: 'Text Embedding 3 Small',
enabled: true,
id: 'text-embedding-3-small',
maxDimension: 1536,
pricing: {
currency: 'USD',
units: [{ name: 'textInput', rate: 0.02, strategy: 'fixed', unit: 'millionTokens' }],
},
releasedAt: '2024-01-25',
type: 'embedding',
},
];

View file

@ -1,12 +1,19 @@
import { lobehubChatModels } from './chat';
import { lobehubEmbeddingModels } from './embedding';
import { lobehubImageModels } from './image';
import { lobehubVideoModels } from './video';
export { lobehubChatModels } from './chat';
export { lobehubEmbeddingModels } from './embedding';
export { lobehubImageModels } from './image';
export * from './utils';
export { lobehubVideoModels, seedance15ProParams } from './video';
export const allModels = [...lobehubChatModels, ...lobehubImageModels, ...lobehubVideoModels];
export const allModels = [
...lobehubChatModels,
...lobehubEmbeddingModels,
...lobehubImageModels,
...lobehubVideoModels,
];
export default allModels;

View file

@ -35,6 +35,7 @@ export interface ModelRuntimeHooks {
* Runs before the LLM call. Throw to abort (e.g., budget exceeded).
*/
beforeChat?: (payload: ChatStreamPayload, options?: ChatMethodOptions) => Promise<void>;
beforeEmbeddings?: (payload: EmbeddingsPayload, options?: EmbeddingsOptions) => Promise<void>;
beforeGenerateObject?: (
payload: GenerateObjectPayload,
options?: GenerateObjectOptions,
@ -57,6 +58,16 @@ export interface ModelRuntimeHooks {
context: { options?: ChatMethodOptions; payload: ChatStreamPayload },
) => void | Promise<void>;
onEmbeddingsError?: (
error: ChatCompletionErrorPayload,
context: { options?: EmbeddingsOptions; payload: EmbeddingsPayload },
) => void | Promise<void>;
onEmbeddingsFinal?: (
data: { latencyMs?: number; usage?: ModelUsage },
context: { options?: EmbeddingsOptions; payload: EmbeddingsPayload },
) => void | Promise<void>;
onGenerateObjectError?: (
error: ChatCompletionErrorPayload,
context: { options?: GenerateObjectOptions; payload: GenerateObjectPayload },
@ -205,7 +216,36 @@ export class ModelRuntime {
}
async embeddings(payload: EmbeddingsPayload, options?: EmbeddingsOptions) {
return this._runtime.embeddings?.(payload, options);
await this._hooks?.beforeEmbeddings?.(payload, options);
const startTime = Date.now();
const finalOptions = this._hooks?.onEmbeddingsFinal
? {
...options,
onUsage: async (usage: ModelUsage) => {
await options?.onUsage?.(usage);
try {
const latencyMs = Date.now() - startTime;
await this._hooks!.onEmbeddingsFinal!({ latencyMs, usage }, { options, payload });
} catch (e) {
console.error('[ModelRuntime] onEmbeddingsFinal hook error:', e);
}
},
}
: options;
try {
return await this._runtime.embeddings?.(payload, finalOptions);
} catch (error) {
if (this._hooks?.onEmbeddingsError) {
await this._hooks.onEmbeddingsError(error as ChatCompletionErrorPayload, {
options,
payload,
});
}
throw error;
}
}
async textToSpeech(payload: TextToSpeechPayload, options?: EmbeddingsOptions) {
return this._runtime.textToSpeech?.(payload, options);
@ -243,7 +283,12 @@ export class ModelRuntime {
params: Partial<
ClientOptions &
LobeBedrockAIParams &
LobeCloudflareParams & { apiKey?: string; apiVersion?: string; baseURL?: string }
LobeCloudflareParams & {
apiKey?: string;
apiVersion?: string;
baseURL?: string;
userId?: string;
}
>,
hooks?: ModelRuntimeHooks,
) {

View file

@ -90,6 +90,7 @@ export interface RouteAttemptResult {
channelId?: string;
durationMs: number;
error?: unknown;
metadata?: Record<string, unknown>;
model: string;
optionIndex: number;
providerId: string;
@ -302,6 +303,7 @@ export const createRouterRuntime = ({
private async runWithFallback<T>(
model: string,
requestHandler: (runtime: LobeRuntimeAI) => Promise<T>,
metadata?: Record<string, unknown>,
): Promise<T> {
const matchedRouter = await this.resolveMatchedRouter(model);
const routerOptions = this.normalizeRouterOptions(matchedRouter);
@ -354,6 +356,7 @@ export const createRouterRuntime = ({
apiType: resolvedApiType,
channelId,
durationMs: Date.now() - startTime,
metadata,
model,
optionIndex: index,
providerId: id,
@ -376,6 +379,7 @@ export const createRouterRuntime = ({
channelId,
durationMs: Date.now() - startTime,
error,
metadata,
model,
optionIndex: index,
providerId: id,
@ -452,8 +456,10 @@ export const createRouterRuntime = ({
*/
async chat(payload: ChatStreamPayload, options?: ChatMethodOptions) {
try {
return await this.runWithFallback(payload.model, (runtime) =>
runtime.chat!(payload, options),
return await this.runWithFallback(
payload.model,
(runtime) => runtime.chat!(payload, options),
options?.metadata,
);
} catch (e) {
if (params.chatCompletion?.handleError) {
@ -485,14 +491,18 @@ export const createRouterRuntime = ({
}
async generateObject(payload: GenerateObjectPayload, options?: GenerateObjectOptions) {
return this.runWithFallback(payload.model, (runtime) =>
runtime.generateObject!(payload, options),
return this.runWithFallback(
payload.model,
(runtime) => runtime.generateObject!(payload, options),
options?.metadata,
);
}
async embeddings(payload: EmbeddingsPayload, options?: EmbeddingsOptions) {
return this.runWithFallback(payload.model, (runtime) =>
runtime.embeddings!(payload, options),
return this.runWithFallback(
payload.model,
(runtime) => runtime.embeddings!(payload, options),
options?.metadata,
);
}

View file

@ -820,6 +820,17 @@ export const createOpenAICompatibleRuntime = <T extends Record<string, any> = an
{ headers: options?.headers, signal: options?.signal },
);
if (res.usage && options?.onUsage) {
const pricing = await getModelPricing(payload.model, this.id);
await options.onUsage(
convertOpenAIUsage(res.usage as any, {
model: payload.model,
pricing,
provider: this.id,
}),
);
}
log('received %d embeddings', res.data.length);
return res.data.map((item) => item.embedding);
} catch (error) {

View file

@ -120,6 +120,19 @@ export class LobeAzureOpenAI implements LobeRuntimeAI {
{ headers: options?.headers, signal: options?.signal },
);
if (res.usage && options?.onUsage) {
const { convertOpenAIUsage } = await import('../../core/usageConverters/openai');
const { getModelPricing } = await import('../../utils/getModelPricing');
const pricing = await getModelPricing(payload.model, ModelProvider.Azure);
await options.onUsage(
convertOpenAIUsage(res.usage as any, {
model: payload.model,
pricing,
provider: ModelProvider.Azure,
}),
);
}
return res.data.map((item) => item.embedding);
} catch (error) {
return this.handleError(error, payload.model);

View file

@ -1,3 +1,5 @@
import type { ModelUsage } from '@lobechat/types';
export interface EmbeddingsPayload {
/**
* The number of dimensions the resulting output embeddings should have. Only
@ -18,6 +20,9 @@ export interface EmbeddingsPayload {
export interface EmbeddingsOptions {
headers?: Record<string, any>;
/** Metadata passed to hooks (billing, tracing, etc.) */
metadata?: Record<string, unknown>;
onUsage?: (usage: ModelUsage) => void | Promise<void>;
signal?: AbortSignal;
/**
* userId for the embeddings

View file

@ -34,6 +34,9 @@ export interface GenerateObjectOptions {
*/
headers?: Record<string, any>;
/** Metadata passed to hooks (billing, tracing, etc.) */
metadata?: Record<string, unknown>;
onUsage?: (usage: ModelUsage) => void | Promise<void>;
signal?: AbortSignal;

View file

@ -1,9 +1,12 @@
import { resolve } from 'node:path';
import { coverageConfigDefaults, defineConfig } from 'vitest/config';
export default defineConfig({
test: {
alias: {
// Resolve @cloud/database's internal @/ paths when pnpm overrides pull in cloud packages
'@/database': resolve(__dirname, '../../packages/database/src'),
// TODO: 目前仍然残留 ModelRuntime.test.ts 中的部分测试依赖了主项目的内容,后续需要拆分测试
'@': resolve(__dirname, '../../src'),
},

View file

@ -1,5 +1,6 @@
import type { ChatStreamPayload } from '@lobechat/model-runtime';
import type { LobeAgentChatConfig, LobeAgentConfig, UserSystemAgentConfig } from '@lobechat/types';
import { RequestTrigger } from '@lobechat/types';
import { and, eq } from 'drizzle-orm';
import { getBusinessModelRuntimeHooks } from '@/business/server/model-runtime';
@ -341,6 +342,7 @@ export class ChatService extends BaseService {
// 调用聊天 API
const response = await modelRuntime.chat(chatPayload, {
metadata: { trigger: RequestTrigger.Api },
user: this.userId!,
});

View file

@ -1,3 +1,15 @@
export enum RequestTrigger {
Api = 'api',
Bot = 'bot',
Chat = 'chat',
Cron = 'cron',
Eval = 'eval',
FileEmbedding = 'file_embedding',
Memory = 'memory',
SemanticSearch = 'semantic_search',
Topic = 'topic',
}
// ******* Runtime Biz Error ******* //
export const AgentRuntimeErrorType = {
AgentRuntimeError: 'AgentRuntimeError', // Agent Runtime module runtime error

View file

@ -4,10 +4,6 @@ export const checkEmbeddingUsage = asyncTrpc.middleware(async (opts) => {
return opts.next();
});
export const checkBudgetsUsage = asyncTrpc.middleware(async (opts) => {
return opts.next();
});
export const createImageBusinessMiddleware = asyncTrpc.middleware(async (opts) => {
return opts.next();
});

View file

@ -3,7 +3,3 @@ import { trpc } from '@/libs/trpc/lambda/init';
export const checkFileStorageUsage = trpc.middleware(async (opts) => {
return opts.next();
});
export const checkBudgetsUsage = trpc.middleware(async (opts) => {
return opts.next();
});

View file

@ -12,7 +12,18 @@ export default {
'table.columns.spend': 'Credits',
'table.columns.startTime': 'Created At',
'table.columns.totalTokens': 'Token Usage',
'table.columns.type.enums.chat': 'Chat Message',
'table.columns.trigger.enums.api': 'API Call',
'table.columns.trigger.enums.bot': 'Bot Message',
'table.columns.trigger.enums.chat': 'Chat Message',
'table.columns.trigger.enums.cron': 'Scheduled Task',
'table.columns.trigger.enums.eval': 'Benchmark Eval',
'table.columns.trigger.enums.file_embedding': 'File Embedding',
'table.columns.trigger.enums.memory': 'Memory Extraction',
'table.columns.trigger.enums.semantic_search': 'Knowledge Search',
'table.columns.trigger.enums.topic': 'Topic Summary',
'table.columns.trigger.title': 'Trigger',
'table.columns.type.enums.chat': 'Text Generation',
'table.columns.type.enums.embedding': 'Embedding',
'table.columns.type.enums.imageGeneration': 'Image Generation',
'table.columns.type.enums.videoGeneration': 'Video Generation',
'table.columns.type.title': 'Type',

View file

@ -1,13 +1,14 @@
import { ActionIcon, Center, Flexbox, Text, TooltipGroup } from '@lobehub/ui';
import isEqual from 'fast-deep-equal';
import { ArrowDownUpIcon, ToggleLeft } from 'lucide-react';
import { useMemo, useState } from 'react';
import { use, useMemo, useState } from 'react';
import { useTranslation } from 'react-i18next';
import { useAiInfraStore } from '@/store/aiInfra';
import { aiModelSelectors } from '@/store/aiInfra/selectors';
import ModelItem from '../ModelItem';
import { ProviderSettingsContext } from '../ProviderSettingsContext';
import SortModelModal from '../SortModelModal';
interface EnabledModelListProps {
@ -16,6 +17,7 @@ interface EnabledModelListProps {
const EnabledModelList = ({ activeTab }: EnabledModelListProps) => {
const { t } = useTranslation('modelProvider');
const { modelEditable } = use(ProviderSettingsContext);
const enabledModels = useAiInfraStore(aiModelSelectors.enabledAiProviderModelList, isEqual);
const batchToggleAiModels = useAiInfraStore((s) => s.batchToggleAiModels);
@ -30,6 +32,13 @@ const EnabledModelList = ({ activeTab }: EnabledModelListProps) => {
return enabledModels.filter((model) => model.type === activeTab);
}, [enabledModels, activeTab]);
// Models that can be toggled (exclude embedding models when not editable)
const togglableModels = useMemo(
() =>
modelEditable ? filteredModels : filteredModels.filter((model) => model.type !== 'embedding'),
[filteredModels, modelEditable],
);
const isCurrentTabEmpty = filteredModels.length === 0;
return (
<>
@ -40,20 +49,22 @@ const EnabledModelList = ({ activeTab }: EnabledModelListProps) => {
{!isEmpty && (
<TooltipGroup>
<Flexbox horizontal>
<ActionIcon
icon={ToggleLeft}
loading={batchLoading}
size={'small'}
title={t('providerModels.list.enabledActions.disableAll')}
onClick={async () => {
setBatchLoading(true);
await batchToggleAiModels(
enabledModels.map((i) => i.id),
false,
);
setBatchLoading(false);
}}
/>
{togglableModels.length > 0 && (
<ActionIcon
icon={ToggleLeft}
loading={batchLoading}
size={'small'}
title={t('providerModels.list.enabledActions.disableAll')}
onClick={async () => {
setBatchLoading(true);
await batchToggleAiModels(
togglableModels.map((i) => i.id),
false,
);
setBatchLoading(false);
}}
/>
)}
<ActionIcon
icon={ArrowDownUpIcon}

View file

@ -170,7 +170,9 @@ const ModelItem = memo<ModelItemProps>(
</Tag>
);
const EnableSwitch = (
const canToggle = modelEditable || type !== 'embedding';
const EnableSwitch = canToggle ? (
<Switch
checked={checked}
loading={isModelLoading}
@ -180,7 +182,7 @@ const ModelItem = memo<ModelItemProps>(
await toggleModelEnabled({ enabled: e, id, source, type });
}}
/>
);
) : null;
const Actions =
modelEditable &&

View file

@ -410,6 +410,7 @@ export const createRuntimeExecutors = (
metadata: {
operationId,
topicId: state.metadata?.topicId,
trigger: state.metadata?.trigger,
},
user: ctx.userId,
});

View file

@ -1,10 +1,11 @@
import { ASYNC_TASK_TIMEOUT } from '@lobechat/business-config/server';
import { RequestTrigger } from '@lobechat/types';
import { TRPCError } from '@trpc/server';
import { chunk } from 'es-toolkit/compat';
import pMap from 'p-map';
import { z } from 'zod';
import { checkBudgetsUsage, checkEmbeddingUsage } from '@/business/server/trpc-middlewares/async';
import { checkEmbeddingUsage } from '@/business/server/trpc-middlewares/async';
import { serverDBEnv } from '@/config/db';
import { DEFAULT_FILE_EMBEDDING_MODEL_ITEM } from '@/const/settings/knowledge';
import { AsyncTaskModel } from '@/database/models/asyncTask';
@ -41,7 +42,6 @@ const fileProcedure = asyncAuthedProcedure.use(async (opts) => {
export const fileRouter = router({
embeddingChunks: fileProcedure
.use(checkEmbeddingUsage)
.use(checkBudgetsUsage)
.input(
z.object({
fileId: z.string(),
@ -98,11 +98,14 @@ export const fileRouter = router({
provider,
);
const embeddings = await modelRuntime.embeddings({
dimensions: 1024,
input: chunks.map((c) => c.text),
model,
});
const embeddings = await modelRuntime.embeddings(
{
dimensions: 1024,
input: chunks.map((c) => c.text),
model,
},
{ metadata: { trigger: RequestTrigger.FileEmbedding } },
);
const items: NewEmbeddingsItem[] =
embeddings?.map((e, idx) => ({

View file

@ -1,5 +1,5 @@
import { chainAnswerWithContext } from '@lobechat/prompts';
import { EvalEvaluationStatus } from '@lobechat/types';
import { EvalEvaluationStatus, RequestTrigger } from '@lobechat/types';
import { TRPCError } from '@trpc/server';
import { ModelProvider } from 'model-bank';
import type OpenAI from 'openai';
@ -65,11 +65,14 @@ export const ragEvalRouter = router({
// If questionEmbeddingId does not exist, perform an embedding
if (!questionEmbeddingId) {
const embeddings = await modelRuntime.embeddings({
dimensions: 1024,
input: question,
model: !!embeddingModel ? embeddingModel : DEFAULT_EMBEDDING_MODEL,
});
const embeddings = await modelRuntime.embeddings(
{
dimensions: 1024,
input: question,
model: !!embeddingModel ? embeddingModel : DEFAULT_EMBEDDING_MODEL,
},
{ metadata: { trigger: RequestTrigger.Eval } },
);
const embeddingId = await ctx.embeddingModel.create({
embeddings: embeddings?.[0],
@ -102,13 +105,16 @@ export const ragEvalRouter = router({
// Generate LLM answer
const { messages } = chainAnswerWithContext({ context, knowledge: [], question });
const response = await modelRuntime.chat({
messages: messages!,
model: !!languageModel ? languageModel : DEFAULT_MODEL,
responseMode: 'json',
stream: false,
temperature: 1,
});
const response = await modelRuntime.chat(
{
messages: messages!,
model: !!languageModel ? languageModel : DEFAULT_MODEL,
responseMode: 'json',
stream: false,
temperature: 1,
},
{ metadata: { trigger: RequestTrigger.Eval } },
);
const data = (await response.json()) as OpenAI.ChatCompletion;

View file

@ -821,12 +821,15 @@ describe('aiChatRouter', () => {
const result = await caller.outputJSON(input);
expect(initModelRuntimeFromDB).toHaveBeenCalledWith({}, 'u1', 'openai');
expect(mockGenerateObject).toHaveBeenCalledWith({
messages: input.messages,
model: 'gpt-4o',
schema: input.schema,
tools: undefined,
});
expect(mockGenerateObject).toHaveBeenCalledWith(
{
messages: input.messages,
model: 'gpt-4o',
schema: input.schema,
tools: undefined,
},
{ metadata: { trigger: 'chat' } },
);
expect(result).toEqual(mockResult);
});
@ -862,12 +865,15 @@ describe('aiChatRouter', () => {
await caller.outputJSON(input);
expect(mockGenerateObject).toHaveBeenCalledWith({
messages: [],
model: 'gpt-4o',
schema: undefined,
tools: mockTools,
});
expect(mockGenerateObject).toHaveBeenCalledWith(
{
messages: [],
model: 'gpt-4o',
schema: undefined,
tools: mockTools,
},
{ metadata: { trigger: 'chat' } },
);
});
});
});

View file

@ -1,5 +1,5 @@
import { type CreateMessageParams, type SendMessageServerResponse } from '@lobechat/types';
import { AiSendMessageServerSchema, StructureOutputSchema } from '@lobechat/types';
import { AiSendMessageServerSchema, RequestTrigger, StructureOutputSchema } from '@lobechat/types';
import debug from 'debug';
import { LOADING_FLAT } from '@/const/message';
@ -42,12 +42,15 @@ export const aiChatRouter = router({
const modelRuntime = await initModelRuntimeFromDB(ctx.serverDB, ctx.userId, input.provider);
log('calling generateObject');
const result = await modelRuntime.generateObject({
messages: input.messages,
model: input.model,
schema: input.schema,
tools: input.tools,
});
const result = await modelRuntime.generateObject(
{
messages: input.messages,
model: input.model,
schema: input.schema,
tools: input.tools,
},
{ metadata: { trigger: RequestTrigger.Chat } },
);
log('generateObject completed, result: %O', result);
return result;

View file

@ -1,12 +1,11 @@
import { DEFAULT_FILE_EMBEDDING_MODEL_ITEM } from '@lobechat/const';
import { type ChatSemanticSearchChunk, type FileSearchResult } from '@lobechat/types';
import { SemanticSearchSchema } from '@lobechat/types';
import { RequestTrigger, SemanticSearchSchema } from '@lobechat/types';
import { TRPCError } from '@trpc/server';
import { inArray } from 'drizzle-orm';
import pMap from 'p-map';
import { z } from 'zod';
import { checkBudgetsUsage } from '@/business/server/trpc-middlewares/lambda';
import { AsyncTaskModel } from '@/database/models/asyncTask';
import { ChunkModel } from '@/database/models/chunk';
import { DocumentModel } from '@/database/models/document';
@ -219,18 +218,20 @@ export const chunkRouter = router({
query: z.string(),
}),
)
.use(checkBudgetsUsage)
.mutation(async ({ ctx, input }) => {
const { model, provider } =
getServerDefaultFilesConfig().embeddingModel || DEFAULT_FILE_EMBEDDING_MODEL_ITEM;
// Read user's provider config from database
const agentRuntime = await initModelRuntimeFromDB(ctx.serverDB, ctx.userId, provider);
const embeddings = await agentRuntime.embeddings({
dimensions: 1024,
input: input.query,
model,
});
const embeddings = await agentRuntime.embeddings(
{
dimensions: 1024,
input: input.query,
model,
},
{ metadata: { trigger: RequestTrigger.SemanticSearch } },
);
return ctx.chunkModel.semanticSearch({
embedding: embeddings![0],
@ -251,11 +252,14 @@ export const chunkRouter = router({
// slice content to make sure in the context window limit
const query = input.query.length > 8000 ? input.query.slice(0, 8000) : input.query;
const embeddings = await modelRuntime.embeddings({
dimensions: 1024,
input: query,
model,
});
const embeddings = await modelRuntime.embeddings(
{
dimensions: 1024,
input: query,
model,
},
{ metadata: { trigger: RequestTrigger.SemanticSearch } },
);
const embedding = embeddings![0];

View file

@ -16,7 +16,7 @@ import {
UpdateIdentityActionSchema,
} from '@lobechat/memory-user-memory';
import { type SearchMemoryResult } from '@lobechat/types';
import { LayersEnum, searchMemorySchema } from '@lobechat/types';
import { LayersEnum, RequestTrigger, searchMemorySchema } from '@lobechat/types';
import { type SQL } from 'drizzle-orm';
import { and, asc, eq, gte, lte } from 'drizzle-orm';
import pMap from 'p-map';
@ -167,11 +167,14 @@ const searchUserMemories = async (
// Read user's provider config from database
const modelRuntime = await initModelRuntimeFromDB(ctx.serverDB, ctx.userId, provider);
const queryEmbeddings = await modelRuntime.embeddings({
dimensions: DEFAULT_USER_MEMORY_EMBEDDING_DIMENSIONS,
input: input.query,
model: embeddingModel,
});
const queryEmbeddings = await modelRuntime.embeddings(
{
dimensions: DEFAULT_USER_MEMORY_EMBEDDING_DIMENSIONS,
input: input.query,
model: embeddingModel,
},
{ metadata: { trigger: RequestTrigger.Memory } },
);
const effectiveEffort = normalizeMemoryEffort(input.effort ?? ctx.memoryEffort);
const effortDefaults = MEMORY_SEARCH_TOP_K_LIMITS[effectiveEffort];
@ -210,11 +213,14 @@ const createEmbedder = (agentRuntime: any, embeddingModel: string) => {
return async (value?: string | null): Promise<number[] | undefined> => {
if (!value || value.trim().length === 0) return undefined;
const embeddings = await agentRuntime.embeddings({
dimensions: DEFAULT_USER_MEMORY_EMBEDDING_DIMENSIONS,
input: value,
model: embeddingModel,
});
const embeddings = await agentRuntime.embeddings(
{
dimensions: DEFAULT_USER_MEMORY_EMBEDDING_DIMENSIONS,
input: value,
model: embeddingModel,
},
{ metadata: { trigger: RequestTrigger.Memory } },
);
return embeddings?.[0];
};
@ -485,11 +491,14 @@ export const userMemoriesRouter = router({
const embedTexts = async (texts: string[]): Promise<number[][]> => {
if (texts.length === 0) return [];
const response = await agentRuntime.embeddings({
dimensions: DEFAULT_USER_MEMORY_EMBEDDING_DIMENSIONS,
input: texts,
model: embeddingModel,
});
const response = await agentRuntime.embeddings(
{
dimensions: DEFAULT_USER_MEMORY_EMBEDDING_DIMENSIONS,
input: texts,
model: embeddingModel,
},
{ metadata: { trigger: RequestTrigger.Memory } },
);
if (!response || response.length !== texts.length) {
throw new Error('Embedding response length mismatch');
@ -957,17 +966,14 @@ export const userMemoriesRouter = router({
}
}),
searchMemory: memoryProcedure
.input(searchMemorySchema)
.query(async ({ input, ctx }) => {
try {
return await searchUserMemories(ctx, input);
} catch (error) {
console.error('Failed to retrieve memories:', error);
return EMPTY_SEARCH_RESULT;
}
searchMemory: memoryProcedure.input(searchMemorySchema).query(async ({ input, ctx }) => {
try {
return await searchUserMemories(ctx, input);
} catch (error) {
console.error('Failed to retrieve memories:', error);
return EMPTY_SEARCH_RESULT;
}
),
}),
toolAddActivityMemory: memoryProcedure
.input(ActivityMemoryItemSchema)

View file

@ -9,6 +9,7 @@ import type {
EvalRunTopicResult,
RubricType,
} from '@lobechat/types';
import { RequestTrigger } from '@lobechat/types';
import {
AgentEvalBenchmarkModel,
@ -78,7 +79,7 @@ export class AgentEvalRunService {
testCases.map((tc) => ({
agentId: params.targetAgentId ?? undefined,
title: `[Eval Case #${(tc.sortOrder ?? 0) + 1}] ${tc.content?.input?.slice(0, 50) || 'Test Case'}...`,
trigger: 'eval',
trigger: RequestTrigger.Eval,
})),
);
@ -165,7 +166,7 @@ export class AgentEvalRunService {
errorTestCases.map((tc) => ({
agentId: run.targetAgentId ?? undefined,
title: `[Eval Case #${(tc.sortOrder ?? 0) + 1}] ${tc.input?.slice(0, 50) || 'Test Case'}...`,
trigger: 'eval',
trigger: RequestTrigger.Eval,
})),
);
@ -204,7 +205,7 @@ export class AgentEvalRunService {
{
agentId: run.targetAgentId ?? undefined,
title: `[Eval Case #${(runTopic.testCase?.sortOrder ?? 0) + 1}] ${runTopic.testCase?.content?.input?.slice(0, 50) || 'Test Case'}...`,
trigger: 'eval',
trigger: RequestTrigger.Eval,
},
]);

View file

@ -135,6 +135,7 @@ export interface OperationCreationParams {
groupId?: string | null;
threadId?: string | null;
topicId?: string | null;
trigger?: string;
};
autoStart?: boolean;
/**

View file

@ -778,6 +778,7 @@ export class AiAgentService {
groupId: appContext?.groupId,
threadId: appContext?.threadId,
topicId,
trigger,
},
autoStart,
completionWebhook,

View file

@ -1,4 +1,5 @@
import type { ChatTopicBotContext } from '@lobechat/types';
import { RequestTrigger } from '@lobechat/types';
import type { Message, SentMessage, Thread } from 'chat';
import { emoji } from 'chat';
import debug from 'debug';
@ -174,7 +175,7 @@ export class AgentBridgeService {
botContext,
channelContext,
reactionThreadId: parentChannelThreadId(thread.id),
trigger: 'bot',
trigger: RequestTrigger.Bot,
});
// Persist topic mapping and channel context in thread state for follow-up messages
@ -241,7 +242,7 @@ export class AgentBridgeService {
botContext,
channelContext,
topicId,
trigger: 'bot',
trigger: RequestTrigger.Bot,
});
} catch (error) {
// If the cached topicId references a deleted topic (FK violation),

View file

@ -22,6 +22,7 @@ import {
type Embeddings,
type GenerateObjectPayload,
type LLMRoleType,
type ModelRuntimeHooks,
type OpenAIChatMessage,
} from '@lobechat/model-runtime';
import { ModelRuntime } from '@lobechat/model-runtime';
@ -37,14 +38,15 @@ import {
tracer,
} from '@lobechat/observability-otel/modules/memory-user-memory';
import { attributesCommon } from '@lobechat/observability-otel/node';
import {
type AiProviderRuntimeState,
type ChatTopicMetadata,
type IdentityMemoryDetail,
type MemoryExtractionAgentCallTrace,
type MemoryExtractionTraceError,
type MemoryExtractionTracePayload,
import type {
AiProviderRuntimeState,
ChatTopicMetadata,
IdentityMemoryDetail,
MemoryExtractionAgentCallTrace,
MemoryExtractionTraceError,
MemoryExtractionTracePayload,
} from '@lobechat/types';
import { RequestTrigger } from '@lobechat/types';
import { type FlowControl } from '@upstash/qstash';
import { Client } from '@upstash/workflow';
import debug from 'debug';
@ -52,6 +54,7 @@ import { and, asc, eq, inArray } from 'drizzle-orm';
import { join } from 'pathe';
import { z } from 'zod';
import { getBusinessModelRuntimeHooks } from '@/business/server/model-runtime';
import { AsyncTaskModel } from '@/database/models/asyncTask';
import { type ListTopicsForMemoryExtractorCursor } from '@/database/models/topic';
import { TopicModel } from '@/database/models/topic';
@ -303,12 +306,14 @@ export type RuntimeResolveOptions = {
preferred?: {
providerIds?: string[];
};
userId?: string;
};
export const resolveRuntimeAgentConfig = (
agent: MemoryAgentConfig,
keyVaults?: ProviderKeyVaultMap,
options?: RuntimeResolveOptions,
hooks?: ModelRuntimeHooks,
) => {
const normalizedPreferredProviders = (options?.preferred?.providerIds || [])
.map(normalizeProvider)
@ -329,7 +334,7 @@ export const resolveRuntimeAgentConfig = (
source: 'user-vault' as const,
});
return ModelRuntime.initializeWithProvider(provider, {});
return ModelRuntime.initializeWithProvider(provider, { userId: options?.userId }, hooks);
}
const { apiKey: userApiKey, baseURL: userBaseURL } = extractCredentialsFromVault(
@ -354,6 +359,7 @@ export const resolveRuntimeAgentConfig = (
return ModelRuntime.initializeWithProvider(provider, {
apiKey: userApiKey,
baseURL: userBaseURL,
userId: options?.userId,
});
}
@ -367,6 +373,7 @@ export const resolveRuntimeAgentConfig = (
return ModelRuntime.initializeWithProvider(agent.provider || 'openai', {
apiKey: agent.apiKey || options?.fallback?.apiKey,
baseURL: agent.baseURL || options?.fallback?.baseURL,
userId: options?.userId,
});
};
@ -603,7 +610,7 @@ export class MemoryExtractionExecutor {
input: requests.map((item) => item.text),
model,
},
{ user: 'memory-extraction' },
{ metadata: { trigger: RequestTrigger.Memory }, user: 'memory-extraction' },
);
const vectors = texts.map<Embeddings | null>(() => null);
@ -1050,11 +1057,14 @@ export class MemoryExtractionExecutor {
tokenLimit,
);
const embeddings = await runtime.embeddings({
dimensions: DEFAULT_USER_MEMORY_EMBEDDING_DIMENSIONS,
input: [aggregatedContent],
model: embeddingModel,
});
const embeddings = await runtime.embeddings(
{
dimensions: DEFAULT_USER_MEMORY_EMBEDDING_DIMENSIONS,
input: [aggregatedContent],
model: embeddingModel,
},
{ metadata: { trigger: RequestTrigger.Memory } },
);
const vector = embeddings?.[0];
if (vector) {
@ -1978,6 +1988,7 @@ export class MemoryExtractionExecutor {
baseURL: this.privateConfig.embedding.baseURL,
},
preferred: { providerIds: this.embeddingPreferredProviders },
userId,
};
const gatekeeperOptions: RuntimeResolveOptions = {
@ -1986,6 +1997,7 @@ export class MemoryExtractionExecutor {
baseURL: this.privateConfig.agentGateKeeper.baseURL,
},
preferred: { providerIds: this.gatekeeperPreferredProviders },
userId,
};
const layerExtractorOptions: RuntimeResolveOptions = {
@ -1994,23 +2006,29 @@ export class MemoryExtractionExecutor {
baseURL: this.privateConfig.agentLayerExtractor.baseURL,
},
preferred: { providerIds: this.layerPreferredProviders },
userId,
};
const hooks = getBusinessModelRuntimeHooks(userId, 'lobehub');
const runtimes: RuntimeBundle = {
embeddings: await resolveRuntimeAgentConfig(
{ ...this.privateConfig.embedding },
keyVaults,
embeddingOptions,
hooks,
),
gatekeeper: await resolveRuntimeAgentConfig(
{ ...this.privateConfig.agentGateKeeper },
keyVaults,
gatekeeperOptions,
hooks,
),
layerExtractor: await resolveRuntimeAgentConfig(
{ ...this.privateConfig.agentLayerExtractor },
keyVaults,
layerExtractorOptions,
hooks,
),
};

View file

@ -11,6 +11,7 @@ import {
} from '@lobechat/memory-user-memory';
import { desc, eq } from 'drizzle-orm';
import { getBusinessModelRuntimeHooks } from '@/business/server/model-runtime';
import { UserMemoryModel } from '@/database/models/userMemory';
import { UserPersonaModel } from '@/database/models/userMemory/persona';
import { AiInfraRepos } from '@/database/repositories/aiInfra';
@ -78,13 +79,21 @@ export class UserPersonaService {
{} as ProviderKeyVaultMap,
);
const runtime = await resolveRuntimeAgentConfig({ ...this.agentConfig }, keyVaults, {
fallback: {
apiKey: this.agentConfig.apiKey,
baseURL: this.agentConfig.baseURL,
},
preferred: { providerIds: [providerId] },
} satisfies RuntimeResolveOptions);
const hooks = getBusinessModelRuntimeHooks(payload.userId, 'lobehub');
const runtime = await resolveRuntimeAgentConfig(
{ ...this.agentConfig },
keyVaults,
{
fallback: {
apiKey: this.agentConfig.apiKey,
baseURL: this.agentConfig.baseURL,
},
preferred: { providerIds: [providerId] },
userId: payload.userId,
} satisfies RuntimeResolveOptions,
hooks,
);
const personaModel = new UserPersonaModel(this.db, payload.userId);
const lastDocument = await personaModel.getLatestPersonaDocument();

View file

@ -1,6 +1,7 @@
import { DEFAULT_SYSTEM_AGENT_CONFIG } from '@lobechat/const';
import { chainSummaryTitle } from '@lobechat/prompts';
import { type UserSystemAgentConfig, type UserSystemAgentConfigKey } from '@lobechat/types';
import type { UserSystemAgentConfig, UserSystemAgentConfigKey } from '@lobechat/types';
import { RequestTrigger } from '@lobechat/types';
import debug from 'debug';
import { UserModel } from '@/database/models/user';
@ -65,11 +66,14 @@ export class SystemAgentService {
const payload = chainSummaryTitle(messages, locale);
const modelRuntime = await initModelRuntimeFromDB(this.db, this.userId, provider);
const result = await modelRuntime.generateObject({
messages: payload.messages as any[],
model,
schema: TOPIC_TITLE_SCHEMA,
});
const result = await modelRuntime.generateObject(
{
messages: payload.messages as any[],
model,
schema: TOPIC_TITLE_SCHEMA,
},
{ metadata: { trigger: RequestTrigger.Topic } },
);
const title = (result as { title?: string })?.title?.trim();
if (!title) {

View file

@ -30,7 +30,7 @@ import type {
SearchMemoryResult,
UpdateIdentityMemoryResult,
} from '@lobechat/types';
import { LayersEnum } from '@lobechat/types';
import { LayersEnum, RequestTrigger } from '@lobechat/types';
import { eq } from 'drizzle-orm';
import type { z } from 'zod';
@ -159,11 +159,14 @@ const createEmbedder = (agentRuntime: any, embeddingModel: string) => {
return async (value?: string | null): Promise<number[] | undefined> => {
if (!value || value.trim().length === 0) return undefined;
const embeddings = await agentRuntime.embeddings({
dimensions: DEFAULT_USER_MEMORY_EMBEDDING_DIMENSIONS,
input: value,
model: embeddingModel,
});
const embeddings = await agentRuntime.embeddings(
{
dimensions: DEFAULT_USER_MEMORY_EMBEDDING_DIMENSIONS,
input: value,
model: embeddingModel,
},
{ metadata: { trigger: RequestTrigger.Memory } },
);
return embeddings?.[0];
};
@ -193,11 +196,14 @@ class MemoryServerRuntimeService implements MemoryRuntimeService {
const modelRuntime = await initModelRuntimeFromDB(this.serverDB, this.userId, provider);
const queryEmbeddings = await modelRuntime.embeddings({
dimensions: DEFAULT_USER_MEMORY_EMBEDDING_DIMENSIONS,
input: params.query,
model: embeddingModel,
});
const queryEmbeddings = await modelRuntime.embeddings(
{
dimensions: DEFAULT_USER_MEMORY_EMBEDDING_DIMENSIONS,
input: params.query,
model: embeddingModel,
},
{ metadata: { trigger: RequestTrigger.Memory } },
);
const effectiveEffort = normalizeMemoryEffort(params.effort ?? this.memoryEffort);
const effortDefaults = MEMORY_SEARCH_TOP_K_LIMITS[effectiveEffort];