diff --git a/packages/builtin-tool-web-browsing/src/ExecutionRuntime/index.ts b/packages/builtin-tool-web-browsing/src/ExecutionRuntime/index.ts index c814e1c3d3..d6a073d637 100644 --- a/packages/builtin-tool-web-browsing/src/ExecutionRuntime/index.ts +++ b/packages/builtin-tool-web-browsing/src/ExecutionRuntime/index.ts @@ -7,14 +7,38 @@ import type { SearchQuery, SearchServiceImpl, } from '@lobechat/types'; +import type { CrawlSuccessResult } from '@lobechat/web-crawler'; import { CRAWL_CONTENT_LIMITED_COUNT, SEARCH_ITEM_LIMITED_COUNT } from '../const'; -export class WebBrowsingExecutionRuntime { - private searchService: SearchServiceImpl; +export interface WebBrowsingDocumentService { + associateDocument: (documentId: string) => Promise; + createDocument: (params: { + content: string; + description?: string; + title: string; + url: string; + }) => Promise<{ id: string }>; +} - constructor(options: { searchService: SearchServiceImpl }) { +export interface WebBrowsingRuntimeOptions { + agentId?: string; + documentService?: WebBrowsingDocumentService; + searchService: SearchServiceImpl; + topicId?: string; +} + +export class WebBrowsingExecutionRuntime { + private agentId?: string; + private documentService?: WebBrowsingDocumentService; + private searchService: SearchServiceImpl; + private topicId?: string; + + constructor(options: WebBrowsingRuntimeOptions) { this.searchService = options.searchService; + this.documentService = options.documentService; + this.agentId = options.agentId; + this.topicId = options.topicId; } async search( @@ -66,6 +90,31 @@ export class WebBrowsingExecutionRuntime { const { results } = response; + // Save crawled pages as documents and associate with agent + if (this.documentService) { + await Promise.all( + results.map(async (item) => { + if ('errorMessage' in item.data) return; + + const pageData = item.data as CrawlSuccessResult; + if (!pageData.content) return; + + try { + const doc = await this.documentService!.createDocument({ + content: pageData.content, + description: pageData.description || `Crawled from ${pageData.url}`, + title: pageData.title || pageData.url, + url: pageData.url, + }); + + await this.documentService!.associateDocument(doc.id); + } catch (error) { + console.error('[WebBrowsing] Failed to save crawl result to agent document:', error); + } + }), + ); + } + const content = results.map((item) => 'errorMessage' in item ? item diff --git a/packages/database/src/models/__tests__/document.test.ts b/packages/database/src/models/__tests__/document.test.ts index 6603ae46c0..322f327381 100644 --- a/packages/database/src/models/__tests__/document.test.ts +++ b/packages/database/src/models/__tests__/document.test.ts @@ -55,6 +55,51 @@ const createTestDocument = async (model: DocumentModel, fModel: FileModel, conte }; describe('DocumentModel', () => { + describe('findOrCreateFolder', () => { + it('should create a new folder when none exists', async () => { + const folder = await documentModel.findOrCreateFolder('bookmark'); + + expect(folder).toBeDefined(); + expect(folder.fileType).toBe('custom/folder'); + expect(folder.filename).toBe('bookmark'); + expect(folder.title).toBe('bookmark'); + expect(folder.source).toBe(''); + expect(folder.sourceType).toBe('api'); + expect(folder.totalCharCount).toBe(0); + expect(folder.content).toBe(''); + }); + + it('should return existing folder on second call', async () => { + const first = await documentModel.findOrCreateFolder('bookmark'); + const second = await documentModel.findOrCreateFolder('bookmark'); + + expect(second.id).toBe(first.id); + }); + + it('should isolate folders by user', async () => { + const folder1 = await documentModel.findOrCreateFolder('bookmark'); + const folder2 = await documentModel2.findOrCreateFolder('bookmark'); + + expect(folder1.id).not.toBe(folder2.id); + }); + + it('should support parentId for nested folders', async () => { + const parent = await documentModel.findOrCreateFolder('root'); + const child = await documentModel.findOrCreateFolder('sub', parent.id); + + expect(child.parentId).toBe(parent.id); + expect(child.id).not.toBe(parent.id); + }); + + it('should distinguish folders with same name but different parentId', async () => { + const topLevel = await documentModel.findOrCreateFolder('notes'); + const parent = await documentModel.findOrCreateFolder('root'); + const nested = await documentModel.findOrCreateFolder('notes', parent.id); + + expect(topLevel.id).not.toBe(nested.id); + }); + }); + describe('create', () => { it('should create a new document', async () => { const { id: fileId } = await fileModel.create({ diff --git a/packages/database/src/models/agentDocuments/__tests__/agentDocument.test.ts b/packages/database/src/models/agentDocuments/__tests__/agentDocument.test.ts index 62a23028ae..eb3ad2459b 100644 --- a/packages/database/src/models/agentDocuments/__tests__/agentDocument.test.ts +++ b/packages/database/src/models/agentDocuments/__tests__/agentDocument.test.ts @@ -39,6 +39,96 @@ beforeEach(async () => { }); describe('AgentDocumentModel', () => { + describe('associate', () => { + it('should link an existing document to an agent and return the new id', async () => { + // Create a document in the documents table directly + const [doc] = await serverDB + .insert(documents) + .values({ + content: 'crawled content', + fileType: 'article', + filename: 'page.html', + source: 'https://example.com', + sourceType: 'web', + title: 'Example Page', + totalCharCount: 15, + totalLineCount: 1, + userId, + }) + .returning(); + + const result = await agentDocumentModel.associate({ agentId, documentId: doc!.id }); + + expect(result.id).toBeDefined(); + expect(result.id).not.toBe(''); + + // Verify the agentDocuments row was created + const [row] = await serverDB + .select() + .from(agentDocuments) + .where(eq(agentDocuments.id, result.id)); + + expect(row).toBeDefined(); + expect(row?.agentId).toBe(agentId); + expect(row?.documentId).toBe(doc!.id); + expect(row?.userId).toBe(userId); + expect(row?.policyLoad).toBe(PolicyLoad.PROGRESSIVE); + }); + + it('should be idempotent (onConflictDoNothing)', async () => { + const [doc] = await serverDB + .insert(documents) + .values({ + content: 'content', + fileType: 'article', + filename: 'dup.html', + source: 'https://example.com/dup', + sourceType: 'web', + title: 'Dup Page', + totalCharCount: 7, + totalLineCount: 1, + userId, + }) + .returning(); + + const first = await agentDocumentModel.associate({ agentId, documentId: doc!.id }); + const second = await agentDocumentModel.associate({ agentId, documentId: doc!.id }); + + expect(first.id).toBeDefined(); + // Second call should not throw, id may be undefined due to onConflictDoNothing + expect(second).toBeDefined(); + }); + + it('should not create documents row — only the link', async () => { + const [doc] = await serverDB + .insert(documents) + .values({ + content: 'existing', + fileType: 'article', + filename: 'existing.html', + source: 'https://example.com/existing', + sourceType: 'web', + title: 'Existing', + totalCharCount: 8, + totalLineCount: 1, + userId, + }) + .returning(); + + const countBefore = await serverDB + .select() + .from(documents) + .where(eq(documents.userId, userId)); + await agentDocumentModel.associate({ agentId, documentId: doc!.id }); + const countAfter = await serverDB + .select() + .from(documents) + .where(eq(documents.userId, userId)); + + expect(countAfter.length).toBe(countBefore.length); + }); + }); + describe('create', () => { it('should create an agent document with normalized policy and linked document row', async () => { const result = await agentDocumentModel.create(agentId, 'identity.md', 'line1\nline2', { diff --git a/packages/database/src/models/agentDocuments/agentDocument.ts b/packages/database/src/models/agentDocuments/agentDocument.ts index dc5f31c9a3..81604cc5ab 100644 --- a/packages/database/src/models/agentDocuments/agentDocument.ts +++ b/packages/database/src/models/agentDocuments/agentDocument.ts @@ -85,6 +85,45 @@ export class AgentDocumentModel { }; } + async associate(params: { + agentId: string; + documentId: string; + policyLoad?: PolicyLoad; + }): Promise<{ id: string }> { + const { agentId, documentId, policyLoad } = params; + + // Verify the document belongs to the current user + const doc = await this.db.query.documents.findFirst({ + where: and(eq(documents.id, documentId), eq(documents.userId, this.userId)), + }); + + if (!doc) return { id: '' }; + + const [result] = await this.db + .insert(agentDocuments) + .values({ + accessPublic: 0, + accessSelf: + AgentAccess.EXECUTE | + AgentAccess.LIST | + AgentAccess.READ | + AgentAccess.WRITE | + AgentAccess.DELETE, + accessShared: 0, + agentId, + documentId, + policyLoad: policyLoad ?? PolicyLoad.PROGRESSIVE, + policyLoadFormat: DocumentLoadFormat.RAW, + policyLoadPosition: DocumentLoadPosition.BEFORE_FIRST_USER, + policyLoadRule: DocumentLoadRule.ALWAYS, + userId: this.userId, + }) + .onConflictDoNothing() + .returning({ id: agentDocuments.id }); + + return { id: result?.id }; + } + async create( agentId: string, filename: string, diff --git a/packages/database/src/models/document.ts b/packages/database/src/models/document.ts index 7eb886450f..5330510e6a 100644 --- a/packages/database/src/models/document.ts +++ b/packages/database/src/models/document.ts @@ -1,7 +1,7 @@ -import { and, count, desc, eq, inArray } from 'drizzle-orm'; +import { and, count, desc, eq, inArray, isNull } from 'drizzle-orm'; import type { DocumentItem, NewDocument } from '../schemas'; -import { documents } from '../schemas'; +import { DOCUMENT_FOLDER_TYPE, documents } from '../schemas'; import type { LobeChatDatabase } from '../type'; export interface QueryDocumentParams { @@ -20,6 +20,31 @@ export class DocumentModel { this.db = db; } + findOrCreateFolder = async (name: string, parentId?: string): Promise => { + const existing = await this.db.query.documents.findFirst({ + where: and( + eq(documents.userId, this.userId), + eq(documents.fileType, DOCUMENT_FOLDER_TYPE), + eq(documents.filename, name), + parentId ? eq(documents.parentId, parentId) : isNull(documents.parentId), + ), + }); + + if (existing) return existing; + + return this.create({ + content: '', + fileType: DOCUMENT_FOLDER_TYPE, + filename: name, + parentId, + source: '', + sourceType: 'api', + title: name, + totalCharCount: 0, + totalLineCount: 0, + }); + }; + create = async (params: Omit): Promise => { const result = (await this.db .insert(documents) diff --git a/packages/database/src/models/recent.ts b/packages/database/src/models/recent.ts index 05c4b64c32..9e79b6ff61 100644 --- a/packages/database/src/models/recent.ts +++ b/packages/database/src/models/recent.ts @@ -1,6 +1,6 @@ import { sql } from 'drizzle-orm'; -import { agents, documents, tasks, topics } from '../schemas'; +import { agents, DOCUMENT_FOLDER_TYPE, documents, tasks, topics } from '../schemas'; import type { LobeChatDatabase } from '../type'; export interface RecentDbItem { @@ -56,7 +56,7 @@ export class RecentModel { WHERE ${documents.userId} = ${this.userId} AND ${documents.sourceType} != 'file' AND ${documents.knowledgeBaseId} IS NULL - AND ${documents.fileType} != 'custom/folder' + AND ${documents.fileType} != ${DOCUMENT_FOLDER_TYPE} UNION ALL diff --git a/packages/database/src/repositories/knowledge/index.ts b/packages/database/src/repositories/knowledge/index.ts index a39b472688..13e0603be1 100644 --- a/packages/database/src/repositories/knowledge/index.ts +++ b/packages/database/src/repositories/knowledge/index.ts @@ -4,7 +4,7 @@ import { and, eq, sql } from 'drizzle-orm'; import { DocumentModel } from '../../models/document'; import { FileModel } from '../../models/file'; -import { documents, files, knowledgeBaseFiles } from '../../schemas'; +import { DOCUMENT_FOLDER_TYPE, documents, files, knowledgeBaseFiles } from '../../schemas'; import type { LobeChatDatabase } from '../../type'; export interface KnowledgeItem { @@ -313,7 +313,7 @@ export class KnowledgeRepo { const document = await this.documentModel.findById(id); if (!document) return; - if (document.fileType === 'custom/folder') { + if (document.fileType === DOCUMENT_FOLDER_TYPE) { const children = await this.db.query.documents.findMany({ where: and(eq(documents.parentId, id), eq(documents.userId, this.userId)), }); diff --git a/packages/database/src/repositories/search/index.ts b/packages/database/src/repositories/search/index.ts index cd538ab94c..e7c7008b75 100644 --- a/packages/database/src/repositories/search/index.ts +++ b/packages/database/src/repositories/search/index.ts @@ -3,6 +3,7 @@ import { and, eq, ne, sql } from 'drizzle-orm'; import { agents, chatGroups, + DOCUMENT_FOLDER_TYPE, documents, files, knowledgeBaseFiles, @@ -554,7 +555,7 @@ export class SearchRepo { } /** - * Search folders (documents with file_type='custom/folder') (BM25) + * Search folders (documents with file_type=DOCUMENT_FOLDER_TYPE) (BM25) */ private async searchFolders(query: string, limit: number): Promise { const bm25Query = sanitizeBm25Query(query); @@ -575,7 +576,7 @@ export class SearchRepo { .where( and( eq(documents.userId, this.userId), - eq(documents.fileType, 'custom/folder'), + eq(documents.fileType, DOCUMENT_FOLDER_TYPE), sql`(${documents.title} @@@ ${bm25Query} OR ${documents.slug} @@@ ${bm25Query} OR ${documents.description} @@@ ${bm25Query})`, ), ) diff --git a/packages/database/src/schemas/file.ts b/packages/database/src/schemas/file.ts index eeddce70ac..e3369c5669 100644 --- a/packages/database/src/schemas/file.ts +++ b/packages/database/src/schemas/file.ts @@ -22,6 +22,8 @@ import { accessedAt, createdAt, timestamps } from './_helpers'; import { asyncTasks } from './asyncTask'; import { users } from './user'; +export const DOCUMENT_FOLDER_TYPE = 'custom/folder'; + export const globalFiles = pgTable( 'global_files', { diff --git a/src/server/routers/lambda/agentDocument.ts b/src/server/routers/lambda/agentDocument.ts index f253867521..29c1647545 100644 --- a/src/server/routers/lambda/agentDocument.ts +++ b/src/server/routers/lambda/agentDocument.ts @@ -230,6 +230,20 @@ export const agentDocumentRouter = router({ }); }), + /** + * Tool-oriented: associate an existing document with an agent + */ + associateDocument: agentDocumentProcedure + .input( + z.object({ + agentId: z.string(), + documentId: z.string(), + }), + ) + .mutation(async ({ ctx, input }) => { + return ctx.agentDocumentService.associateDocument(input.agentId, input.documentId); + }), + /** * Tool-oriented: create document */ diff --git a/src/server/services/agentDocuments.test.ts b/src/server/services/agentDocuments.test.ts index ef50e17c56..3fbd19db7c 100644 --- a/src/server/services/agentDocuments.test.ts +++ b/src/server/services/agentDocuments.test.ts @@ -19,6 +19,7 @@ describe('AgentDocumentsService', () => { const userId = 'user-1'; const mockModel = { + associate: vi.fn(), create: vi.fn(), findByAgent: vi.fn(), findByFilename: vi.fn(), @@ -136,4 +137,16 @@ describe('AgentDocumentsService', () => { expect(result).toBe(true); }); }); + + describe('associateDocument', () => { + it('should delegate to agentDocumentModel.associate', async () => { + mockModel.associate.mockResolvedValue({ id: 'ad-1' }); + + const service = new AgentDocumentsService(db, userId); + const result = await service.associateDocument('agent-1', 'doc-1'); + + expect(mockModel.associate).toHaveBeenCalledWith({ agentId: 'agent-1', documentId: 'doc-1' }); + expect(result).toEqual({ id: 'ad-1' }); + }); + }); }); diff --git a/src/server/services/agentDocuments.ts b/src/server/services/agentDocuments.ts index 33c7446731..b8d07d2a2b 100644 --- a/src/server/services/agentDocuments.ts +++ b/src/server/services/agentDocuments.ts @@ -208,6 +208,10 @@ export class AgentDocumentsService { }); } + async associateDocument(agentId: string, documentId: string): Promise<{ id: string }> { + return this.agentDocumentModel.associate({ agentId, documentId }); + } + async createDocument(agentId: string, title: string, content: string) { return this.createWithUniqueFilename(agentId, title, content); } diff --git a/src/server/services/toolExecution/serverRuntimes/webBrowsing.ts b/src/server/services/toolExecution/serverRuntimes/webBrowsing.ts index dbb384ae88..af37869cd0 100644 --- a/src/server/services/toolExecution/serverRuntimes/webBrowsing.ts +++ b/src/server/services/toolExecution/serverRuntimes/webBrowsing.ts @@ -1,20 +1,42 @@ import { WebBrowsingManifest } from '@lobechat/builtin-tool-web-browsing'; import { WebBrowsingExecutionRuntime } from '@lobechat/builtin-tool-web-browsing/executionRuntime'; +import { DocumentModel } from '@/database/models/document'; +import { AgentDocumentsService } from '@/server/services/agentDocuments'; import { SearchService } from '@/server/services/search'; import { type ServerRuntimeRegistration } from './types'; -// Pre-instantiated (no per-request context needed) -const runtime = new WebBrowsingExecutionRuntime({ - searchService: new SearchService(), -}); - -/** - * WebBrowsing Server Runtime - * Pre-instantiated runtime (no per-request context needed) - */ export const webBrowsingRuntime: ServerRuntimeRegistration = { - factory: () => runtime, + factory: (context) => { + const { userId, serverDB, agentId } = context; + const canSaveDocuments = userId && serverDB && agentId; + + return new WebBrowsingExecutionRuntime({ + documentService: canSaveDocuments + ? { + associateDocument: async (documentId) => { + const service = new AgentDocumentsService(serverDB, userId); + await service.associateDocument(agentId, documentId); + }, + createDocument: async ({ content, description, title, url }) => { + const model = new DocumentModel(serverDB, userId); + return model.create({ + content, + description, + fileType: 'article', + filename: title, + source: url, + sourceType: 'web', + title, + totalCharCount: content.length, + totalLineCount: content.split('\n').length, + }); + }, + } + : undefined, + searchService: new SearchService(), + }); + }, identifier: WebBrowsingManifest.identifier, }; diff --git a/src/services/agentDocument.ts b/src/services/agentDocument.ts index 4e83b746f8..201861f6f4 100644 --- a/src/services/agentDocument.ts +++ b/src/services/agentDocument.ts @@ -70,6 +70,13 @@ class AgentDocumentService { return result; }; + associateDocument = async (params: { agentId: string; documentId: string }) => { + const result = await lambdaClient.agentDocument.associateDocument.mutate(params); + await revalidateAgentDocuments(params.agentId); + + return result; + }; + createDocument = async (params: { agentId: string; content: string; title: string }) => { const result = await lambdaClient.agentDocument.createDocument.mutate(params); await revalidateAgentDocuments(params.agentId); diff --git a/src/store/tool/slices/builtin/executors/lobe-web-browsing.ts b/src/store/tool/slices/builtin/executors/lobe-web-browsing.ts index 90cfbea594..f0847d57d2 100644 --- a/src/store/tool/slices/builtin/executors/lobe-web-browsing.ts +++ b/src/store/tool/slices/builtin/executors/lobe-web-browsing.ts @@ -4,21 +4,42 @@ * Handles web search and page crawling tool calls. */ import { WebBrowsingApiName, WebBrowsingManifest } from '@lobechat/builtin-tool-web-browsing'; -import { WebBrowsingExecutionRuntime } from '@lobechat/builtin-tool-web-browsing/executionRuntime'; +import { + type WebBrowsingDocumentService, + WebBrowsingExecutionRuntime, +} from '@lobechat/builtin-tool-web-browsing/executionRuntime'; import { type BuiltinToolContext, type BuiltinToolResult, type CrawlMultiPagesQuery, - type CrawlPluginState, type SearchQuery, } from '@lobechat/types'; import { BaseExecutor, SEARCH_SEARXNG_NOT_CONFIG } from '@lobechat/types'; -import { type CrawlSuccessResult } from '@lobechat/web-crawler'; +import { agentDocumentService } from '@/services/agentDocument'; import { notebookService } from '@/services/notebook'; import { searchService } from '@/services/search'; -const runtime = new WebBrowsingExecutionRuntime({ searchService }); +const searchRuntime = new WebBrowsingExecutionRuntime({ searchService }); + +const createDocumentService = (ctx: BuiltinToolContext): WebBrowsingDocumentService => ({ + associateDocument: async (documentId) => { + if (!ctx.agentId) return; + await agentDocumentService.associateDocument({ agentId: ctx.agentId, documentId }); + }, + createDocument: async ({ content, description, title, url }) => { + if (!ctx.topicId) throw new Error('topicId is required to save document'); + return notebookService.createDocument({ + content, + description: description || `Crawled from ${url}`, + source: url, + sourceType: 'web', + title, + topicId: ctx.topicId, + type: 'article', + }); + }, +}); class WebBrowsingExecutor extends BaseExecutor { readonly identifier = WebBrowsingManifest.identifier; @@ -29,22 +50,16 @@ class WebBrowsingExecutor extends BaseExecutor { */ search = async (params: SearchQuery, ctx: BuiltinToolContext): Promise => { try { - // Check if aborted if (ctx.signal?.aborted) { return { stop: true, success: false }; } - const result = await runtime.search(params, { signal: ctx.signal }); + const result = await searchRuntime.search(params, { signal: ctx.signal }); if (result.success) { - return { - content: result.content, - state: result.state, - success: true, - }; + return { content: result.content, state: result.state, success: true }; } - // Handle specific error cases const error = result.error as Error; if (error?.message === SEARCH_SEARXNG_NOT_CONFIG) { return { @@ -67,18 +82,11 @@ class WebBrowsingExecutor extends BaseExecutor { }; } catch (e) { const err = e as Error; - - // Handle abort error if (err.name === 'AbortError' || err.message.includes('The user aborted a request.')) { return { stop: true, success: false }; } - return { - error: { - body: e, - message: err.message, - type: 'PluginServerError', - }, + error: { body: e, message: err.message, type: 'PluginServerError' }, success: false, }; } @@ -102,69 +110,21 @@ class WebBrowsingExecutor extends BaseExecutor { ctx: BuiltinToolContext, ): Promise => { try { - // Check if aborted if (ctx.signal?.aborted) { return { stop: true, success: false }; } + const runtime = new WebBrowsingExecutionRuntime({ + agentId: ctx.agentId, + documentService: ctx.topicId ? createDocumentService(ctx) : undefined, + searchService, + topicId: ctx.topicId ?? undefined, + }); + const result = await runtime.crawlMultiPages(params); if (result.success) { - // Save crawled pages as documents if topicId is available - const savedDocuments: Array<{ id: string; title: string; url: string }> = []; - - if (ctx.topicId) { - const crawlState = result.state as CrawlPluginState; - - // Create documents for each successfully crawled page - await Promise.all( - crawlState.results.map(async (crawlResult) => { - // Skip if there's an error - if ('errorMessage' in crawlResult.data) return; - - const pageData = crawlResult.data as CrawlSuccessResult; - if (!pageData.content) return; - - try { - const document = await notebookService.createDocument({ - content: pageData.content, - description: pageData.description || `Crawled from ${pageData.url}`, - source: pageData.url, - sourceType: 'web', - title: pageData.title || pageData.url, - topicId: ctx.topicId!, - type: 'article', - }); - - savedDocuments.push({ - id: document.id, - title: document.title || pageData.url, - url: pageData.url, - }); - } catch { - // Silently ignore document creation errors to not block the main flow - } - }), - ); - } - - // Append saved documents info to content - let content = result.content; - if (savedDocuments.length > 0) { - const savedDocsInfo = savedDocuments - .map((doc) => `- "${doc.title}" (ID: ${doc.id})`) - .join('\n'); - content += `\n\n\nThe crawled content has been saved as documents for future reference:\n${savedDocsInfo}\n`; - } - - return { - content, - state: { - ...result.state, - savedDocuments, - }, - success: true, - }; + return { content: result.content, state: result.state, success: true }; } return { @@ -178,18 +138,11 @@ class WebBrowsingExecutor extends BaseExecutor { }; } catch (e) { const err = e as Error; - - // Handle abort error if (err.name === 'AbortError' || err.message.includes('The user aborted a request.')) { return { stop: true, success: false }; } - return { - error: { - body: e, - message: err.message, - type: 'PluginServerError', - }, + error: { body: e, message: err.message, type: 'PluginServerError' }, success: false, }; }