mirror of
https://github.com/lobehub/lobehub
synced 2026-04-21 09:37:28 +00:00
✨ feat: associate web crawl documents with agent documents (#13893)
* ✨ feat: associate web crawl documents with agent documents - Add `associate` method to AgentDocumentModel for linking existing documents - Add `associateDocument` to AgentDocumentsService, TRPC router, and client service - Update web browsing executor to associate crawled pages with agent after notebook save - Add server-side crawl-to-agent-document persistence in webBrowsing runtime - Add `findOrCreateFolder` to DocumentModel for folder hierarchy support - Extract `DOCUMENT_FOLDER_TYPE` constant from hardcoded 'custom/folder' strings - Add tests for associate, findOrCreateFolder, and service layer Fixes LOBE-7242 Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * 🐛 fix: log errors in web crawl agent document association Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * ♻️ refactor: add onCrawlComplete callback to WebBrowsingExecutionRuntime Replace monkey-patching of crawlMultiPages with a proper onCrawlComplete callback in the runtime constructor options. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * ♻️ refactor: move document save logic into WebBrowsingExecutionRuntime Replace onCrawlComplete callback with documentService dependency injection. The runtime now directly handles createDocument + associateDocument internally. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * ♻️ refactor: pass per-call context to documentService via crawlMultiPages Add WebBrowsingDocumentContext (topicId, agentId) as a parameter to crawlMultiPages, which flows through to documentService methods. This allows a singleton runtime with per-call context on the client side. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * 🐛 fix: enforce document ownership in associate and match root folders by null parentId - associate: verify documentId belongs to current user before creating link - findOrCreateFolder: add parentId IS NULL condition for root-level lookup Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
13d1b011b7
commit
c046d042f5
15 changed files with 369 additions and 105 deletions
|
|
@ -7,14 +7,38 @@ import type {
|
|||
SearchQuery,
|
||||
SearchServiceImpl,
|
||||
} from '@lobechat/types';
|
||||
import type { CrawlSuccessResult } from '@lobechat/web-crawler';
|
||||
|
||||
import { CRAWL_CONTENT_LIMITED_COUNT, SEARCH_ITEM_LIMITED_COUNT } from '../const';
|
||||
|
||||
export class WebBrowsingExecutionRuntime {
|
||||
private searchService: SearchServiceImpl;
|
||||
export interface WebBrowsingDocumentService {
|
||||
associateDocument: (documentId: string) => Promise<void>;
|
||||
createDocument: (params: {
|
||||
content: string;
|
||||
description?: string;
|
||||
title: string;
|
||||
url: string;
|
||||
}) => Promise<{ id: string }>;
|
||||
}
|
||||
|
||||
constructor(options: { searchService: SearchServiceImpl }) {
|
||||
export interface WebBrowsingRuntimeOptions {
|
||||
agentId?: string;
|
||||
documentService?: WebBrowsingDocumentService;
|
||||
searchService: SearchServiceImpl;
|
||||
topicId?: string;
|
||||
}
|
||||
|
||||
export class WebBrowsingExecutionRuntime {
|
||||
private agentId?: string;
|
||||
private documentService?: WebBrowsingDocumentService;
|
||||
private searchService: SearchServiceImpl;
|
||||
private topicId?: string;
|
||||
|
||||
constructor(options: WebBrowsingRuntimeOptions) {
|
||||
this.searchService = options.searchService;
|
||||
this.documentService = options.documentService;
|
||||
this.agentId = options.agentId;
|
||||
this.topicId = options.topicId;
|
||||
}
|
||||
|
||||
async search(
|
||||
|
|
@ -66,6 +90,31 @@ export class WebBrowsingExecutionRuntime {
|
|||
|
||||
const { results } = response;
|
||||
|
||||
// Save crawled pages as documents and associate with agent
|
||||
if (this.documentService) {
|
||||
await Promise.all(
|
||||
results.map(async (item) => {
|
||||
if ('errorMessage' in item.data) return;
|
||||
|
||||
const pageData = item.data as CrawlSuccessResult;
|
||||
if (!pageData.content) return;
|
||||
|
||||
try {
|
||||
const doc = await this.documentService!.createDocument({
|
||||
content: pageData.content,
|
||||
description: pageData.description || `Crawled from ${pageData.url}`,
|
||||
title: pageData.title || pageData.url,
|
||||
url: pageData.url,
|
||||
});
|
||||
|
||||
await this.documentService!.associateDocument(doc.id);
|
||||
} catch (error) {
|
||||
console.error('[WebBrowsing] Failed to save crawl result to agent document:', error);
|
||||
}
|
||||
}),
|
||||
);
|
||||
}
|
||||
|
||||
const content = results.map((item) =>
|
||||
'errorMessage' in item
|
||||
? item
|
||||
|
|
|
|||
|
|
@ -55,6 +55,51 @@ const createTestDocument = async (model: DocumentModel, fModel: FileModel, conte
|
|||
};
|
||||
|
||||
describe('DocumentModel', () => {
|
||||
describe('findOrCreateFolder', () => {
|
||||
it('should create a new folder when none exists', async () => {
|
||||
const folder = await documentModel.findOrCreateFolder('bookmark');
|
||||
|
||||
expect(folder).toBeDefined();
|
||||
expect(folder.fileType).toBe('custom/folder');
|
||||
expect(folder.filename).toBe('bookmark');
|
||||
expect(folder.title).toBe('bookmark');
|
||||
expect(folder.source).toBe('');
|
||||
expect(folder.sourceType).toBe('api');
|
||||
expect(folder.totalCharCount).toBe(0);
|
||||
expect(folder.content).toBe('');
|
||||
});
|
||||
|
||||
it('should return existing folder on second call', async () => {
|
||||
const first = await documentModel.findOrCreateFolder('bookmark');
|
||||
const second = await documentModel.findOrCreateFolder('bookmark');
|
||||
|
||||
expect(second.id).toBe(first.id);
|
||||
});
|
||||
|
||||
it('should isolate folders by user', async () => {
|
||||
const folder1 = await documentModel.findOrCreateFolder('bookmark');
|
||||
const folder2 = await documentModel2.findOrCreateFolder('bookmark');
|
||||
|
||||
expect(folder1.id).not.toBe(folder2.id);
|
||||
});
|
||||
|
||||
it('should support parentId for nested folders', async () => {
|
||||
const parent = await documentModel.findOrCreateFolder('root');
|
||||
const child = await documentModel.findOrCreateFolder('sub', parent.id);
|
||||
|
||||
expect(child.parentId).toBe(parent.id);
|
||||
expect(child.id).not.toBe(parent.id);
|
||||
});
|
||||
|
||||
it('should distinguish folders with same name but different parentId', async () => {
|
||||
const topLevel = await documentModel.findOrCreateFolder('notes');
|
||||
const parent = await documentModel.findOrCreateFolder('root');
|
||||
const nested = await documentModel.findOrCreateFolder('notes', parent.id);
|
||||
|
||||
expect(topLevel.id).not.toBe(nested.id);
|
||||
});
|
||||
});
|
||||
|
||||
describe('create', () => {
|
||||
it('should create a new document', async () => {
|
||||
const { id: fileId } = await fileModel.create({
|
||||
|
|
|
|||
|
|
@ -39,6 +39,96 @@ beforeEach(async () => {
|
|||
});
|
||||
|
||||
describe('AgentDocumentModel', () => {
|
||||
describe('associate', () => {
|
||||
it('should link an existing document to an agent and return the new id', async () => {
|
||||
// Create a document in the documents table directly
|
||||
const [doc] = await serverDB
|
||||
.insert(documents)
|
||||
.values({
|
||||
content: 'crawled content',
|
||||
fileType: 'article',
|
||||
filename: 'page.html',
|
||||
source: 'https://example.com',
|
||||
sourceType: 'web',
|
||||
title: 'Example Page',
|
||||
totalCharCount: 15,
|
||||
totalLineCount: 1,
|
||||
userId,
|
||||
})
|
||||
.returning();
|
||||
|
||||
const result = await agentDocumentModel.associate({ agentId, documentId: doc!.id });
|
||||
|
||||
expect(result.id).toBeDefined();
|
||||
expect(result.id).not.toBe('');
|
||||
|
||||
// Verify the agentDocuments row was created
|
||||
const [row] = await serverDB
|
||||
.select()
|
||||
.from(agentDocuments)
|
||||
.where(eq(agentDocuments.id, result.id));
|
||||
|
||||
expect(row).toBeDefined();
|
||||
expect(row?.agentId).toBe(agentId);
|
||||
expect(row?.documentId).toBe(doc!.id);
|
||||
expect(row?.userId).toBe(userId);
|
||||
expect(row?.policyLoad).toBe(PolicyLoad.PROGRESSIVE);
|
||||
});
|
||||
|
||||
it('should be idempotent (onConflictDoNothing)', async () => {
|
||||
const [doc] = await serverDB
|
||||
.insert(documents)
|
||||
.values({
|
||||
content: 'content',
|
||||
fileType: 'article',
|
||||
filename: 'dup.html',
|
||||
source: 'https://example.com/dup',
|
||||
sourceType: 'web',
|
||||
title: 'Dup Page',
|
||||
totalCharCount: 7,
|
||||
totalLineCount: 1,
|
||||
userId,
|
||||
})
|
||||
.returning();
|
||||
|
||||
const first = await agentDocumentModel.associate({ agentId, documentId: doc!.id });
|
||||
const second = await agentDocumentModel.associate({ agentId, documentId: doc!.id });
|
||||
|
||||
expect(first.id).toBeDefined();
|
||||
// Second call should not throw, id may be undefined due to onConflictDoNothing
|
||||
expect(second).toBeDefined();
|
||||
});
|
||||
|
||||
it('should not create documents row — only the link', async () => {
|
||||
const [doc] = await serverDB
|
||||
.insert(documents)
|
||||
.values({
|
||||
content: 'existing',
|
||||
fileType: 'article',
|
||||
filename: 'existing.html',
|
||||
source: 'https://example.com/existing',
|
||||
sourceType: 'web',
|
||||
title: 'Existing',
|
||||
totalCharCount: 8,
|
||||
totalLineCount: 1,
|
||||
userId,
|
||||
})
|
||||
.returning();
|
||||
|
||||
const countBefore = await serverDB
|
||||
.select()
|
||||
.from(documents)
|
||||
.where(eq(documents.userId, userId));
|
||||
await agentDocumentModel.associate({ agentId, documentId: doc!.id });
|
||||
const countAfter = await serverDB
|
||||
.select()
|
||||
.from(documents)
|
||||
.where(eq(documents.userId, userId));
|
||||
|
||||
expect(countAfter.length).toBe(countBefore.length);
|
||||
});
|
||||
});
|
||||
|
||||
describe('create', () => {
|
||||
it('should create an agent document with normalized policy and linked document row', async () => {
|
||||
const result = await agentDocumentModel.create(agentId, 'identity.md', 'line1\nline2', {
|
||||
|
|
|
|||
|
|
@ -85,6 +85,45 @@ export class AgentDocumentModel {
|
|||
};
|
||||
}
|
||||
|
||||
async associate(params: {
|
||||
agentId: string;
|
||||
documentId: string;
|
||||
policyLoad?: PolicyLoad;
|
||||
}): Promise<{ id: string }> {
|
||||
const { agentId, documentId, policyLoad } = params;
|
||||
|
||||
// Verify the document belongs to the current user
|
||||
const doc = await this.db.query.documents.findFirst({
|
||||
where: and(eq(documents.id, documentId), eq(documents.userId, this.userId)),
|
||||
});
|
||||
|
||||
if (!doc) return { id: '' };
|
||||
|
||||
const [result] = await this.db
|
||||
.insert(agentDocuments)
|
||||
.values({
|
||||
accessPublic: 0,
|
||||
accessSelf:
|
||||
AgentAccess.EXECUTE |
|
||||
AgentAccess.LIST |
|
||||
AgentAccess.READ |
|
||||
AgentAccess.WRITE |
|
||||
AgentAccess.DELETE,
|
||||
accessShared: 0,
|
||||
agentId,
|
||||
documentId,
|
||||
policyLoad: policyLoad ?? PolicyLoad.PROGRESSIVE,
|
||||
policyLoadFormat: DocumentLoadFormat.RAW,
|
||||
policyLoadPosition: DocumentLoadPosition.BEFORE_FIRST_USER,
|
||||
policyLoadRule: DocumentLoadRule.ALWAYS,
|
||||
userId: this.userId,
|
||||
})
|
||||
.onConflictDoNothing()
|
||||
.returning({ id: agentDocuments.id });
|
||||
|
||||
return { id: result?.id };
|
||||
}
|
||||
|
||||
async create(
|
||||
agentId: string,
|
||||
filename: string,
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
import { and, count, desc, eq, inArray } from 'drizzle-orm';
|
||||
import { and, count, desc, eq, inArray, isNull } from 'drizzle-orm';
|
||||
|
||||
import type { DocumentItem, NewDocument } from '../schemas';
|
||||
import { documents } from '../schemas';
|
||||
import { DOCUMENT_FOLDER_TYPE, documents } from '../schemas';
|
||||
import type { LobeChatDatabase } from '../type';
|
||||
|
||||
export interface QueryDocumentParams {
|
||||
|
|
@ -20,6 +20,31 @@ export class DocumentModel {
|
|||
this.db = db;
|
||||
}
|
||||
|
||||
findOrCreateFolder = async (name: string, parentId?: string): Promise<DocumentItem> => {
|
||||
const existing = await this.db.query.documents.findFirst({
|
||||
where: and(
|
||||
eq(documents.userId, this.userId),
|
||||
eq(documents.fileType, DOCUMENT_FOLDER_TYPE),
|
||||
eq(documents.filename, name),
|
||||
parentId ? eq(documents.parentId, parentId) : isNull(documents.parentId),
|
||||
),
|
||||
});
|
||||
|
||||
if (existing) return existing;
|
||||
|
||||
return this.create({
|
||||
content: '',
|
||||
fileType: DOCUMENT_FOLDER_TYPE,
|
||||
filename: name,
|
||||
parentId,
|
||||
source: '',
|
||||
sourceType: 'api',
|
||||
title: name,
|
||||
totalCharCount: 0,
|
||||
totalLineCount: 0,
|
||||
});
|
||||
};
|
||||
|
||||
create = async (params: Omit<NewDocument, 'userId'>): Promise<DocumentItem> => {
|
||||
const result = (await this.db
|
||||
.insert(documents)
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
import { sql } from 'drizzle-orm';
|
||||
|
||||
import { agents, documents, tasks, topics } from '../schemas';
|
||||
import { agents, DOCUMENT_FOLDER_TYPE, documents, tasks, topics } from '../schemas';
|
||||
import type { LobeChatDatabase } from '../type';
|
||||
|
||||
export interface RecentDbItem {
|
||||
|
|
@ -56,7 +56,7 @@ export class RecentModel {
|
|||
WHERE ${documents.userId} = ${this.userId}
|
||||
AND ${documents.sourceType} != 'file'
|
||||
AND ${documents.knowledgeBaseId} IS NULL
|
||||
AND ${documents.fileType} != 'custom/folder'
|
||||
AND ${documents.fileType} != ${DOCUMENT_FOLDER_TYPE}
|
||||
|
||||
UNION ALL
|
||||
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ import { and, eq, sql } from 'drizzle-orm';
|
|||
|
||||
import { DocumentModel } from '../../models/document';
|
||||
import { FileModel } from '../../models/file';
|
||||
import { documents, files, knowledgeBaseFiles } from '../../schemas';
|
||||
import { DOCUMENT_FOLDER_TYPE, documents, files, knowledgeBaseFiles } from '../../schemas';
|
||||
import type { LobeChatDatabase } from '../../type';
|
||||
|
||||
export interface KnowledgeItem {
|
||||
|
|
@ -313,7 +313,7 @@ export class KnowledgeRepo {
|
|||
const document = await this.documentModel.findById(id);
|
||||
if (!document) return;
|
||||
|
||||
if (document.fileType === 'custom/folder') {
|
||||
if (document.fileType === DOCUMENT_FOLDER_TYPE) {
|
||||
const children = await this.db.query.documents.findMany({
|
||||
where: and(eq(documents.parentId, id), eq(documents.userId, this.userId)),
|
||||
});
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ import { and, eq, ne, sql } from 'drizzle-orm';
|
|||
import {
|
||||
agents,
|
||||
chatGroups,
|
||||
DOCUMENT_FOLDER_TYPE,
|
||||
documents,
|
||||
files,
|
||||
knowledgeBaseFiles,
|
||||
|
|
@ -554,7 +555,7 @@ export class SearchRepo {
|
|||
}
|
||||
|
||||
/**
|
||||
* Search folders (documents with file_type='custom/folder') (BM25)
|
||||
* Search folders (documents with file_type=DOCUMENT_FOLDER_TYPE) (BM25)
|
||||
*/
|
||||
private async searchFolders(query: string, limit: number): Promise<FolderSearchResult[]> {
|
||||
const bm25Query = sanitizeBm25Query(query);
|
||||
|
|
@ -575,7 +576,7 @@ export class SearchRepo {
|
|||
.where(
|
||||
and(
|
||||
eq(documents.userId, this.userId),
|
||||
eq(documents.fileType, 'custom/folder'),
|
||||
eq(documents.fileType, DOCUMENT_FOLDER_TYPE),
|
||||
sql`(${documents.title} @@@ ${bm25Query} OR ${documents.slug} @@@ ${bm25Query} OR ${documents.description} @@@ ${bm25Query})`,
|
||||
),
|
||||
)
|
||||
|
|
|
|||
|
|
@ -22,6 +22,8 @@ import { accessedAt, createdAt, timestamps } from './_helpers';
|
|||
import { asyncTasks } from './asyncTask';
|
||||
import { users } from './user';
|
||||
|
||||
export const DOCUMENT_FOLDER_TYPE = 'custom/folder';
|
||||
|
||||
export const globalFiles = pgTable(
|
||||
'global_files',
|
||||
{
|
||||
|
|
|
|||
|
|
@ -230,6 +230,20 @@ export const agentDocumentRouter = router({
|
|||
});
|
||||
}),
|
||||
|
||||
/**
|
||||
* Tool-oriented: associate an existing document with an agent
|
||||
*/
|
||||
associateDocument: agentDocumentProcedure
|
||||
.input(
|
||||
z.object({
|
||||
agentId: z.string(),
|
||||
documentId: z.string(),
|
||||
}),
|
||||
)
|
||||
.mutation(async ({ ctx, input }) => {
|
||||
return ctx.agentDocumentService.associateDocument(input.agentId, input.documentId);
|
||||
}),
|
||||
|
||||
/**
|
||||
* Tool-oriented: create document
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -19,6 +19,7 @@ describe('AgentDocumentsService', () => {
|
|||
const userId = 'user-1';
|
||||
|
||||
const mockModel = {
|
||||
associate: vi.fn(),
|
||||
create: vi.fn(),
|
||||
findByAgent: vi.fn(),
|
||||
findByFilename: vi.fn(),
|
||||
|
|
@ -136,4 +137,16 @@ describe('AgentDocumentsService', () => {
|
|||
expect(result).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe('associateDocument', () => {
|
||||
it('should delegate to agentDocumentModel.associate', async () => {
|
||||
mockModel.associate.mockResolvedValue({ id: 'ad-1' });
|
||||
|
||||
const service = new AgentDocumentsService(db, userId);
|
||||
const result = await service.associateDocument('agent-1', 'doc-1');
|
||||
|
||||
expect(mockModel.associate).toHaveBeenCalledWith({ agentId: 'agent-1', documentId: 'doc-1' });
|
||||
expect(result).toEqual({ id: 'ad-1' });
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -208,6 +208,10 @@ export class AgentDocumentsService {
|
|||
});
|
||||
}
|
||||
|
||||
async associateDocument(agentId: string, documentId: string): Promise<{ id: string }> {
|
||||
return this.agentDocumentModel.associate({ agentId, documentId });
|
||||
}
|
||||
|
||||
async createDocument(agentId: string, title: string, content: string) {
|
||||
return this.createWithUniqueFilename(agentId, title, content);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,20 +1,42 @@
|
|||
import { WebBrowsingManifest } from '@lobechat/builtin-tool-web-browsing';
|
||||
import { WebBrowsingExecutionRuntime } from '@lobechat/builtin-tool-web-browsing/executionRuntime';
|
||||
|
||||
import { DocumentModel } from '@/database/models/document';
|
||||
import { AgentDocumentsService } from '@/server/services/agentDocuments';
|
||||
import { SearchService } from '@/server/services/search';
|
||||
|
||||
import { type ServerRuntimeRegistration } from './types';
|
||||
|
||||
// Pre-instantiated (no per-request context needed)
|
||||
const runtime = new WebBrowsingExecutionRuntime({
|
||||
searchService: new SearchService(),
|
||||
});
|
||||
|
||||
/**
|
||||
* WebBrowsing Server Runtime
|
||||
* Pre-instantiated runtime (no per-request context needed)
|
||||
*/
|
||||
export const webBrowsingRuntime: ServerRuntimeRegistration = {
|
||||
factory: () => runtime,
|
||||
factory: (context) => {
|
||||
const { userId, serverDB, agentId } = context;
|
||||
const canSaveDocuments = userId && serverDB && agentId;
|
||||
|
||||
return new WebBrowsingExecutionRuntime({
|
||||
documentService: canSaveDocuments
|
||||
? {
|
||||
associateDocument: async (documentId) => {
|
||||
const service = new AgentDocumentsService(serverDB, userId);
|
||||
await service.associateDocument(agentId, documentId);
|
||||
},
|
||||
createDocument: async ({ content, description, title, url }) => {
|
||||
const model = new DocumentModel(serverDB, userId);
|
||||
return model.create({
|
||||
content,
|
||||
description,
|
||||
fileType: 'article',
|
||||
filename: title,
|
||||
source: url,
|
||||
sourceType: 'web',
|
||||
title,
|
||||
totalCharCount: content.length,
|
||||
totalLineCount: content.split('\n').length,
|
||||
});
|
||||
},
|
||||
}
|
||||
: undefined,
|
||||
searchService: new SearchService(),
|
||||
});
|
||||
},
|
||||
identifier: WebBrowsingManifest.identifier,
|
||||
};
|
||||
|
|
|
|||
|
|
@ -70,6 +70,13 @@ class AgentDocumentService {
|
|||
return result;
|
||||
};
|
||||
|
||||
associateDocument = async (params: { agentId: string; documentId: string }) => {
|
||||
const result = await lambdaClient.agentDocument.associateDocument.mutate(params);
|
||||
await revalidateAgentDocuments(params.agentId);
|
||||
|
||||
return result;
|
||||
};
|
||||
|
||||
createDocument = async (params: { agentId: string; content: string; title: string }) => {
|
||||
const result = await lambdaClient.agentDocument.createDocument.mutate(params);
|
||||
await revalidateAgentDocuments(params.agentId);
|
||||
|
|
|
|||
|
|
@ -4,21 +4,42 @@
|
|||
* Handles web search and page crawling tool calls.
|
||||
*/
|
||||
import { WebBrowsingApiName, WebBrowsingManifest } from '@lobechat/builtin-tool-web-browsing';
|
||||
import { WebBrowsingExecutionRuntime } from '@lobechat/builtin-tool-web-browsing/executionRuntime';
|
||||
import {
|
||||
type WebBrowsingDocumentService,
|
||||
WebBrowsingExecutionRuntime,
|
||||
} from '@lobechat/builtin-tool-web-browsing/executionRuntime';
|
||||
import {
|
||||
type BuiltinToolContext,
|
||||
type BuiltinToolResult,
|
||||
type CrawlMultiPagesQuery,
|
||||
type CrawlPluginState,
|
||||
type SearchQuery,
|
||||
} from '@lobechat/types';
|
||||
import { BaseExecutor, SEARCH_SEARXNG_NOT_CONFIG } from '@lobechat/types';
|
||||
import { type CrawlSuccessResult } from '@lobechat/web-crawler';
|
||||
|
||||
import { agentDocumentService } from '@/services/agentDocument';
|
||||
import { notebookService } from '@/services/notebook';
|
||||
import { searchService } from '@/services/search';
|
||||
|
||||
const runtime = new WebBrowsingExecutionRuntime({ searchService });
|
||||
const searchRuntime = new WebBrowsingExecutionRuntime({ searchService });
|
||||
|
||||
const createDocumentService = (ctx: BuiltinToolContext): WebBrowsingDocumentService => ({
|
||||
associateDocument: async (documentId) => {
|
||||
if (!ctx.agentId) return;
|
||||
await agentDocumentService.associateDocument({ agentId: ctx.agentId, documentId });
|
||||
},
|
||||
createDocument: async ({ content, description, title, url }) => {
|
||||
if (!ctx.topicId) throw new Error('topicId is required to save document');
|
||||
return notebookService.createDocument({
|
||||
content,
|
||||
description: description || `Crawled from ${url}`,
|
||||
source: url,
|
||||
sourceType: 'web',
|
||||
title,
|
||||
topicId: ctx.topicId,
|
||||
type: 'article',
|
||||
});
|
||||
},
|
||||
});
|
||||
|
||||
class WebBrowsingExecutor extends BaseExecutor<typeof WebBrowsingApiName> {
|
||||
readonly identifier = WebBrowsingManifest.identifier;
|
||||
|
|
@ -29,22 +50,16 @@ class WebBrowsingExecutor extends BaseExecutor<typeof WebBrowsingApiName> {
|
|||
*/
|
||||
search = async (params: SearchQuery, ctx: BuiltinToolContext): Promise<BuiltinToolResult> => {
|
||||
try {
|
||||
// Check if aborted
|
||||
if (ctx.signal?.aborted) {
|
||||
return { stop: true, success: false };
|
||||
}
|
||||
|
||||
const result = await runtime.search(params, { signal: ctx.signal });
|
||||
const result = await searchRuntime.search(params, { signal: ctx.signal });
|
||||
|
||||
if (result.success) {
|
||||
return {
|
||||
content: result.content,
|
||||
state: result.state,
|
||||
success: true,
|
||||
};
|
||||
return { content: result.content, state: result.state, success: true };
|
||||
}
|
||||
|
||||
// Handle specific error cases
|
||||
const error = result.error as Error;
|
||||
if (error?.message === SEARCH_SEARXNG_NOT_CONFIG) {
|
||||
return {
|
||||
|
|
@ -67,18 +82,11 @@ class WebBrowsingExecutor extends BaseExecutor<typeof WebBrowsingApiName> {
|
|||
};
|
||||
} catch (e) {
|
||||
const err = e as Error;
|
||||
|
||||
// Handle abort error
|
||||
if (err.name === 'AbortError' || err.message.includes('The user aborted a request.')) {
|
||||
return { stop: true, success: false };
|
||||
}
|
||||
|
||||
return {
|
||||
error: {
|
||||
body: e,
|
||||
message: err.message,
|
||||
type: 'PluginServerError',
|
||||
},
|
||||
error: { body: e, message: err.message, type: 'PluginServerError' },
|
||||
success: false,
|
||||
};
|
||||
}
|
||||
|
|
@ -102,69 +110,21 @@ class WebBrowsingExecutor extends BaseExecutor<typeof WebBrowsingApiName> {
|
|||
ctx: BuiltinToolContext,
|
||||
): Promise<BuiltinToolResult> => {
|
||||
try {
|
||||
// Check if aborted
|
||||
if (ctx.signal?.aborted) {
|
||||
return { stop: true, success: false };
|
||||
}
|
||||
|
||||
const runtime = new WebBrowsingExecutionRuntime({
|
||||
agentId: ctx.agentId,
|
||||
documentService: ctx.topicId ? createDocumentService(ctx) : undefined,
|
||||
searchService,
|
||||
topicId: ctx.topicId ?? undefined,
|
||||
});
|
||||
|
||||
const result = await runtime.crawlMultiPages(params);
|
||||
|
||||
if (result.success) {
|
||||
// Save crawled pages as documents if topicId is available
|
||||
const savedDocuments: Array<{ id: string; title: string; url: string }> = [];
|
||||
|
||||
if (ctx.topicId) {
|
||||
const crawlState = result.state as CrawlPluginState;
|
||||
|
||||
// Create documents for each successfully crawled page
|
||||
await Promise.all(
|
||||
crawlState.results.map(async (crawlResult) => {
|
||||
// Skip if there's an error
|
||||
if ('errorMessage' in crawlResult.data) return;
|
||||
|
||||
const pageData = crawlResult.data as CrawlSuccessResult;
|
||||
if (!pageData.content) return;
|
||||
|
||||
try {
|
||||
const document = await notebookService.createDocument({
|
||||
content: pageData.content,
|
||||
description: pageData.description || `Crawled from ${pageData.url}`,
|
||||
source: pageData.url,
|
||||
sourceType: 'web',
|
||||
title: pageData.title || pageData.url,
|
||||
topicId: ctx.topicId!,
|
||||
type: 'article',
|
||||
});
|
||||
|
||||
savedDocuments.push({
|
||||
id: document.id,
|
||||
title: document.title || pageData.url,
|
||||
url: pageData.url,
|
||||
});
|
||||
} catch {
|
||||
// Silently ignore document creation errors to not block the main flow
|
||||
}
|
||||
}),
|
||||
);
|
||||
}
|
||||
|
||||
// Append saved documents info to content
|
||||
let content = result.content;
|
||||
if (savedDocuments.length > 0) {
|
||||
const savedDocsInfo = savedDocuments
|
||||
.map((doc) => `- "${doc.title}" (ID: ${doc.id})`)
|
||||
.join('\n');
|
||||
content += `\n\n<saved_documents>\nThe crawled content has been saved as documents for future reference:\n${savedDocsInfo}\n</saved_documents>`;
|
||||
}
|
||||
|
||||
return {
|
||||
content,
|
||||
state: {
|
||||
...result.state,
|
||||
savedDocuments,
|
||||
},
|
||||
success: true,
|
||||
};
|
||||
return { content: result.content, state: result.state, success: true };
|
||||
}
|
||||
|
||||
return {
|
||||
|
|
@ -178,18 +138,11 @@ class WebBrowsingExecutor extends BaseExecutor<typeof WebBrowsingApiName> {
|
|||
};
|
||||
} catch (e) {
|
||||
const err = e as Error;
|
||||
|
||||
// Handle abort error
|
||||
if (err.name === 'AbortError' || err.message.includes('The user aborted a request.')) {
|
||||
return { stop: true, success: false };
|
||||
}
|
||||
|
||||
return {
|
||||
error: {
|
||||
body: e,
|
||||
message: err.message,
|
||||
type: 'PluginServerError',
|
||||
},
|
||||
error: { body: e, message: err.message, type: 'PluginServerError' },
|
||||
success: false,
|
||||
};
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue