From 3b81a94d76d5c08103089bb76191cc0fca1d7b3c Mon Sep 17 00:00:00 2001 From: Innei Date: Fri, 10 Apr 2026 01:56:05 +0800 Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9B=20fix(kb):=20clean=20up=20vector?= =?UTF-8?q?=20storage=20when=20deleting=20knowledge=20bases=20(#13254)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * 🐛 feat(db): add findExclusiveFileIds, deleteWithFiles, deleteAllWithFiles to KnowledgeBaseModel Add methods to safely clean up vector storage when deleting knowledge bases: - findExclusiveFileIds: identifies files belonging only to a specific KB - deleteWithFiles: deletes KB and its exclusive files with chunks/embeddings - deleteAllWithFiles: bulk version for deleting all user KBs * 🐛 fix(kb): wire vector cleanup in TRPC router, OpenAPI service, and client - TRPC removeKnowledgeBase: use deleteWithFiles when removeFiles=true + S3 cleanup - TRPC removeAllKnowledgeBases: use deleteAllWithFiles + S3 cleanup - OpenAPI deleteKnowledgeBase: use deleteWithFiles + S3 cleanup - Client service: default removeFiles=true when deleting knowledge base * 🐛 fix(knowledgeBase): change default behavior of deleteKnowledgeBase to not remove files and update related tests Signed-off-by: Innei * ✨ feat(knowledgeBase): add optional query parameter to deleteKnowledgeBase for file removal - Introduced `removeFiles` query parameter to control the deletion of exclusive files and derived data when deleting a knowledge base. - Updated `KnowledgeBaseController`, `KnowledgeBaseService`, and related schemas to support this new functionality. This change enhances the flexibility of the delete operation, allowing users to choose whether to remove associated files. Signed-off-by: Innei * 🐛 fix: cascade knowledge base deletion and add orphan cleanup runbook * ✨ feat(knowledgeRepo): implement cascading deletion for file-backed documents - Enhanced the `KnowledgeRepo` to ensure that when a document with an associated file is deleted, all related data (files, chunks, embeddings) are also removed. - Introduced a new method `deleteDocumentWithRelations` to handle the cascading deletion logic. - Updated tests to verify that all related entities are deleted when a file-backed document is removed. This change improves data integrity by ensuring that no orphaned records remain after deletions. Signed-off-by: Innei * Defer DocumentService file initialization * Fix flaky database tests and knowledge repo fixtures * Add deletion regression tests for folders and external files * :rewind: chore: remove kb orphan cleanup files from pr --------- Signed-off-by: Innei --- .../models/__tests__/knowledgeBase.test.ts | 340 ++++++++++++++++++ .../agentEval/__tests__/benchmark.test.ts | 1 - .../agentEval/__tests__/dataset.test.ts | 1 - .../models/agentEval/__tests__/run.test.ts | 1 - .../agentEval/__tests__/testCase.test.ts | 1 - packages/database/src/models/file.ts | 35 +- packages/database/src/models/knowledgeBase.ts | 70 +++- .../knowledge/__tests__/index.test.ts | 286 ++++++++++++++- .../src/repositories/knowledge/index.test.ts | 299 ++++++++++++++- .../src/repositories/knowledge/index.ts | 35 +- .../src/services/knowledge-base.service.ts | 15 +- src/server/routers/lambda/knowledgeBase.ts | 27 +- src/server/routers/lambda/notebook.ts | 7 +- .../services/document/__tests__/index.test.ts | 6 + src/server/services/document/index.ts | 9 +- .../services/notebook/__tests__/index.test.ts | 12 +- src/server/services/notebook/index.ts | 5 +- tests/openapi/knowledge-base-delete.test.ts | 95 +++++ 18 files changed, 1183 insertions(+), 62 deletions(-) create mode 100644 tests/openapi/knowledge-base-delete.test.ts diff --git a/packages/database/src/models/__tests__/knowledgeBase.test.ts b/packages/database/src/models/__tests__/knowledgeBase.test.ts index 769543e010..92f48352bb 100644 --- a/packages/database/src/models/__tests__/knowledgeBase.test.ts +++ b/packages/database/src/models/__tests__/knowledgeBase.test.ts @@ -636,6 +636,346 @@ describe('KnowledgeBaseModel', () => { }); }); + describe('findExclusiveFileIds', () => { + it('should return file IDs that belong only to this knowledge base', async () => { + await serverDB.insert(globalFiles).values([ + { + hashId: 'hash1', + url: 'https://example.com/a.pdf', + size: 100, + fileType: 'application/pdf', + creator: userId, + }, + { + hashId: 'hash2', + url: 'https://example.com/b.pdf', + size: 200, + fileType: 'application/pdf', + creator: userId, + }, + ]); + await serverDB.insert(files).values([ + { + id: 'file1', + name: 'a.pdf', + url: 'https://example.com/a.pdf', + fileHash: 'hash1', + size: 100, + fileType: 'application/pdf', + userId, + }, + { + id: 'file2', + name: 'b.pdf', + url: 'https://example.com/b.pdf', + fileHash: 'hash2', + size: 200, + fileType: 'application/pdf', + userId, + }, + ]); + const { id: kb1 } = await knowledgeBaseModel.create({ name: 'KB1' }); + const { id: kb2 } = await knowledgeBaseModel.create({ name: 'KB2' }); + await knowledgeBaseModel.addFilesToKnowledgeBase(kb1, ['file1', 'file2']); + await knowledgeBaseModel.addFilesToKnowledgeBase(kb2, ['file1']); + const exclusiveIds = await knowledgeBaseModel.findExclusiveFileIds(kb1); + expect(exclusiveIds).toEqual(['file2']); + }); + + it('should return empty array when all files are shared', async () => { + await serverDB + .insert(globalFiles) + .values([ + { + hashId: 'hash1', + url: 'https://example.com/a.pdf', + size: 100, + fileType: 'application/pdf', + creator: userId, + }, + ]); + await serverDB + .insert(files) + .values([ + { + id: 'file1', + name: 'a.pdf', + url: 'https://example.com/a.pdf', + fileHash: 'hash1', + size: 100, + fileType: 'application/pdf', + userId, + }, + ]); + const { id: kb1 } = await knowledgeBaseModel.create({ name: 'KB1' }); + const { id: kb2 } = await knowledgeBaseModel.create({ name: 'KB2' }); + await knowledgeBaseModel.addFilesToKnowledgeBase(kb1, ['file1']); + await knowledgeBaseModel.addFilesToKnowledgeBase(kb2, ['file1']); + const exclusiveIds = await knowledgeBaseModel.findExclusiveFileIds(kb1); + expect(exclusiveIds).toEqual([]); + }); + + it('should return all files when none are shared', async () => { + await serverDB.insert(globalFiles).values([ + { + hashId: 'hash1', + url: 'https://example.com/a.pdf', + size: 100, + fileType: 'application/pdf', + creator: userId, + }, + { + hashId: 'hash2', + url: 'https://example.com/b.pdf', + size: 200, + fileType: 'application/pdf', + creator: userId, + }, + ]); + await serverDB.insert(files).values([ + { + id: 'file1', + name: 'a.pdf', + url: 'https://example.com/a.pdf', + fileHash: 'hash1', + size: 100, + fileType: 'application/pdf', + userId, + }, + { + id: 'file2', + name: 'b.pdf', + url: 'https://example.com/b.pdf', + fileHash: 'hash2', + size: 200, + fileType: 'application/pdf', + userId, + }, + ]); + const { id: kb1 } = await knowledgeBaseModel.create({ name: 'KB1' }); + await knowledgeBaseModel.addFilesToKnowledgeBase(kb1, ['file1', 'file2']); + const exclusiveIds = await knowledgeBaseModel.findExclusiveFileIds(kb1); + expect(exclusiveIds.sort()).toEqual(['file1', 'file2']); + }); + + it('should return empty array when KB has no files', async () => { + const { id: kb1 } = await knowledgeBaseModel.create({ name: 'Empty KB' }); + const exclusiveIds = await knowledgeBaseModel.findExclusiveFileIds(kb1); + expect(exclusiveIds).toEqual([]); + }); + }); + + describe('deleteWithFiles', () => { + it('should delete KB and its exclusive files', async () => { + await serverDB + .insert(globalFiles) + .values([ + { + hashId: 'hash1', + url: 'https://example.com/a.pdf', + size: 100, + fileType: 'application/pdf', + creator: userId, + }, + ]); + await serverDB + .insert(files) + .values([ + { + id: 'file1', + name: 'a.pdf', + url: 'https://example.com/a.pdf', + fileHash: 'hash1', + size: 100, + fileType: 'application/pdf', + userId, + }, + ]); + const { id: kbId } = await knowledgeBaseModel.create({ name: 'KB1' }); + await knowledgeBaseModel.addFilesToKnowledgeBase(kbId, ['file1']); + const result = await knowledgeBaseModel.deleteWithFiles(kbId); + expect( + await serverDB.query.knowledgeBases.findFirst({ where: eq(knowledgeBases.id, kbId) }), + ).toBeUndefined(); + expect( + await serverDB.query.files.findFirst({ where: eq(files.id, 'file1') }), + ).toBeUndefined(); + const kbFiles = await serverDB.query.knowledgeBaseFiles.findMany({ + where: eq(knowledgeBaseFiles.knowledgeBaseId, kbId), + }); + expect(kbFiles).toHaveLength(0); + expect(result.deletedFiles).toHaveLength(1); + }); + + it('should NOT delete files shared with another KB', async () => { + await serverDB.insert(globalFiles).values([ + { + hashId: 'hash1', + url: 'https://example.com/a.pdf', + size: 100, + fileType: 'application/pdf', + creator: userId, + }, + { + hashId: 'hash2', + url: 'https://example.com/b.pdf', + size: 200, + fileType: 'application/pdf', + creator: userId, + }, + ]); + await serverDB.insert(files).values([ + { + id: 'file1', + name: 'a.pdf', + url: 'https://example.com/a.pdf', + fileHash: 'hash1', + size: 100, + fileType: 'application/pdf', + userId, + }, + { + id: 'file2', + name: 'b.pdf', + url: 'https://example.com/b.pdf', + fileHash: 'hash2', + size: 200, + fileType: 'application/pdf', + userId, + }, + ]); + const { id: kb1 } = await knowledgeBaseModel.create({ name: 'KB1' }); + const { id: kb2 } = await knowledgeBaseModel.create({ name: 'KB2' }); + await knowledgeBaseModel.addFilesToKnowledgeBase(kb1, ['file1', 'file2']); + await knowledgeBaseModel.addFilesToKnowledgeBase(kb2, ['file1']); + const result = await knowledgeBaseModel.deleteWithFiles(kb1); + expect( + await serverDB.query.knowledgeBases.findFirst({ where: eq(knowledgeBases.id, kb1) }), + ).toBeUndefined(); + expect(await serverDB.query.files.findFirst({ where: eq(files.id, 'file1') })).toBeDefined(); + expect( + await serverDB.query.files.findFirst({ where: eq(files.id, 'file2') }), + ).toBeUndefined(); + const kb2Files = await serverDB.query.knowledgeBaseFiles.findMany({ + where: eq(knowledgeBaseFiles.knowledgeBaseId, kb2), + }); + expect(kb2Files).toHaveLength(1); + expect(kb2Files[0].fileId).toBe('file1'); + expect(result.deletedFiles).toHaveLength(1); + }); + + it('should handle KB with no files', async () => { + const { id: kbId } = await knowledgeBaseModel.create({ name: 'Empty KB' }); + const result = await knowledgeBaseModel.deleteWithFiles(kbId); + expect( + await serverDB.query.knowledgeBases.findFirst({ where: eq(knowledgeBases.id, kbId) }), + ).toBeUndefined(); + expect(result.deletedFiles).toHaveLength(0); + }); + }); + + describe('deleteAllWithFiles', () => { + it('should delete all KBs and their exclusive files', async () => { + await serverDB.insert(globalFiles).values([ + { + hashId: 'hash1', + url: 'https://example.com/a.pdf', + size: 100, + fileType: 'application/pdf', + creator: userId, + }, + { + hashId: 'hash2', + url: 'https://example.com/b.pdf', + size: 200, + fileType: 'application/pdf', + creator: userId, + }, + ]); + await serverDB.insert(files).values([ + { + id: 'file1', + name: 'a.pdf', + url: 'https://example.com/a.pdf', + fileHash: 'hash1', + size: 100, + fileType: 'application/pdf', + userId, + }, + { + id: 'file2', + name: 'b.pdf', + url: 'https://example.com/b.pdf', + fileHash: 'hash2', + size: 200, + fileType: 'application/pdf', + userId, + }, + ]); + const { id: kb1 } = await knowledgeBaseModel.create({ name: 'KB1' }); + const { id: kb2 } = await knowledgeBaseModel.create({ name: 'KB2' }); + await knowledgeBaseModel.addFilesToKnowledgeBase(kb1, ['file1']); + await knowledgeBaseModel.addFilesToKnowledgeBase(kb2, ['file2']); + const result = await knowledgeBaseModel.deleteAllWithFiles(); + const remaining = await serverDB.query.knowledgeBases.findMany({ + where: eq(knowledgeBases.userId, userId), + }); + expect(remaining).toHaveLength(0); + expect( + await serverDB.query.files.findFirst({ where: eq(files.id, 'file1') }), + ).toBeUndefined(); + expect( + await serverDB.query.files.findFirst({ where: eq(files.id, 'file2') }), + ).toBeUndefined(); + expect(result.deletedFiles.length).toBe(2); + }); + + it('should delete shared file when both KBs sharing it are deleted', async () => { + await serverDB + .insert(globalFiles) + .values([ + { + hashId: 'hash1', + url: 'https://example.com/a.pdf', + size: 100, + fileType: 'application/pdf', + creator: userId, + }, + ]); + await serverDB + .insert(files) + .values([ + { + id: 'file1', + name: 'a.pdf', + url: 'https://example.com/a.pdf', + fileHash: 'hash1', + size: 100, + fileType: 'application/pdf', + userId, + }, + ]); + const { id: kb1 } = await knowledgeBaseModel.create({ name: 'KB1' }); + const { id: kb2 } = await knowledgeBaseModel.create({ name: 'KB2' }); + await knowledgeBaseModel.addFilesToKnowledgeBase(kb1, ['file1']); + await knowledgeBaseModel.addFilesToKnowledgeBase(kb2, ['file1']); + const result = await knowledgeBaseModel.deleteAllWithFiles(); + expect( + await serverDB.query.files.findFirst({ where: eq(files.id, 'file1') }), + ).toBeUndefined(); + expect(result.deletedFiles.length).toBe(1); + }); + + it('should not delete other users KBs or files', async () => { + const anotherModel = new KnowledgeBaseModel(serverDB, 'user2'); + const { id: otherKb } = await anotherModel.create({ name: 'Other KB' }); + await knowledgeBaseModel.deleteAllWithFiles(); + expect( + await serverDB.query.knowledgeBases.findFirst({ where: eq(knowledgeBases.id, otherKb) }), + ).toBeDefined(); + }); + }); + describe('static findById', () => { it('should find a knowledge base by id without user restriction', async () => { const { id } = await knowledgeBaseModel.create({ name: 'Test Group' }); diff --git a/packages/database/src/models/agentEval/__tests__/benchmark.test.ts b/packages/database/src/models/agentEval/__tests__/benchmark.test.ts index 86f0b17c94..5664524787 100644 --- a/packages/database/src/models/agentEval/__tests__/benchmark.test.ts +++ b/packages/database/src/models/agentEval/__tests__/benchmark.test.ts @@ -519,7 +519,6 @@ describe('AgentEvalBenchmarkModel', () => { expect(result?.name).toBe('Updated Name'); expect(result?.description).toBe('New description'); expect(result?.updatedAt).toBeDefined(); - expect(result?.updatedAt.getTime()).toBeGreaterThanOrEqual(result!.createdAt.getTime()); }); it('should not update a system benchmark', async () => { diff --git a/packages/database/src/models/agentEval/__tests__/dataset.test.ts b/packages/database/src/models/agentEval/__tests__/dataset.test.ts index ec9e7e2ae7..e34c534cb2 100644 --- a/packages/database/src/models/agentEval/__tests__/dataset.test.ts +++ b/packages/database/src/models/agentEval/__tests__/dataset.test.ts @@ -323,7 +323,6 @@ describe('AgentEvalDatasetModel', () => { expect(result?.name).toBe('Updated Name'); expect(result?.description).toBe('New description'); expect(result?.updatedAt).toBeDefined(); - expect(result?.updatedAt.getTime()).toBeGreaterThanOrEqual(result!.createdAt.getTime()); }); it('should not update a dataset owned by another user', async () => { diff --git a/packages/database/src/models/agentEval/__tests__/run.test.ts b/packages/database/src/models/agentEval/__tests__/run.test.ts index 1171833e5b..7038aad88d 100644 --- a/packages/database/src/models/agentEval/__tests__/run.test.ts +++ b/packages/database/src/models/agentEval/__tests__/run.test.ts @@ -303,7 +303,6 @@ describe('AgentEvalRunModel', () => { passRate: 0.5, }); expect(result?.updatedAt).toBeDefined(); - expect(result?.updatedAt.getTime()).toBeGreaterThanOrEqual(result!.createdAt.getTime()); }); it('should not update a run owned by another user', async () => { diff --git a/packages/database/src/models/agentEval/__tests__/testCase.test.ts b/packages/database/src/models/agentEval/__tests__/testCase.test.ts index e41b16ab96..ca8f280e5e 100644 --- a/packages/database/src/models/agentEval/__tests__/testCase.test.ts +++ b/packages/database/src/models/agentEval/__tests__/testCase.test.ts @@ -477,7 +477,6 @@ describe('AgentEvalTestCaseModel', () => { expect(result?.content.expected).toBe('New answer'); expect(result?.metadata).toEqual({ reviewed: true }); expect(result?.updatedAt).toBeDefined(); - expect(result?.updatedAt.getTime()).toBeGreaterThanOrEqual(result!.createdAt.getTime()); }); it('should update only sortOrder', async () => { diff --git a/packages/database/src/models/file.ts b/packages/database/src/models/file.ts index 6eb37046c9..2dc848001d 100644 --- a/packages/database/src/models/file.ts +++ b/packages/database/src/models/file.ts @@ -390,31 +390,11 @@ export class FileModel { const batchChunkIds = chunkIds.slice(startIdx, startIdx + BATCH_SIZE); if (batchChunkIds.length === 0) continue; - // Process each batch in the correct deletion order, failures do not block the flow + // Process each batch in the correct deletion order. const batchPromise = (async () => { - // 1. Delete embeddings (top-level, has foreign key dependencies) - try { - await trx.delete(embeddings).where(inArray(embeddings.chunkId, batchChunkIds)); - } catch (e) { - // Silent handling, does not block deletion process - console.warn('Failed to delete embeddings:', e); - } - - // 2. Delete documentChunks association (if exists) - try { - await trx.delete(documentChunks).where(inArray(documentChunks.chunkId, batchChunkIds)); - } catch (e) { - // Silent handling, does not block deletion process - console.warn('Failed to delete documentChunks:', e); - } - - // 3. Delete chunks (core data) - try { - await trx.delete(chunks).where(inArray(chunks.id, batchChunkIds)); - } catch (e) { - // Silent handling, does not block deletion process - console.warn('Failed to delete chunks:', e); - } + await trx.delete(embeddings).where(inArray(embeddings.chunkId, batchChunkIds)); + await trx.delete(documentChunks).where(inArray(documentChunks.chunkId, batchChunkIds)); + await trx.delete(chunks).where(inArray(chunks.id, batchChunkIds)); })(); batchPromises.push(batchPromise); @@ -425,12 +405,7 @@ export class FileModel { } // 4. Finally delete fileChunks association table records - try { - await trx.delete(fileChunks).where(inArray(fileChunks.fileId, fileIds)); - } catch (e) { - // Silent handling, does not block deletion process - console.warn('Failed to delete fileChunks:', e); - } + await trx.delete(fileChunks).where(inArray(fileChunks.fileId, fileIds)); return chunkIds; }; diff --git a/packages/database/src/models/knowledgeBase.ts b/packages/database/src/models/knowledgeBase.ts index b57f148f63..1d267983c0 100644 --- a/packages/database/src/models/knowledgeBase.ts +++ b/packages/database/src/models/knowledgeBase.ts @@ -1,9 +1,10 @@ import type { KnowledgeBaseItem } from '@lobechat/types'; -import { and, desc, eq, inArray } from 'drizzle-orm'; +import { and, count, desc, eq, inArray } from 'drizzle-orm'; import type { NewKnowledgeBase } from '../schemas'; import { documents, knowledgeBaseFiles, knowledgeBases } from '../schemas'; import type { LobeChatDatabase } from '../type'; +import { FileModel } from './file'; export class KnowledgeBaseModel { private userId: string; @@ -160,6 +161,73 @@ export class KnowledgeBaseModel { .set({ ...value, updatedAt: new Date() }) .where(and(eq(knowledgeBases.id, id), eq(knowledgeBases.userId, this.userId))); + findExclusiveFileIds = async (knowledgeBaseId: string): Promise => { + const kbFiles = await this.db + .select({ fileId: knowledgeBaseFiles.fileId }) + .from(knowledgeBaseFiles) + .where( + and( + eq(knowledgeBaseFiles.knowledgeBaseId, knowledgeBaseId), + eq(knowledgeBaseFiles.userId, this.userId), + ), + ); + const fileIds = kbFiles.map((f) => f.fileId); + if (fileIds.length === 0) return []; + + const sharedFiles = await this.db + .select({ + fileId: knowledgeBaseFiles.fileId, + kbCount: count(knowledgeBaseFiles.knowledgeBaseId), + }) + .from(knowledgeBaseFiles) + .where( + and( + inArray(knowledgeBaseFiles.fileId, fileIds), + eq(knowledgeBaseFiles.userId, this.userId), + ), + ) + .groupBy(knowledgeBaseFiles.fileId); + + return sharedFiles.filter((f) => Number(f.kbCount) === 1).map((f) => f.fileId); + }; + + deleteWithFiles = async (id: string, removeGlobalFile: boolean = true) => { + const exclusiveFileIds = await this.findExclusiveFileIds(id); + + let deletedFiles: Array<{ id: string; url: string | null }> = []; + if (exclusiveFileIds.length > 0) { + const fileModel = new FileModel(this.db, this.userId); + const result = await fileModel.deleteMany(exclusiveFileIds, removeGlobalFile); + deletedFiles = (result || []).map((f) => ({ id: f.id, url: f.url })); + } + + await this.db + .delete(knowledgeBases) + .where(and(eq(knowledgeBases.id, id), eq(knowledgeBases.userId, this.userId))); + + return { deletedFiles }; + }; + + deleteAllWithFiles = async (removeGlobalFile: boolean = true) => { + const allKbFileIds = await this.db + .select({ fileId: knowledgeBaseFiles.fileId }) + .from(knowledgeBaseFiles) + .where(eq(knowledgeBaseFiles.userId, this.userId)); + + const fileIds = [...new Set(allKbFileIds.map((f) => f.fileId))]; + + let deletedFiles: Array<{ id: string; url: string | null }> = []; + if (fileIds.length > 0) { + const fileModel = new FileModel(this.db, this.userId); + const result = await fileModel.deleteMany(fileIds, removeGlobalFile); + deletedFiles = (result || []).map((f) => ({ id: f.id, url: f.url })); + } + + await this.db.delete(knowledgeBases).where(eq(knowledgeBases.userId, this.userId)); + + return { deletedFiles }; + }; + static findById = async (db: LobeChatDatabase, id: string) => db.query.knowledgeBases.findFirst({ where: eq(knowledgeBases.id, id), diff --git a/packages/database/src/repositories/knowledge/__tests__/index.test.ts b/packages/database/src/repositories/knowledge/__tests__/index.test.ts index e1442fc40e..f142e24346 100644 --- a/packages/database/src/repositories/knowledge/__tests__/index.test.ts +++ b/packages/database/src/repositories/knowledge/__tests__/index.test.ts @@ -4,7 +4,16 @@ import { eq } from 'drizzle-orm'; import { beforeEach, describe, expect, it } from 'vitest'; import { getTestDB } from '../../../core/getTestDB'; -import { documents, files, knowledgeBaseFiles, knowledgeBases, users } from '../../../schemas'; +import { + chunks, + documents, + embeddings, + fileChunks, + files, + knowledgeBaseFiles, + knowledgeBases, + users, +} from '../../../schemas'; import type { LobeChatDatabase } from '../../../type'; import { KnowledgeRepo } from '../index'; @@ -12,8 +21,14 @@ const serverDB: LobeChatDatabase = await getTestDB(); const userId = 'knowledge-repo-test-user'; const otherUserId = 'other-knowledge-user'; +const deleteDocChunkId = '33333333-3333-4333-8333-333333333333'; +const deleteManyDocChunkId = '44444444-4444-4444-8444-444444444444'; +const deleteFolderFileChunkId = '55555555-5555-4555-8555-555555555555'; +const deleteFolderDocChunkId = '66666666-6666-4666-8666-666666666666'; +const deleteNestedFolderFileChunkId = '77777777-7777-4777-8777-777777777777'; let knowledgeRepo: KnowledgeRepo; +const testEmbedding = Array.from({ length: 1024 }, () => 0.1); beforeEach(async () => { // Clean up @@ -397,6 +412,15 @@ describe('KnowledgeRepo', () => { url: 'https://example.com/delete.txt', }); + await serverDB.insert(files).values({ + id: 'delete-doc-file', + userId, + name: 'delete-doc-file.pdf', + fileType: 'application/pdf', + size: 2048, + url: 'https://example.com/delete-doc-file.pdf', + }); + await serverDB.insert(documents).values([ { id: 'delete-doc', @@ -408,6 +432,153 @@ describe('KnowledgeRepo', () => { totalCharCount: 100, totalLineCount: 2, }, + { + id: 'delete-folder', + userId, + title: 'Folder To Delete', + fileType: 'custom/folder', + sourceType: 'topic', + source: 'internal://folder/delete-folder', + totalCharCount: 0, + totalLineCount: 0, + }, + ]); + await serverDB.insert(files).values([ + { + id: 'delete-folder-file', + userId, + name: 'delete-folder-file.pdf', + fileType: 'application/pdf', + size: 256, + parentId: 'delete-folder', + url: 'https://example.com/delete-folder-file.pdf', + }, + { + id: 'delete-folder-doc-file', + userId, + name: 'delete-folder-doc-file.pdf', + fileType: 'application/pdf', + size: 512, + url: 'https://example.com/delete-folder-doc-file.pdf', + }, + ]); + await serverDB.insert(documents).values([ + { + id: 'delete-doc-with-file', + userId, + title: 'To Delete File-Backed Note', + fileId: 'delete-doc-file', + fileType: 'application/pdf', + filename: 'delete-doc-file.pdf', + sourceType: 'api', + source: 'internal://note/delete-doc-with-file', + totalCharCount: 120, + totalLineCount: 3, + }, + { + id: 'delete-folder-doc', + userId, + parentId: 'delete-folder', + title: 'Folder Child Doc', + fileId: 'delete-folder-doc-file', + fileType: 'application/pdf', + filename: 'delete-folder-doc-file.pdf', + sourceType: 'api', + source: 'internal://note/delete-folder-doc', + totalCharCount: 90, + totalLineCount: 2, + }, + { + id: 'delete-folder-child', + userId, + parentId: 'delete-folder', + title: 'Nested Folder', + fileType: 'custom/folder', + sourceType: 'topic', + source: 'internal://folder/delete-folder-child', + totalCharCount: 0, + totalLineCount: 0, + }, + ]); + await serverDB.insert(files).values({ + id: 'delete-folder-child-file', + userId, + name: 'delete-folder-child-file.pdf', + fileType: 'application/pdf', + size: 768, + parentId: 'delete-folder-child', + url: 'https://example.com/delete-folder-child-file.pdf', + }); + + await serverDB.insert(chunks).values({ + id: deleteDocChunkId, + text: 'chunk for mirrored file', + userId, + }); + await serverDB.insert(fileChunks).values({ + chunkId: deleteDocChunkId, + fileId: 'delete-doc-file', + userId, + }); + await serverDB.insert(embeddings).values({ + chunkId: deleteDocChunkId, + embeddings: testEmbedding, + model: 'test-model', + userId, + }); + await serverDB.insert(chunks).values([ + { + id: deleteFolderFileChunkId, + text: 'chunk for folder file', + userId, + }, + { + id: deleteFolderDocChunkId, + text: 'chunk for folder child mirrored file', + userId, + }, + { + id: deleteNestedFolderFileChunkId, + text: 'chunk for nested folder file', + userId, + }, + ]); + await serverDB.insert(fileChunks).values([ + { + chunkId: deleteFolderFileChunkId, + fileId: 'delete-folder-file', + userId, + }, + { + chunkId: deleteFolderDocChunkId, + fileId: 'delete-folder-doc-file', + userId, + }, + { + chunkId: deleteNestedFolderFileChunkId, + fileId: 'delete-folder-child-file', + userId, + }, + ]); + await serverDB.insert(embeddings).values([ + { + chunkId: deleteFolderFileChunkId, + embeddings: testEmbedding, + model: 'test-model', + userId, + }, + { + chunkId: deleteFolderDocChunkId, + embeddings: testEmbedding, + model: 'test-model', + userId, + }, + { + chunkId: deleteNestedFolderFileChunkId, + embeddings: testEmbedding, + model: 'test-model', + userId, + }, ]); }); @@ -428,6 +599,82 @@ describe('KnowledgeRepo', () => { }); expect(doc).toBeUndefined(); }); + + it('should delete mirrored file data when deleting a file-backed document', async () => { + await knowledgeRepo.deleteItem('delete-doc-with-file', 'document'); + + const doc = await serverDB.query.documents.findFirst({ + where: eq(documents.id, 'delete-doc-with-file'), + }); + const file = await serverDB.query.files.findFirst({ + where: eq(files.id, 'delete-doc-file'), + }); + const chunk = await serverDB.query.chunks.findFirst({ + where: eq(chunks.id, deleteDocChunkId), + }); + const embedding = await serverDB.query.embeddings.findFirst({ + where: eq(embeddings.chunkId, deleteDocChunkId), + }); + + expect(doc).toBeUndefined(); + expect(file).toBeUndefined(); + expect(chunk).toBeUndefined(); + expect(embedding).toBeUndefined(); + }); + + it('should recursively delete child documents, files and vectors when deleting a folder', async () => { + await knowledgeRepo.deleteItem('delete-folder', 'document'); + + const folder = await serverDB.query.documents.findFirst({ + where: eq(documents.id, 'delete-folder'), + }); + const childDoc = await serverDB.query.documents.findFirst({ + where: eq(documents.id, 'delete-folder-doc'), + }); + const childFolder = await serverDB.query.documents.findFirst({ + where: eq(documents.id, 'delete-folder-child'), + }); + const folderFile = await serverDB.query.files.findFirst({ + where: eq(files.id, 'delete-folder-file'), + }); + const childDocFile = await serverDB.query.files.findFirst({ + where: eq(files.id, 'delete-folder-doc-file'), + }); + const nestedFolderFile = await serverDB.query.files.findFirst({ + where: eq(files.id, 'delete-folder-child-file'), + }); + const folderFileChunk = await serverDB.query.chunks.findFirst({ + where: eq(chunks.id, deleteFolderFileChunkId), + }); + const childDocChunk = await serverDB.query.chunks.findFirst({ + where: eq(chunks.id, deleteFolderDocChunkId), + }); + const nestedFolderFileChunk = await serverDB.query.chunks.findFirst({ + where: eq(chunks.id, deleteNestedFolderFileChunkId), + }); + const folderFileEmbedding = await serverDB.query.embeddings.findFirst({ + where: eq(embeddings.chunkId, deleteFolderFileChunkId), + }); + const childDocEmbedding = await serverDB.query.embeddings.findFirst({ + where: eq(embeddings.chunkId, deleteFolderDocChunkId), + }); + const nestedFolderFileEmbedding = await serverDB.query.embeddings.findFirst({ + where: eq(embeddings.chunkId, deleteNestedFolderFileChunkId), + }); + + expect(folder).toBeUndefined(); + expect(childDoc).toBeUndefined(); + expect(childFolder).toBeUndefined(); + expect(folderFile).toBeUndefined(); + expect(childDocFile).toBeUndefined(); + expect(nestedFolderFile).toBeUndefined(); + expect(folderFileChunk).toBeUndefined(); + expect(childDocChunk).toBeUndefined(); + expect(nestedFolderFileChunk).toBeUndefined(); + expect(folderFileEmbedding).toBeUndefined(); + expect(childDocEmbedding).toBeUndefined(); + expect(nestedFolderFileEmbedding).toBeUndefined(); + }); }); describe('deleteMany', () => { @@ -449,6 +696,14 @@ describe('KnowledgeRepo', () => { size: 100, url: 'https://example.com/delete2.txt', }, + { + id: 'delete-many-doc-file-1', + userId, + name: 'delete-many-doc-file-1.pdf', + fileType: 'application/pdf', + size: 512, + url: 'https://example.com/delete-many-doc-file-1.pdf', + }, ]); await serverDB.insert(documents).values([ @@ -456,6 +711,7 @@ describe('KnowledgeRepo', () => { id: 'delete-many-doc-1', userId, title: 'Delete Note 1', + fileId: 'delete-many-doc-file-1', fileType: 'custom/note', sourceType: 'topic', source: 'internal://note/delete-many-doc-1', @@ -473,6 +729,22 @@ describe('KnowledgeRepo', () => { totalLineCount: 2, }, ]); + await serverDB.insert(chunks).values({ + id: deleteManyDocChunkId, + text: 'delete many mirrored chunk', + userId, + }); + await serverDB.insert(fileChunks).values({ + chunkId: deleteManyDocChunkId, + fileId: 'delete-many-doc-file-1', + userId, + }); + await serverDB.insert(embeddings).values({ + chunkId: deleteManyDocChunkId, + embeddings: testEmbedding, + model: 'test-model', + userId, + }); }); it('should delete multiple files and documents', async () => { @@ -495,11 +767,23 @@ describe('KnowledgeRepo', () => { const doc2 = await serverDB.query.documents.findFirst({ where: eq(documents.id, 'delete-many-doc-2'), }); + const mirroredFile = await serverDB.query.files.findFirst({ + where: eq(files.id, 'delete-many-doc-file-1'), + }); + const chunk = await serverDB.query.chunks.findFirst({ + where: eq(chunks.id, deleteManyDocChunkId), + }); + const embedding = await serverDB.query.embeddings.findFirst({ + where: eq(embeddings.chunkId, deleteManyDocChunkId), + }); expect(file1).toBeUndefined(); expect(file2).toBeUndefined(); expect(doc1).toBeUndefined(); expect(doc2).toBeUndefined(); + expect(mirroredFile).toBeUndefined(); + expect(chunk).toBeUndefined(); + expect(embedding).toBeUndefined(); }); it('should handle empty arrays', async () => { diff --git a/packages/database/src/repositories/knowledge/index.test.ts b/packages/database/src/repositories/knowledge/index.test.ts index fd47c3ba1e..0a1ddfe1d5 100644 --- a/packages/database/src/repositories/knowledge/index.test.ts +++ b/packages/database/src/repositories/knowledge/index.test.ts @@ -5,14 +5,22 @@ import { beforeEach, describe, expect, it } from 'vitest'; import { getTestDB } from '../../core/getTestDB'; import type { NewDocument, NewFile } from '../../schemas/file'; import { documents, files } from '../../schemas/file'; +import { chunks, embeddings } from '../../schemas/rag'; +import { fileChunks } from '../../schemas/relations'; import { users } from '../../schemas/user'; import type { LobeChatDatabase } from '../../type'; import { KnowledgeRepo } from './index'; const userId = 'knowledge-test-user'; const otherUserId = 'other-knowledge-user'; +const deleteDocChunkId = '11111111-1111-4111-8111-111111111111'; +const deleteManyDocChunkId = '22222222-2222-4222-8222-222222222222'; +const deleteFolderFileChunkId = '33333333-3333-4333-8333-333333333333'; +const deleteFolderDocChunkId = '44444444-4444-4444-8444-444444444444'; +const deleteNestedFolderFileChunkId = '55555555-5555-4555-8555-555555555555'; let knowledgeRepo: KnowledgeRepo; +const testEmbedding = Array.from({ length: 1024 }, () => 0.1); const serverDB: LobeChatDatabase = await getTestDB(); @@ -556,17 +564,177 @@ describe('KnowledgeRepo', () => { userId, }); - await serverDB.insert(documents).values({ - id: 'delete-doc', - content: 'Document to delete', - fileType: 'custom/other', - filename: 'to-delete-doc.txt', - source: 'source', - sourceType: 'api', - totalCharCount: 100, - totalLineCount: 10, + await serverDB.insert(files).values({ + id: 'delete-doc-file', + fileType: 'application/pdf', + name: 'delete-doc-file.pdf', + size: 2048, + url: 'delete-doc-file-url', userId, }); + + await serverDB.insert(documents).values([ + { + id: 'delete-doc', + content: 'Document to delete', + fileType: 'custom/other', + filename: 'to-delete-doc.txt', + source: 'source', + sourceType: 'api', + totalCharCount: 100, + totalLineCount: 10, + userId, + }, + { + id: 'delete-folder', + content: '', + fileType: 'custom/folder', + filename: 'delete-folder', + source: 'source', + sourceType: 'api', + totalCharCount: 0, + totalLineCount: 0, + userId, + }, + ]); + await serverDB.insert(files).values([ + { + id: 'delete-folder-file', + fileType: 'application/pdf', + name: 'delete-folder-file.pdf', + parentId: 'delete-folder', + size: 1024, + url: 'delete-folder-file-url', + userId, + }, + { + id: 'delete-folder-doc-file', + fileType: 'application/pdf', + name: 'delete-folder-doc-file.pdf', + size: 1024, + url: 'delete-folder-doc-file-url', + userId, + }, + ]); + await serverDB.insert(documents).values([ + { + id: 'delete-doc-with-file', + content: 'Document with mirrored file', + fileId: 'delete-doc-file', + fileType: 'application/pdf', + filename: 'delete-doc-file.pdf', + source: 'source', + sourceType: 'api', + totalCharCount: 120, + totalLineCount: 12, + userId, + }, + { + id: 'delete-folder-doc', + content: 'Folder child document', + fileId: 'delete-folder-doc-file', + fileType: 'application/pdf', + filename: 'delete-folder-doc-file.pdf', + parentId: 'delete-folder', + source: 'source', + sourceType: 'api', + totalCharCount: 80, + totalLineCount: 8, + userId, + }, + { + id: 'delete-folder-child', + content: '', + fileType: 'custom/folder', + filename: 'delete-folder-child', + parentId: 'delete-folder', + source: 'source', + sourceType: 'api', + totalCharCount: 0, + totalLineCount: 0, + userId, + }, + ]); + await serverDB.insert(files).values({ + id: 'delete-folder-child-file', + fileType: 'application/pdf', + name: 'delete-folder-child-file.pdf', + parentId: 'delete-folder-child', + size: 1024, + url: 'delete-folder-child-file-url', + userId, + }); + + await serverDB.insert(chunks).values({ + id: deleteDocChunkId, + text: 'chunk for document file', + userId, + }); + await serverDB.insert(fileChunks).values({ + chunkId: deleteDocChunkId, + fileId: 'delete-doc-file', + userId, + }); + await serverDB.insert(embeddings).values({ + chunkId: deleteDocChunkId, + embeddings: testEmbedding, + model: 'test-model', + userId, + }); + await serverDB.insert(chunks).values([ + { + id: deleteFolderFileChunkId, + text: 'chunk for folder file', + userId, + }, + { + id: deleteFolderDocChunkId, + text: 'chunk for folder mirrored file', + userId, + }, + { + id: deleteNestedFolderFileChunkId, + text: 'chunk for nested folder file', + userId, + }, + ]); + await serverDB.insert(fileChunks).values([ + { + chunkId: deleteFolderFileChunkId, + fileId: 'delete-folder-file', + userId, + }, + { + chunkId: deleteFolderDocChunkId, + fileId: 'delete-folder-doc-file', + userId, + }, + { + chunkId: deleteNestedFolderFileChunkId, + fileId: 'delete-folder-child-file', + userId, + }, + ]); + await serverDB.insert(embeddings).values([ + { + chunkId: deleteFolderFileChunkId, + embeddings: testEmbedding, + model: 'test-model', + userId, + }, + { + chunkId: deleteFolderDocChunkId, + embeddings: testEmbedding, + model: 'test-model', + userId, + }, + { + chunkId: deleteNestedFolderFileChunkId, + embeddings: testEmbedding, + model: 'test-model', + userId, + }, + ]); }); it('should delete a file by id', async () => { @@ -588,6 +756,82 @@ describe('KnowledgeRepo', () => { }); expect(result).toBeUndefined(); }); + + it('should delete mirrored file data when deleting a file-backed document', async () => { + await knowledgeRepo.deleteItem('delete-doc-with-file', 'document'); + + const document = await serverDB.query.documents.findFirst({ + where: (d, { eq }) => eq(d.id, 'delete-doc-with-file'), + }); + const file = await serverDB.query.files.findFirst({ + where: (f, { eq }) => eq(f.id, 'delete-doc-file'), + }); + const chunk = await serverDB.query.chunks.findFirst({ + where: (c, { eq }) => eq(c.id, deleteDocChunkId), + }); + const embedding = await serverDB.query.embeddings.findFirst({ + where: (e, { eq }) => eq(e.chunkId, deleteDocChunkId), + }); + + expect(document).toBeUndefined(); + expect(file).toBeUndefined(); + expect(chunk).toBeUndefined(); + expect(embedding).toBeUndefined(); + }); + + it('should recursively delete child documents, files and vectors when deleting a folder', async () => { + await knowledgeRepo.deleteItem('delete-folder', 'document'); + + const folder = await serverDB.query.documents.findFirst({ + where: (d, { eq }) => eq(d.id, 'delete-folder'), + }); + const childDoc = await serverDB.query.documents.findFirst({ + where: (d, { eq }) => eq(d.id, 'delete-folder-doc'), + }); + const childFolder = await serverDB.query.documents.findFirst({ + where: (d, { eq }) => eq(d.id, 'delete-folder-child'), + }); + const folderFile = await serverDB.query.files.findFirst({ + where: (f, { eq }) => eq(f.id, 'delete-folder-file'), + }); + const childDocFile = await serverDB.query.files.findFirst({ + where: (f, { eq }) => eq(f.id, 'delete-folder-doc-file'), + }); + const nestedFolderFile = await serverDB.query.files.findFirst({ + where: (f, { eq }) => eq(f.id, 'delete-folder-child-file'), + }); + const folderFileChunk = await serverDB.query.chunks.findFirst({ + where: (c, { eq }) => eq(c.id, deleteFolderFileChunkId), + }); + const childDocChunk = await serverDB.query.chunks.findFirst({ + where: (c, { eq }) => eq(c.id, deleteFolderDocChunkId), + }); + const nestedFolderFileChunk = await serverDB.query.chunks.findFirst({ + where: (c, { eq }) => eq(c.id, deleteNestedFolderFileChunkId), + }); + const folderFileEmbedding = await serverDB.query.embeddings.findFirst({ + where: (e, { eq }) => eq(e.chunkId, deleteFolderFileChunkId), + }); + const childDocEmbedding = await serverDB.query.embeddings.findFirst({ + where: (e, { eq }) => eq(e.chunkId, deleteFolderDocChunkId), + }); + const nestedFolderFileEmbedding = await serverDB.query.embeddings.findFirst({ + where: (e, { eq }) => eq(e.chunkId, deleteNestedFolderFileChunkId), + }); + + expect(folder).toBeUndefined(); + expect(childDoc).toBeUndefined(); + expect(childFolder).toBeUndefined(); + expect(folderFile).toBeUndefined(); + expect(childDocFile).toBeUndefined(); + expect(nestedFolderFile).toBeUndefined(); + expect(folderFileChunk).toBeUndefined(); + expect(childDocChunk).toBeUndefined(); + expect(nestedFolderFileChunk).toBeUndefined(); + expect(folderFileEmbedding).toBeUndefined(); + expect(childDocEmbedding).toBeUndefined(); + expect(nestedFolderFileEmbedding).toBeUndefined(); + }); }); describe('deleteMany', () => { @@ -609,12 +853,21 @@ describe('KnowledgeRepo', () => { url: 'url-2', userId, }, + { + id: 'delete-many-doc-file-1', + fileType: 'application/pdf', + name: 'delete-many-doc-file-1.pdf', + size: 1024, + url: 'delete-many-doc-file-1-url', + userId, + }, ]); await serverDB.insert(documents).values([ { id: 'delete-many-doc-1', content: 'Delete doc 1', + fileId: 'delete-many-doc-file-1', fileType: 'custom/other', filename: 'delete-doc-1.txt', source: 'source', @@ -624,6 +877,22 @@ describe('KnowledgeRepo', () => { userId, }, ]); + await serverDB.insert(chunks).values({ + id: deleteManyDocChunkId, + text: 'delete many chunk', + userId, + }); + await serverDB.insert(fileChunks).values({ + chunkId: deleteManyDocChunkId, + fileId: 'delete-many-doc-file-1', + userId, + }); + await serverDB.insert(embeddings).values({ + chunkId: deleteManyDocChunkId, + embeddings: testEmbedding, + model: 'test-model', + userId, + }); }); it('should delete multiple items of mixed types', async () => { @@ -643,10 +912,22 @@ describe('KnowledgeRepo', () => { const doc1 = await serverDB.query.documents.findFirst({ where: (d, { eq }) => eq(d.id, 'delete-many-doc-1'), }); + const mirroredFile = await serverDB.query.files.findFirst({ + where: (f, { eq }) => eq(f.id, 'delete-many-doc-file-1'), + }); + const chunk = await serverDB.query.chunks.findFirst({ + where: (c, { eq }) => eq(c.id, deleteManyDocChunkId), + }); + const embedding = await serverDB.query.embeddings.findFirst({ + where: (e, { eq }) => eq(e.chunkId, deleteManyDocChunkId), + }); expect(file1).toBeUndefined(); expect(file2).toBeUndefined(); expect(doc1).toBeUndefined(); + expect(mirroredFile).toBeUndefined(); + expect(chunk).toBeUndefined(); + expect(embedding).toBeUndefined(); }); it('should handle empty items array', async () => { diff --git a/packages/database/src/repositories/knowledge/index.ts b/packages/database/src/repositories/knowledge/index.ts index c40cde4428..a39b472688 100644 --- a/packages/database/src/repositories/knowledge/index.ts +++ b/packages/database/src/repositories/knowledge/index.ts @@ -1,6 +1,6 @@ import type { QueryFileListParams } from '@lobechat/types'; import { FilesTabs, SortType } from '@lobechat/types'; -import { sql } from 'drizzle-orm'; +import { and, eq, sql } from 'drizzle-orm'; import { DocumentModel } from '../../models/document'; import { FileModel } from '../../models/file'; @@ -277,7 +277,7 @@ export class KnowledgeRepo { if (sourceType === 'file') { await this.fileModel.delete(id); } else { - await this.documentModel.delete(id); + await this.deleteDocumentWithRelations(id); } } @@ -293,7 +293,7 @@ export class KnowledgeRepo { await Promise.all([ fileIds.length > 0 ? this.fileModel.deleteMany(fileIds) : Promise.resolve(), documentIds.length > 0 - ? Promise.all(documentIds.map((id) => this.documentModel.delete(id))) + ? Promise.all(documentIds.map((id) => this.deleteDocumentWithRelations(id))) : Promise.resolve(), ]); } @@ -309,6 +309,35 @@ export class KnowledgeRepo { } } + private deleteDocumentWithRelations = async (id: string): Promise => { + const document = await this.documentModel.findById(id); + if (!document) return; + + if (document.fileType === 'custom/folder') { + const children = await this.db.query.documents.findMany({ + where: and(eq(documents.parentId, id), eq(documents.userId, this.userId)), + }); + + for (const child of children) { + await this.deleteDocumentWithRelations(child.id); + } + + const childFiles = await this.db.query.files.findMany({ + where: and(eq(files.parentId, id), eq(files.userId, this.userId)), + }); + + for (const file of childFiles) { + await this.fileModel.delete(file.id); + } + } + + if (document.fileId) { + await this.fileModel.delete(document.fileId); + } + + await this.documentModel.delete(id); + }; + private buildFileQuery({ category, q, diff --git a/packages/openapi/src/services/knowledge-base.service.ts b/packages/openapi/src/services/knowledge-base.service.ts index 12e57d67f9..6e03bcfa22 100644 --- a/packages/openapi/src/services/knowledge-base.service.ts +++ b/packages/openapi/src/services/knowledge-base.service.ts @@ -4,6 +4,7 @@ import { KnowledgeBaseModel } from '@/database/models/knowledgeBase'; import type { KnowledgeBaseItem } from '@/database/schemas'; import { knowledgeBases } from '@/database/schemas'; import type { LobeChatDatabase } from '@/database/type'; +import { FileService as CoreFileService } from '@/server/services/file'; import { BaseService } from '../common/base.service'; import { processPaginationConditions } from '../helpers/pagination'; @@ -237,8 +238,18 @@ export class KnowledgeBaseService extends BaseService { throw this.createNotFoundError('Knowledge base not found or access denied'); } - // Delete knowledge base - await this.knowledgeBaseModel.delete(id); + const result = await this.knowledgeBaseModel.deleteWithFiles(id); + + if (result.deletedFiles.length > 0) { + const fileService = new CoreFileService(this.db, this.userId); + const urls = result.deletedFiles + .map((f: { url: string | null }) => f.url) + .filter(Boolean) as string[]; + + if (urls.length > 0) { + await fileService.deleteFiles(urls); + } + } this.log('info', 'Knowledge base deleted successfully', { id }); diff --git a/src/server/routers/lambda/knowledgeBase.ts b/src/server/routers/lambda/knowledgeBase.ts index d2db6d9da4..cea518f091 100644 --- a/src/server/routers/lambda/knowledgeBase.ts +++ b/src/server/routers/lambda/knowledgeBase.ts @@ -1,10 +1,12 @@ import { TRPCError } from '@trpc/server'; import { z } from 'zod'; +import { serverDBEnv } from '@/config/db'; import { KnowledgeBaseModel } from '@/database/models/knowledgeBase'; import { insertKnowledgeBasesSchema } from '@/database/schemas'; import { authedProcedure, router } from '@/libs/trpc/lambda'; import { serverDatabase } from '@/libs/trpc/lambda/middleware'; +import { FileService } from '@/server/services/file'; import { type KnowledgeBaseItem } from '@/types/knowledgeBase'; const knowledgeBaseProcedure = authedProcedure.use(serverDatabase).use(async (opts) => { @@ -68,7 +70,15 @@ export const knowledgeBaseRouter = router({ }), removeAllKnowledgeBases: knowledgeBaseProcedure.mutation(async ({ ctx }) => { - return ctx.knowledgeBaseModel.deleteAll(); + const result = await ctx.knowledgeBaseModel.deleteAllWithFiles(serverDBEnv.REMOVE_GLOBAL_FILE); + + if (result.deletedFiles.length > 0) { + const fileService = new FileService(ctx.serverDB, ctx.userId); + const urls = result.deletedFiles.map((f) => f.url).filter(Boolean) as string[]; + if (urls.length > 0) { + await fileService.deleteFiles(urls); + } + } }), removeFilesFromKnowledgeBase: knowledgeBaseProcedure @@ -78,9 +88,20 @@ export const knowledgeBaseRouter = router({ }), removeKnowledgeBase: knowledgeBaseProcedure - .input(z.object({ id: z.string(), removeFiles: z.boolean().optional() })) + .input(z.object({ id: z.string() })) .mutation(async ({ input, ctx }) => { - return ctx.knowledgeBaseModel.delete(input.id); + const result = await ctx.knowledgeBaseModel.deleteWithFiles( + input.id, + serverDBEnv.REMOVE_GLOBAL_FILE, + ); + + if (result.deletedFiles.length > 0) { + const fileService = new FileService(ctx.serverDB, ctx.userId); + const urls = result.deletedFiles.map((f) => f.url).filter(Boolean) as string[]; + if (urls.length > 0) { + await fileService.deleteFiles(urls); + } + } }), updateKnowledgeBase: knowledgeBaseProcedure diff --git a/src/server/routers/lambda/notebook.ts b/src/server/routers/lambda/notebook.ts index 124c533996..52efbb3b2a 100644 --- a/src/server/routers/lambda/notebook.ts +++ b/src/server/routers/lambda/notebook.ts @@ -5,6 +5,7 @@ import { DocumentModel } from '@/database/models/document'; import { TopicDocumentModel } from '@/database/models/topicDocument'; import { authedProcedure, router } from '@/libs/trpc/lambda'; import { serverDatabase } from '@/libs/trpc/lambda/middleware'; +import { NotebookRuntimeService } from '@/server/services/notebook'; const notebookProcedure = authedProcedure.use(serverDatabase).use(async (opts) => { const { ctx } = opts; @@ -12,6 +13,7 @@ const notebookProcedure = authedProcedure.use(serverDatabase).use(async (opts) = return opts.next({ ctx: { documentModel: new DocumentModel(ctx.serverDB, ctx.userId), + notebookService: new NotebookRuntimeService({ serverDB: ctx.serverDB, userId: ctx.userId }), topicDocumentModel: new TopicDocumentModel(ctx.serverDB, ctx.userId), }, }); @@ -60,10 +62,7 @@ export const notebookRouter = router({ deleteDocument: notebookProcedure .input(z.object({ id: z.string() })) .mutation(async ({ ctx, input }) => { - // Remove associations first - await ctx.topicDocumentModel.deleteByDocumentId(input.id); - // Delete the document - await ctx.documentModel.delete(input.id); + await ctx.notebookService.deleteDocument(input.id); return { success: true }; }), diff --git a/src/server/services/document/__tests__/index.test.ts b/src/server/services/document/__tests__/index.test.ts index db180f9ae0..00bba30158 100644 --- a/src/server/services/document/__tests__/index.test.ts +++ b/src/server/services/document/__tests__/index.test.ts @@ -70,6 +70,12 @@ describe('DocumentService', () => { vi.clearAllMocks(); }); + describe('constructor', () => { + it('should not initialize FileService before file parsing is needed', () => { + expect(FileService).not.toHaveBeenCalled(); + }); + }); + describe('createDocument', () => { it('should create a document without knowledgeBase', async () => { const mockDoc = { id: 'doc-1', title: 'Test Doc' }; diff --git a/src/server/services/document/index.ts b/src/server/services/document/index.ts index c32efac4c4..a07cd3b5bc 100644 --- a/src/server/services/document/index.ts +++ b/src/server/services/document/index.ts @@ -17,17 +17,22 @@ export class DocumentService { userId: string; private fileModel: FileModel; private documentModel: DocumentModel; - private fileService: FileService; + private fileServiceInstance?: FileService; private db: LobeChatDatabase; constructor(db: LobeChatDatabase, userId: string) { this.userId = userId; this.db = db; this.fileModel = new FileModel(db, userId); - this.fileService = new FileService(db, userId); this.documentModel = new DocumentModel(db, userId); } + private get fileService() { + this.fileServiceInstance ??= new FileService(this.db, this.userId); + + return this.fileServiceInstance; + } + /** * Create a document */ diff --git a/src/server/services/notebook/__tests__/index.test.ts b/src/server/services/notebook/__tests__/index.test.ts index fcb8a4d586..8f03ca8a71 100644 --- a/src/server/services/notebook/__tests__/index.test.ts +++ b/src/server/services/notebook/__tests__/index.test.ts @@ -2,17 +2,20 @@ import { beforeEach, describe, expect, it, vi } from 'vitest'; import { DocumentModel } from '@/database/models/document'; import { TopicDocumentModel } from '@/database/models/topicDocument'; +import { DocumentService } from '@/server/services/document'; import { NotebookRuntimeService } from '../index'; vi.mock('@/database/models/document'); vi.mock('@/database/models/topicDocument'); +vi.mock('@/server/services/document'); describe('NotebookRuntimeService', () => { let service: NotebookRuntimeService; const mockDb = {} as any; const mockUserId = 'test-user'; let mockDocumentModel: any; + let mockDocumentService: any; let mockTopicDocumentModel: any; beforeEach(() => { @@ -25,6 +28,10 @@ describe('NotebookRuntimeService', () => { update: vi.fn(), }; + mockDocumentService = { + deleteDocument: vi.fn(), + }; + mockTopicDocumentModel = { associate: vi.fn(), deleteByDocumentId: vi.fn(), @@ -32,6 +39,7 @@ describe('NotebookRuntimeService', () => { }; vi.mocked(DocumentModel).mockImplementation(() => mockDocumentModel); + vi.mocked(DocumentService).mockImplementation(() => mockDocumentService); vi.mocked(TopicDocumentModel).mockImplementation(() => mockTopicDocumentModel); service = new NotebookRuntimeService({ serverDB: mockDb, userId: mockUserId }); @@ -169,12 +177,12 @@ describe('NotebookRuntimeService', () => { describe('deleteDocument', () => { it('should delete associations first then the document', async () => { mockTopicDocumentModel.deleteByDocumentId.mockResolvedValue(undefined); - mockDocumentModel.delete.mockResolvedValue(undefined); + mockDocumentService.deleteDocument.mockResolvedValue(undefined); await service.deleteDocument('doc-1'); expect(mockTopicDocumentModel.deleteByDocumentId).toHaveBeenCalledWith('doc-1'); - expect(mockDocumentModel.delete).toHaveBeenCalledWith('doc-1'); + expect(mockDocumentService.deleteDocument).toHaveBeenCalledWith('doc-1'); }); }); diff --git a/src/server/services/notebook/index.ts b/src/server/services/notebook/index.ts index 397431b970..6d15863d01 100644 --- a/src/server/services/notebook/index.ts +++ b/src/server/services/notebook/index.ts @@ -2,6 +2,7 @@ import { type LobeChatDatabase } from '@lobechat/database'; import { DocumentModel } from '@/database/models/document'; import { TopicDocumentModel } from '@/database/models/topicDocument'; +import { DocumentService } from '@/server/services/document'; interface DocumentServiceResult { content: string | null; @@ -46,10 +47,12 @@ const toServiceResult = (doc: { }); export class NotebookRuntimeService { + private documentService: DocumentService; private documentModel: DocumentModel; private topicDocumentModel: TopicDocumentModel; constructor(options: NotebookRuntimeServiceOptions) { + this.documentService = new DocumentService(options.serverDB, options.userId); this.documentModel = new DocumentModel(options.serverDB, options.userId); this.topicDocumentModel = new TopicDocumentModel(options.serverDB, options.userId); } @@ -73,7 +76,7 @@ export class NotebookRuntimeService { deleteDocument = async (id: string): Promise => { await this.topicDocumentModel.deleteByDocumentId(id); - await this.documentModel.delete(id); + await this.documentService.deleteDocument(id); }; getDocument = async (id: string): Promise => { diff --git a/tests/openapi/knowledge-base-delete.test.ts b/tests/openapi/knowledge-base-delete.test.ts new file mode 100644 index 0000000000..6d449207ab --- /dev/null +++ b/tests/openapi/knowledge-base-delete.test.ts @@ -0,0 +1,95 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; + +import type { LobeChatDatabase } from '@/database/type'; +import { FileService } from '@/server/services/file'; + +import { KnowledgeBaseService } from '../../packages/openapi/src/services/knowledge-base.service'; + +vi.mock('@/server/services/file'); + +describe('KnowledgeBaseService.deleteKnowledgeBase', () => { + let db: LobeChatDatabase; + let deleteFilesSpy: ReturnType; + + beforeEach(() => { + db = { + query: { + knowledgeBases: { + findFirst: vi.fn().mockResolvedValue({ id: 'kb-1', userId: 'user-1' }), + }, + }, + } as unknown as LobeChatDatabase; + + deleteFilesSpy = vi.fn().mockResolvedValue(undefined); + vi.mocked(FileService).mockImplementation(() => ({ deleteFiles: deleteFilesSpy }) as any); + }); + + afterEach(() => { + vi.restoreAllMocks(); + }); + + const createService = () => { + const service = new KnowledgeBaseService(db, 'user-1'); + + vi.spyOn(service as any, 'log').mockImplementation(() => {}); + vi.spyOn(service as any, 'resolveOperationPermission').mockResolvedValue({ + isPermitted: true, + message: '', + }); + + return service; + }; + + it('should always delete exclusive files together with the knowledge base', async () => { + const service = createService(); + const deleteWithFilesSpy = vi.fn().mockResolvedValue({ + deletedFiles: [], + }); + + Reflect.set(service, 'knowledgeBaseModel', { + deleteWithFiles: deleteWithFilesSpy, + }); + + await expect(service.deleteKnowledgeBase('kb-1')).resolves.toEqual({ + message: 'Knowledge base deleted successfully', + success: true, + }); + + expect(deleteWithFilesSpy).toHaveBeenCalledWith('kb-1'); + }); + + it('should delete external files when deleted knowledge-base files have URLs', async () => { + const service = createService(); + + Reflect.set(service, 'knowledgeBaseModel', { + deleteWithFiles: vi.fn().mockResolvedValue({ + deletedFiles: [ + { id: 'file-1', url: 'https://example.com/a.pdf' }, + { id: 'file-2', url: null }, + { id: 'file-3', url: 'https://example.com/b.pdf' }, + ], + }), + }); + + await service.deleteKnowledgeBase('kb-1'); + + expect(deleteFilesSpy).toHaveBeenCalledWith([ + 'https://example.com/a.pdf', + 'https://example.com/b.pdf', + ]); + }); + + it('should skip external file deletion when deleted files have no URLs', async () => { + const service = createService(); + + Reflect.set(service, 'knowledgeBaseModel', { + deleteWithFiles: vi.fn().mockResolvedValue({ + deletedFiles: [{ id: 'file-1', url: null }], + }), + }); + + await service.deleteKnowledgeBase('kb-1'); + + expect(deleteFilesSpy).not.toHaveBeenCalled(); + }); +});