mirror of
https://github.com/lobehub/lobehub
synced 2026-04-21 09:37:28 +00:00
🐛 fix(kb): clean up vector storage when deleting knowledge bases (#13254)
* 🐛 feat(db): add findExclusiveFileIds, deleteWithFiles, deleteAllWithFiles to KnowledgeBaseModel Add methods to safely clean up vector storage when deleting knowledge bases: - findExclusiveFileIds: identifies files belonging only to a specific KB - deleteWithFiles: deletes KB and its exclusive files with chunks/embeddings - deleteAllWithFiles: bulk version for deleting all user KBs * 🐛 fix(kb): wire vector cleanup in TRPC router, OpenAPI service, and client - TRPC removeKnowledgeBase: use deleteWithFiles when removeFiles=true + S3 cleanup - TRPC removeAllKnowledgeBases: use deleteAllWithFiles + S3 cleanup - OpenAPI deleteKnowledgeBase: use deleteWithFiles + S3 cleanup - Client service: default removeFiles=true when deleting knowledge base * 🐛 fix(knowledgeBase): change default behavior of deleteKnowledgeBase to not remove files and update related tests Signed-off-by: Innei <tukon479@gmail.com> * ✨ feat(knowledgeBase): add optional query parameter to deleteKnowledgeBase for file removal - Introduced `removeFiles` query parameter to control the deletion of exclusive files and derived data when deleting a knowledge base. - Updated `KnowledgeBaseController`, `KnowledgeBaseService`, and related schemas to support this new functionality. This change enhances the flexibility of the delete operation, allowing users to choose whether to remove associated files. Signed-off-by: Innei <tukon479@gmail.com> * 🐛 fix: cascade knowledge base deletion and add orphan cleanup runbook * ✨ feat(knowledgeRepo): implement cascading deletion for file-backed documents - Enhanced the `KnowledgeRepo` to ensure that when a document with an associated file is deleted, all related data (files, chunks, embeddings) are also removed. - Introduced a new method `deleteDocumentWithRelations` to handle the cascading deletion logic. - Updated tests to verify that all related entities are deleted when a file-backed document is removed. This change improves data integrity by ensuring that no orphaned records remain after deletions. Signed-off-by: Innei <tukon479@gmail.com> * Defer DocumentService file initialization * Fix flaky database tests and knowledge repo fixtures * Add deletion regression tests for folders and external files * ⏪ chore: remove kb orphan cleanup files from pr --------- Signed-off-by: Innei <tukon479@gmail.com>
This commit is contained in:
parent
a4d9967e60
commit
3b81a94d76
18 changed files with 1183 additions and 62 deletions
|
|
@ -636,6 +636,346 @@ describe('KnowledgeBaseModel', () => {
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
describe('findExclusiveFileIds', () => {
|
||||||
|
it('should return file IDs that belong only to this knowledge base', async () => {
|
||||||
|
await serverDB.insert(globalFiles).values([
|
||||||
|
{
|
||||||
|
hashId: 'hash1',
|
||||||
|
url: 'https://example.com/a.pdf',
|
||||||
|
size: 100,
|
||||||
|
fileType: 'application/pdf',
|
||||||
|
creator: userId,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
hashId: 'hash2',
|
||||||
|
url: 'https://example.com/b.pdf',
|
||||||
|
size: 200,
|
||||||
|
fileType: 'application/pdf',
|
||||||
|
creator: userId,
|
||||||
|
},
|
||||||
|
]);
|
||||||
|
await serverDB.insert(files).values([
|
||||||
|
{
|
||||||
|
id: 'file1',
|
||||||
|
name: 'a.pdf',
|
||||||
|
url: 'https://example.com/a.pdf',
|
||||||
|
fileHash: 'hash1',
|
||||||
|
size: 100,
|
||||||
|
fileType: 'application/pdf',
|
||||||
|
userId,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: 'file2',
|
||||||
|
name: 'b.pdf',
|
||||||
|
url: 'https://example.com/b.pdf',
|
||||||
|
fileHash: 'hash2',
|
||||||
|
size: 200,
|
||||||
|
fileType: 'application/pdf',
|
||||||
|
userId,
|
||||||
|
},
|
||||||
|
]);
|
||||||
|
const { id: kb1 } = await knowledgeBaseModel.create({ name: 'KB1' });
|
||||||
|
const { id: kb2 } = await knowledgeBaseModel.create({ name: 'KB2' });
|
||||||
|
await knowledgeBaseModel.addFilesToKnowledgeBase(kb1, ['file1', 'file2']);
|
||||||
|
await knowledgeBaseModel.addFilesToKnowledgeBase(kb2, ['file1']);
|
||||||
|
const exclusiveIds = await knowledgeBaseModel.findExclusiveFileIds(kb1);
|
||||||
|
expect(exclusiveIds).toEqual(['file2']);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should return empty array when all files are shared', async () => {
|
||||||
|
await serverDB
|
||||||
|
.insert(globalFiles)
|
||||||
|
.values([
|
||||||
|
{
|
||||||
|
hashId: 'hash1',
|
||||||
|
url: 'https://example.com/a.pdf',
|
||||||
|
size: 100,
|
||||||
|
fileType: 'application/pdf',
|
||||||
|
creator: userId,
|
||||||
|
},
|
||||||
|
]);
|
||||||
|
await serverDB
|
||||||
|
.insert(files)
|
||||||
|
.values([
|
||||||
|
{
|
||||||
|
id: 'file1',
|
||||||
|
name: 'a.pdf',
|
||||||
|
url: 'https://example.com/a.pdf',
|
||||||
|
fileHash: 'hash1',
|
||||||
|
size: 100,
|
||||||
|
fileType: 'application/pdf',
|
||||||
|
userId,
|
||||||
|
},
|
||||||
|
]);
|
||||||
|
const { id: kb1 } = await knowledgeBaseModel.create({ name: 'KB1' });
|
||||||
|
const { id: kb2 } = await knowledgeBaseModel.create({ name: 'KB2' });
|
||||||
|
await knowledgeBaseModel.addFilesToKnowledgeBase(kb1, ['file1']);
|
||||||
|
await knowledgeBaseModel.addFilesToKnowledgeBase(kb2, ['file1']);
|
||||||
|
const exclusiveIds = await knowledgeBaseModel.findExclusiveFileIds(kb1);
|
||||||
|
expect(exclusiveIds).toEqual([]);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should return all files when none are shared', async () => {
|
||||||
|
await serverDB.insert(globalFiles).values([
|
||||||
|
{
|
||||||
|
hashId: 'hash1',
|
||||||
|
url: 'https://example.com/a.pdf',
|
||||||
|
size: 100,
|
||||||
|
fileType: 'application/pdf',
|
||||||
|
creator: userId,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
hashId: 'hash2',
|
||||||
|
url: 'https://example.com/b.pdf',
|
||||||
|
size: 200,
|
||||||
|
fileType: 'application/pdf',
|
||||||
|
creator: userId,
|
||||||
|
},
|
||||||
|
]);
|
||||||
|
await serverDB.insert(files).values([
|
||||||
|
{
|
||||||
|
id: 'file1',
|
||||||
|
name: 'a.pdf',
|
||||||
|
url: 'https://example.com/a.pdf',
|
||||||
|
fileHash: 'hash1',
|
||||||
|
size: 100,
|
||||||
|
fileType: 'application/pdf',
|
||||||
|
userId,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: 'file2',
|
||||||
|
name: 'b.pdf',
|
||||||
|
url: 'https://example.com/b.pdf',
|
||||||
|
fileHash: 'hash2',
|
||||||
|
size: 200,
|
||||||
|
fileType: 'application/pdf',
|
||||||
|
userId,
|
||||||
|
},
|
||||||
|
]);
|
||||||
|
const { id: kb1 } = await knowledgeBaseModel.create({ name: 'KB1' });
|
||||||
|
await knowledgeBaseModel.addFilesToKnowledgeBase(kb1, ['file1', 'file2']);
|
||||||
|
const exclusiveIds = await knowledgeBaseModel.findExclusiveFileIds(kb1);
|
||||||
|
expect(exclusiveIds.sort()).toEqual(['file1', 'file2']);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should return empty array when KB has no files', async () => {
|
||||||
|
const { id: kb1 } = await knowledgeBaseModel.create({ name: 'Empty KB' });
|
||||||
|
const exclusiveIds = await knowledgeBaseModel.findExclusiveFileIds(kb1);
|
||||||
|
expect(exclusiveIds).toEqual([]);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('deleteWithFiles', () => {
|
||||||
|
it('should delete KB and its exclusive files', async () => {
|
||||||
|
await serverDB
|
||||||
|
.insert(globalFiles)
|
||||||
|
.values([
|
||||||
|
{
|
||||||
|
hashId: 'hash1',
|
||||||
|
url: 'https://example.com/a.pdf',
|
||||||
|
size: 100,
|
||||||
|
fileType: 'application/pdf',
|
||||||
|
creator: userId,
|
||||||
|
},
|
||||||
|
]);
|
||||||
|
await serverDB
|
||||||
|
.insert(files)
|
||||||
|
.values([
|
||||||
|
{
|
||||||
|
id: 'file1',
|
||||||
|
name: 'a.pdf',
|
||||||
|
url: 'https://example.com/a.pdf',
|
||||||
|
fileHash: 'hash1',
|
||||||
|
size: 100,
|
||||||
|
fileType: 'application/pdf',
|
||||||
|
userId,
|
||||||
|
},
|
||||||
|
]);
|
||||||
|
const { id: kbId } = await knowledgeBaseModel.create({ name: 'KB1' });
|
||||||
|
await knowledgeBaseModel.addFilesToKnowledgeBase(kbId, ['file1']);
|
||||||
|
const result = await knowledgeBaseModel.deleteWithFiles(kbId);
|
||||||
|
expect(
|
||||||
|
await serverDB.query.knowledgeBases.findFirst({ where: eq(knowledgeBases.id, kbId) }),
|
||||||
|
).toBeUndefined();
|
||||||
|
expect(
|
||||||
|
await serverDB.query.files.findFirst({ where: eq(files.id, 'file1') }),
|
||||||
|
).toBeUndefined();
|
||||||
|
const kbFiles = await serverDB.query.knowledgeBaseFiles.findMany({
|
||||||
|
where: eq(knowledgeBaseFiles.knowledgeBaseId, kbId),
|
||||||
|
});
|
||||||
|
expect(kbFiles).toHaveLength(0);
|
||||||
|
expect(result.deletedFiles).toHaveLength(1);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should NOT delete files shared with another KB', async () => {
|
||||||
|
await serverDB.insert(globalFiles).values([
|
||||||
|
{
|
||||||
|
hashId: 'hash1',
|
||||||
|
url: 'https://example.com/a.pdf',
|
||||||
|
size: 100,
|
||||||
|
fileType: 'application/pdf',
|
||||||
|
creator: userId,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
hashId: 'hash2',
|
||||||
|
url: 'https://example.com/b.pdf',
|
||||||
|
size: 200,
|
||||||
|
fileType: 'application/pdf',
|
||||||
|
creator: userId,
|
||||||
|
},
|
||||||
|
]);
|
||||||
|
await serverDB.insert(files).values([
|
||||||
|
{
|
||||||
|
id: 'file1',
|
||||||
|
name: 'a.pdf',
|
||||||
|
url: 'https://example.com/a.pdf',
|
||||||
|
fileHash: 'hash1',
|
||||||
|
size: 100,
|
||||||
|
fileType: 'application/pdf',
|
||||||
|
userId,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: 'file2',
|
||||||
|
name: 'b.pdf',
|
||||||
|
url: 'https://example.com/b.pdf',
|
||||||
|
fileHash: 'hash2',
|
||||||
|
size: 200,
|
||||||
|
fileType: 'application/pdf',
|
||||||
|
userId,
|
||||||
|
},
|
||||||
|
]);
|
||||||
|
const { id: kb1 } = await knowledgeBaseModel.create({ name: 'KB1' });
|
||||||
|
const { id: kb2 } = await knowledgeBaseModel.create({ name: 'KB2' });
|
||||||
|
await knowledgeBaseModel.addFilesToKnowledgeBase(kb1, ['file1', 'file2']);
|
||||||
|
await knowledgeBaseModel.addFilesToKnowledgeBase(kb2, ['file1']);
|
||||||
|
const result = await knowledgeBaseModel.deleteWithFiles(kb1);
|
||||||
|
expect(
|
||||||
|
await serverDB.query.knowledgeBases.findFirst({ where: eq(knowledgeBases.id, kb1) }),
|
||||||
|
).toBeUndefined();
|
||||||
|
expect(await serverDB.query.files.findFirst({ where: eq(files.id, 'file1') })).toBeDefined();
|
||||||
|
expect(
|
||||||
|
await serverDB.query.files.findFirst({ where: eq(files.id, 'file2') }),
|
||||||
|
).toBeUndefined();
|
||||||
|
const kb2Files = await serverDB.query.knowledgeBaseFiles.findMany({
|
||||||
|
where: eq(knowledgeBaseFiles.knowledgeBaseId, kb2),
|
||||||
|
});
|
||||||
|
expect(kb2Files).toHaveLength(1);
|
||||||
|
expect(kb2Files[0].fileId).toBe('file1');
|
||||||
|
expect(result.deletedFiles).toHaveLength(1);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should handle KB with no files', async () => {
|
||||||
|
const { id: kbId } = await knowledgeBaseModel.create({ name: 'Empty KB' });
|
||||||
|
const result = await knowledgeBaseModel.deleteWithFiles(kbId);
|
||||||
|
expect(
|
||||||
|
await serverDB.query.knowledgeBases.findFirst({ where: eq(knowledgeBases.id, kbId) }),
|
||||||
|
).toBeUndefined();
|
||||||
|
expect(result.deletedFiles).toHaveLength(0);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('deleteAllWithFiles', () => {
|
||||||
|
it('should delete all KBs and their exclusive files', async () => {
|
||||||
|
await serverDB.insert(globalFiles).values([
|
||||||
|
{
|
||||||
|
hashId: 'hash1',
|
||||||
|
url: 'https://example.com/a.pdf',
|
||||||
|
size: 100,
|
||||||
|
fileType: 'application/pdf',
|
||||||
|
creator: userId,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
hashId: 'hash2',
|
||||||
|
url: 'https://example.com/b.pdf',
|
||||||
|
size: 200,
|
||||||
|
fileType: 'application/pdf',
|
||||||
|
creator: userId,
|
||||||
|
},
|
||||||
|
]);
|
||||||
|
await serverDB.insert(files).values([
|
||||||
|
{
|
||||||
|
id: 'file1',
|
||||||
|
name: 'a.pdf',
|
||||||
|
url: 'https://example.com/a.pdf',
|
||||||
|
fileHash: 'hash1',
|
||||||
|
size: 100,
|
||||||
|
fileType: 'application/pdf',
|
||||||
|
userId,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: 'file2',
|
||||||
|
name: 'b.pdf',
|
||||||
|
url: 'https://example.com/b.pdf',
|
||||||
|
fileHash: 'hash2',
|
||||||
|
size: 200,
|
||||||
|
fileType: 'application/pdf',
|
||||||
|
userId,
|
||||||
|
},
|
||||||
|
]);
|
||||||
|
const { id: kb1 } = await knowledgeBaseModel.create({ name: 'KB1' });
|
||||||
|
const { id: kb2 } = await knowledgeBaseModel.create({ name: 'KB2' });
|
||||||
|
await knowledgeBaseModel.addFilesToKnowledgeBase(kb1, ['file1']);
|
||||||
|
await knowledgeBaseModel.addFilesToKnowledgeBase(kb2, ['file2']);
|
||||||
|
const result = await knowledgeBaseModel.deleteAllWithFiles();
|
||||||
|
const remaining = await serverDB.query.knowledgeBases.findMany({
|
||||||
|
where: eq(knowledgeBases.userId, userId),
|
||||||
|
});
|
||||||
|
expect(remaining).toHaveLength(0);
|
||||||
|
expect(
|
||||||
|
await serverDB.query.files.findFirst({ where: eq(files.id, 'file1') }),
|
||||||
|
).toBeUndefined();
|
||||||
|
expect(
|
||||||
|
await serverDB.query.files.findFirst({ where: eq(files.id, 'file2') }),
|
||||||
|
).toBeUndefined();
|
||||||
|
expect(result.deletedFiles.length).toBe(2);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should delete shared file when both KBs sharing it are deleted', async () => {
|
||||||
|
await serverDB
|
||||||
|
.insert(globalFiles)
|
||||||
|
.values([
|
||||||
|
{
|
||||||
|
hashId: 'hash1',
|
||||||
|
url: 'https://example.com/a.pdf',
|
||||||
|
size: 100,
|
||||||
|
fileType: 'application/pdf',
|
||||||
|
creator: userId,
|
||||||
|
},
|
||||||
|
]);
|
||||||
|
await serverDB
|
||||||
|
.insert(files)
|
||||||
|
.values([
|
||||||
|
{
|
||||||
|
id: 'file1',
|
||||||
|
name: 'a.pdf',
|
||||||
|
url: 'https://example.com/a.pdf',
|
||||||
|
fileHash: 'hash1',
|
||||||
|
size: 100,
|
||||||
|
fileType: 'application/pdf',
|
||||||
|
userId,
|
||||||
|
},
|
||||||
|
]);
|
||||||
|
const { id: kb1 } = await knowledgeBaseModel.create({ name: 'KB1' });
|
||||||
|
const { id: kb2 } = await knowledgeBaseModel.create({ name: 'KB2' });
|
||||||
|
await knowledgeBaseModel.addFilesToKnowledgeBase(kb1, ['file1']);
|
||||||
|
await knowledgeBaseModel.addFilesToKnowledgeBase(kb2, ['file1']);
|
||||||
|
const result = await knowledgeBaseModel.deleteAllWithFiles();
|
||||||
|
expect(
|
||||||
|
await serverDB.query.files.findFirst({ where: eq(files.id, 'file1') }),
|
||||||
|
).toBeUndefined();
|
||||||
|
expect(result.deletedFiles.length).toBe(1);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should not delete other users KBs or files', async () => {
|
||||||
|
const anotherModel = new KnowledgeBaseModel(serverDB, 'user2');
|
||||||
|
const { id: otherKb } = await anotherModel.create({ name: 'Other KB' });
|
||||||
|
await knowledgeBaseModel.deleteAllWithFiles();
|
||||||
|
expect(
|
||||||
|
await serverDB.query.knowledgeBases.findFirst({ where: eq(knowledgeBases.id, otherKb) }),
|
||||||
|
).toBeDefined();
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
describe('static findById', () => {
|
describe('static findById', () => {
|
||||||
it('should find a knowledge base by id without user restriction', async () => {
|
it('should find a knowledge base by id without user restriction', async () => {
|
||||||
const { id } = await knowledgeBaseModel.create({ name: 'Test Group' });
|
const { id } = await knowledgeBaseModel.create({ name: 'Test Group' });
|
||||||
|
|
|
||||||
|
|
@ -519,7 +519,6 @@ describe('AgentEvalBenchmarkModel', () => {
|
||||||
expect(result?.name).toBe('Updated Name');
|
expect(result?.name).toBe('Updated Name');
|
||||||
expect(result?.description).toBe('New description');
|
expect(result?.description).toBe('New description');
|
||||||
expect(result?.updatedAt).toBeDefined();
|
expect(result?.updatedAt).toBeDefined();
|
||||||
expect(result?.updatedAt.getTime()).toBeGreaterThanOrEqual(result!.createdAt.getTime());
|
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should not update a system benchmark', async () => {
|
it('should not update a system benchmark', async () => {
|
||||||
|
|
|
||||||
|
|
@ -323,7 +323,6 @@ describe('AgentEvalDatasetModel', () => {
|
||||||
expect(result?.name).toBe('Updated Name');
|
expect(result?.name).toBe('Updated Name');
|
||||||
expect(result?.description).toBe('New description');
|
expect(result?.description).toBe('New description');
|
||||||
expect(result?.updatedAt).toBeDefined();
|
expect(result?.updatedAt).toBeDefined();
|
||||||
expect(result?.updatedAt.getTime()).toBeGreaterThanOrEqual(result!.createdAt.getTime());
|
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should not update a dataset owned by another user', async () => {
|
it('should not update a dataset owned by another user', async () => {
|
||||||
|
|
|
||||||
|
|
@ -303,7 +303,6 @@ describe('AgentEvalRunModel', () => {
|
||||||
passRate: 0.5,
|
passRate: 0.5,
|
||||||
});
|
});
|
||||||
expect(result?.updatedAt).toBeDefined();
|
expect(result?.updatedAt).toBeDefined();
|
||||||
expect(result?.updatedAt.getTime()).toBeGreaterThanOrEqual(result!.createdAt.getTime());
|
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should not update a run owned by another user', async () => {
|
it('should not update a run owned by another user', async () => {
|
||||||
|
|
|
||||||
|
|
@ -477,7 +477,6 @@ describe('AgentEvalTestCaseModel', () => {
|
||||||
expect(result?.content.expected).toBe('New answer');
|
expect(result?.content.expected).toBe('New answer');
|
||||||
expect(result?.metadata).toEqual({ reviewed: true });
|
expect(result?.metadata).toEqual({ reviewed: true });
|
||||||
expect(result?.updatedAt).toBeDefined();
|
expect(result?.updatedAt).toBeDefined();
|
||||||
expect(result?.updatedAt.getTime()).toBeGreaterThanOrEqual(result!.createdAt.getTime());
|
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should update only sortOrder', async () => {
|
it('should update only sortOrder', async () => {
|
||||||
|
|
|
||||||
|
|
@ -390,31 +390,11 @@ export class FileModel {
|
||||||
const batchChunkIds = chunkIds.slice(startIdx, startIdx + BATCH_SIZE);
|
const batchChunkIds = chunkIds.slice(startIdx, startIdx + BATCH_SIZE);
|
||||||
if (batchChunkIds.length === 0) continue;
|
if (batchChunkIds.length === 0) continue;
|
||||||
|
|
||||||
// Process each batch in the correct deletion order, failures do not block the flow
|
// Process each batch in the correct deletion order.
|
||||||
const batchPromise = (async () => {
|
const batchPromise = (async () => {
|
||||||
// 1. Delete embeddings (top-level, has foreign key dependencies)
|
await trx.delete(embeddings).where(inArray(embeddings.chunkId, batchChunkIds));
|
||||||
try {
|
await trx.delete(documentChunks).where(inArray(documentChunks.chunkId, batchChunkIds));
|
||||||
await trx.delete(embeddings).where(inArray(embeddings.chunkId, batchChunkIds));
|
await trx.delete(chunks).where(inArray(chunks.id, batchChunkIds));
|
||||||
} catch (e) {
|
|
||||||
// Silent handling, does not block deletion process
|
|
||||||
console.warn('Failed to delete embeddings:', e);
|
|
||||||
}
|
|
||||||
|
|
||||||
// 2. Delete documentChunks association (if exists)
|
|
||||||
try {
|
|
||||||
await trx.delete(documentChunks).where(inArray(documentChunks.chunkId, batchChunkIds));
|
|
||||||
} catch (e) {
|
|
||||||
// Silent handling, does not block deletion process
|
|
||||||
console.warn('Failed to delete documentChunks:', e);
|
|
||||||
}
|
|
||||||
|
|
||||||
// 3. Delete chunks (core data)
|
|
||||||
try {
|
|
||||||
await trx.delete(chunks).where(inArray(chunks.id, batchChunkIds));
|
|
||||||
} catch (e) {
|
|
||||||
// Silent handling, does not block deletion process
|
|
||||||
console.warn('Failed to delete chunks:', e);
|
|
||||||
}
|
|
||||||
})();
|
})();
|
||||||
|
|
||||||
batchPromises.push(batchPromise);
|
batchPromises.push(batchPromise);
|
||||||
|
|
@ -425,12 +405,7 @@ export class FileModel {
|
||||||
}
|
}
|
||||||
|
|
||||||
// 4. Finally delete fileChunks association table records
|
// 4. Finally delete fileChunks association table records
|
||||||
try {
|
await trx.delete(fileChunks).where(inArray(fileChunks.fileId, fileIds));
|
||||||
await trx.delete(fileChunks).where(inArray(fileChunks.fileId, fileIds));
|
|
||||||
} catch (e) {
|
|
||||||
// Silent handling, does not block deletion process
|
|
||||||
console.warn('Failed to delete fileChunks:', e);
|
|
||||||
}
|
|
||||||
|
|
||||||
return chunkIds;
|
return chunkIds;
|
||||||
};
|
};
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,10 @@
|
||||||
import type { KnowledgeBaseItem } from '@lobechat/types';
|
import type { KnowledgeBaseItem } from '@lobechat/types';
|
||||||
import { and, desc, eq, inArray } from 'drizzle-orm';
|
import { and, count, desc, eq, inArray } from 'drizzle-orm';
|
||||||
|
|
||||||
import type { NewKnowledgeBase } from '../schemas';
|
import type { NewKnowledgeBase } from '../schemas';
|
||||||
import { documents, knowledgeBaseFiles, knowledgeBases } from '../schemas';
|
import { documents, knowledgeBaseFiles, knowledgeBases } from '../schemas';
|
||||||
import type { LobeChatDatabase } from '../type';
|
import type { LobeChatDatabase } from '../type';
|
||||||
|
import { FileModel } from './file';
|
||||||
|
|
||||||
export class KnowledgeBaseModel {
|
export class KnowledgeBaseModel {
|
||||||
private userId: string;
|
private userId: string;
|
||||||
|
|
@ -160,6 +161,73 @@ export class KnowledgeBaseModel {
|
||||||
.set({ ...value, updatedAt: new Date() })
|
.set({ ...value, updatedAt: new Date() })
|
||||||
.where(and(eq(knowledgeBases.id, id), eq(knowledgeBases.userId, this.userId)));
|
.where(and(eq(knowledgeBases.id, id), eq(knowledgeBases.userId, this.userId)));
|
||||||
|
|
||||||
|
findExclusiveFileIds = async (knowledgeBaseId: string): Promise<string[]> => {
|
||||||
|
const kbFiles = await this.db
|
||||||
|
.select({ fileId: knowledgeBaseFiles.fileId })
|
||||||
|
.from(knowledgeBaseFiles)
|
||||||
|
.where(
|
||||||
|
and(
|
||||||
|
eq(knowledgeBaseFiles.knowledgeBaseId, knowledgeBaseId),
|
||||||
|
eq(knowledgeBaseFiles.userId, this.userId),
|
||||||
|
),
|
||||||
|
);
|
||||||
|
const fileIds = kbFiles.map((f) => f.fileId);
|
||||||
|
if (fileIds.length === 0) return [];
|
||||||
|
|
||||||
|
const sharedFiles = await this.db
|
||||||
|
.select({
|
||||||
|
fileId: knowledgeBaseFiles.fileId,
|
||||||
|
kbCount: count(knowledgeBaseFiles.knowledgeBaseId),
|
||||||
|
})
|
||||||
|
.from(knowledgeBaseFiles)
|
||||||
|
.where(
|
||||||
|
and(
|
||||||
|
inArray(knowledgeBaseFiles.fileId, fileIds),
|
||||||
|
eq(knowledgeBaseFiles.userId, this.userId),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
.groupBy(knowledgeBaseFiles.fileId);
|
||||||
|
|
||||||
|
return sharedFiles.filter((f) => Number(f.kbCount) === 1).map((f) => f.fileId);
|
||||||
|
};
|
||||||
|
|
||||||
|
deleteWithFiles = async (id: string, removeGlobalFile: boolean = true) => {
|
||||||
|
const exclusiveFileIds = await this.findExclusiveFileIds(id);
|
||||||
|
|
||||||
|
let deletedFiles: Array<{ id: string; url: string | null }> = [];
|
||||||
|
if (exclusiveFileIds.length > 0) {
|
||||||
|
const fileModel = new FileModel(this.db, this.userId);
|
||||||
|
const result = await fileModel.deleteMany(exclusiveFileIds, removeGlobalFile);
|
||||||
|
deletedFiles = (result || []).map((f) => ({ id: f.id, url: f.url }));
|
||||||
|
}
|
||||||
|
|
||||||
|
await this.db
|
||||||
|
.delete(knowledgeBases)
|
||||||
|
.where(and(eq(knowledgeBases.id, id), eq(knowledgeBases.userId, this.userId)));
|
||||||
|
|
||||||
|
return { deletedFiles };
|
||||||
|
};
|
||||||
|
|
||||||
|
deleteAllWithFiles = async (removeGlobalFile: boolean = true) => {
|
||||||
|
const allKbFileIds = await this.db
|
||||||
|
.select({ fileId: knowledgeBaseFiles.fileId })
|
||||||
|
.from(knowledgeBaseFiles)
|
||||||
|
.where(eq(knowledgeBaseFiles.userId, this.userId));
|
||||||
|
|
||||||
|
const fileIds = [...new Set(allKbFileIds.map((f) => f.fileId))];
|
||||||
|
|
||||||
|
let deletedFiles: Array<{ id: string; url: string | null }> = [];
|
||||||
|
if (fileIds.length > 0) {
|
||||||
|
const fileModel = new FileModel(this.db, this.userId);
|
||||||
|
const result = await fileModel.deleteMany(fileIds, removeGlobalFile);
|
||||||
|
deletedFiles = (result || []).map((f) => ({ id: f.id, url: f.url }));
|
||||||
|
}
|
||||||
|
|
||||||
|
await this.db.delete(knowledgeBases).where(eq(knowledgeBases.userId, this.userId));
|
||||||
|
|
||||||
|
return { deletedFiles };
|
||||||
|
};
|
||||||
|
|
||||||
static findById = async (db: LobeChatDatabase, id: string) =>
|
static findById = async (db: LobeChatDatabase, id: string) =>
|
||||||
db.query.knowledgeBases.findFirst({
|
db.query.knowledgeBases.findFirst({
|
||||||
where: eq(knowledgeBases.id, id),
|
where: eq(knowledgeBases.id, id),
|
||||||
|
|
|
||||||
|
|
@ -4,7 +4,16 @@ import { eq } from 'drizzle-orm';
|
||||||
import { beforeEach, describe, expect, it } from 'vitest';
|
import { beforeEach, describe, expect, it } from 'vitest';
|
||||||
|
|
||||||
import { getTestDB } from '../../../core/getTestDB';
|
import { getTestDB } from '../../../core/getTestDB';
|
||||||
import { documents, files, knowledgeBaseFiles, knowledgeBases, users } from '../../../schemas';
|
import {
|
||||||
|
chunks,
|
||||||
|
documents,
|
||||||
|
embeddings,
|
||||||
|
fileChunks,
|
||||||
|
files,
|
||||||
|
knowledgeBaseFiles,
|
||||||
|
knowledgeBases,
|
||||||
|
users,
|
||||||
|
} from '../../../schemas';
|
||||||
import type { LobeChatDatabase } from '../../../type';
|
import type { LobeChatDatabase } from '../../../type';
|
||||||
import { KnowledgeRepo } from '../index';
|
import { KnowledgeRepo } from '../index';
|
||||||
|
|
||||||
|
|
@ -12,8 +21,14 @@ const serverDB: LobeChatDatabase = await getTestDB();
|
||||||
|
|
||||||
const userId = 'knowledge-repo-test-user';
|
const userId = 'knowledge-repo-test-user';
|
||||||
const otherUserId = 'other-knowledge-user';
|
const otherUserId = 'other-knowledge-user';
|
||||||
|
const deleteDocChunkId = '33333333-3333-4333-8333-333333333333';
|
||||||
|
const deleteManyDocChunkId = '44444444-4444-4444-8444-444444444444';
|
||||||
|
const deleteFolderFileChunkId = '55555555-5555-4555-8555-555555555555';
|
||||||
|
const deleteFolderDocChunkId = '66666666-6666-4666-8666-666666666666';
|
||||||
|
const deleteNestedFolderFileChunkId = '77777777-7777-4777-8777-777777777777';
|
||||||
|
|
||||||
let knowledgeRepo: KnowledgeRepo;
|
let knowledgeRepo: KnowledgeRepo;
|
||||||
|
const testEmbedding = Array.from({ length: 1024 }, () => 0.1);
|
||||||
|
|
||||||
beforeEach(async () => {
|
beforeEach(async () => {
|
||||||
// Clean up
|
// Clean up
|
||||||
|
|
@ -397,6 +412,15 @@ describe('KnowledgeRepo', () => {
|
||||||
url: 'https://example.com/delete.txt',
|
url: 'https://example.com/delete.txt',
|
||||||
});
|
});
|
||||||
|
|
||||||
|
await serverDB.insert(files).values({
|
||||||
|
id: 'delete-doc-file',
|
||||||
|
userId,
|
||||||
|
name: 'delete-doc-file.pdf',
|
||||||
|
fileType: 'application/pdf',
|
||||||
|
size: 2048,
|
||||||
|
url: 'https://example.com/delete-doc-file.pdf',
|
||||||
|
});
|
||||||
|
|
||||||
await serverDB.insert(documents).values([
|
await serverDB.insert(documents).values([
|
||||||
{
|
{
|
||||||
id: 'delete-doc',
|
id: 'delete-doc',
|
||||||
|
|
@ -408,6 +432,153 @@ describe('KnowledgeRepo', () => {
|
||||||
totalCharCount: 100,
|
totalCharCount: 100,
|
||||||
totalLineCount: 2,
|
totalLineCount: 2,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
id: 'delete-folder',
|
||||||
|
userId,
|
||||||
|
title: 'Folder To Delete',
|
||||||
|
fileType: 'custom/folder',
|
||||||
|
sourceType: 'topic',
|
||||||
|
source: 'internal://folder/delete-folder',
|
||||||
|
totalCharCount: 0,
|
||||||
|
totalLineCount: 0,
|
||||||
|
},
|
||||||
|
]);
|
||||||
|
await serverDB.insert(files).values([
|
||||||
|
{
|
||||||
|
id: 'delete-folder-file',
|
||||||
|
userId,
|
||||||
|
name: 'delete-folder-file.pdf',
|
||||||
|
fileType: 'application/pdf',
|
||||||
|
size: 256,
|
||||||
|
parentId: 'delete-folder',
|
||||||
|
url: 'https://example.com/delete-folder-file.pdf',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: 'delete-folder-doc-file',
|
||||||
|
userId,
|
||||||
|
name: 'delete-folder-doc-file.pdf',
|
||||||
|
fileType: 'application/pdf',
|
||||||
|
size: 512,
|
||||||
|
url: 'https://example.com/delete-folder-doc-file.pdf',
|
||||||
|
},
|
||||||
|
]);
|
||||||
|
await serverDB.insert(documents).values([
|
||||||
|
{
|
||||||
|
id: 'delete-doc-with-file',
|
||||||
|
userId,
|
||||||
|
title: 'To Delete File-Backed Note',
|
||||||
|
fileId: 'delete-doc-file',
|
||||||
|
fileType: 'application/pdf',
|
||||||
|
filename: 'delete-doc-file.pdf',
|
||||||
|
sourceType: 'api',
|
||||||
|
source: 'internal://note/delete-doc-with-file',
|
||||||
|
totalCharCount: 120,
|
||||||
|
totalLineCount: 3,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: 'delete-folder-doc',
|
||||||
|
userId,
|
||||||
|
parentId: 'delete-folder',
|
||||||
|
title: 'Folder Child Doc',
|
||||||
|
fileId: 'delete-folder-doc-file',
|
||||||
|
fileType: 'application/pdf',
|
||||||
|
filename: 'delete-folder-doc-file.pdf',
|
||||||
|
sourceType: 'api',
|
||||||
|
source: 'internal://note/delete-folder-doc',
|
||||||
|
totalCharCount: 90,
|
||||||
|
totalLineCount: 2,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: 'delete-folder-child',
|
||||||
|
userId,
|
||||||
|
parentId: 'delete-folder',
|
||||||
|
title: 'Nested Folder',
|
||||||
|
fileType: 'custom/folder',
|
||||||
|
sourceType: 'topic',
|
||||||
|
source: 'internal://folder/delete-folder-child',
|
||||||
|
totalCharCount: 0,
|
||||||
|
totalLineCount: 0,
|
||||||
|
},
|
||||||
|
]);
|
||||||
|
await serverDB.insert(files).values({
|
||||||
|
id: 'delete-folder-child-file',
|
||||||
|
userId,
|
||||||
|
name: 'delete-folder-child-file.pdf',
|
||||||
|
fileType: 'application/pdf',
|
||||||
|
size: 768,
|
||||||
|
parentId: 'delete-folder-child',
|
||||||
|
url: 'https://example.com/delete-folder-child-file.pdf',
|
||||||
|
});
|
||||||
|
|
||||||
|
await serverDB.insert(chunks).values({
|
||||||
|
id: deleteDocChunkId,
|
||||||
|
text: 'chunk for mirrored file',
|
||||||
|
userId,
|
||||||
|
});
|
||||||
|
await serverDB.insert(fileChunks).values({
|
||||||
|
chunkId: deleteDocChunkId,
|
||||||
|
fileId: 'delete-doc-file',
|
||||||
|
userId,
|
||||||
|
});
|
||||||
|
await serverDB.insert(embeddings).values({
|
||||||
|
chunkId: deleteDocChunkId,
|
||||||
|
embeddings: testEmbedding,
|
||||||
|
model: 'test-model',
|
||||||
|
userId,
|
||||||
|
});
|
||||||
|
await serverDB.insert(chunks).values([
|
||||||
|
{
|
||||||
|
id: deleteFolderFileChunkId,
|
||||||
|
text: 'chunk for folder file',
|
||||||
|
userId,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: deleteFolderDocChunkId,
|
||||||
|
text: 'chunk for folder child mirrored file',
|
||||||
|
userId,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: deleteNestedFolderFileChunkId,
|
||||||
|
text: 'chunk for nested folder file',
|
||||||
|
userId,
|
||||||
|
},
|
||||||
|
]);
|
||||||
|
await serverDB.insert(fileChunks).values([
|
||||||
|
{
|
||||||
|
chunkId: deleteFolderFileChunkId,
|
||||||
|
fileId: 'delete-folder-file',
|
||||||
|
userId,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
chunkId: deleteFolderDocChunkId,
|
||||||
|
fileId: 'delete-folder-doc-file',
|
||||||
|
userId,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
chunkId: deleteNestedFolderFileChunkId,
|
||||||
|
fileId: 'delete-folder-child-file',
|
||||||
|
userId,
|
||||||
|
},
|
||||||
|
]);
|
||||||
|
await serverDB.insert(embeddings).values([
|
||||||
|
{
|
||||||
|
chunkId: deleteFolderFileChunkId,
|
||||||
|
embeddings: testEmbedding,
|
||||||
|
model: 'test-model',
|
||||||
|
userId,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
chunkId: deleteFolderDocChunkId,
|
||||||
|
embeddings: testEmbedding,
|
||||||
|
model: 'test-model',
|
||||||
|
userId,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
chunkId: deleteNestedFolderFileChunkId,
|
||||||
|
embeddings: testEmbedding,
|
||||||
|
model: 'test-model',
|
||||||
|
userId,
|
||||||
|
},
|
||||||
]);
|
]);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
@ -428,6 +599,82 @@ describe('KnowledgeRepo', () => {
|
||||||
});
|
});
|
||||||
expect(doc).toBeUndefined();
|
expect(doc).toBeUndefined();
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('should delete mirrored file data when deleting a file-backed document', async () => {
|
||||||
|
await knowledgeRepo.deleteItem('delete-doc-with-file', 'document');
|
||||||
|
|
||||||
|
const doc = await serverDB.query.documents.findFirst({
|
||||||
|
where: eq(documents.id, 'delete-doc-with-file'),
|
||||||
|
});
|
||||||
|
const file = await serverDB.query.files.findFirst({
|
||||||
|
where: eq(files.id, 'delete-doc-file'),
|
||||||
|
});
|
||||||
|
const chunk = await serverDB.query.chunks.findFirst({
|
||||||
|
where: eq(chunks.id, deleteDocChunkId),
|
||||||
|
});
|
||||||
|
const embedding = await serverDB.query.embeddings.findFirst({
|
||||||
|
where: eq(embeddings.chunkId, deleteDocChunkId),
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(doc).toBeUndefined();
|
||||||
|
expect(file).toBeUndefined();
|
||||||
|
expect(chunk).toBeUndefined();
|
||||||
|
expect(embedding).toBeUndefined();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should recursively delete child documents, files and vectors when deleting a folder', async () => {
|
||||||
|
await knowledgeRepo.deleteItem('delete-folder', 'document');
|
||||||
|
|
||||||
|
const folder = await serverDB.query.documents.findFirst({
|
||||||
|
where: eq(documents.id, 'delete-folder'),
|
||||||
|
});
|
||||||
|
const childDoc = await serverDB.query.documents.findFirst({
|
||||||
|
where: eq(documents.id, 'delete-folder-doc'),
|
||||||
|
});
|
||||||
|
const childFolder = await serverDB.query.documents.findFirst({
|
||||||
|
where: eq(documents.id, 'delete-folder-child'),
|
||||||
|
});
|
||||||
|
const folderFile = await serverDB.query.files.findFirst({
|
||||||
|
where: eq(files.id, 'delete-folder-file'),
|
||||||
|
});
|
||||||
|
const childDocFile = await serverDB.query.files.findFirst({
|
||||||
|
where: eq(files.id, 'delete-folder-doc-file'),
|
||||||
|
});
|
||||||
|
const nestedFolderFile = await serverDB.query.files.findFirst({
|
||||||
|
where: eq(files.id, 'delete-folder-child-file'),
|
||||||
|
});
|
||||||
|
const folderFileChunk = await serverDB.query.chunks.findFirst({
|
||||||
|
where: eq(chunks.id, deleteFolderFileChunkId),
|
||||||
|
});
|
||||||
|
const childDocChunk = await serverDB.query.chunks.findFirst({
|
||||||
|
where: eq(chunks.id, deleteFolderDocChunkId),
|
||||||
|
});
|
||||||
|
const nestedFolderFileChunk = await serverDB.query.chunks.findFirst({
|
||||||
|
where: eq(chunks.id, deleteNestedFolderFileChunkId),
|
||||||
|
});
|
||||||
|
const folderFileEmbedding = await serverDB.query.embeddings.findFirst({
|
||||||
|
where: eq(embeddings.chunkId, deleteFolderFileChunkId),
|
||||||
|
});
|
||||||
|
const childDocEmbedding = await serverDB.query.embeddings.findFirst({
|
||||||
|
where: eq(embeddings.chunkId, deleteFolderDocChunkId),
|
||||||
|
});
|
||||||
|
const nestedFolderFileEmbedding = await serverDB.query.embeddings.findFirst({
|
||||||
|
where: eq(embeddings.chunkId, deleteNestedFolderFileChunkId),
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(folder).toBeUndefined();
|
||||||
|
expect(childDoc).toBeUndefined();
|
||||||
|
expect(childFolder).toBeUndefined();
|
||||||
|
expect(folderFile).toBeUndefined();
|
||||||
|
expect(childDocFile).toBeUndefined();
|
||||||
|
expect(nestedFolderFile).toBeUndefined();
|
||||||
|
expect(folderFileChunk).toBeUndefined();
|
||||||
|
expect(childDocChunk).toBeUndefined();
|
||||||
|
expect(nestedFolderFileChunk).toBeUndefined();
|
||||||
|
expect(folderFileEmbedding).toBeUndefined();
|
||||||
|
expect(childDocEmbedding).toBeUndefined();
|
||||||
|
expect(nestedFolderFileEmbedding).toBeUndefined();
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
describe('deleteMany', () => {
|
describe('deleteMany', () => {
|
||||||
|
|
@ -449,6 +696,14 @@ describe('KnowledgeRepo', () => {
|
||||||
size: 100,
|
size: 100,
|
||||||
url: 'https://example.com/delete2.txt',
|
url: 'https://example.com/delete2.txt',
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
id: 'delete-many-doc-file-1',
|
||||||
|
userId,
|
||||||
|
name: 'delete-many-doc-file-1.pdf',
|
||||||
|
fileType: 'application/pdf',
|
||||||
|
size: 512,
|
||||||
|
url: 'https://example.com/delete-many-doc-file-1.pdf',
|
||||||
|
},
|
||||||
]);
|
]);
|
||||||
|
|
||||||
await serverDB.insert(documents).values([
|
await serverDB.insert(documents).values([
|
||||||
|
|
@ -456,6 +711,7 @@ describe('KnowledgeRepo', () => {
|
||||||
id: 'delete-many-doc-1',
|
id: 'delete-many-doc-1',
|
||||||
userId,
|
userId,
|
||||||
title: 'Delete Note 1',
|
title: 'Delete Note 1',
|
||||||
|
fileId: 'delete-many-doc-file-1',
|
||||||
fileType: 'custom/note',
|
fileType: 'custom/note',
|
||||||
sourceType: 'topic',
|
sourceType: 'topic',
|
||||||
source: 'internal://note/delete-many-doc-1',
|
source: 'internal://note/delete-many-doc-1',
|
||||||
|
|
@ -473,6 +729,22 @@ describe('KnowledgeRepo', () => {
|
||||||
totalLineCount: 2,
|
totalLineCount: 2,
|
||||||
},
|
},
|
||||||
]);
|
]);
|
||||||
|
await serverDB.insert(chunks).values({
|
||||||
|
id: deleteManyDocChunkId,
|
||||||
|
text: 'delete many mirrored chunk',
|
||||||
|
userId,
|
||||||
|
});
|
||||||
|
await serverDB.insert(fileChunks).values({
|
||||||
|
chunkId: deleteManyDocChunkId,
|
||||||
|
fileId: 'delete-many-doc-file-1',
|
||||||
|
userId,
|
||||||
|
});
|
||||||
|
await serverDB.insert(embeddings).values({
|
||||||
|
chunkId: deleteManyDocChunkId,
|
||||||
|
embeddings: testEmbedding,
|
||||||
|
model: 'test-model',
|
||||||
|
userId,
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should delete multiple files and documents', async () => {
|
it('should delete multiple files and documents', async () => {
|
||||||
|
|
@ -495,11 +767,23 @@ describe('KnowledgeRepo', () => {
|
||||||
const doc2 = await serverDB.query.documents.findFirst({
|
const doc2 = await serverDB.query.documents.findFirst({
|
||||||
where: eq(documents.id, 'delete-many-doc-2'),
|
where: eq(documents.id, 'delete-many-doc-2'),
|
||||||
});
|
});
|
||||||
|
const mirroredFile = await serverDB.query.files.findFirst({
|
||||||
|
where: eq(files.id, 'delete-many-doc-file-1'),
|
||||||
|
});
|
||||||
|
const chunk = await serverDB.query.chunks.findFirst({
|
||||||
|
where: eq(chunks.id, deleteManyDocChunkId),
|
||||||
|
});
|
||||||
|
const embedding = await serverDB.query.embeddings.findFirst({
|
||||||
|
where: eq(embeddings.chunkId, deleteManyDocChunkId),
|
||||||
|
});
|
||||||
|
|
||||||
expect(file1).toBeUndefined();
|
expect(file1).toBeUndefined();
|
||||||
expect(file2).toBeUndefined();
|
expect(file2).toBeUndefined();
|
||||||
expect(doc1).toBeUndefined();
|
expect(doc1).toBeUndefined();
|
||||||
expect(doc2).toBeUndefined();
|
expect(doc2).toBeUndefined();
|
||||||
|
expect(mirroredFile).toBeUndefined();
|
||||||
|
expect(chunk).toBeUndefined();
|
||||||
|
expect(embedding).toBeUndefined();
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should handle empty arrays', async () => {
|
it('should handle empty arrays', async () => {
|
||||||
|
|
|
||||||
|
|
@ -5,14 +5,22 @@ import { beforeEach, describe, expect, it } from 'vitest';
|
||||||
import { getTestDB } from '../../core/getTestDB';
|
import { getTestDB } from '../../core/getTestDB';
|
||||||
import type { NewDocument, NewFile } from '../../schemas/file';
|
import type { NewDocument, NewFile } from '../../schemas/file';
|
||||||
import { documents, files } from '../../schemas/file';
|
import { documents, files } from '../../schemas/file';
|
||||||
|
import { chunks, embeddings } from '../../schemas/rag';
|
||||||
|
import { fileChunks } from '../../schemas/relations';
|
||||||
import { users } from '../../schemas/user';
|
import { users } from '../../schemas/user';
|
||||||
import type { LobeChatDatabase } from '../../type';
|
import type { LobeChatDatabase } from '../../type';
|
||||||
import { KnowledgeRepo } from './index';
|
import { KnowledgeRepo } from './index';
|
||||||
|
|
||||||
const userId = 'knowledge-test-user';
|
const userId = 'knowledge-test-user';
|
||||||
const otherUserId = 'other-knowledge-user';
|
const otherUserId = 'other-knowledge-user';
|
||||||
|
const deleteDocChunkId = '11111111-1111-4111-8111-111111111111';
|
||||||
|
const deleteManyDocChunkId = '22222222-2222-4222-8222-222222222222';
|
||||||
|
const deleteFolderFileChunkId = '33333333-3333-4333-8333-333333333333';
|
||||||
|
const deleteFolderDocChunkId = '44444444-4444-4444-8444-444444444444';
|
||||||
|
const deleteNestedFolderFileChunkId = '55555555-5555-4555-8555-555555555555';
|
||||||
|
|
||||||
let knowledgeRepo: KnowledgeRepo;
|
let knowledgeRepo: KnowledgeRepo;
|
||||||
|
const testEmbedding = Array.from({ length: 1024 }, () => 0.1);
|
||||||
|
|
||||||
const serverDB: LobeChatDatabase = await getTestDB();
|
const serverDB: LobeChatDatabase = await getTestDB();
|
||||||
|
|
||||||
|
|
@ -556,17 +564,177 @@ describe('KnowledgeRepo', () => {
|
||||||
userId,
|
userId,
|
||||||
});
|
});
|
||||||
|
|
||||||
await serverDB.insert(documents).values({
|
await serverDB.insert(files).values({
|
||||||
id: 'delete-doc',
|
id: 'delete-doc-file',
|
||||||
content: 'Document to delete',
|
fileType: 'application/pdf',
|
||||||
fileType: 'custom/other',
|
name: 'delete-doc-file.pdf',
|
||||||
filename: 'to-delete-doc.txt',
|
size: 2048,
|
||||||
source: 'source',
|
url: 'delete-doc-file-url',
|
||||||
sourceType: 'api',
|
|
||||||
totalCharCount: 100,
|
|
||||||
totalLineCount: 10,
|
|
||||||
userId,
|
userId,
|
||||||
});
|
});
|
||||||
|
|
||||||
|
await serverDB.insert(documents).values([
|
||||||
|
{
|
||||||
|
id: 'delete-doc',
|
||||||
|
content: 'Document to delete',
|
||||||
|
fileType: 'custom/other',
|
||||||
|
filename: 'to-delete-doc.txt',
|
||||||
|
source: 'source',
|
||||||
|
sourceType: 'api',
|
||||||
|
totalCharCount: 100,
|
||||||
|
totalLineCount: 10,
|
||||||
|
userId,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: 'delete-folder',
|
||||||
|
content: '',
|
||||||
|
fileType: 'custom/folder',
|
||||||
|
filename: 'delete-folder',
|
||||||
|
source: 'source',
|
||||||
|
sourceType: 'api',
|
||||||
|
totalCharCount: 0,
|
||||||
|
totalLineCount: 0,
|
||||||
|
userId,
|
||||||
|
},
|
||||||
|
]);
|
||||||
|
await serverDB.insert(files).values([
|
||||||
|
{
|
||||||
|
id: 'delete-folder-file',
|
||||||
|
fileType: 'application/pdf',
|
||||||
|
name: 'delete-folder-file.pdf',
|
||||||
|
parentId: 'delete-folder',
|
||||||
|
size: 1024,
|
||||||
|
url: 'delete-folder-file-url',
|
||||||
|
userId,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: 'delete-folder-doc-file',
|
||||||
|
fileType: 'application/pdf',
|
||||||
|
name: 'delete-folder-doc-file.pdf',
|
||||||
|
size: 1024,
|
||||||
|
url: 'delete-folder-doc-file-url',
|
||||||
|
userId,
|
||||||
|
},
|
||||||
|
]);
|
||||||
|
await serverDB.insert(documents).values([
|
||||||
|
{
|
||||||
|
id: 'delete-doc-with-file',
|
||||||
|
content: 'Document with mirrored file',
|
||||||
|
fileId: 'delete-doc-file',
|
||||||
|
fileType: 'application/pdf',
|
||||||
|
filename: 'delete-doc-file.pdf',
|
||||||
|
source: 'source',
|
||||||
|
sourceType: 'api',
|
||||||
|
totalCharCount: 120,
|
||||||
|
totalLineCount: 12,
|
||||||
|
userId,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: 'delete-folder-doc',
|
||||||
|
content: 'Folder child document',
|
||||||
|
fileId: 'delete-folder-doc-file',
|
||||||
|
fileType: 'application/pdf',
|
||||||
|
filename: 'delete-folder-doc-file.pdf',
|
||||||
|
parentId: 'delete-folder',
|
||||||
|
source: 'source',
|
||||||
|
sourceType: 'api',
|
||||||
|
totalCharCount: 80,
|
||||||
|
totalLineCount: 8,
|
||||||
|
userId,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: 'delete-folder-child',
|
||||||
|
content: '',
|
||||||
|
fileType: 'custom/folder',
|
||||||
|
filename: 'delete-folder-child',
|
||||||
|
parentId: 'delete-folder',
|
||||||
|
source: 'source',
|
||||||
|
sourceType: 'api',
|
||||||
|
totalCharCount: 0,
|
||||||
|
totalLineCount: 0,
|
||||||
|
userId,
|
||||||
|
},
|
||||||
|
]);
|
||||||
|
await serverDB.insert(files).values({
|
||||||
|
id: 'delete-folder-child-file',
|
||||||
|
fileType: 'application/pdf',
|
||||||
|
name: 'delete-folder-child-file.pdf',
|
||||||
|
parentId: 'delete-folder-child',
|
||||||
|
size: 1024,
|
||||||
|
url: 'delete-folder-child-file-url',
|
||||||
|
userId,
|
||||||
|
});
|
||||||
|
|
||||||
|
await serverDB.insert(chunks).values({
|
||||||
|
id: deleteDocChunkId,
|
||||||
|
text: 'chunk for document file',
|
||||||
|
userId,
|
||||||
|
});
|
||||||
|
await serverDB.insert(fileChunks).values({
|
||||||
|
chunkId: deleteDocChunkId,
|
||||||
|
fileId: 'delete-doc-file',
|
||||||
|
userId,
|
||||||
|
});
|
||||||
|
await serverDB.insert(embeddings).values({
|
||||||
|
chunkId: deleteDocChunkId,
|
||||||
|
embeddings: testEmbedding,
|
||||||
|
model: 'test-model',
|
||||||
|
userId,
|
||||||
|
});
|
||||||
|
await serverDB.insert(chunks).values([
|
||||||
|
{
|
||||||
|
id: deleteFolderFileChunkId,
|
||||||
|
text: 'chunk for folder file',
|
||||||
|
userId,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: deleteFolderDocChunkId,
|
||||||
|
text: 'chunk for folder mirrored file',
|
||||||
|
userId,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: deleteNestedFolderFileChunkId,
|
||||||
|
text: 'chunk for nested folder file',
|
||||||
|
userId,
|
||||||
|
},
|
||||||
|
]);
|
||||||
|
await serverDB.insert(fileChunks).values([
|
||||||
|
{
|
||||||
|
chunkId: deleteFolderFileChunkId,
|
||||||
|
fileId: 'delete-folder-file',
|
||||||
|
userId,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
chunkId: deleteFolderDocChunkId,
|
||||||
|
fileId: 'delete-folder-doc-file',
|
||||||
|
userId,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
chunkId: deleteNestedFolderFileChunkId,
|
||||||
|
fileId: 'delete-folder-child-file',
|
||||||
|
userId,
|
||||||
|
},
|
||||||
|
]);
|
||||||
|
await serverDB.insert(embeddings).values([
|
||||||
|
{
|
||||||
|
chunkId: deleteFolderFileChunkId,
|
||||||
|
embeddings: testEmbedding,
|
||||||
|
model: 'test-model',
|
||||||
|
userId,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
chunkId: deleteFolderDocChunkId,
|
||||||
|
embeddings: testEmbedding,
|
||||||
|
model: 'test-model',
|
||||||
|
userId,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
chunkId: deleteNestedFolderFileChunkId,
|
||||||
|
embeddings: testEmbedding,
|
||||||
|
model: 'test-model',
|
||||||
|
userId,
|
||||||
|
},
|
||||||
|
]);
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should delete a file by id', async () => {
|
it('should delete a file by id', async () => {
|
||||||
|
|
@ -588,6 +756,82 @@ describe('KnowledgeRepo', () => {
|
||||||
});
|
});
|
||||||
expect(result).toBeUndefined();
|
expect(result).toBeUndefined();
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('should delete mirrored file data when deleting a file-backed document', async () => {
|
||||||
|
await knowledgeRepo.deleteItem('delete-doc-with-file', 'document');
|
||||||
|
|
||||||
|
const document = await serverDB.query.documents.findFirst({
|
||||||
|
where: (d, { eq }) => eq(d.id, 'delete-doc-with-file'),
|
||||||
|
});
|
||||||
|
const file = await serverDB.query.files.findFirst({
|
||||||
|
where: (f, { eq }) => eq(f.id, 'delete-doc-file'),
|
||||||
|
});
|
||||||
|
const chunk = await serverDB.query.chunks.findFirst({
|
||||||
|
where: (c, { eq }) => eq(c.id, deleteDocChunkId),
|
||||||
|
});
|
||||||
|
const embedding = await serverDB.query.embeddings.findFirst({
|
||||||
|
where: (e, { eq }) => eq(e.chunkId, deleteDocChunkId),
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(document).toBeUndefined();
|
||||||
|
expect(file).toBeUndefined();
|
||||||
|
expect(chunk).toBeUndefined();
|
||||||
|
expect(embedding).toBeUndefined();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should recursively delete child documents, files and vectors when deleting a folder', async () => {
|
||||||
|
await knowledgeRepo.deleteItem('delete-folder', 'document');
|
||||||
|
|
||||||
|
const folder = await serverDB.query.documents.findFirst({
|
||||||
|
where: (d, { eq }) => eq(d.id, 'delete-folder'),
|
||||||
|
});
|
||||||
|
const childDoc = await serverDB.query.documents.findFirst({
|
||||||
|
where: (d, { eq }) => eq(d.id, 'delete-folder-doc'),
|
||||||
|
});
|
||||||
|
const childFolder = await serverDB.query.documents.findFirst({
|
||||||
|
where: (d, { eq }) => eq(d.id, 'delete-folder-child'),
|
||||||
|
});
|
||||||
|
const folderFile = await serverDB.query.files.findFirst({
|
||||||
|
where: (f, { eq }) => eq(f.id, 'delete-folder-file'),
|
||||||
|
});
|
||||||
|
const childDocFile = await serverDB.query.files.findFirst({
|
||||||
|
where: (f, { eq }) => eq(f.id, 'delete-folder-doc-file'),
|
||||||
|
});
|
||||||
|
const nestedFolderFile = await serverDB.query.files.findFirst({
|
||||||
|
where: (f, { eq }) => eq(f.id, 'delete-folder-child-file'),
|
||||||
|
});
|
||||||
|
const folderFileChunk = await serverDB.query.chunks.findFirst({
|
||||||
|
where: (c, { eq }) => eq(c.id, deleteFolderFileChunkId),
|
||||||
|
});
|
||||||
|
const childDocChunk = await serverDB.query.chunks.findFirst({
|
||||||
|
where: (c, { eq }) => eq(c.id, deleteFolderDocChunkId),
|
||||||
|
});
|
||||||
|
const nestedFolderFileChunk = await serverDB.query.chunks.findFirst({
|
||||||
|
where: (c, { eq }) => eq(c.id, deleteNestedFolderFileChunkId),
|
||||||
|
});
|
||||||
|
const folderFileEmbedding = await serverDB.query.embeddings.findFirst({
|
||||||
|
where: (e, { eq }) => eq(e.chunkId, deleteFolderFileChunkId),
|
||||||
|
});
|
||||||
|
const childDocEmbedding = await serverDB.query.embeddings.findFirst({
|
||||||
|
where: (e, { eq }) => eq(e.chunkId, deleteFolderDocChunkId),
|
||||||
|
});
|
||||||
|
const nestedFolderFileEmbedding = await serverDB.query.embeddings.findFirst({
|
||||||
|
where: (e, { eq }) => eq(e.chunkId, deleteNestedFolderFileChunkId),
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(folder).toBeUndefined();
|
||||||
|
expect(childDoc).toBeUndefined();
|
||||||
|
expect(childFolder).toBeUndefined();
|
||||||
|
expect(folderFile).toBeUndefined();
|
||||||
|
expect(childDocFile).toBeUndefined();
|
||||||
|
expect(nestedFolderFile).toBeUndefined();
|
||||||
|
expect(folderFileChunk).toBeUndefined();
|
||||||
|
expect(childDocChunk).toBeUndefined();
|
||||||
|
expect(nestedFolderFileChunk).toBeUndefined();
|
||||||
|
expect(folderFileEmbedding).toBeUndefined();
|
||||||
|
expect(childDocEmbedding).toBeUndefined();
|
||||||
|
expect(nestedFolderFileEmbedding).toBeUndefined();
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
describe('deleteMany', () => {
|
describe('deleteMany', () => {
|
||||||
|
|
@ -609,12 +853,21 @@ describe('KnowledgeRepo', () => {
|
||||||
url: 'url-2',
|
url: 'url-2',
|
||||||
userId,
|
userId,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
id: 'delete-many-doc-file-1',
|
||||||
|
fileType: 'application/pdf',
|
||||||
|
name: 'delete-many-doc-file-1.pdf',
|
||||||
|
size: 1024,
|
||||||
|
url: 'delete-many-doc-file-1-url',
|
||||||
|
userId,
|
||||||
|
},
|
||||||
]);
|
]);
|
||||||
|
|
||||||
await serverDB.insert(documents).values([
|
await serverDB.insert(documents).values([
|
||||||
{
|
{
|
||||||
id: 'delete-many-doc-1',
|
id: 'delete-many-doc-1',
|
||||||
content: 'Delete doc 1',
|
content: 'Delete doc 1',
|
||||||
|
fileId: 'delete-many-doc-file-1',
|
||||||
fileType: 'custom/other',
|
fileType: 'custom/other',
|
||||||
filename: 'delete-doc-1.txt',
|
filename: 'delete-doc-1.txt',
|
||||||
source: 'source',
|
source: 'source',
|
||||||
|
|
@ -624,6 +877,22 @@ describe('KnowledgeRepo', () => {
|
||||||
userId,
|
userId,
|
||||||
},
|
},
|
||||||
]);
|
]);
|
||||||
|
await serverDB.insert(chunks).values({
|
||||||
|
id: deleteManyDocChunkId,
|
||||||
|
text: 'delete many chunk',
|
||||||
|
userId,
|
||||||
|
});
|
||||||
|
await serverDB.insert(fileChunks).values({
|
||||||
|
chunkId: deleteManyDocChunkId,
|
||||||
|
fileId: 'delete-many-doc-file-1',
|
||||||
|
userId,
|
||||||
|
});
|
||||||
|
await serverDB.insert(embeddings).values({
|
||||||
|
chunkId: deleteManyDocChunkId,
|
||||||
|
embeddings: testEmbedding,
|
||||||
|
model: 'test-model',
|
||||||
|
userId,
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should delete multiple items of mixed types', async () => {
|
it('should delete multiple items of mixed types', async () => {
|
||||||
|
|
@ -643,10 +912,22 @@ describe('KnowledgeRepo', () => {
|
||||||
const doc1 = await serverDB.query.documents.findFirst({
|
const doc1 = await serverDB.query.documents.findFirst({
|
||||||
where: (d, { eq }) => eq(d.id, 'delete-many-doc-1'),
|
where: (d, { eq }) => eq(d.id, 'delete-many-doc-1'),
|
||||||
});
|
});
|
||||||
|
const mirroredFile = await serverDB.query.files.findFirst({
|
||||||
|
where: (f, { eq }) => eq(f.id, 'delete-many-doc-file-1'),
|
||||||
|
});
|
||||||
|
const chunk = await serverDB.query.chunks.findFirst({
|
||||||
|
where: (c, { eq }) => eq(c.id, deleteManyDocChunkId),
|
||||||
|
});
|
||||||
|
const embedding = await serverDB.query.embeddings.findFirst({
|
||||||
|
where: (e, { eq }) => eq(e.chunkId, deleteManyDocChunkId),
|
||||||
|
});
|
||||||
|
|
||||||
expect(file1).toBeUndefined();
|
expect(file1).toBeUndefined();
|
||||||
expect(file2).toBeUndefined();
|
expect(file2).toBeUndefined();
|
||||||
expect(doc1).toBeUndefined();
|
expect(doc1).toBeUndefined();
|
||||||
|
expect(mirroredFile).toBeUndefined();
|
||||||
|
expect(chunk).toBeUndefined();
|
||||||
|
expect(embedding).toBeUndefined();
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should handle empty items array', async () => {
|
it('should handle empty items array', async () => {
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
import type { QueryFileListParams } from '@lobechat/types';
|
import type { QueryFileListParams } from '@lobechat/types';
|
||||||
import { FilesTabs, SortType } from '@lobechat/types';
|
import { FilesTabs, SortType } from '@lobechat/types';
|
||||||
import { sql } from 'drizzle-orm';
|
import { and, eq, sql } from 'drizzle-orm';
|
||||||
|
|
||||||
import { DocumentModel } from '../../models/document';
|
import { DocumentModel } from '../../models/document';
|
||||||
import { FileModel } from '../../models/file';
|
import { FileModel } from '../../models/file';
|
||||||
|
|
@ -277,7 +277,7 @@ export class KnowledgeRepo {
|
||||||
if (sourceType === 'file') {
|
if (sourceType === 'file') {
|
||||||
await this.fileModel.delete(id);
|
await this.fileModel.delete(id);
|
||||||
} else {
|
} else {
|
||||||
await this.documentModel.delete(id);
|
await this.deleteDocumentWithRelations(id);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -293,7 +293,7 @@ export class KnowledgeRepo {
|
||||||
await Promise.all([
|
await Promise.all([
|
||||||
fileIds.length > 0 ? this.fileModel.deleteMany(fileIds) : Promise.resolve(),
|
fileIds.length > 0 ? this.fileModel.deleteMany(fileIds) : Promise.resolve(),
|
||||||
documentIds.length > 0
|
documentIds.length > 0
|
||||||
? Promise.all(documentIds.map((id) => this.documentModel.delete(id)))
|
? Promise.all(documentIds.map((id) => this.deleteDocumentWithRelations(id)))
|
||||||
: Promise.resolve(),
|
: Promise.resolve(),
|
||||||
]);
|
]);
|
||||||
}
|
}
|
||||||
|
|
@ -309,6 +309,35 @@ export class KnowledgeRepo {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private deleteDocumentWithRelations = async (id: string): Promise<void> => {
|
||||||
|
const document = await this.documentModel.findById(id);
|
||||||
|
if (!document) return;
|
||||||
|
|
||||||
|
if (document.fileType === 'custom/folder') {
|
||||||
|
const children = await this.db.query.documents.findMany({
|
||||||
|
where: and(eq(documents.parentId, id), eq(documents.userId, this.userId)),
|
||||||
|
});
|
||||||
|
|
||||||
|
for (const child of children) {
|
||||||
|
await this.deleteDocumentWithRelations(child.id);
|
||||||
|
}
|
||||||
|
|
||||||
|
const childFiles = await this.db.query.files.findMany({
|
||||||
|
where: and(eq(files.parentId, id), eq(files.userId, this.userId)),
|
||||||
|
});
|
||||||
|
|
||||||
|
for (const file of childFiles) {
|
||||||
|
await this.fileModel.delete(file.id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (document.fileId) {
|
||||||
|
await this.fileModel.delete(document.fileId);
|
||||||
|
}
|
||||||
|
|
||||||
|
await this.documentModel.delete(id);
|
||||||
|
};
|
||||||
|
|
||||||
private buildFileQuery({
|
private buildFileQuery({
|
||||||
category,
|
category,
|
||||||
q,
|
q,
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,7 @@ import { KnowledgeBaseModel } from '@/database/models/knowledgeBase';
|
||||||
import type { KnowledgeBaseItem } from '@/database/schemas';
|
import type { KnowledgeBaseItem } from '@/database/schemas';
|
||||||
import { knowledgeBases } from '@/database/schemas';
|
import { knowledgeBases } from '@/database/schemas';
|
||||||
import type { LobeChatDatabase } from '@/database/type';
|
import type { LobeChatDatabase } from '@/database/type';
|
||||||
|
import { FileService as CoreFileService } from '@/server/services/file';
|
||||||
|
|
||||||
import { BaseService } from '../common/base.service';
|
import { BaseService } from '../common/base.service';
|
||||||
import { processPaginationConditions } from '../helpers/pagination';
|
import { processPaginationConditions } from '../helpers/pagination';
|
||||||
|
|
@ -237,8 +238,18 @@ export class KnowledgeBaseService extends BaseService {
|
||||||
throw this.createNotFoundError('Knowledge base not found or access denied');
|
throw this.createNotFoundError('Knowledge base not found or access denied');
|
||||||
}
|
}
|
||||||
|
|
||||||
// Delete knowledge base
|
const result = await this.knowledgeBaseModel.deleteWithFiles(id);
|
||||||
await this.knowledgeBaseModel.delete(id);
|
|
||||||
|
if (result.deletedFiles.length > 0) {
|
||||||
|
const fileService = new CoreFileService(this.db, this.userId);
|
||||||
|
const urls = result.deletedFiles
|
||||||
|
.map((f: { url: string | null }) => f.url)
|
||||||
|
.filter(Boolean) as string[];
|
||||||
|
|
||||||
|
if (urls.length > 0) {
|
||||||
|
await fileService.deleteFiles(urls);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
this.log('info', 'Knowledge base deleted successfully', { id });
|
this.log('info', 'Knowledge base deleted successfully', { id });
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,10 +1,12 @@
|
||||||
import { TRPCError } from '@trpc/server';
|
import { TRPCError } from '@trpc/server';
|
||||||
import { z } from 'zod';
|
import { z } from 'zod';
|
||||||
|
|
||||||
|
import { serverDBEnv } from '@/config/db';
|
||||||
import { KnowledgeBaseModel } from '@/database/models/knowledgeBase';
|
import { KnowledgeBaseModel } from '@/database/models/knowledgeBase';
|
||||||
import { insertKnowledgeBasesSchema } from '@/database/schemas';
|
import { insertKnowledgeBasesSchema } from '@/database/schemas';
|
||||||
import { authedProcedure, router } from '@/libs/trpc/lambda';
|
import { authedProcedure, router } from '@/libs/trpc/lambda';
|
||||||
import { serverDatabase } from '@/libs/trpc/lambda/middleware';
|
import { serverDatabase } from '@/libs/trpc/lambda/middleware';
|
||||||
|
import { FileService } from '@/server/services/file';
|
||||||
import { type KnowledgeBaseItem } from '@/types/knowledgeBase';
|
import { type KnowledgeBaseItem } from '@/types/knowledgeBase';
|
||||||
|
|
||||||
const knowledgeBaseProcedure = authedProcedure.use(serverDatabase).use(async (opts) => {
|
const knowledgeBaseProcedure = authedProcedure.use(serverDatabase).use(async (opts) => {
|
||||||
|
|
@ -68,7 +70,15 @@ export const knowledgeBaseRouter = router({
|
||||||
}),
|
}),
|
||||||
|
|
||||||
removeAllKnowledgeBases: knowledgeBaseProcedure.mutation(async ({ ctx }) => {
|
removeAllKnowledgeBases: knowledgeBaseProcedure.mutation(async ({ ctx }) => {
|
||||||
return ctx.knowledgeBaseModel.deleteAll();
|
const result = await ctx.knowledgeBaseModel.deleteAllWithFiles(serverDBEnv.REMOVE_GLOBAL_FILE);
|
||||||
|
|
||||||
|
if (result.deletedFiles.length > 0) {
|
||||||
|
const fileService = new FileService(ctx.serverDB, ctx.userId);
|
||||||
|
const urls = result.deletedFiles.map((f) => f.url).filter(Boolean) as string[];
|
||||||
|
if (urls.length > 0) {
|
||||||
|
await fileService.deleteFiles(urls);
|
||||||
|
}
|
||||||
|
}
|
||||||
}),
|
}),
|
||||||
|
|
||||||
removeFilesFromKnowledgeBase: knowledgeBaseProcedure
|
removeFilesFromKnowledgeBase: knowledgeBaseProcedure
|
||||||
|
|
@ -78,9 +88,20 @@ export const knowledgeBaseRouter = router({
|
||||||
}),
|
}),
|
||||||
|
|
||||||
removeKnowledgeBase: knowledgeBaseProcedure
|
removeKnowledgeBase: knowledgeBaseProcedure
|
||||||
.input(z.object({ id: z.string(), removeFiles: z.boolean().optional() }))
|
.input(z.object({ id: z.string() }))
|
||||||
.mutation(async ({ input, ctx }) => {
|
.mutation(async ({ input, ctx }) => {
|
||||||
return ctx.knowledgeBaseModel.delete(input.id);
|
const result = await ctx.knowledgeBaseModel.deleteWithFiles(
|
||||||
|
input.id,
|
||||||
|
serverDBEnv.REMOVE_GLOBAL_FILE,
|
||||||
|
);
|
||||||
|
|
||||||
|
if (result.deletedFiles.length > 0) {
|
||||||
|
const fileService = new FileService(ctx.serverDB, ctx.userId);
|
||||||
|
const urls = result.deletedFiles.map((f) => f.url).filter(Boolean) as string[];
|
||||||
|
if (urls.length > 0) {
|
||||||
|
await fileService.deleteFiles(urls);
|
||||||
|
}
|
||||||
|
}
|
||||||
}),
|
}),
|
||||||
|
|
||||||
updateKnowledgeBase: knowledgeBaseProcedure
|
updateKnowledgeBase: knowledgeBaseProcedure
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,7 @@ import { DocumentModel } from '@/database/models/document';
|
||||||
import { TopicDocumentModel } from '@/database/models/topicDocument';
|
import { TopicDocumentModel } from '@/database/models/topicDocument';
|
||||||
import { authedProcedure, router } from '@/libs/trpc/lambda';
|
import { authedProcedure, router } from '@/libs/trpc/lambda';
|
||||||
import { serverDatabase } from '@/libs/trpc/lambda/middleware';
|
import { serverDatabase } from '@/libs/trpc/lambda/middleware';
|
||||||
|
import { NotebookRuntimeService } from '@/server/services/notebook';
|
||||||
|
|
||||||
const notebookProcedure = authedProcedure.use(serverDatabase).use(async (opts) => {
|
const notebookProcedure = authedProcedure.use(serverDatabase).use(async (opts) => {
|
||||||
const { ctx } = opts;
|
const { ctx } = opts;
|
||||||
|
|
@ -12,6 +13,7 @@ const notebookProcedure = authedProcedure.use(serverDatabase).use(async (opts) =
|
||||||
return opts.next({
|
return opts.next({
|
||||||
ctx: {
|
ctx: {
|
||||||
documentModel: new DocumentModel(ctx.serverDB, ctx.userId),
|
documentModel: new DocumentModel(ctx.serverDB, ctx.userId),
|
||||||
|
notebookService: new NotebookRuntimeService({ serverDB: ctx.serverDB, userId: ctx.userId }),
|
||||||
topicDocumentModel: new TopicDocumentModel(ctx.serverDB, ctx.userId),
|
topicDocumentModel: new TopicDocumentModel(ctx.serverDB, ctx.userId),
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
|
@ -60,10 +62,7 @@ export const notebookRouter = router({
|
||||||
deleteDocument: notebookProcedure
|
deleteDocument: notebookProcedure
|
||||||
.input(z.object({ id: z.string() }))
|
.input(z.object({ id: z.string() }))
|
||||||
.mutation(async ({ ctx, input }) => {
|
.mutation(async ({ ctx, input }) => {
|
||||||
// Remove associations first
|
await ctx.notebookService.deleteDocument(input.id);
|
||||||
await ctx.topicDocumentModel.deleteByDocumentId(input.id);
|
|
||||||
// Delete the document
|
|
||||||
await ctx.documentModel.delete(input.id);
|
|
||||||
|
|
||||||
return { success: true };
|
return { success: true };
|
||||||
}),
|
}),
|
||||||
|
|
|
||||||
|
|
@ -70,6 +70,12 @@ describe('DocumentService', () => {
|
||||||
vi.clearAllMocks();
|
vi.clearAllMocks();
|
||||||
});
|
});
|
||||||
|
|
||||||
|
describe('constructor', () => {
|
||||||
|
it('should not initialize FileService before file parsing is needed', () => {
|
||||||
|
expect(FileService).not.toHaveBeenCalled();
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
describe('createDocument', () => {
|
describe('createDocument', () => {
|
||||||
it('should create a document without knowledgeBase', async () => {
|
it('should create a document without knowledgeBase', async () => {
|
||||||
const mockDoc = { id: 'doc-1', title: 'Test Doc' };
|
const mockDoc = { id: 'doc-1', title: 'Test Doc' };
|
||||||
|
|
|
||||||
|
|
@ -17,17 +17,22 @@ export class DocumentService {
|
||||||
userId: string;
|
userId: string;
|
||||||
private fileModel: FileModel;
|
private fileModel: FileModel;
|
||||||
private documentModel: DocumentModel;
|
private documentModel: DocumentModel;
|
||||||
private fileService: FileService;
|
private fileServiceInstance?: FileService;
|
||||||
private db: LobeChatDatabase;
|
private db: LobeChatDatabase;
|
||||||
|
|
||||||
constructor(db: LobeChatDatabase, userId: string) {
|
constructor(db: LobeChatDatabase, userId: string) {
|
||||||
this.userId = userId;
|
this.userId = userId;
|
||||||
this.db = db;
|
this.db = db;
|
||||||
this.fileModel = new FileModel(db, userId);
|
this.fileModel = new FileModel(db, userId);
|
||||||
this.fileService = new FileService(db, userId);
|
|
||||||
this.documentModel = new DocumentModel(db, userId);
|
this.documentModel = new DocumentModel(db, userId);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private get fileService() {
|
||||||
|
this.fileServiceInstance ??= new FileService(this.db, this.userId);
|
||||||
|
|
||||||
|
return this.fileServiceInstance;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a document
|
* Create a document
|
||||||
*/
|
*/
|
||||||
|
|
|
||||||
|
|
@ -2,17 +2,20 @@ import { beforeEach, describe, expect, it, vi } from 'vitest';
|
||||||
|
|
||||||
import { DocumentModel } from '@/database/models/document';
|
import { DocumentModel } from '@/database/models/document';
|
||||||
import { TopicDocumentModel } from '@/database/models/topicDocument';
|
import { TopicDocumentModel } from '@/database/models/topicDocument';
|
||||||
|
import { DocumentService } from '@/server/services/document';
|
||||||
|
|
||||||
import { NotebookRuntimeService } from '../index';
|
import { NotebookRuntimeService } from '../index';
|
||||||
|
|
||||||
vi.mock('@/database/models/document');
|
vi.mock('@/database/models/document');
|
||||||
vi.mock('@/database/models/topicDocument');
|
vi.mock('@/database/models/topicDocument');
|
||||||
|
vi.mock('@/server/services/document');
|
||||||
|
|
||||||
describe('NotebookRuntimeService', () => {
|
describe('NotebookRuntimeService', () => {
|
||||||
let service: NotebookRuntimeService;
|
let service: NotebookRuntimeService;
|
||||||
const mockDb = {} as any;
|
const mockDb = {} as any;
|
||||||
const mockUserId = 'test-user';
|
const mockUserId = 'test-user';
|
||||||
let mockDocumentModel: any;
|
let mockDocumentModel: any;
|
||||||
|
let mockDocumentService: any;
|
||||||
let mockTopicDocumentModel: any;
|
let mockTopicDocumentModel: any;
|
||||||
|
|
||||||
beforeEach(() => {
|
beforeEach(() => {
|
||||||
|
|
@ -25,6 +28,10 @@ describe('NotebookRuntimeService', () => {
|
||||||
update: vi.fn(),
|
update: vi.fn(),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
mockDocumentService = {
|
||||||
|
deleteDocument: vi.fn(),
|
||||||
|
};
|
||||||
|
|
||||||
mockTopicDocumentModel = {
|
mockTopicDocumentModel = {
|
||||||
associate: vi.fn(),
|
associate: vi.fn(),
|
||||||
deleteByDocumentId: vi.fn(),
|
deleteByDocumentId: vi.fn(),
|
||||||
|
|
@ -32,6 +39,7 @@ describe('NotebookRuntimeService', () => {
|
||||||
};
|
};
|
||||||
|
|
||||||
vi.mocked(DocumentModel).mockImplementation(() => mockDocumentModel);
|
vi.mocked(DocumentModel).mockImplementation(() => mockDocumentModel);
|
||||||
|
vi.mocked(DocumentService).mockImplementation(() => mockDocumentService);
|
||||||
vi.mocked(TopicDocumentModel).mockImplementation(() => mockTopicDocumentModel);
|
vi.mocked(TopicDocumentModel).mockImplementation(() => mockTopicDocumentModel);
|
||||||
|
|
||||||
service = new NotebookRuntimeService({ serverDB: mockDb, userId: mockUserId });
|
service = new NotebookRuntimeService({ serverDB: mockDb, userId: mockUserId });
|
||||||
|
|
@ -169,12 +177,12 @@ describe('NotebookRuntimeService', () => {
|
||||||
describe('deleteDocument', () => {
|
describe('deleteDocument', () => {
|
||||||
it('should delete associations first then the document', async () => {
|
it('should delete associations first then the document', async () => {
|
||||||
mockTopicDocumentModel.deleteByDocumentId.mockResolvedValue(undefined);
|
mockTopicDocumentModel.deleteByDocumentId.mockResolvedValue(undefined);
|
||||||
mockDocumentModel.delete.mockResolvedValue(undefined);
|
mockDocumentService.deleteDocument.mockResolvedValue(undefined);
|
||||||
|
|
||||||
await service.deleteDocument('doc-1');
|
await service.deleteDocument('doc-1');
|
||||||
|
|
||||||
expect(mockTopicDocumentModel.deleteByDocumentId).toHaveBeenCalledWith('doc-1');
|
expect(mockTopicDocumentModel.deleteByDocumentId).toHaveBeenCalledWith('doc-1');
|
||||||
expect(mockDocumentModel.delete).toHaveBeenCalledWith('doc-1');
|
expect(mockDocumentService.deleteDocument).toHaveBeenCalledWith('doc-1');
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,7 @@ import { type LobeChatDatabase } from '@lobechat/database';
|
||||||
|
|
||||||
import { DocumentModel } from '@/database/models/document';
|
import { DocumentModel } from '@/database/models/document';
|
||||||
import { TopicDocumentModel } from '@/database/models/topicDocument';
|
import { TopicDocumentModel } from '@/database/models/topicDocument';
|
||||||
|
import { DocumentService } from '@/server/services/document';
|
||||||
|
|
||||||
interface DocumentServiceResult {
|
interface DocumentServiceResult {
|
||||||
content: string | null;
|
content: string | null;
|
||||||
|
|
@ -46,10 +47,12 @@ const toServiceResult = (doc: {
|
||||||
});
|
});
|
||||||
|
|
||||||
export class NotebookRuntimeService {
|
export class NotebookRuntimeService {
|
||||||
|
private documentService: DocumentService;
|
||||||
private documentModel: DocumentModel;
|
private documentModel: DocumentModel;
|
||||||
private topicDocumentModel: TopicDocumentModel;
|
private topicDocumentModel: TopicDocumentModel;
|
||||||
|
|
||||||
constructor(options: NotebookRuntimeServiceOptions) {
|
constructor(options: NotebookRuntimeServiceOptions) {
|
||||||
|
this.documentService = new DocumentService(options.serverDB, options.userId);
|
||||||
this.documentModel = new DocumentModel(options.serverDB, options.userId);
|
this.documentModel = new DocumentModel(options.serverDB, options.userId);
|
||||||
this.topicDocumentModel = new TopicDocumentModel(options.serverDB, options.userId);
|
this.topicDocumentModel = new TopicDocumentModel(options.serverDB, options.userId);
|
||||||
}
|
}
|
||||||
|
|
@ -73,7 +76,7 @@ export class NotebookRuntimeService {
|
||||||
|
|
||||||
deleteDocument = async (id: string): Promise<void> => {
|
deleteDocument = async (id: string): Promise<void> => {
|
||||||
await this.topicDocumentModel.deleteByDocumentId(id);
|
await this.topicDocumentModel.deleteByDocumentId(id);
|
||||||
await this.documentModel.delete(id);
|
await this.documentService.deleteDocument(id);
|
||||||
};
|
};
|
||||||
|
|
||||||
getDocument = async (id: string): Promise<DocumentServiceResult | undefined> => {
|
getDocument = async (id: string): Promise<DocumentServiceResult | undefined> => {
|
||||||
|
|
|
||||||
95
tests/openapi/knowledge-base-delete.test.ts
Normal file
95
tests/openapi/knowledge-base-delete.test.ts
Normal file
|
|
@ -0,0 +1,95 @@
|
||||||
|
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
|
||||||
|
|
||||||
|
import type { LobeChatDatabase } from '@/database/type';
|
||||||
|
import { FileService } from '@/server/services/file';
|
||||||
|
|
||||||
|
import { KnowledgeBaseService } from '../../packages/openapi/src/services/knowledge-base.service';
|
||||||
|
|
||||||
|
vi.mock('@/server/services/file');
|
||||||
|
|
||||||
|
describe('KnowledgeBaseService.deleteKnowledgeBase', () => {
|
||||||
|
let db: LobeChatDatabase;
|
||||||
|
let deleteFilesSpy: ReturnType<typeof vi.fn>;
|
||||||
|
|
||||||
|
beforeEach(() => {
|
||||||
|
db = {
|
||||||
|
query: {
|
||||||
|
knowledgeBases: {
|
||||||
|
findFirst: vi.fn().mockResolvedValue({ id: 'kb-1', userId: 'user-1' }),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
} as unknown as LobeChatDatabase;
|
||||||
|
|
||||||
|
deleteFilesSpy = vi.fn().mockResolvedValue(undefined);
|
||||||
|
vi.mocked(FileService).mockImplementation(() => ({ deleteFiles: deleteFilesSpy }) as any);
|
||||||
|
});
|
||||||
|
|
||||||
|
afterEach(() => {
|
||||||
|
vi.restoreAllMocks();
|
||||||
|
});
|
||||||
|
|
||||||
|
const createService = () => {
|
||||||
|
const service = new KnowledgeBaseService(db, 'user-1');
|
||||||
|
|
||||||
|
vi.spyOn(service as any, 'log').mockImplementation(() => {});
|
||||||
|
vi.spyOn(service as any, 'resolveOperationPermission').mockResolvedValue({
|
||||||
|
isPermitted: true,
|
||||||
|
message: '',
|
||||||
|
});
|
||||||
|
|
||||||
|
return service;
|
||||||
|
};
|
||||||
|
|
||||||
|
it('should always delete exclusive files together with the knowledge base', async () => {
|
||||||
|
const service = createService();
|
||||||
|
const deleteWithFilesSpy = vi.fn().mockResolvedValue({
|
||||||
|
deletedFiles: [],
|
||||||
|
});
|
||||||
|
|
||||||
|
Reflect.set(service, 'knowledgeBaseModel', {
|
||||||
|
deleteWithFiles: deleteWithFilesSpy,
|
||||||
|
});
|
||||||
|
|
||||||
|
await expect(service.deleteKnowledgeBase('kb-1')).resolves.toEqual({
|
||||||
|
message: 'Knowledge base deleted successfully',
|
||||||
|
success: true,
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(deleteWithFilesSpy).toHaveBeenCalledWith('kb-1');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should delete external files when deleted knowledge-base files have URLs', async () => {
|
||||||
|
const service = createService();
|
||||||
|
|
||||||
|
Reflect.set(service, 'knowledgeBaseModel', {
|
||||||
|
deleteWithFiles: vi.fn().mockResolvedValue({
|
||||||
|
deletedFiles: [
|
||||||
|
{ id: 'file-1', url: 'https://example.com/a.pdf' },
|
||||||
|
{ id: 'file-2', url: null },
|
||||||
|
{ id: 'file-3', url: 'https://example.com/b.pdf' },
|
||||||
|
],
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
|
||||||
|
await service.deleteKnowledgeBase('kb-1');
|
||||||
|
|
||||||
|
expect(deleteFilesSpy).toHaveBeenCalledWith([
|
||||||
|
'https://example.com/a.pdf',
|
||||||
|
'https://example.com/b.pdf',
|
||||||
|
]);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should skip external file deletion when deleted files have no URLs', async () => {
|
||||||
|
const service = createService();
|
||||||
|
|
||||||
|
Reflect.set(service, 'knowledgeBaseModel', {
|
||||||
|
deleteWithFiles: vi.fn().mockResolvedValue({
|
||||||
|
deletedFiles: [{ id: 'file-1', url: null }],
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
|
||||||
|
await service.deleteKnowledgeBase('kb-1');
|
||||||
|
|
||||||
|
expect(deleteFilesSpy).not.toHaveBeenCalled();
|
||||||
|
});
|
||||||
|
});
|
||||||
Loading…
Reference in a new issue