✨ feat: support upload files direct into chat context (#7751)

* add document service * add file item * add file content for direct upload file * fix tests * fix tests * fix tests * add debug log for file-loaders * add debug log and test * improve loading * update tests * fix pdf parser * fix pdf version * fix pdf worker url * fix pdf worker url * fix test
2026-04-21 17:47:27 +00:00 · 2025-05-10 00:58:39 +08:00 · 2025-05-10 00:58:39 +08:00 · 39b790ec37
commit 39b790ec37
parent 2beee680bb
46 changed files with 6349 additions and 120 deletions
--- a/docs/development/database-schema.dbml
+++ b/docs/development/database-schema.dbml
@ -115,6 +115,45 @@ table async_tasks {
  updated_at "timestamp with time zone" [not null, default: `now()`]
 }

+table document_chunks {
+  document_id varchar(30) [not null]
+  chunk_id uuid [not null]
+  page_index integer
+  user_id text [not null]
+  created_at "timestamp with time zone" [not null, default: `now()`]
+
+  indexes {
+    (document_id, chunk_id) [pk]
+  }
+}
+
+table documents {
+  id varchar(30) [pk, not null]
+  title text
+  content text
+  file_type varchar(255) [not null]
+  filename text
+  total_char_count integer [not null]
+  total_line_count integer [not null]
+  metadata jsonb
+  pages jsonb
+  source_type text [not null]
+  source text [not null]
+  file_id text
+  user_id text [not null]
+  client_id text
+  accessed_at "timestamp with time zone" [not null, default: `now()`]
+  created_at "timestamp with time zone" [not null, default: `now()`]
+  updated_at "timestamp with time zone" [not null, default: `now()`]
+
+  indexes {
+    source [name: 'documents_source_idx']
+    file_type [name: 'documents_file_type_idx']
+    file_id [name: 'documents_file_id_idx']
+    (client_id, user_id) [name: 'documents_client_id_user_id_unique', unique]
+  }
+}
+
 table files {
  id text [pk, not null]
  user_id text [not null]
@ -670,6 +709,17 @@ table threads {
  }
 }

+table topic_documents {
+  document_id text [not null]
+  topic_id text [not null]
+  user_id text [not null]
+  created_at "timestamp with time zone" [not null, default: `now()`]
+
+  indexes {
+    (document_id, topic_id) [pk]
+  }
+}
+
 table topics {
  id text [pk, not null]
  title text
@ -744,6 +794,10 @@ ref: agents_to_sessions.agent_id > agents.id

 ref: unstructured_chunks.file_id - files.id

+ref: document_chunks.document_id > documents.id
+
+ref: documents.file_id > files.id
+
 ref: files.embedding_task_id - async_tasks.id

 ref: messages.session_id - sessions.id
@ -756,4 +810,8 @@ ref: threads.source_message_id - messages.id

 ref: sessions.group_id - session_groups.id

-ref: topics.session_id - sessions.id
+ref: topic_documents.document_id > documents.id
+
+ref: topic_documents.topic_id > topics.id
+
+ref: topics.session_id - sessions.id
--- a/package.json
+++ b/package.json
@ -143,6 +143,7 @@
    "@langchain/community": "^0.3.38",
    "@lobechat/electron-client-ipc": "workspace:*",
    "@lobechat/electron-server-ipc": "workspace:*",
+    "@lobechat/file-loaders": "workspace:*",
    "@lobechat/web-crawler": "workspace:*",
    "@lobehub/charts": "^2.0.0",
    "@lobehub/chat-plugin-sdk": "^1.32.4",
--- a/packages/file-loaders/package.json
+++ b/packages/file-loaders/package.json
@ -20,6 +20,9 @@
  "author": "LobeHub <i@lobehub.com>",
  "sideEffects": false,
  "main": "./src/index.ts",
+  "scripts": {
+    "test": "vitest"
+  },
  "dependencies": {
    "@langchain/community": "^0.3.41",
    "@langchain/core": "^0.3.45",
@ -27,13 +30,14 @@
    "concat-stream": "^2.0.0",
    "mammoth": "^1.8.0",
    "officeparser": "^5.1.1",
-    "pdfjs-dist": "4.8.69",
+    "pdfjs-dist": "4.10.38",
    "xlsx": "^0.18.5",
    "yauzl": "^3.2.0"
  },
  "devDependencies": {
    "@types/concat-stream": "^2.0.3",
    "@types/yauzl": "^2.10.3",
+    "canvas": "^3.1.0",
    "typescript": "^5"
  },
  "peerDependencies": {
--- a/packages/file-loaders/src/loadFile.ts
+++ b/packages/file-loaders/src/loadFile.ts
@ -1,3 +1,4 @@
+import debug from 'debug';
 import { stat } from 'node:fs/promises';
 import * as path from 'node:path';

@ -7,37 +8,52 @@ import { FileDocument, FileMetadata, SupportedFileType } from './types';
 import type { DocumentPage, FileLoaderInterface } from './types';
 import { isTextReadableFile } from './utils/isTextReadableFile';

+const log = debug('file-loaders:loadFile');
+
 /**
 * Determines the file type based on the filename extension.
 * @param filePath The path to the file.
 * @returns The determined file type or 'txt' if text-readable, undefined otherwise.
 */
 const getFileType = (filePath: string): SupportedFileType | undefined => {
+  log('Determining file type for:', filePath);
  const extension = path.extname(filePath).toLowerCase().replace('.', '');

-  if (!extension) return 'txt'; // Treat files without extension as text?
+  if (!extension) {
+    log('No extension found, treating as txt');
+    return 'txt'; // Treat files without extension as text?
+  }

  // Prioritize checking if it's a generally text-readable type
  if (isTextReadableFile(extension)) {
+    log(`Extension '${extension}' is text-readable, treating as txt`);
    return 'txt';
  }

  // Handle specific non-text or complex types
+  log(`Checking specific types for extension: '${extension}'`);
  switch (extension) {
    case 'pdf': {
+      log('File type identified as pdf');
      return 'pdf';
    }
    case 'docx': {
+      log('File type identified as docx');
      return 'docx';
    }
    case 'xlsx':
    case 'xls': {
+      log('File type identified as excel');
      return 'excel';
    }
    case 'pptx': {
+      log('File type identified as pptx');
      return 'pptx';
    }
    default: {
+      log(
+        `Extension '${extension}' is not a specifically handled type and not text-readable. Unsupported.`,
+      );
      // If not text-readable and not a specific known type, it's unsupported
      return undefined;
    }
@ -59,18 +75,23 @@ export const loadFile = async (
  filePath: string,
  fileMetadata?: FileMetadata,
 ): Promise<FileDocument> => {
+  log('Starting to load file:', filePath, 'with metadata:', fileMetadata);
  let stats;
  let fsError: string | undefined;

  try {
+    log('Attempting to get file stats for:', filePath);
    stats = await stat(filePath);
+    log('Successfully retrieved file stats:', stats);
  } catch (e) {
    const error = e as Error;
+    log('Error getting file stats for %s: %s', filePath, error.message);
    console.error(`Error getting file stats for ${filePath}: ${error.message}`);
    fsError = `Failed to access file stats: ${error.message}`;
  }

  // Determine base file info from path and stats (if available)
+  log('Determining base file info');
  const fileExtension = path.extname(filePath).slice(1).toLowerCase();
  const baseFilename = path.basename(filePath);

@ -80,13 +101,22 @@ export const loadFile = async (
  const fileType = fileMetadata?.fileType ?? fileExtension;
  const createdTime = fileMetadata?.createdTime ?? stats?.ctime ?? new Date();
  const modifiedTime = fileMetadata?.modifiedTime ?? stats?.mtime ?? new Date();
+  log('File info determined/overridden: %O', {
+    createdTime,
+    fileType,
+    filename,
+    modifiedTime,
+    source,
+  });

  const paserType = getFileType(filePath);
+  log('Parser type determined as:', paserType);

  // Select the loader CLASS based on the determined fileType, fallback to DefaultLoader
  const LoaderClass: new () => FileLoaderInterface = paserType
    ? fileLoaders[paserType]
    : DefaultLoader;
+  log('Selected loader class:', LoaderClass.name);

  if (!paserType) {
    console.warn(
@ -102,17 +132,23 @@ export const loadFile = async (
  let loaderSpecificMetadata: any | undefined;

  // Instantiate the loader
+  log('Instantiating loader:', LoaderClass.name);
  const loaderInstance = new LoaderClass();

  // If we couldn't even get stats, skip loader execution
  if (!fsError) {
+    log('File stats available, proceeding with loader execution.');
    try {
      // 1. Load pages using the instance
+      log('Loading pages with loader:', LoaderClass.name, 'for file:', filePath);
      pages = await loaderInstance.loadPages(filePath);
+      log('Pages loaded successfully, count:', pages.length);

      try {
        // 2. Aggregate content using the instance
+        log('Aggregating content with loader:', LoaderClass.name);
        aggregatedContent = await loaderInstance.aggregateContent(pages);
+        log('Content aggregated successfully, length:', aggregatedContent.length);
      } catch (aggError) {
        const error = aggError as Error;
        console.error(
@ -124,8 +160,10 @@ export const loadFile = async (

      // 3. Attach document-specific metadata if loader supports it
      if (typeof loaderInstance.attachDocumentMetadata === 'function') {
+        log('Loader supports attachDocumentMetadata. Attaching...');
        try {
          loaderSpecificMetadata = await loaderInstance.attachDocumentMetadata(filePath);
+          log('Document-specific metadata attached:', loaderSpecificMetadata);
        } catch (metaErr) {
          const error = metaErr as Error;
          console.error(
@ -133,6 +171,8 @@ export const loadFile = async (
          );
          metadataError = `Metadata attachment failed: ${error.message}`;
        }
+      } else {
+        log('Loader does not support attachDocumentMetadata.');
      }
    } catch (loadErr) {
      const error = loadErr as Error;
@ -152,6 +192,7 @@ export const loadFile = async (
      // Aggregated content remains empty
    }
  } else {
+    log('File stats access failed (fsError: %s). Creating minimal error page.', fsError);
    // If stats failed, create a minimal error page
    pages = [
      {
@ -167,16 +208,20 @@ export const loadFile = async (
  // Calculate totals from the loaded pages
  let totalCharCount = 0;
  let totalLineCount = 0;
+  log('Calculating total char and line counts from pages.');
  for (const page of pages) {
    totalCharCount += page.charCount;
    totalLineCount += page.lineCount;
  }
+  log('Totals calculated:', { totalCharCount, totalLineCount });

  // Combine all potential errors
  const combinedError =
    [fsError, loaderError, aggregationError, metadataError].filter(Boolean).join('; ') || undefined;
+  if (combinedError) log('Combined errors:', combinedError);

  // Construct the final FileDocument
+  log('Constructing final FileDocument.');
  const fileDocument: FileDocument = {
    content: aggregatedContent, // Use content from aggregateContent
    createdTime,
@ -202,5 +247,10 @@ export const loadFile = async (
    delete fileDocument.metadata.error;
  }

+  log('File loading process completed for:', filePath, 'Returning document:', {
+    fileType: fileDocument.fileType,
+    filename: fileDocument.filename,
+    pages: fileDocument.pages?.length,
+  });
  return fileDocument;
 };
--- a/packages/file-loaders/src/loaders/docx/index.ts
+++ b/packages/file-loaders/src/loaders/docx/index.ts
@ -1,15 +1,21 @@
 import { DocxLoader as LangchainDocxLoader } from '@langchain/community/document_loaders/fs/docx';
+import debug from 'debug';

 import type { DocumentPage, FileLoaderInterface } from '../../types';

+const log = debug('file-loaders:docx');
+
 /**
 * Loads Word documents (.docx) using the LangChain Community DocxLoader.
 */
 export class DocxLoader implements FileLoaderInterface {
  async loadPages(filePath: string): Promise<DocumentPage[]> {
+    log('Loading DOCX file:', filePath);
    try {
      const loader = new LangchainDocxLoader(filePath);
+      log('LangChain DocxLoader created');
      const docs = await loader.load(); // Langchain DocxLoader typically loads the whole doc as one
+      log('DOCX document loaded, parts:', docs.length);

      const pages: DocumentPage[] = docs.map((doc) => {
        const pageContent = doc.pageContent || '';
@ -27,6 +33,8 @@ export class DocxLoader implements FileLoaderInterface {
        // @ts-expect-error Remove source if present, as it's handled at the FileDocument level
        delete metadata.source;

+        log('DOCX document processed, lines:', lineCount, 'chars:', charCount);
+
        return {
          charCount,
          lineCount,
@ -37,6 +45,7 @@ export class DocxLoader implements FileLoaderInterface {

      // If docs array is empty (e.g., empty file), create an empty page
      if (pages.length === 0) {
+        log('No content in DOCX document, creating empty page');
        pages.push({
          charCount: 0,
          lineCount: 0,
@ -45,9 +54,11 @@ export class DocxLoader implements FileLoaderInterface {
        });
      }

+      log('DOCX loading completed, total pages:', pages.length);
      return pages;
    } catch (e) {
      const error = e as Error;
+      log('Error encountered while loading DOCX file');
      console.error(`Error loading DOCX file ${filePath} using LangChain loader: ${error.message}`);
      const errorPage: DocumentPage = {
        charCount: 0,
@ -57,6 +68,7 @@ export class DocxLoader implements FileLoaderInterface {
        },
        pageContent: '',
      };
+      log('Created error page for failed DOCX loading');
      return [errorPage];
    }
  }
@ -68,6 +80,9 @@ export class DocxLoader implements FileLoaderInterface {
   * @returns Aggregated content as a string.
   */
  async aggregateContent(pages: DocumentPage[]): Promise<string> {
-    return pages.map((page) => page.pageContent).join('\n\n');
+    log('Aggregating content from', pages.length, 'DOCX pages');
+    const result = pages.map((page) => page.pageContent).join('\n\n');
+    log('DOCX content aggregated successfully, length:', result.length);
+    return result;
  }
 }
--- a/packages/file-loaders/src/loaders/excel/index.ts
+++ b/packages/file-loaders/src/loaders/excel/index.ts
@ -1,26 +1,34 @@
+import debug from 'debug';
 import { readFile } from 'node:fs/promises';
 import * as xlsx from 'xlsx';

 import type { DocumentPage, FileLoaderInterface } from '../../types';

+const log = debug('file-loaders:excel');
+
 /**
 * Converts sheet data (array of objects) to a Markdown table string.
 * Handles empty sheets and escapes pipe characters.
 */
 function sheetToMarkdownTable(jsonData: Record<string, any>[]): string {
+  log('Converting sheet data to Markdown table, rows:', jsonData?.length || 0);
  if (!jsonData || jsonData.length === 0) {
+    log('Sheet is empty, returning placeholder message');
    return '*Sheet is empty or contains no data.*';
  }

  // Ensure all rows have the same keys based on the first row, handle potentially sparse data
  const headers = Object.keys(jsonData[0] || {});
+  log('Sheet headers:', headers);
  if (headers.length === 0) {
+    log('Sheet has no headers, returning placeholder message');
    return '*Sheet has headers but no data.*';
  }

  const headerRow = `| ${headers.join(' | ')} |`;
  const separatorRow = `| ${headers.map(() => '---').join(' | ')} |`;

+  log('Building data rows for Markdown table');
  const dataRows = jsonData
    .map((row) => {
      const cells = headers.map((header) => {
@ -34,7 +42,9 @@ function sheetToMarkdownTable(jsonData: Record<string, any>[]): string {
    })
    .join('\n');

-  return `${headerRow}\n${separatorRow}\n${dataRows}`;
+  const result = `${headerRow}\n${separatorRow}\n${dataRows}`;
+  log('Markdown table created, length:', result.length);
+  return result;
 }

 /**
@ -43,13 +53,20 @@ function sheetToMarkdownTable(jsonData: Record<string, any>[]): string {
 */
 export class ExcelLoader implements FileLoaderInterface {
  async loadPages(filePath: string): Promise<DocumentPage[]> {
+    log('Loading Excel file:', filePath);
    const pages: DocumentPage[] = [];
    try {
      // Use readFile for async operation compatible with other loaders
+      log('Reading Excel file as buffer');
      const dataBuffer = await readFile(filePath);
+      log('Excel file read successfully, size:', dataBuffer.length, 'bytes');
+
+      log('Parsing Excel workbook');
      const workbook = xlsx.read(dataBuffer, { type: 'buffer' });
+      log('Excel workbook parsed successfully, sheets:', workbook.SheetNames.length);

      for (const sheetName of workbook.SheetNames) {
+        log(`Processing sheet: ${sheetName}`);
        const worksheet = workbook.Sheets[sheetName];
        // Use sheet_to_json to get array of objects for our custom markdown function
        const jsonData = xlsx.utils.sheet_to_json<Record<string, any>>(worksheet, {
@ -57,6 +74,7 @@ export class ExcelLoader implements FileLoaderInterface {
          defval: '',
          raw: false, // Use empty string for blank cells
        });
+        log(`Sheet ${sheetName} converted to JSON, rows:`, jsonData.length);

        // Convert to markdown using YOUR helper function
        const tableMarkdown = sheetToMarkdownTable(jsonData);
@ -64,6 +82,7 @@ export class ExcelLoader implements FileLoaderInterface {
        const lines = tableMarkdown.split('\n');
        const lineCount = lines.length;
        const charCount = tableMarkdown.length;
+        log(`Sheet ${sheetName} converted to Markdown, lines: ${lineCount}, chars: ${charCount}`);

        pages.push({
          // Trim whitespace
@ -74,9 +93,11 @@ export class ExcelLoader implements FileLoaderInterface {
          },
          pageContent: tableMarkdown.trim(),
        });
+        log(`Added sheet ${sheetName} as page`);
      }

      if (pages.length === 0) {
+        log('Excel file contains no sheets, creating empty page with error');
        pages.push({
          charCount: 0,
          lineCount: 0,
@ -87,9 +108,11 @@ export class ExcelLoader implements FileLoaderInterface {
        });
      }

+      log('Excel loading completed, total pages:', pages.length);
      return pages;
    } catch (e) {
      const error = e as Error;
+      log('Error encountered while loading Excel file');
      console.error(`Error loading Excel file ${filePath}: ${error.message}`);
      const errorPage: DocumentPage = {
        charCount: 0,
@ -99,6 +122,7 @@ export class ExcelLoader implements FileLoaderInterface {
        },
        pageContent: '',
      };
+      log('Created error page for failed Excel loading');
      return [errorPage];
    }
  }
@ -110,12 +134,16 @@ export class ExcelLoader implements FileLoaderInterface {
   * @returns Aggregated content as a string.
   */
  async aggregateContent(pages: DocumentPage[]): Promise<string> {
-    return pages
+    log('Aggregating content from', pages.length, 'Excel pages');
+    const result = pages
      .map((page) => {
        const sheetName = page.metadata.sheetName;
        const header = sheetName ? `## Sheet: ${sheetName}\n\n` : '';
        return header + page.pageContent;
      })
      .join('\n\n---\n\n'); // Separator between sheets
+
+    log('Excel content aggregated successfully, length:', result.length);
+    return result;
  }
 }
--- a/packages/file-loaders/src/loaders/pdf/snapshots/index.test.ts.snap
+++ b/packages/file-loaders/src/loaders/pdf/snapshots/index.test.ts.snap
@ -48,7 +48,7 @@ exports[`PdfLoader > should attach document metadata correctly 1`] = `
    "Title": "test",
  },
  "pdfMetadata": null,
-  "pdfVersion": "4.8.69",
+  "pdfVersion": "4.10.38",
 }
 `;

--- a/packages/file-loaders/src/loaders/pdf/index.ts
+++ b/packages/file-loaders/src/loaders/pdf/index.ts
@ -1,37 +1,54 @@
+import debug from 'debug';
 import { readFile } from 'node:fs/promises';
-import * as pdfjsLib from 'pdfjs-dist';
-import type { PDFDocumentProxy, PDFPageProxy, TextContent } from 'pdfjs-dist/types/src/display/api';
+import type { PDFDocumentProxy, PDFPageProxy } from 'pdfjs-dist';
+import { getDocument, version } from 'pdfjs-dist/legacy/build/pdf.mjs';
+// @ts-ignore
+import * as _pdfjsWorker from 'pdfjs-dist/legacy/build/pdf.worker.mjs';
+import type { TextContent } from 'pdfjs-dist/types/src/display/api';

 import type { DocumentPage, FileLoaderInterface } from '../../types';

+const log = debug('file-loaders:pdf');
+
 /**
 * Loads PDF files page by page using the official pdfjs-dist library.
 */
 export class PdfLoader implements FileLoaderInterface {
  private pdfInstance: PDFDocumentProxy | null = null;
+  private pdfjsWorker = _pdfjsWorker;

  private async getPDFFile(filePath: string) {
-    const dataBuffer = await readFile(filePath);
+    // GlobalWorkerOptions.workerSrc should have been set at the module level.
+    // We are now relying on pdfjs-dist to use this path when it creates a worker.

-    const loadingTask = pdfjsLib.getDocument({
+    log('Reading PDF file:', filePath);
+    const dataBuffer = await readFile(filePath);
+    log('PDF file read successfully, size:', dataBuffer.length, 'bytes');
+
+    const loadingTask = getDocument({
      data: new Uint8Array(dataBuffer.buffer, dataBuffer.byteOffset, dataBuffer.length),
      useSystemFonts: true,
-      // Explicitly disable worker thread
-      worker: undefined, // Attempt to use system fonts
    });

-    return await loadingTask.promise;
+    log('PDF document loading task created');
+    const pdf = await loadingTask.promise;
+    log('PDF document loaded successfully, pages:', pdf.numPages);
+    return pdf;
  }

  async loadPages(filePath: string): Promise<DocumentPage[]> {
+    log('Starting to load PDF pages from:', filePath);
    try {
      const pdf: PDFDocumentProxy = await this.getPDFFile(filePath);

      const pages: DocumentPage[] = [];
+      log(`Processing ${pdf.numPages} PDF pages`);

      for (let i = 1; i <= pdf.numPages; i += 1) {
+        log(`Loading page ${i}/${pdf.numPages}`);
        const page: PDFPageProxy = await pdf.getPage(i);
        const content: TextContent = await page.getTextContent();
+        log(`Page ${i} text content retrieved, items:`, content.items.length);

        // --- Revert to EXACT Simple Langchain PDFLoader Logic ---
        let lastY;
@ -61,6 +78,7 @@ export class PdfLoader implements FileLoaderInterface {
        const pageLines = cleanedPageContent.split('\n');
        const lineCount = pageLines.length;
        const charCount = cleanedPageContent.length;
+        log(`Page ${i} processed, lines: ${lineCount}, chars: ${charCount}`);

        pages.push({
          charCount,
@ -70,15 +88,19 @@ export class PdfLoader implements FileLoaderInterface {
        });

        // Clean up page resources
+        log(`Cleaning up page ${i} resources`);
        page.cleanup();
      }

      // Clean up document resources
+      log('Cleaning up PDF document resources');
      await pdf.destroy();

+      log(`PDF loading completed for ${filePath}, total pages:`, pages.length);
      return pages;
    } catch (e) {
      const error = e as Error;
+      log('Error encountered while loading PDF file');
      console.error(
        `Error loading PDF file ${filePath} using pdfjs-dist: ${error.message}`,
        error.stack,
@ -92,6 +114,7 @@ export class PdfLoader implements FileLoaderInterface {
        },
        pageContent: '',
      };
+      log('Created error page for failed PDF loading');
      return [errorPage];
    }
  }
@ -103,25 +126,42 @@ export class PdfLoader implements FileLoaderInterface {
   * @returns Aggregated content as a string.
   */
  async aggregateContent(pages: DocumentPage[]): Promise<string> {
-    return pages
-      .filter((page) => !page.metadata.error)
-      .map((page) => page.pageContent)
-      .join('\n\n');
+    log('Aggregating content from', pages.length, 'PDF pages');
+    const validPages = pages.filter((page) => !page.metadata.error);
+    log(
+      `Found ${validPages.length} valid pages for aggregation (${pages.length - validPages.length} pages with errors filtered out)`,
+    );
+
+    const result = validPages.map((page) => page.pageContent).join('\n\n');
+    log('PDF content aggregated successfully, length:', result.length);
+    return result;
  }

  async attachDocumentMetadata(filePath: string): Promise<any> {
+    log('Attaching document metadata for PDF:', filePath);
    const pdf: PDFDocumentProxy = await this.getPDFFile(filePath);

-    const pdfMetadata = (await pdf.getMetadata().catch(() => null)) ?? null;
+    log('Getting PDF metadata');
+    const pdfMetadata =
+      (await pdf.getMetadata().catch((err) => {
+        log('Error retrieving PDF metadata');
+        console.error(`Error getting PDF metadata: ${err.message}`);
+        return null;
+      })) ?? null;
+
    const pdfInfo = pdfMetadata?.info ?? {};
    const metadata = pdfMetadata?.metadata ?? null;
+    log('PDF metadata retrieved:', {
+      hasInfo: !!Object.keys(pdfInfo).length,
+      hasMetadata: !!metadata,
+    });

    return {
      pdfInfo: pdfInfo,
      // PDF info (Author, Title, etc.)
      pdfMetadata: metadata,
      // PDF metadata
-      pdfVersion: pdfjsLib.version,
+      pdfVersion: version,
    };
  }
 }
--- a/packages/file-loaders/src/loaders/pptx/index.ts
+++ b/packages/file-loaders/src/loaders/pptx/index.ts
@ -1,8 +1,11 @@
+import debug from 'debug';
 import path from 'node:path';

 import type { DocumentPage, FileLoaderInterface } from '../../types';
 import { type ExtractedFile, extractFiles, parseString } from '../../utils/parser-utils';

+const log = debug('file-loaders:pptx');
+
 /**
 * Represents a loader for PPTX files using extracted utility functions.
 *
@ -19,20 +22,25 @@ export class PptxLoader implements FileLoaderInterface {
   *          `DocumentPage` object with error information in its metadata.
   */
  async loadPages(filePath: string): Promise<DocumentPage[]> {
+    log('Loading PPTX file:', filePath);
    const sourceFileName = path.basename(filePath);
+    log('Source file name:', sourceFileName);

    try {
      // --- File Extraction Step ---
      const slidesRegex = /ppt\/slides\/slide\d+\.xml/g;
      const slideNumberRegex = /slide(\d+)\.xml/;

+      log('Extracting slide XML files from PPTX');
      // Extract only slide XML files
      const slideFiles: ExtractedFile[] = await extractFiles(filePath, (fileName) =>
        slidesRegex.test(fileName),
      );
+      log('Extracted slide files:', slideFiles.length);

      // --- Validation Step ---
      if (slideFiles.length === 0) {
+        log('No slide XML files found in the PPTX file');
        console.warn(`No slide XML files found in ${sourceFileName}. May be corrupted or empty.`);
        return [
          this.createErrorPage(
@ -43,6 +51,7 @@ export class PptxLoader implements FileLoaderInterface {
      }

      // --- Sorting Step ---
+      log('Sorting slide files by slide number');
      // Sort files based on the slide number extracted from the path
      slideFiles.sort((a, b) => {
        const matchA = a.path.match(slideNumberRegex);
@ -51,13 +60,17 @@ export class PptxLoader implements FileLoaderInterface {
        const numB = matchB ? parseInt(matchB[1], 10) : Infinity;
        return numA - numB;
      });
+      log('Slide files sorted');

      // --- Page Creation Step ---
+      log('Creating document pages from slide files');
      const pages: DocumentPage[] = slideFiles
        .map((slideFile, index) => {
          try {
+            log(`Processing slide ${index + 1}/${slideFiles.length}, path: ${slideFile.path}`);
            const xmlDoc = parseString(slideFile.content);
            const paragraphNodes = xmlDoc.getElementsByTagName('a:p');
+            log(`Found ${paragraphNodes.length} paragraph nodes in slide ${index + 1}`);

            const slideText = Array.from(paragraphNodes)
              .map((pNode) => {
@ -72,6 +85,9 @@ export class PptxLoader implements FileLoaderInterface {
            const lines = slideText.split('\n');
            const slideNumberMatch = slideFile.path.match(slideNumberRegex);
            const slideNumber = slideNumberMatch ? parseInt(slideNumberMatch[1], 10) : index + 1; // Fallback to index if regex fails
+            log(
+              `Slide ${index + 1} text extracted, lines: ${lines.length}, characters: ${slideText.length}`,
+            );

            const metadata = {
              pageCount: slideFiles.length, // Total number of slides found
@ -86,6 +102,7 @@ export class PptxLoader implements FileLoaderInterface {
              pageContent: slideText.trim(), // Trim final content
            };
          } catch (parseError) {
+            log(`Error parsing slide ${slideFile.path}`);
            console.error(
              `Failed to parse XML for slide ${slideFile.path} in ${sourceFileName}: ${parseError instanceof Error ? parseError.message : String(parseError)}`,
            );
@ -101,9 +118,11 @@ export class PptxLoader implements FileLoaderInterface {
        })
        // Filter out any potential nulls if we change the error handling above
        .filter((page): page is DocumentPage => page !== null);
+      log(`Created ${pages.length} document pages from slides`);

      if (pages.length === 0) {
        // This case might happen if all slides failed to parse
+        log('Parsing resulted in zero valid pages');
        console.warn(`Parsing resulted in zero valid pages for ${sourceFileName}`);
        return [this.createErrorPage('Parsing resulted in zero valid pages.', sourceFileName)];
      }
@ -112,15 +131,18 @@ export class PptxLoader implements FileLoaderInterface {
      const allErrored = pages.every((page) => page.metadata?.error);
      if (allErrored) {
        // If all pages resulted in errors, perhaps return a single summary error
+        log('All slides failed to parse');
        console.warn(`All slides failed to parse for ${sourceFileName}`);
        return [this.createErrorPage('All slides failed to parse correctly.', sourceFileName)];
        // Or return all the individual error pages: return pages;
      }

+      log('PPTX loading completed successfully');
      return pages;
    } catch (error) {
      // --- Error Handling Step ---
      // This catches errors from extractFiles or other unexpected issues
+      log('Error loading or processing PPTX file');
      const errorMessage = `Failed to load or process PPTX file: ${error instanceof Error ? error.message : String(error)}`;
      console.error(errorMessage, { filePath });
      return [this.createErrorPage(errorMessage, sourceFileName)];
@ -137,16 +159,21 @@ export class PptxLoader implements FileLoaderInterface {
   * @returns A Promise resolving to the aggregated content string.
   */
  async aggregateContent(pages: DocumentPage[]): Promise<string> {
+    log('Aggregating content from', pages.length, 'PPTX pages');
    // Ensure pages array is valid and non-empty before proceeding
    // Filter out error pages before aggregation unless we want to include error messages
    const validPages = pages.filter((page) => !page.metadata?.error);
+    log(
+      `Found ${validPages.length} valid pages for aggregation (${pages.length - validPages.length} error pages filtered out)`,
+    );

    if (validPages.length === 0) {
      // If only error pages existed, return empty or a summary error message
+      log('No valid pages found, returning content of first page (may be error page)');
      return pages[0]?.pageContent || ''; // Return content of the first page (might be an error page)
    }

-    return validPages
+    const result = validPages
      .map((page) => {
        const slideNumber = page.metadata?.slideNumber;
        // Use Markdown H2 for slide headers
@ -156,6 +183,9 @@ ${page.pageContent}
 </slide_page>`;
      })
      .join('\n\n'); // Use Markdown horizontal rule as separator
+
+    log('PPTX content aggregated successfully, length:', result.length);
+    return result;
  }

  /**
@ -171,6 +201,7 @@ ${page.pageContent}
    sourceFileName: string,
    sourceFilePath?: string,
  ): DocumentPage {
+    log('Creating error page:', errorInfo);
    return {
      charCount: 0,
      lineCount: 0,
--- a/packages/file-loaders/src/loaders/text/index.test.ts
+++ b/packages/file-loaders/src/loaders/text/index.test.ts
@ -1,5 +1,5 @@
 import path from 'node:path';
-import { beforeEach } from 'vitest';
+import { beforeEach, describe, expect, it } from 'vitest';

 import type { FileLoaderInterface } from '../../types';
 import { TextLoader } from './index';
--- a/packages/file-loaders/src/loaders/text/index.ts
+++ b/packages/file-loaders/src/loaders/text/index.ts
@ -1,17 +1,23 @@
+import debug from 'debug';
 import { readFile } from 'node:fs/promises';

 import type { DocumentPage, FileLoaderInterface } from '../../types';

+const log = debug('file-loaders:text');
+
 /**
 * 用于加载纯文本文件的加载器。
 */
 export class TextLoader implements FileLoaderInterface {
  async loadPages(filePath: string): Promise<DocumentPage[]> {
+    log('Loading text file:', filePath);
    try {
      const fileContent = await readFile(filePath, 'utf8');
+      log('Text file loaded successfully, size:', fileContent.length, 'bytes');
      const lines = fileContent.split('\n');
      const lineCount = lines.length;
      const charCount = fileContent.length;
+      log('Text file stats:', { charCount, lineCount });

      const page: DocumentPage = {
        charCount,
@ -23,9 +29,11 @@ export class TextLoader implements FileLoaderInterface {
        pageContent: fileContent,
      };

+      log('Text page created successfully');
      return [page];
    } catch (e) {
      const error = e as Error;
+      log('Error encountered while loading text file');
      console.error(`Error loading text file ${filePath}: ${error.message}`);
      // 如果读取失败，返回一个包含错误信息的 Page
      const errorPage: DocumentPage = {
@ -36,6 +44,7 @@ export class TextLoader implements FileLoaderInterface {
        },
        pageContent: '',
      };
+      log('Created error page for failed text file loading');
      return [errorPage];
    }
  }
@ -47,7 +56,10 @@ export class TextLoader implements FileLoaderInterface {
   * @returns 聚合后的内容
   */
  async aggregateContent(pages: DocumentPage[]): Promise<string> {
+    log('Aggregating content from', pages.length, 'text pages');
    // 默认使用换行符连接，可以根据需要调整或使其可配置
-    return pages.map((page) => page.pageContent).join('\n');
+    const result = pages.map((page) => page.pageContent).join('\n');
+    log('Content aggregated successfully, length:', result.length);
+    return result;
  }
 }
--- a/packages/file-loaders/test/snapshots/loaders.test.ts.snap
+++ b/packages/file-loaders/test/snapshots/loaders.test.ts.snap
@ -1,5 +1,46 @@
 // Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html

+exports[`loadFile Integration Tests > PDF Handling > should load content from a pdf file using filePath 1`] = `
+{
+  "content": "123",
+  "fileType": "pdf",
+  "filename": "test.pdf",
+  "metadata": {
+    "loaderSpecific": {
+      "pdfInfo": {
+        "CreationDate": "D:20250419143655Z00'00'",
+        "Creator": "Pages文稿",
+        "EncryptFilterName": null,
+        "IsAcroFormPresent": false,
+        "IsCollectionPresent": false,
+        "IsLinearized": false,
+        "IsSignaturesPresent": false,
+        "IsXFAPresent": false,
+        "Language": null,
+        "ModDate": "D:20250419143655Z00'00'",
+        "PDFFormatVersion": "1.3",
+        "Producer": "macOS 版本15.3.2（版号24D81） Quartz PDFContext",
+        "Title": "test",
+      },
+      "pdfMetadata": null,
+      "pdfVersion": "4.10.38",
+    },
+  },
+  "pages": [
+    {
+      "charCount": 3,
+      "lineCount": 1,
+      "metadata": {
+        "pageNumber": 1,
+      },
+      "pageContent": "123",
+    },
+  ],
+  "totalCharCount": 3,
+  "totalLineCount": 1,
+}
+`;
+
 exports[`loadFile Integration Tests > Text Handling (.txt, .csv, .md, etc.) > should load content from a test.csv file using filePath 1`] = `
 {
  "content": "ID,Name,Value
--- a/packages/file-loaders/test/loaders.test.ts
+++ b/packages/file-loaders/test/loaders.test.ts
@ -36,4 +36,24 @@ describe('loadFile Integration Tests', () => {
      testPureTextFile(file);
    });
  });
+
+  describe('PDF Handling', () => {
+    it(`should load content from a pdf file using filePath`, async () => {
+      const filePath = getFixturePath('test.pdf');
+
+      // Pass filePath directly to loadFile
+      const docs = await loadFile(filePath);
+
+      expect(docs.content).toEqual('123');
+      expect(docs.source).toEqual(filePath);
+
+      // @ts-expect-error
+      delete docs.source;
+      // @ts-expect-error
+      delete docs.createdTime;
+      // @ts-expect-error
+      delete docs.modifiedTime;
+      expect(docs).toMatchSnapshot();
+    });
+  });
 });
--- a/packages/file-loaders/test/setup.ts
+++ b/packages/file-loaders/test/setup.ts
@ -0,0 +1,17 @@
+// Polyfill DOMMatrix for pdfjs-dist in Node.js environment
+import { DOMMatrix } from 'canvas';
+
+if (typeof global.DOMMatrix === 'undefined') {
+  // @ts-ignore
+  global.DOMMatrix = DOMMatrix;
+}
+
+// Polyfill URL.createObjectURL and URL.revokeObjectURL for pdfjs-dist
+if (typeof global.URL.createObjectURL === 'undefined') {
+  global.URL.createObjectURL = () => 'blob:http://localhost/fake-blob-url';
+}
+if (typeof global.URL.revokeObjectURL === 'undefined') {
+  global.URL.revokeObjectURL = () => {
+    /* no-op */
+  };
+}
--- a/packages/file-loaders/vitest.config.ts
+++ b/packages/file-loaders/vitest.config.ts
@ -0,0 +1,14 @@
+import { defineConfig } from 'vitest/config';
+
+export default defineConfig({
+  test: {
+    // coverage: {
+    //   all: false,
+    //   provider: 'v8',
+    //   reporter: ['text', 'json', 'lcov', 'text-summary'],
+    //   reportsDirectory: './coverage/app',
+    // },
+    environment: 'happy-dom',
+    // setupFiles: join(__dirname, './test/setup.ts'),
+  },
+});
--- a/src/const/file.ts
+++ b/src/const/file.ts
@ -1 +1,8 @@
-export const FILE_UPLOAD_BLACKLIST = ['.DS_Store'];
+export const FILE_UPLOAD_BLACKLIST = [
+  '.DS_Store',
+  'Thumbs.db',
+  'desktop.ini',
+  '.localized',
+  'ehthumbs.db',
+  'ehthumbs_vista.db',
+];
--- a/src/database/client/migrations.json
+++ b/src/database/client/migrations.json
@ -456,6 +456,28 @@
    ],
    "bps": true,
    "folderMillis": 1744602998656,
-    "hash": "9a32c373461472a4afdb45e690c3009a0db0eaae81dcf6c8d05277a48f3a5e85"
+    "hash": "fdbac49ffdbe759234e760d0d48cdc1854028ea70d756a12b72f24305b4f3072"
+  },
+  {
+    "sql": [
+      "CREATE TABLE IF NOT EXISTS \"document_chunks\" (\n\t\"document_id\" varchar(30) NOT NULL,\n\t\"chunk_id\" uuid NOT NULL,\n\t\"page_index\" integer,\n\t\"user_id\" text NOT NULL,\n\t\"created_at\" timestamp with time zone DEFAULT now() NOT NULL,\n\tCONSTRAINT \"document_chunks_document_id_chunk_id_pk\" PRIMARY KEY(\"document_id\",\"chunk_id\")\n);\n",
+      "\nCREATE TABLE IF NOT EXISTS \"documents\" (\n\t\"id\" varchar(30) PRIMARY KEY NOT NULL,\n\t\"title\" text,\n\t\"content\" text,\n\t\"file_type\" varchar(255) NOT NULL,\n\t\"filename\" text,\n\t\"total_char_count\" integer NOT NULL,\n\t\"total_line_count\" integer NOT NULL,\n\t\"metadata\" jsonb,\n\t\"pages\" jsonb,\n\t\"source_type\" text NOT NULL,\n\t\"source\" text NOT NULL,\n\t\"file_id\" text,\n\t\"user_id\" text NOT NULL,\n\t\"client_id\" text,\n\t\"accessed_at\" timestamp with time zone DEFAULT now() NOT NULL,\n\t\"created_at\" timestamp with time zone DEFAULT now() NOT NULL,\n\t\"updated_at\" timestamp with time zone DEFAULT now() NOT NULL\n);\n",
+      "\nCREATE TABLE IF NOT EXISTS \"topic_documents\" (\n\t\"document_id\" text NOT NULL,\n\t\"topic_id\" text NOT NULL,\n\t\"user_id\" text NOT NULL,\n\t\"created_at\" timestamp with time zone DEFAULT now() NOT NULL,\n\tCONSTRAINT \"topic_documents_document_id_topic_id_pk\" PRIMARY KEY(\"document_id\",\"topic_id\")\n);\n",
+      "\nALTER TABLE \"document_chunks\" ADD CONSTRAINT \"document_chunks_document_id_documents_id_fk\" FOREIGN KEY (\"document_id\") REFERENCES \"public\".\"documents\"(\"id\") ON DELETE cascade ON UPDATE no action;",
+      "\nALTER TABLE \"document_chunks\" ADD CONSTRAINT \"document_chunks_chunk_id_chunks_id_fk\" FOREIGN KEY (\"chunk_id\") REFERENCES \"public\".\"chunks\"(\"id\") ON DELETE cascade ON UPDATE no action;",
+      "\nALTER TABLE \"document_chunks\" ADD CONSTRAINT \"document_chunks_user_id_users_id_fk\" FOREIGN KEY (\"user_id\") REFERENCES \"public\".\"users\"(\"id\") ON DELETE cascade ON UPDATE no action;",
+      "\nALTER TABLE \"documents\" ADD CONSTRAINT \"documents_file_id_files_id_fk\" FOREIGN KEY (\"file_id\") REFERENCES \"public\".\"files\"(\"id\") ON DELETE set null ON UPDATE no action;",
+      "\nALTER TABLE \"documents\" ADD CONSTRAINT \"documents_user_id_users_id_fk\" FOREIGN KEY (\"user_id\") REFERENCES \"public\".\"users\"(\"id\") ON DELETE cascade ON UPDATE no action;",
+      "\nALTER TABLE \"topic_documents\" ADD CONSTRAINT \"topic_documents_document_id_documents_id_fk\" FOREIGN KEY (\"document_id\") REFERENCES \"public\".\"documents\"(\"id\") ON DELETE cascade ON UPDATE no action;",
+      "\nALTER TABLE \"topic_documents\" ADD CONSTRAINT \"topic_documents_topic_id_topics_id_fk\" FOREIGN KEY (\"topic_id\") REFERENCES \"public\".\"topics\"(\"id\") ON DELETE cascade ON UPDATE no action;",
+      "\nALTER TABLE \"topic_documents\" ADD CONSTRAINT \"topic_documents_user_id_users_id_fk\" FOREIGN KEY (\"user_id\") REFERENCES \"public\".\"users\"(\"id\") ON DELETE cascade ON UPDATE no action;",
+      "\nCREATE INDEX \"documents_source_idx\" ON \"documents\" USING btree (\"source\");",
+      "\nCREATE INDEX \"documents_file_type_idx\" ON \"documents\" USING btree (\"file_type\");",
+      "\nCREATE INDEX \"documents_file_id_idx\" ON \"documents\" USING btree (\"file_id\");",
+      "\nCREATE UNIQUE INDEX \"documents_client_id_user_id_unique\" ON \"documents\" USING btree (\"client_id\",\"user_id\");\n"
+    ],
+    "bps": true,
+    "folderMillis": 1746724476380,
+    "hash": "0518cd9882f7ea38eb498b31c8dda73fb56bbc3aa55445ecbc7a9e716631d047"
  }
 ]
--- a/src/database/migrations/0022_add_documents.sql
+++ b/src/database/migrations/0022_add_documents.sql
@ -0,0 +1,49 @@
+CREATE TABLE IF NOT EXISTS "document_chunks" (
+	"document_id" varchar(30) NOT NULL,
+	"chunk_id" uuid NOT NULL,
+	"page_index" integer,
+	"user_id" text NOT NULL,
+	"created_at" timestamp with time zone DEFAULT now() NOT NULL,
+	CONSTRAINT "document_chunks_document_id_chunk_id_pk" PRIMARY KEY("document_id","chunk_id")
+);
+--> statement-breakpoint
+CREATE TABLE IF NOT EXISTS "documents" (
+	"id" varchar(30) PRIMARY KEY NOT NULL,
+	"title" text,
+	"content" text,
+	"file_type" varchar(255) NOT NULL,
+	"filename" text,
+	"total_char_count" integer NOT NULL,
+	"total_line_count" integer NOT NULL,
+	"metadata" jsonb,
+	"pages" jsonb,
+	"source_type" text NOT NULL,
+	"source" text NOT NULL,
+	"file_id" text,
+	"user_id" text NOT NULL,
+	"client_id" text,
+	"accessed_at" timestamp with time zone DEFAULT now() NOT NULL,
+	"created_at" timestamp with time zone DEFAULT now() NOT NULL,
+	"updated_at" timestamp with time zone DEFAULT now() NOT NULL
+);
+--> statement-breakpoint
+CREATE TABLE IF NOT EXISTS "topic_documents" (
+	"document_id" text NOT NULL,
+	"topic_id" text NOT NULL,
+	"user_id" text NOT NULL,
+	"created_at" timestamp with time zone DEFAULT now() NOT NULL,
+	CONSTRAINT "topic_documents_document_id_topic_id_pk" PRIMARY KEY("document_id","topic_id")
+);
+--> statement-breakpoint
+ALTER TABLE "document_chunks" ADD CONSTRAINT "document_chunks_document_id_documents_id_fk" FOREIGN KEY ("document_id") REFERENCES "public"."documents"("id") ON DELETE cascade ON UPDATE no action;--> statement-breakpoint
+ALTER TABLE "document_chunks" ADD CONSTRAINT "document_chunks_chunk_id_chunks_id_fk" FOREIGN KEY ("chunk_id") REFERENCES "public"."chunks"("id") ON DELETE cascade ON UPDATE no action;--> statement-breakpoint
+ALTER TABLE "document_chunks" ADD CONSTRAINT "document_chunks_user_id_users_id_fk" FOREIGN KEY ("user_id") REFERENCES "public"."users"("id") ON DELETE cascade ON UPDATE no action;--> statement-breakpoint
+ALTER TABLE "documents" ADD CONSTRAINT "documents_file_id_files_id_fk" FOREIGN KEY ("file_id") REFERENCES "public"."files"("id") ON DELETE set null ON UPDATE no action;--> statement-breakpoint
+ALTER TABLE "documents" ADD CONSTRAINT "documents_user_id_users_id_fk" FOREIGN KEY ("user_id") REFERENCES "public"."users"("id") ON DELETE cascade ON UPDATE no action;--> statement-breakpoint
+ALTER TABLE "topic_documents" ADD CONSTRAINT "topic_documents_document_id_documents_id_fk" FOREIGN KEY ("document_id") REFERENCES "public"."documents"("id") ON DELETE cascade ON UPDATE no action;--> statement-breakpoint
+ALTER TABLE "topic_documents" ADD CONSTRAINT "topic_documents_topic_id_topics_id_fk" FOREIGN KEY ("topic_id") REFERENCES "public"."topics"("id") ON DELETE cascade ON UPDATE no action;--> statement-breakpoint
+ALTER TABLE "topic_documents" ADD CONSTRAINT "topic_documents_user_id_users_id_fk" FOREIGN KEY ("user_id") REFERENCES "public"."users"("id") ON DELETE cascade ON UPDATE no action;--> statement-breakpoint
+CREATE INDEX "documents_source_idx" ON "documents" USING btree ("source");--> statement-breakpoint
+CREATE INDEX "documents_file_type_idx" ON "documents" USING btree ("file_type");--> statement-breakpoint
+CREATE INDEX "documents_file_id_idx" ON "documents" USING btree ("file_id");--> statement-breakpoint
+CREATE UNIQUE INDEX "documents_client_id_user_id_unique" ON "documents" USING btree ("client_id","user_id");
--- a/src/database/migrations/meta/0022_snapshot.json
+++ b/src/database/migrations/meta/0022_snapshot.json
--- a/src/database/migrations/meta/_journal.json
+++ b/src/database/migrations/meta/_journal.json
@ -154,6 +154,13 @@
      "when": 1744602998656,
      "tag": "0021_add_agent_opening_settings",
      "breakpoints": true
+    },
+    {
+      "idx": 22,
+      "version": "7",
+      "when": 1746724476380,
+      "tag": "0022_add_documents",
+      "breakpoints": true
    }
  ],
  "version": "6"
--- a/src/database/models/_template.ts
+++ b/src/database/models/_template.ts
@ -13,7 +13,7 @@ export class TemplateModel {
    this.db = db;
  }

-  create = async (params: NewSessionGroup) => {
+  create = async (params: Omit<NewSessionGroup, 'userId'>) => {
    const [result] = await this.db
      .insert(sessionGroups)
      .values({ ...params, userId: this.userId })
--- a/src/database/models/document.ts
+++ b/src/database/models/document.ts
@ -0,0 +1,54 @@
+import { and, desc, eq } from 'drizzle-orm/expressions';
+
+import { LobeChatDatabase } from '@/database/type';
+
+import { DocumentItem, NewDocument, documents } from '../schemas';
+
+export class DocumentModel {
+  private userId: string;
+  private db: LobeChatDatabase;
+
+  constructor(db: LobeChatDatabase, userId: string) {
+    this.userId = userId;
+    this.db = db;
+  }
+
+  create = async (params: Omit<NewDocument, 'userId'>) => {
+    const [result] = await this.db
+      .insert(documents)
+      .values({ ...params, userId: this.userId })
+      .returning();
+
+    return result;
+  };
+
+  delete = async (id: string) => {
+    return this.db
+      .delete(documents)
+      .where(and(eq(documents.id, id), eq(documents.userId, this.userId)));
+  };
+
+  deleteAll = async () => {
+    return this.db.delete(documents).where(eq(documents.userId, this.userId));
+  };
+
+  query = async () => {
+    return this.db.query.documents.findMany({
+      orderBy: [desc(documents.updatedAt)],
+      where: eq(documents.userId, this.userId),
+    });
+  };
+
+  findById = async (id: string) => {
+    return this.db.query.documents.findFirst({
+      where: and(eq(documents.id, id), eq(documents.userId, this.userId)),
+    });
+  };
+
+  update = async (id: string, value: Partial<DocumentItem>) => {
+    return this.db
+      .update(documents)
+      .set({ ...value, updatedAt: new Date() })
+      .where(and(eq(documents.id, id), eq(documents.userId, this.userId)));
+  };
+}
--- a/src/database/models/message.ts
+++ b/src/database/models/message.ts
@ -30,6 +30,7 @@ import { today } from '@/utils/time';
 import {
  MessagePluginItem,
  chunks,
+  documents,
  embeddings,
  fileChunks,
  files,
@ -154,6 +155,29 @@ export class MessageModel {
      })),
    );

+    // 获取关联的文档内容
+    const fileIds = relatedFileList.map((file) => file.id).filter(Boolean);
+
+    let documentsMap: Record<string, string> = {};
+
+    if (fileIds.length > 0) {
+      const documentsList = await this.db
+        .select({
+          content: documents.content,
+          fileId: documents.fileId,
+        })
+        .from(documents)
+        .where(inArray(documents.fileId, fileIds));
+
+      documentsMap = documentsList.reduce(
+        (acc, doc) => {
+          if (doc.fileId) acc[doc.fileId] = doc.content as string;
+          return acc;
+        },
+        {} as Record<string, string>,
+      );
+    }
+
    const imageList = relatedFileList.filter((i) => (i.fileType || '').startsWith('image'));
    const fileList = relatedFileList.filter((i) => !(i.fileType || '').startsWith('image'));

@ -214,6 +238,7 @@ export class MessageModel {
            .filter((relation) => relation.messageId === item.id)
            // eslint-disable-next-line @typescript-eslint/no-unused-vars
            .map<ChatFileItem>(({ id, url, size, fileType, name }) => ({
+              content: documentsMap[id],
              fileType: fileType!,
              id,
              name: name!,
--- a/src/database/repositories/tableViewer/index.test.ts
+++ b/src/database/repositories/tableViewer/index.test.ts
@ -23,7 +23,7 @@ describe('TableViewerRepo', () => {
    it('should return all tables with counts', async () => {
      const result = await repo.getAllTables();

-      expect(result.length).toEqual(48);
+      expect(result.length).toEqual(51);
      expect(result[0]).toEqual({ name: 'agents', count: 0, type: 'BASE TABLE' });
    });

--- a/src/database/schemas/document.ts
+++ b/src/database/schemas/document.ts
@ -0,0 +1,104 @@
+/* eslint-disable sort-keys-fix/sort-keys-fix  */
+import {
+  index,
+  integer,
+  jsonb,
+  pgTable,
+  primaryKey,
+  text,
+  uniqueIndex,
+  uuid,
+  varchar,
+} from 'drizzle-orm/pg-core';
+import { createInsertSchema } from 'drizzle-zod';
+
+import { chunks } from '@/database/schemas/rag';
+import { idGenerator } from '@/database/utils/idGenerator';
+import { LobeDocumentPage } from '@/types/document';
+
+import { createdAt, timestamps } from './_helpers';
+import { files } from './file';
+import { users } from './user';
+
+/**
+ * 文档表 - 存储文件内容或网页搜索结果
+ */
+export const documents = pgTable(
+  'documents',
+  {
+    id: varchar('id', { length: 30 })
+      .$defaultFn(() => idGenerator('documents', 16))
+      .primaryKey(),
+
+    // 基本信息
+    title: text('title'),
+    content: text('content'),
+    fileType: varchar('file_type', { length: 255 }).notNull(),
+    filename: text('filename'),
+
+    // 统计信息
+    totalCharCount: integer('total_char_count').notNull(),
+    totalLineCount: integer('total_line_count').notNull(),
+
+    // 元数据
+    metadata: jsonb('metadata').$type<Record<string, any>>(),
+
+    // 页面/块数据
+    pages: jsonb('pages').$type<LobeDocumentPage[]>(),
+
+    // 来源类型
+    sourceType: text('source_type', { enum: ['file', 'web', 'api'] }).notNull(),
+    source: text('source').notNull(), // 文件路径或网页URL
+
+    // 关联文件（可选）
+    fileId: text('file_id').references(() => files.id, { onDelete: 'set null' }),
+
+    // 用户关联
+    userId: text('user_id')
+      .references(() => users.id, { onDelete: 'cascade' })
+      .notNull(),
+    clientId: text('client_id'),
+
+    // 时间戳
+    ...timestamps,
+  },
+  (table) => [
+    index('documents_source_idx').on(table.source),
+    index('documents_file_type_idx').on(table.fileType),
+    index('documents_file_id_idx').on(table.fileId),
+    uniqueIndex('documents_client_id_user_id_unique').on(table.clientId, table.userId),
+  ],
+);
+
+export type NewDocument = typeof documents.$inferInsert;
+export type DocumentItem = typeof documents.$inferSelect;
+export const insertDocumentSchema = createInsertSchema(documents);
+
+/**
+ * 文档块表 - 将文档内容分割成块并关联到 chunks 表，用于向量检索
+ * 注意：此表可选，如果已经使用 pages 字段存储了文档块，可以不需要此表
+ */
+export const documentChunks = pgTable(
+  'document_chunks',
+  {
+    documentId: varchar('document_id', { length: 30 })
+      .references(() => documents.id, { onDelete: 'cascade' })
+      .notNull(),
+
+    chunkId: uuid('chunk_id')
+      .references(() => chunks.id, { onDelete: 'cascade' })
+      .notNull(),
+
+    pageIndex: integer('page_index'),
+
+    userId: text('user_id')
+      .references(() => users.id, { onDelete: 'cascade' })
+      .notNull(),
+
+    createdAt: createdAt(),
+  },
+  (t) => [primaryKey({ columns: [t.documentId, t.chunkId] })],
+);
+
+export type NewDocumentChunk = typeof documentChunks.$inferInsert;
+export type DocumentChunkItem = typeof documentChunks.$inferSelect;
--- a/src/database/schemas/index.ts
+++ b/src/database/schemas/index.ts
@ -1,6 +1,7 @@
 export * from './agent';
 export * from './aiInfra';
 export * from './asyncTask';
+export * from './document';
 export * from './file';
 export * from './message';
 export * from './nextauth';
--- a/src/database/schemas/relations.ts
+++ b/src/database/schemas/relations.ts
@ -6,11 +6,12 @@ import { createdAt } from '@/database/schemas/_helpers';

 import { agents, agentsFiles, agentsKnowledgeBases } from './agent';
 import { asyncTasks } from './asyncTask';
+import { documentChunks, documents } from './document';
 import { files, knowledgeBases } from './file';
 import { messages, messagesFiles } from './message';
 import { chunks, unstructuredChunks } from './rag';
 import { sessionGroups, sessions } from './session';
-import { threads, topics } from './topic';
+import { threads, topicDocuments, topics } from './topic';
 import { users } from './user';

 export const agentsToSessions = pgTable(
@ -65,11 +66,12 @@ export const fileChunks = pgTable(
 );
 export type NewFileChunkItem = typeof fileChunks.$inferInsert;

-export const topicRelations = relations(topics, ({ one }) => ({
+export const topicRelations = relations(topics, ({ one, many }) => ({
  session: one(sessions, {
    fields: [topics.sessionId],
    references: [sessions.id],
  }),
+  documents: many(topicDocuments),
 }));

 export const threadsRelations = relations(threads, ({ one }) => ({
@ -151,6 +153,7 @@ export const filesRelations = relations(files, ({ many, one }) => ({
  messages: many(messagesFiles),
  sessions: many(filesToSessions),
  agents: many(agentsFiles),
+  documents: many(documents, { relationName: 'fileDocuments' }),

  chunkingTask: one(asyncTasks, {
    fields: [files.chunkTaskId],
@ -161,3 +164,32 @@ export const filesRelations = relations(files, ({ many, one }) => ({
    references: [asyncTasks.id],
  }),
 }));
+
+// Document 相关关系定义
+export const documentsRelations = relations(documents, ({ one, many }) => ({
+  file: one(files, {
+    fields: [documents.fileId],
+    references: [files.id],
+    relationName: 'fileDocuments',
+  }),
+  topics: many(topicDocuments),
+  chunks: many(documentChunks),
+}));
+
+export const topicDocumentsRelations = relations(topicDocuments, ({ one }) => ({
+  document: one(documents, {
+    fields: [topicDocuments.documentId],
+    references: [documents.id],
+  }),
+  topic: one(topics, {
+    fields: [topicDocuments.topicId],
+    references: [topics.id],
+  }),
+}));
+
+export const documentChunksRelations = relations(documentChunks, ({ one }) => ({
+  document: one(documents, {
+    fields: [documentChunks.documentId],
+    references: [documents.id],
+  }),
+}));
--- a/src/database/schemas/topic.ts
+++ b/src/database/schemas/topic.ts
@ -1,11 +1,12 @@
 /* eslint-disable sort-keys-fix/sort-keys-fix  */
-import { boolean, jsonb, pgTable, text, uniqueIndex } from 'drizzle-orm/pg-core';
+import { boolean, jsonb, pgTable, primaryKey, text, uniqueIndex } from 'drizzle-orm/pg-core';
 import { createInsertSchema } from 'drizzle-zod';

+import { documents } from '@/database/schemas/document';
 import { idGenerator } from '@/database/utils/idGenerator';
 import { ChatTopicMetadata } from '@/types/topic';

-import { timestamps, timestamptz } from './_helpers';
+import { createdAt, timestamps, timestamptz } from './_helpers';
 import { sessions } from './session';
 import { users } from './user';

@ -26,9 +27,7 @@ export const topics = pgTable(
    metadata: jsonb('metadata').$type<ChatTopicMetadata | undefined>(),
    ...timestamps,
  },
-  (t) => ({
-    clientIdUnique: uniqueIndex('topics_client_id_user_id_unique').on(t.clientId, t.userId),
-  }),
+  (t) => [uniqueIndex('topics_client_id_user_id_unique').on(t.clientId, t.userId)],
 );

 export type NewTopic = typeof topics.$inferInsert;
@ -60,11 +59,35 @@ export const threads = pgTable(
    lastActiveAt: timestamptz('last_active_at').defaultNow(),
    ...timestamps,
  },
-  (t) => ({
-    clientIdUnique: uniqueIndex('threads_client_id_user_id_unique').on(t.clientId, t.userId),
-  }),
+  (t) => [uniqueIndex('threads_client_id_user_id_unique').on(t.clientId, t.userId)],
 );

 export type NewThread = typeof threads.$inferInsert;
 export type ThreadItem = typeof threads.$inferSelect;
 export const insertThreadSchema = createInsertSchema(threads);
+
+/**
+ * 文档与话题关联表 - 实现文档和话题的多对多关系
+ */
+export const topicDocuments = pgTable(
+  'topic_documents',
+  {
+    documentId: text('document_id')
+      .notNull()
+      .references(() => documents.id, { onDelete: 'cascade' }),
+
+    topicId: text('topic_id')
+      .notNull()
+      .references(() => topics.id, { onDelete: 'cascade' }),
+
+    userId: text('user_id')
+      .references(() => users.id, { onDelete: 'cascade' })
+      .notNull(),
+
+    createdAt: createdAt(),
+  },
+  (t) => [primaryKey({ columns: [t.documentId, t.topicId] })],
+);
+
+export type NewTopicDocument = typeof topicDocuments.$inferInsert;
+export type TopicDocumentItem = typeof topicDocuments.$inferSelect;
--- a/src/database/utils/idGenerator.ts
+++ b/src/database/utils/idGenerator.ts
@ -4,6 +4,7 @@ import { createNanoId } from '@/utils/uuid';

 const prefixes = {
  agents: 'agt',
+  documents: 'docs',
  files: 'file',
  knowledgeBases: 'kb',
  messages: 'msg',
--- a/src/features/ChatInput/Desktop/FilePreview/FileItem/Content.tsx
+++ b/src/features/ChatInput/Desktop/FilePreview/FileItem/Content.tsx
@ -31,7 +31,7 @@ const Content = memo<UploadFileItem>(({ file, previewUrl }) => {
    return <video className={styles.video} src={previewUrl} width={'100%'} />;
  }

-  return <FileIcon fileName={file.name} fileType={file.type} size={100} />;
+  return <FileIcon fileName={file.name} fileType={file.type} size={48} />;
 });

 export default Content;
--- a/src/features/ChatInput/Desktop/FilePreview/FileItem/index.tsx
+++ b/src/features/ChatInput/Desktop/FilePreview/FileItem/index.tsx
@ -11,7 +11,6 @@ import { UploadFileItem } from '@/types/files/upload';

 import UploadDetail from '../../../components/UploadDetail';
 import Content from './Content';
-import { FILE_ITEM_SIZE } from './style';

 const useStyles = createStyles(({ css, token }) => ({
  actions: css`
@ -30,12 +29,15 @@ const useStyles = createStyles(({ css, token }) => ({
  container: css`
    position: relative;

-    width: ${FILE_ITEM_SIZE}px;
-    min-width: ${FILE_ITEM_SIZE}px;
-    height: ${FILE_ITEM_SIZE}px;
+    width: 180px;
+    height: 64px;
    border-radius: 8px;

    background: ${token.colorBgContainer};
+
+    :hover {
+      background: ${token.colorBgElevated};
+    }
  `,
  image: css`
    margin-block: 0 !important;
@ -50,8 +52,6 @@ const useStyles = createStyles(({ css, token }) => ({

 type FileItemProps = UploadFileItem;

-const spacing = 8;
-
 const FileItem = memo<FileItemProps>((props) => {
  const { file, uploadState, status, id, tasks } = props;
  const { t } = useTranslation(['chat', 'common']);
@ -59,12 +59,12 @@ const FileItem = memo<FileItemProps>((props) => {
  const [removeChatUploadFile] = useFileStore((s) => [s.removeChatUploadFile]);

  return (
-    <Flexbox className={styles.container} distribution={'space-between'}>
-      <Center flex={1} height={FILE_ITEM_SIZE - 46} padding={spacing}>
+    <Flexbox align={'center'} className={styles.container} horizontal>
+      <Center flex={1} height={64} padding={4} style={{ maxWidth: 64 }}>
        <Content {...props} />
      </Center>
-      <Flexbox gap={4} style={{ paddingBottom: 4, paddingInline: spacing }}>
-        <Typography.Text ellipsis={{ tooltip: true }} style={{ fontSize: 12 }}>
+      <Flexbox flex={1} gap={4} style={{ paddingBottom: 4, paddingInline: 4 }}>
+        <Typography.Text ellipsis={{ tooltip: true }} style={{ fontSize: 12, maxWidth: 100 }}>
          {file.name}
        </Typography.Text>

--- a/src/features/ChatInput/Desktop/FilePreview/FileItem/style.ts
+++ b/src/features/ChatInput/Desktop/FilePreview/FileItem/style.ts
@ -1,4 +0,0 @@
-export const FILE_ITEM_SIZE = 200;
-
-// 8px on each side
-export const IMAGE_FILE_SIZE = 200 - 2 * 8;
--- a/src/features/ChatInput/components/UploadDetail/UploadStatus.tsx
+++ b/src/features/ChatInput/components/UploadDetail/UploadStatus.tsx
@ -38,7 +38,7 @@ const UploadStatus = memo<UploadStateProps>(({ status, size, uploadState }) => {
        <Flexbox align={'center'} gap={4} horizontal>
          <Progress percent={uploadState?.progress} size={14} type="circle" />
          <Typography.Text style={{ fontSize: 12 }} type={'secondary'}>
-            {formatSize(size * ((uploadState?.progress || 0) / 100), 2)} / {formatSize(size)}
+            {formatSize(size * ((uploadState?.progress || 0) / 100), 0)} / {formatSize(size)}
          </Typography.Text>
        </Flexbox>
      );
@ -49,7 +49,7 @@ const UploadStatus = memo<UploadStateProps>(({ status, size, uploadState }) => {
        <Flexbox align={'center'} gap={4} horizontal>
          <Progress percent={uploadState?.progress} size={14} type="circle" />
          <Typography.Text style={{ fontSize: 12 }} type={'secondary'}>
-            {formatSize(size)} · {t('upload.preview.status.processing')}
+            {formatSize(size)}
          </Typography.Text>
        </Flexbox>
      );
--- a/src/features/Conversation/Actions/Error.tsx
+++ b/src/features/Conversation/Actions/Error.tsx
@ -5,13 +5,13 @@ import { memo } from 'react';
 import { useChatListActionsBar } from '../hooks/useChatListActionsBar';

 export const ErrorActionsBar = memo<ChatActionsBarProps>(({ onActionClick }) => {
-  const { regenerate, copy, edit, del } = useChatListActionsBar();
+  const { regenerate, copy, edit, del, divider } = useChatListActionsBar();

  return (
    <ActionIconGroup
      items={[regenerate, del]}
      menu={{
-        items: [edit, copy],
+        items: [edit, copy, divider, del],
      }}
      onActionClick={onActionClick}
    />
--- a/src/libs/trpc/lambda/context.ts
+++ b/src/libs/trpc/lambda/context.ts
@ -57,6 +57,13 @@ export type LambdaContext = Awaited<ReturnType<typeof createContextInner>>;
 * @link https://trpc.io/docs/v11/context
 */
 export const createLambdaContext = async (request: NextRequest): Promise<LambdaContext> => {
+  // we have a special header to debug the api endpoint in development mode
+  // IT WON'T GO INTO PRODUCTION ANYMORE
+  const isDebugApi = request.headers.get('lobe-auth-dev-backend-api') === '1';
+  if (process.env.NODE_ENV === 'development' && isDebugApi) {
+    return { userId: process.env.MOCK_DEV_USER_ID };
+  }
+
  log('createLambdaContext called for request');
  // for API-response caching see https://trpc.io/docs/v11/caching

--- a/src/prompts/files/file.ts
+++ b/src/prompts/files/file.ts
@ -1,9 +1,11 @@
 import { ChatFileItem } from '@/types/message';

-const filePrompt = (item: ChatFileItem, addUrl: boolean) =>
-  addUrl
-    ? `<file id="${item.id}" name="${item.name}" type="${item.fileType}" size="${item.size}" url="${item.url}"></file>`
-    : `<file id="${item.id}" name="${item.name}" type="${item.fileType}" size="${item.size}"></file>`;
+const filePrompt = (item: ChatFileItem, addUrl: boolean) => {
+  const content = item.content || '';
+  return addUrl
+    ? `<file id="${item.id}" name="${item.name}" type="${item.fileType}" size="${item.size}" url="${item.url}">${content}</file>`
+    : `<file id="${item.id}" name="${item.name}" type="${item.fileType}" size="${item.size}">${content}</file>`;
+};

 export const filePrompts = (fileList: ChatFileItem[], addUrl: boolean) => {
  if (fileList.length === 0) return '';
--- a/src/server/routers/lambda/document.ts
+++ b/src/server/routers/lambda/document.ts
@ -0,0 +1,36 @@
+import { z } from 'zod';
+
+import { ChunkModel } from '@/database/models/chunk';
+import { FileModel } from '@/database/models/file';
+import { MessageModel } from '@/database/models/message';
+import { authedProcedure, router } from '@/libs/trpc/lambda';
+import { serverDatabase } from '@/libs/trpc/lambda/middleware';
+import { DocumentService } from '@/server/services/document';
+
+const documentProcedure = authedProcedure.use(serverDatabase).use(async (opts) => {
+  const { ctx } = opts;
+
+  return opts.next({
+    ctx: {
+      chunkModel: new ChunkModel(ctx.serverDB, ctx.userId),
+      documentService: new DocumentService(ctx.serverDB, ctx.userId),
+      fileModel: new FileModel(ctx.serverDB, ctx.userId),
+      messageModel: new MessageModel(ctx.serverDB, ctx.userId),
+    },
+  });
+});
+
+export const documentRouter = router({
+  parseFileContent: documentProcedure
+    .input(
+      z.object({
+        id: z.string(),
+        skipExist: z.boolean().optional(),
+      }),
+    )
+    .mutation(async ({ ctx, input }) => {
+      const lobeDocument = await ctx.documentService.parseFile(input.id);
+
+      return lobeDocument;
+    }),
+});
--- a/src/server/routers/lambda/index.ts
+++ b/src/server/routers/lambda/index.ts
@ -7,6 +7,7 @@ import { agentRouter } from './agent';
 import { aiModelRouter } from './aiModel';
 import { aiProviderRouter } from './aiProvider';
 import { chunkRouter } from './chunk';
+import { documentRouter } from './document';
 import { exporterRouter } from './exporter';
 import { fileRouter } from './file';
 import { importerRouter } from './importer';
@ -25,6 +26,7 @@ export const lambdaRouter = router({
  aiModel: aiModelRouter,
  aiProvider: aiProviderRouter,
  chunk: chunkRouter,
+  document: documentRouter,
  exporter: exporterRouter,
  file: fileRouter,
  healthcheck: publicProcedure.query(() => "i'm live!"),
--- a/src/server/services/document/index.ts
+++ b/src/server/services/document/index.ts
@ -0,0 +1,66 @@
+import { loadFile } from '@lobechat/file-loaders';
+import debug from 'debug';
+
+import { DocumentModel } from '@/database/models/document';
+import { FileModel } from '@/database/models/file';
+import { LobeChatDatabase } from '@/database/type';
+import { LobeDocument } from '@/types/document';
+
+import { FileService } from '../file';
+
+const log = debug('lobe-chat:service:document');
+
+export class DocumentService {
+  userId: string;
+  private fileModel: FileModel;
+  private documentModel: DocumentModel;
+  private fileService: FileService;
+
+  constructor(db: LobeChatDatabase, userId: string) {
+    this.userId = userId;
+    this.fileModel = new FileModel(db, userId);
+    this.fileService = new FileService(db, userId);
+    this.documentModel = new DocumentModel(db, userId);
+  }
+
+  /**
+   * 解析文件内容
+   *
+   */
+  async parseFile(fileId: string): Promise<LobeDocument> {
+    const { filePath, file, cleanup } = await this.fileService.downloadFileToLocal(fileId);
+
+    const logPrefix = `[${file.name}]`;
+    log(`${logPrefix} 开始解析文件, 路径: ${filePath}`);
+
+    try {
+      // 使用loadFile加载文件内容
+      const fileDocument = await loadFile(filePath);
+
+      log(`${logPrefix} 文件解析成功 %O`, {
+        fileType: fileDocument.fileType,
+        size: fileDocument.content.length,
+      });
+
+      const document = await this.documentModel.create({
+        content: fileDocument.content,
+        fileId,
+        fileType: file.fileType,
+        metadata: fileDocument.metadata,
+        pages: fileDocument.pages,
+        source: file.url,
+        sourceType: 'file',
+        title: fileDocument.metadata?.title,
+        totalCharCount: fileDocument.totalCharCount,
+        totalLineCount: fileDocument.totalLineCount,
+      });
+
+      return document as LobeDocument;
+    } catch (error) {
+      console.error(`${logPrefix} 文件解析失败:`, error);
+      throw error;
+    } finally {
+      cleanup();
+    }
+  }
+}
--- a/src/server/services/mcp/index.ts
+++ b/src/server/services/mcp/index.ts
@ -15,10 +15,6 @@ class MCPService {
  // Store instances of the custom MCPClient, keyed by serialized MCPClientParams
  private clients: Map<string, MCPClient> = new Map();

-  constructor() {
-    log('MCPService initialized');
-  }
-
  // --- MCP Interaction ---

  // listTools now accepts MCPClientParams
--- a/src/services/rag.ts
+++ b/src/services/rag.ts
@ -2,6 +2,10 @@ import { lambdaClient } from '@/libs/trpc/client';
 import { SemanticSearchSchemaType } from '@/types/rag';

 class RAGService {
+  parseFileContent = async (id: string, skipExist?: boolean) => {
+    return lambdaClient.document.parseFileContent.mutate({ id, skipExist });
+  };
+
  createParseFileTask = async (id: string, skipExist?: boolean) => {
    return lambdaClient.chunk.createParseFileTask.mutate({ id, skipExist });
  };
--- a/src/store/chat/slices/aiChat/actions/tests/rag.test.ts
+++ b/src/store/chat/slices/aiChat/actions/tests/rag.test.ts
@ -197,13 +197,13 @@ describe('chatRAG actions', () => {
      expect(result.current.internal_shouldUseRAG()).toBe(true);
    });

-    it('should return true if has user files', () => {
+    it('should return false if has user files', () => {
      const { result } = renderHook(() => useChatStore());

      vi.spyOn(agentSelectors, 'hasEnabledKnowledge').mockReturnValue(false);
      vi.spyOn(chatSelectors, 'currentUserFiles').mockReturnValue([{ id: 'file-1' }] as any);

-      expect(result.current.internal_shouldUseRAG()).toBe(true);
+      expect(result.current.internal_shouldUseRAG()).toBeFalsy();
    });

    it('should return false if no knowledge or files', () => {
--- a/src/store/chat/slices/aiChat/actions/rag.ts
+++ b/src/store/chat/slices/aiChat/actions/rag.ts
@ -130,9 +130,8 @@ export const chatRag: StateCreator<ChatStore, [['zustand/devtools', never]], [],
    return rewriteQuery;
  },
  internal_shouldUseRAG: () => {
-    const userFiles = chatSelectors.currentUserFiles(get()).map((f) => f.id);
-    //  if there is relative files or enabled knowledge, try with ragQuery
-    return hasEnabledKnowledge() || userFiles.length > 0;
+    //  if there is enabled knowledge, try with ragQuery
+    return hasEnabledKnowledge();
  },

  internal_toggleMessageRAGLoading: (loading, id) => {
--- a/src/store/file/slices/chat/action.ts
+++ b/src/store/file/slices/chat/action.ts
@ -7,14 +7,10 @@ import { fileService } from '@/services/file';
 import { ServerService } from '@/services/file/server';
 import { ragService } from '@/services/rag';
 import { UPLOAD_NETWORK_ERROR } from '@/services/upload';
-import { userService } from '@/services/user';
-import { useAgentStore } from '@/store/agent';
 import {
  UploadFileListDispatch,
  uploadFileListReducer,
 } from '@/store/file/reducers/uploadFileList';
-import { useUserStore } from '@/store/user';
-import { preferenceSelectors } from '@/store/user/selectors';
 import { FileListItem } from '@/types/files';
 import { UploadFileItem } from '@/types/files/upload';
 import { isChunkingUnsupported } from '@/utils/isChunkingUnsupported';
@ -97,7 +93,7 @@ export const createFileSlice: StateCreator<
  },

  uploadChatFiles: async (rawFiles) => {
-    const { dispatchChatUploadFileList, startAsyncTask } = get();
+    const { dispatchChatUploadFileList } = get();
    // 0. skip file in blacklist
    const files = rawFiles.filter((file) => !FILE_UPLOAD_BLACKLIST.includes(file.name));
    // 1. add files with base64
@ -154,52 +150,8 @@ export const createFileSlice: StateCreator<
      // image don't need to be chunked and embedding
      if (isChunkingUnsupported(file.type)) return;

-      // 3. auto chunk and embedding
-      dispatchChatUploadFileList({
-        id: fileResult.id,
-        type: 'updateFile',
-        // make the taks empty to hint the user that the task is starting but not triggered
-        value: { tasks: {} },
-      });
-
-      await startAsyncTask(
-        fileResult.id,
-        async (id) => {
-          const data = await ragService.createParseFileTask(id);
-          if (!data || !data.id) throw new Error('failed to createParseFileTask');
-
-          // run the assignment
-          useAgentStore
-            .getState()
-            .addFilesToAgent([id], false)
-            .then(() => {
-              // trigger the tip if it's the first time
-              if (!preferenceSelectors.shouldTriggerFileInKnowledgeBaseTip(useUserStore.getState()))
-                return;
-
-              userService.updateGuide({ uploadFileInKnowledgeBase: true });
-            });
-
-          return data.id;
-        },
-
-        (fileItem) => {
-          dispatchChatUploadFileList({
-            id: fileResult.id,
-            type: 'updateFile',
-            value: {
-              tasks: {
-                chunkCount: fileItem.chunkCount,
-                chunkingError: fileItem.chunkingError,
-                chunkingStatus: fileItem.chunkingStatus,
-                embeddingError: fileItem.embeddingError,
-                embeddingStatus: fileItem.embeddingStatus,
-                finishEmbedding: fileItem.finishEmbedding,
-              },
-            },
-          });
-        },
-      );
+      const data = await ragService.parseFileContent(fileResult.id);
+      console.log(data);
    });

    await Promise.all(pools);
--- a/src/types/document/index.ts
+++ b/src/types/document/index.ts
@ -0,0 +1,172 @@
+/**
+ * 在 LobeChat 中的文档对象
+ */
+export interface LobeDocument {
+  /**
+   * 文件内容
+   */
+  content: string | null;
+  /**
+   * 文件创建时间戳。
+   */
+  createdAt: Date;
+
+  /**
+   * 文件类型或扩展名
+   */
+  fileType: string;
+
+  /**
+   * 原始文件名。
+   */
+  filename: string;
+
+  id: string;
+
+  /**
+   * 文件级别的元数据。
+   * 例如从文件属性中提取的标题、作者，或整个文件加载失败时的错误。
+   */
+  metadata: {
+    /**
+     * 允许添加其他文件级别的元数据。
+     */
+    [key: string]: any;
+    /**
+     * 文档作者 (如果可用)。
+     */
+    author?: string;
+    /**
+     * 如果整个文件加载失败，记录错误信息。
+     */
+    error?: string;
+  };
+
+  /**
+   * 包含文档中所有逻辑页面/块的数组。
+   * 顺序通常对应文件中的自然顺序。
+   */
+  pages?: LobeDocumentPage[];
+
+  /**
+   * 原始文件的完整路径。
+   */
+  source: string;
+
+  /**
+   * 文档标题 (如果可用)。
+   */
+  title?: string;
+
+  /**
+   * 整个文档的总字符数 (所有 Page 的 charCount 之和)。
+   * 需要在所有 Page 加载和计算后得出。
+   */
+  totalCharCount: number;
+
+  /**
+   * 整个文档的总行数 (所有 Page 的 lineCount 之和)。
+   * 需要在所有 Page 加载和计算后得出。
+   */
+  totalLineCount: number;
+
+  /**
+   * 文件最后修改时间戳。
+   */
+  updatedAt: Date;
+}
+
+/**
+ * 代表文件中的一个逻辑单元/页面/块。
+ */
+export interface LobeDocumentPage {
+  /**
+   * 此页/块内容的字符数。
+   */
+  charCount: number;
+
+  /**
+   * 此页/块内容的行数。
+   */
+  lineCount: number;
+
+  /**
+   * 与此页/块相关的元数据。
+   */
+  metadata: {
+    /**
+     * 允许添加其他特定于页/块的元数据。
+     */
+    [key: string]: any;
+
+    /**
+     * 如果原始文件单元被进一步分割成块，这是当前块的索引。
+     */
+    chunkIndex?: number;
+
+    /**
+     * 处理此页/块时发生的错误。
+     */
+    error?: string;
+
+    /**
+     * 此页/块在原始文件中的结束行号。
+     */
+    lineNumberEnd?: number;
+
+    /**
+     * 此页/块在原始文件中的起始行号。
+     */
+    lineNumberStart?: number;
+
+    /**
+     * 页码 (适用于 PDF, DOCX)。
+     */
+    pageNumber?: number;
+
+    /**
+     * 与此页/块相关的章节标题。
+     */
+    sectionTitle?: string;
+
+    /**
+     * 工作表名称 (适用于 XLSX)。
+     */
+    sheetName?: string;
+
+    /**
+     * 幻灯片编号 (适用于 PPTX)。
+     */
+    slideNumber?: number;
+
+    /**
+     * 如果原始文件单元被进一步分割成块，这是该单元的总块数。
+     */
+    totalChunks?: number;
+  };
+
+  /**
+   * 此页/块的核心文本内容。
+   */
+  pageContent: string;
+}
+
+/**
+ * 文档来源类型
+ */
+export enum DocumentSourceType {
+  /**
+   * 来自 API 的内容
+   */
+  API = 'api',
+
+  /**
+   * 本地或上传的文件
+   */
+  FILE = 'file',
+
+  /**
+   * 网页内容
+   */
+  WEB = 'web',
+}
--- a/src/types/message/chat.ts
+++ b/src/types/message/chat.ts
@ -31,6 +31,7 @@ export interface ChatTTS {
 }

 export interface ChatFileItem {
+  content?: string;
  fileType: string;
  id: string;
  name: string;