mirror of
https://github.com/lobehub/lobehub
synced 2026-04-21 17:47:27 +00:00
💄 style: support .doc file parse (#8182)
* ✨ feat: support .doc file parse * ✨ feat: support .doc file parse --------- Co-authored-by: admin <admin@punch.local>
This commit is contained in:
parent
cc67b5443d
commit
ed42753fe5
3 changed files with 8 additions and 1 deletions
|
|
@ -279,6 +279,7 @@
|
|||
"url-join": "^5.0.0",
|
||||
"use-merge-value": "^1.2.0",
|
||||
"uuid": "^11.1.0",
|
||||
"word-extractor": "^1.0.4",
|
||||
"ws": "^8.18.3",
|
||||
"yaml": "^2.8.1",
|
||||
"zod": "^3.25.76",
|
||||
|
|
|
|||
|
|
@ -37,6 +37,7 @@ const getFileType = (filePath: string): SupportedFileType | undefined => {
|
|||
log('File type identified as pdf');
|
||||
return 'pdf';
|
||||
}
|
||||
case 'doc':
|
||||
case 'docx': {
|
||||
log('File type identified as docx');
|
||||
return 'docx';
|
||||
|
|
|
|||
|
|
@ -12,7 +12,12 @@ export class DocxLoader implements FileLoaderInterface {
|
|||
async loadPages(filePath: string): Promise<DocumentPage[]> {
|
||||
log('Loading DOCX file:', filePath);
|
||||
try {
|
||||
const loader = new LangchainDocxLoader(filePath);
|
||||
let loader: LangchainDocxLoader;
|
||||
if (filePath.endsWith('.doc')) {
|
||||
loader = new LangchainDocxLoader(filePath, { type: 'doc' });
|
||||
} else {
|
||||
loader = new LangchainDocxLoader(filePath, { type: 'docx' });
|
||||
}
|
||||
log('LangChain DocxLoader created');
|
||||
const docs = await loader.load(); // Langchain DocxLoader typically loads the whole doc as one
|
||||
log('DOCX document loaded, parts:', docs.length);
|
||||
|
|
|
|||
Loading…
Reference in a new issue