@langchain/community
Version:
Third-party integrations for LangChain.js
1 lines • 4.48 kB
Source Map (JSON)
{"version":3,"file":"epub.cjs","names":["BaseDocumentLoader","Document"],"sources":["../../../src/document_loaders/fs/epub.ts"],"sourcesContent":["import type { EPub } from \"epub2\";\nimport { Document } from \"@langchain/core/documents\";\nimport { BaseDocumentLoader } from \"@langchain/core/document_loaders/base\";\n\n/**\n * A class that extends the `BaseDocumentLoader` class. It represents a\n * document loader that loads documents from EPUB files.\n */\nexport class EPubLoader extends BaseDocumentLoader {\n private splitChapters: boolean;\n\n constructor(\n public filePath: string,\n { splitChapters = true } = {}\n ) {\n super();\n this.splitChapters = splitChapters;\n }\n\n /**\n * A protected method that takes an EPUB object as a parameter and returns\n * a promise that resolves to an array of objects representing the content\n * and metadata of each chapter.\n * @param epub The EPUB object to parse.\n * @returns A promise that resolves to an array of objects representing the content and metadata of each chapter.\n */\n protected async parse(\n epub: EPub\n ): Promise<{ pageContent: string; metadata?: object }[]> {\n const { htmlToText } = await HtmlToTextImport();\n const chapters = await Promise.all(\n epub.flow.map(async (chapter) => {\n if (!chapter.id) return null as never;\n const html: string = await epub.getChapterRawAsync(chapter.id);\n if (!html) return null as never;\n return {\n html,\n title: chapter.title,\n };\n })\n );\n return chapters.filter(Boolean).map((chapter) => ({\n pageContent: htmlToText(chapter.html),\n metadata: {\n ...(chapter.title && { chapter: chapter.title }),\n },\n }));\n }\n\n /**\n * A method that loads the EPUB file and returns a promise that resolves\n * to an array of `Document` instances.\n * @returns A promise that resolves to an array of `Document` instances.\n */\n public async load(): Promise<Document[]> {\n const { EPub } = await EpubImport();\n const epub = await EPub.createAsync(this.filePath);\n\n const parsed = await this.parse(epub);\n const metadata = { source: this.filePath };\n\n if (parsed.length === 0) return [];\n\n return this.splitChapters\n ? parsed.map(\n (chapter) =>\n new Document({\n pageContent: chapter.pageContent,\n metadata: {\n ...metadata,\n ...chapter.metadata,\n },\n })\n )\n : [\n new Document({\n pageContent: parsed\n .map((chapter) => chapter.pageContent)\n .join(\"\\n\\n\"),\n metadata,\n }),\n ];\n }\n}\n\nasync function EpubImport() {\n const { EPub } = await import(\"epub2\").catch(() => {\n throw new Error(\n \"Failed to load epub2. Please install it with eg. `npm install epub2`.\"\n );\n });\n return { EPub };\n}\n\nasync function HtmlToTextImport() {\n const { htmlToText } = await import(\"html-to-text\").catch(() => {\n throw new Error(\n \"Failed to load html-to-text. Please install it with eg. `npm install html-to-text`.\"\n );\n });\n return { htmlToText };\n}\n"],"mappings":";;;;;;;;;;AAQA,IAAa,aAAb,cAAgCA,sCAAAA,mBAAmB;CACjD;CAEA,YACE,UACA,EAAE,gBAAgB,SAAS,EAAE,EAC7B;AACA,SAAO;AAHA,OAAA,WAAA;AAIP,OAAK,gBAAgB;;;;;;;;;CAUvB,MAAgB,MACd,MACuD;EACvD,MAAM,EAAE,eAAe,MAAM,kBAAkB;AAY/C,UAXiB,MAAM,QAAQ,IAC7B,KAAK,KAAK,IAAI,OAAO,YAAY;AAC/B,OAAI,CAAC,QAAQ,GAAI,QAAO;GACxB,MAAM,OAAe,MAAM,KAAK,mBAAmB,QAAQ,GAAG;AAC9D,OAAI,CAAC,KAAM,QAAO;AAClB,UAAO;IACL;IACA,OAAO,QAAQ;IAChB;IACD,CACH,EACe,OAAO,QAAQ,CAAC,KAAK,aAAa;GAChD,aAAa,WAAW,QAAQ,KAAK;GACrC,UAAU,EACR,GAAI,QAAQ,SAAS,EAAE,SAAS,QAAQ,OAAO,EAChD;GACF,EAAE;;;;;;;CAQL,MAAa,OAA4B;EACvC,MAAM,EAAE,SAAS,MAAM,YAAY;EACnC,MAAM,OAAO,MAAM,KAAK,YAAY,KAAK,SAAS;EAElD,MAAM,SAAS,MAAM,KAAK,MAAM,KAAK;EACrC,MAAM,WAAW,EAAE,QAAQ,KAAK,UAAU;AAE1C,MAAI,OAAO,WAAW,EAAG,QAAO,EAAE;AAElC,SAAO,KAAK,gBACR,OAAO,KACJ,YACC,IAAIC,0BAAAA,SAAS;GACX,aAAa,QAAQ;GACrB,UAAU;IACR,GAAG;IACH,GAAG,QAAQ;IACZ;GACF,CAAC,CACL,GACD,CACE,IAAIA,0BAAAA,SAAS;GACX,aAAa,OACV,KAAK,YAAY,QAAQ,YAAY,CACrC,KAAK,OAAO;GACf;GACD,CAAC,CACH;;;AAIT,eAAe,aAAa;CAC1B,MAAM,EAAE,SAAS,MAAM,OAAO,SAAS,YAAY;AACjD,QAAM,IAAI,MACR,wEACD;GACD;AACF,QAAO,EAAE,MAAM;;AAGjB,eAAe,mBAAmB;CAChC,MAAM,EAAE,eAAe,MAAM,OAAO,gBAAgB,YAAY;AAC9D,QAAM,IAAI,MACR,sFACD;GACD;AACF,QAAO,EAAE,YAAY"}