@langchain/community
Version:
Third-party integrations for LangChain.js
84 lines (83 loc) • 3.05 kB
JavaScript
Object.defineProperty(exports, Symbol.toStringTag, { value: "Module" });
const require_runtime = require("../../_virtual/_rolldown/runtime.cjs");
let _langchain_core_documents = require("@langchain/core/documents");
let _langchain_core_document_loaders_base = require("@langchain/core/document_loaders/base");
//#region src/document_loaders/fs/epub.ts
var epub_exports = /* @__PURE__ */ require_runtime.__exportAll({ EPubLoader: () => EPubLoader });
/**
* A class that extends the `BaseDocumentLoader` class. It represents a
* document loader that loads documents from EPUB files.
*/
var EPubLoader = class extends _langchain_core_document_loaders_base.BaseDocumentLoader {
splitChapters;
constructor(filePath, { splitChapters = true } = {}) {
super();
this.filePath = filePath;
this.splitChapters = splitChapters;
}
/**
* A protected method that takes an EPUB object as a parameter and returns
* a promise that resolves to an array of objects representing the content
* and metadata of each chapter.
* @param epub The EPUB object to parse.
* @returns A promise that resolves to an array of objects representing the content and metadata of each chapter.
*/
async parse(epub) {
const { htmlToText } = await HtmlToTextImport();
return (await Promise.all(epub.flow.map(async (chapter) => {
if (!chapter.id) return null;
const html = await epub.getChapterRawAsync(chapter.id);
if (!html) return null;
return {
html,
title: chapter.title
};
}))).filter(Boolean).map((chapter) => ({
pageContent: htmlToText(chapter.html),
metadata: { ...chapter.title && { chapter: chapter.title } }
}));
}
/**
* A method that loads the EPUB file and returns a promise that resolves
* to an array of `Document` instances.
* @returns A promise that resolves to an array of `Document` instances.
*/
async load() {
const { EPub } = await EpubImport();
const epub = await EPub.createAsync(this.filePath);
const parsed = await this.parse(epub);
const metadata = { source: this.filePath };
if (parsed.length === 0) return [];
return this.splitChapters ? parsed.map((chapter) => new _langchain_core_documents.Document({
pageContent: chapter.pageContent,
metadata: {
...metadata,
...chapter.metadata
}
})) : [new _langchain_core_documents.Document({
pageContent: parsed.map((chapter) => chapter.pageContent).join("\n\n"),
metadata
})];
}
};
async function EpubImport() {
const { EPub } = await import("epub2").catch(() => {
throw new Error("Failed to load epub2. Please install it with eg. `npm install epub2`.");
});
return { EPub };
}
async function HtmlToTextImport() {
const { htmlToText } = await import("html-to-text").catch(() => {
throw new Error("Failed to load html-to-text. Please install it with eg. `npm install html-to-text`.");
});
return { htmlToText };
}
//#endregion
exports.EPubLoader = EPubLoader;
Object.defineProperty(exports, "epub_exports", {
enumerable: true,
get: function() {
return epub_exports;
}
});
//# sourceMappingURL=epub.cjs.map