UNPKG

mongodb-rag-core

Version:

Common elements used by MongoDB Chatbot Framework components.

38 lines 1.68 kB
"use strict"; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.makePdfToMarkdownDataSource = void 0; const pdf2md_1 = __importDefault(require("@opendocsg/pdf2md")); const extractMarkdownH1_1 = require("./extractMarkdownH1"); /** Loads PDF and converts content to Page */ function makePdfToMarkdownDataSource({ name, urls, getPdfBuffer, transformPageUrl = (url) => url, getTitleFromContent = extractMarkdownH1_1.extractMarkdownH1, sourceType, metadata, }) { return { name, fetchPages: async () => { const pages = await Promise.all(urls.map(async (url) => { try { const buffer = await getPdfBuffer(url); const mdContent = await (0, pdf2md_1.default)(buffer); const page = { url: transformPageUrl(url), title: getTitleFromContent(mdContent) ?? url, body: mdContent, format: "md", sourceName: name, sourceType, metadata, }; return page; } catch (error) { console.warn(`Failed to create PDF page for url '${url}',`, error); } })); return pages.filter((page) => page !== undefined); }, }; } exports.makePdfToMarkdownDataSource = makePdfToMarkdownDataSource; //# sourceMappingURL=PdfToMdDataSource.js.map