mongodb-rag-core
Version:
Common elements used by MongoDB Chatbot Framework components.
38 lines • 1.68 kB
JavaScript
;
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.makePdfToMarkdownDataSource = void 0;
const pdf2md_1 = __importDefault(require("@opendocsg/pdf2md"));
const extractMarkdownH1_1 = require("./extractMarkdownH1");
/** Loads PDF and converts content to Page */
function makePdfToMarkdownDataSource({ name, urls, getPdfBuffer, transformPageUrl = (url) => url, getTitleFromContent = extractMarkdownH1_1.extractMarkdownH1, sourceType, metadata, }) {
return {
name,
fetchPages: async () => {
const pages = await Promise.all(urls.map(async (url) => {
try {
const buffer = await getPdfBuffer(url);
const mdContent = await (0, pdf2md_1.default)(buffer);
const page = {
url: transformPageUrl(url),
title: getTitleFromContent(mdContent) ?? url,
body: mdContent,
format: "md",
sourceName: name,
sourceType,
metadata,
};
return page;
}
catch (error) {
console.warn(`Failed to create PDF page for url '${url}',`, error);
}
}));
return pages.filter((page) => page !== undefined);
},
};
}
exports.makePdfToMarkdownDataSource = makePdfToMarkdownDataSource;
//# sourceMappingURL=PdfToMdDataSource.js.map