UNPKG

mongodb-rag-core

Version:

Common elements used by MongoDB Chatbot Framework components.

50 lines 2.2 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.makeMarkdownUrlDataSource = void 0; /** Loads markdown pages from URLs. */ const _1 = require("."); const extractMarkdownH1_1 = require("./extractMarkdownH1"); function makeMarkdownUrlDataSource({ sourceName, markdownUrls, sourceType, metadata, markdownUrlToPageUrl, }) { return { name: sourceName, async fetchPages() { const settledPages = await Promise.all(markdownUrls.map(async (url) => { if (!url.endsWith(".md") && !url.endsWith(".markdown")) { console.warn(`URL must end in .md or .markdown: ${url}`); return; } try { const response = await fetch(url); if (!response.ok) { console.warn(`${response.status} response from ${url}`); } else if (!(response.headers.get("content-type")?.includes("text/plain") || response.headers.get("content-type")?.includes("text/markdown"))) { console.warn(`Content is not markdown: ${url}`); } else { const body = (0, _1.removeMarkdownImagesAndLinks)(await response.text()); const page = { url: markdownUrlToPageUrl ? markdownUrlToPageUrl(url) : url, title: (0, extractMarkdownH1_1.extractMarkdownH1)(body), format: "md", body, sourceName, sourceType, metadata, }; return page; } } catch (error) { console.warn(`Failed to create page from ${url},`, error); } })); return settledPages.filter((page) => page !== undefined); }, }; } exports.makeMarkdownUrlDataSource = makeMarkdownUrlDataSource; //# sourceMappingURL=MarkdownUrlDataSource.js.map