mongodb-rag-core
Version:
Common elements used by MongoDB Chatbot Framework components.
50 lines • 2.2 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.makeMarkdownUrlDataSource = void 0;
/**
Loads markdown pages from URLs.
*/
const _1 = require(".");
const extractMarkdownH1_1 = require("./extractMarkdownH1");
function makeMarkdownUrlDataSource({ sourceName, markdownUrls, sourceType, metadata, markdownUrlToPageUrl, }) {
return {
name: sourceName,
async fetchPages() {
const settledPages = await Promise.all(markdownUrls.map(async (url) => {
if (!url.endsWith(".md") && !url.endsWith(".markdown")) {
console.warn(`URL must end in .md or .markdown: ${url}`);
return;
}
try {
const response = await fetch(url);
if (!response.ok) {
console.warn(`${response.status} response from ${url}`);
}
else if (!(response.headers.get("content-type")?.includes("text/plain") ||
response.headers.get("content-type")?.includes("text/markdown"))) {
console.warn(`Content is not markdown: ${url}`);
}
else {
const body = (0, _1.removeMarkdownImagesAndLinks)(await response.text());
const page = {
url: markdownUrlToPageUrl ? markdownUrlToPageUrl(url) : url,
title: (0, extractMarkdownH1_1.extractMarkdownH1)(body),
format: "md",
body,
sourceName,
sourceType,
metadata,
};
return page;
}
}
catch (error) {
console.warn(`Failed to create page from ${url},`, error);
}
}));
return settledPages.filter((page) => page !== undefined);
},
};
}
exports.makeMarkdownUrlDataSource = makeMarkdownUrlDataSource;
//# sourceMappingURL=MarkdownUrlDataSource.js.map