UNPKG

mongodb-rag-core

Version:

Common elements used by MongoDB Chatbot Framework components.

61 lines 2.83 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.makeMdOnGithubDataSource = void 0; const GitHubDataSource_1 = require("./GitHubDataSource"); const removeMarkdownImagesAndLinks_1 = require("./removeMarkdownImagesAndLinks"); const extractMarkdownH1_1 = require("./extractMarkdownH1"); const frontMatter_1 = require("../frontMatter"); /** Loads an .md/.mdx docs site from a GitHub repo. */ const makeMdOnGithubDataSource = async ({ name, repoUrl, repoLoaderOptions, pathToPageUrl, filter, sourceType, metadata, frontMatter = { process: true, separator: "---", format: "yaml" }, extractMetadata, extractTitle = extractMarkdownH1_1.extractMarkdownH1, }) => { return (0, GitHubDataSource_1.makeGitHubDataSource)({ name, repoUrl, filter, repoLoaderOptions: { ...(repoLoaderOptions ?? {}), ignoreFiles: [ /^(?!.*\.(md|mdx)$).*/i, // Anything BUT .md OR .mdx extensions ...(repoLoaderOptions?.ignoreFiles ?? []), ], }, async handleDocumentInRepo(document) { // Process front matter from markdown file into a Record<string, unknown>. // Remove front matter from body. let frontMatterMetadata; let body = document.pageContent; if (frontMatter.process) { const extracted = (0, frontMatter_1.extractFrontMatter)(document.pageContent, frontMatter.format, frontMatter.separator); frontMatterMetadata = extracted.metadata; body = extracted.body; } const { source } = document.metadata; // Extract metadata to use in page from page content and frontmatter (if it exists) let extractedMetadata; try { extractedMetadata = extractMetadata && extractMetadata(body, frontMatterMetadata); } catch (err) { console.error(`The following error occurred when extracting metadata from the page '${source}: ${JSON.stringify(err)}'`); } const url = pathToPageUrl(source, frontMatterMetadata); const page = { body: (0, removeMarkdownImagesAndLinks_1.removeMarkdownImagesAndLinks)(body), format: "md", sourceName: name, url, sourceType, metadata: { ...(metadata ?? {}), ...(extractedMetadata ?? {}) }, }; const title = extractTitle(document.pageContent, frontMatterMetadata); if (title) { page.title = title; } return page; }, }); }; exports.makeMdOnGithubDataSource = makeMdOnGithubDataSource; //# sourceMappingURL=MdOnGithubDataSource.js.map