mongodb-rag-core
Version:
Common elements used by MongoDB Chatbot Framework components.
61 lines • 2.83 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.makeMdOnGithubDataSource = void 0;
const GitHubDataSource_1 = require("./GitHubDataSource");
const removeMarkdownImagesAndLinks_1 = require("./removeMarkdownImagesAndLinks");
const extractMarkdownH1_1 = require("./extractMarkdownH1");
const frontMatter_1 = require("../frontMatter");
/**
Loads an .md/.mdx docs site from a GitHub repo.
*/
const makeMdOnGithubDataSource = async ({ name, repoUrl, repoLoaderOptions, pathToPageUrl, filter, sourceType, metadata, frontMatter = { process: true, separator: "---", format: "yaml" }, extractMetadata, extractTitle = extractMarkdownH1_1.extractMarkdownH1, }) => {
return (0, GitHubDataSource_1.makeGitHubDataSource)({
name,
repoUrl,
filter,
repoLoaderOptions: {
...(repoLoaderOptions ?? {}),
ignoreFiles: [
/^(?!.*\.(md|mdx)$).*/i, // Anything BUT .md OR .mdx extensions
...(repoLoaderOptions?.ignoreFiles ?? []),
],
},
async handleDocumentInRepo(document) {
// Process front matter from markdown file into a Record<string, unknown>.
// Remove front matter from body.
let frontMatterMetadata;
let body = document.pageContent;
if (frontMatter.process) {
const extracted = (0, frontMatter_1.extractFrontMatter)(document.pageContent, frontMatter.format, frontMatter.separator);
frontMatterMetadata = extracted.metadata;
body = extracted.body;
}
const { source } = document.metadata;
// Extract metadata to use in page from page content and frontmatter (if it exists)
let extractedMetadata;
try {
extractedMetadata =
extractMetadata && extractMetadata(body, frontMatterMetadata);
}
catch (err) {
console.error(`The following error occurred when extracting metadata from the page '${source}: ${JSON.stringify(err)}'`);
}
const url = pathToPageUrl(source, frontMatterMetadata);
const page = {
body: (0, removeMarkdownImagesAndLinks_1.removeMarkdownImagesAndLinks)(body),
format: "md",
sourceName: name,
url,
sourceType,
metadata: { ...(metadata ?? {}), ...(extractedMetadata ?? {}) },
};
const title = extractTitle(document.pageContent, frontMatterMetadata);
if (title) {
page.title = title;
}
return page;
},
});
};
exports.makeMdOnGithubDataSource = makeMdOnGithubDataSource;
//# sourceMappingURL=MdOnGithubDataSource.js.map