mongodb-rag-core
Version:
Common elements used by MongoDB Chatbot Framework components.
75 lines • 2.96 kB
JavaScript
;
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.standardChunkFrontMatterUpdater = exports.standardMetadataGetter = exports.makeChunkFrontMatterUpdater = void 0;
const assert_1 = require("assert");
const front_matter_1 = __importDefault(require("front-matter"));
const updateFrontMatter_1 = require("../frontMatter/updateFrontMatter");
/**
Create a function that adds or updates front matter metadata to the chunk
text.
*/
const makeChunkFrontMatterUpdater = (getMetadata) => {
return async (chunk, { page }) => {
// Extract existing front matter, if any
const frontMatterResult = front_matter_1.default.test(chunk.text)
? (0, front_matter_1.default)(chunk.text)
: undefined;
const body = frontMatterResult?.body ?? chunk.text;
// Construct new metadata object from existing front matter (if any) and
// user-provided metadata function
const metadata = {
...(frontMatterResult?.attributes ?? {}),
...(await getMetadata({
chunk,
page,
metadata: frontMatterResult?.attributes,
text: body,
})),
};
// Update chunk with new front matter in yaml format
return {
...chunk,
text: (0, updateFrontMatter_1.updateFrontMatter)(body, metadata),
};
};
};
exports.makeChunkFrontMatterUpdater = makeChunkFrontMatterUpdater;
/**
Forms common metadata based on the chunk text, including info about any code
examples in the text.
*/
const standardMetadataGetter = async ({ page, text }) => {
// Detect code blocks
const mdCodeBlockToken = /```([A-z0-1-_]*)/;
const codeBlockLanguages = Array.from(new Set(text
.split("\n")
.map((line) => mdCodeBlockToken.exec(line))
.filter((match) => match !== null)
.map((match) => {
(0, assert_1.strict)(match !== null);
return match[1];
})));
const metadata = {
pageTitle: page.title,
hasCodeBlock: codeBlockLanguages.length !== 0,
};
// Which code examples
const specifiedLanguages = codeBlockLanguages.filter((language) => language !== "");
if (specifiedLanguages.length) {
metadata["codeBlockLanguages"] = specifiedLanguages;
}
return { ...getPageChunkMetadata(page.metadata), ...metadata };
};
exports.standardMetadataGetter = standardMetadataGetter;
function getPageChunkMetadata(pageMetadata) {
if (pageMetadata !== undefined) {
const { page, ...copy } = pageMetadata;
return copy;
}
return undefined;
}
exports.standardChunkFrontMatterUpdater = (0, exports.makeChunkFrontMatterUpdater)(exports.standardMetadataGetter);
//# sourceMappingURL=ChunkTransformer.js.map