mongodb-rag-core
Version:
Common elements used by MongoDB Chatbot Framework components.
87 lines (85 loc) • 2.68 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.pageFormat = exports.isPageFormat = exports.asPageFormat = exports.pageFormats = void 0;
/**
This is the definition of the canonical file formats that we support
for pages. If something is not in this list, we treat it as a "txt"
file.
If a format may be referenced by multiple terms (e.g. a file
extension), we list it as an array where the first element is the
canonical name and the rest are synonyms.
*/
const pageFormatsWithSynonyms = [
// Text file formats
["txt", "text"],
["md", "markdown"],
"mdx",
["restructuredtext", "rst"],
// Data file formats
"csv",
"json",
["yaml", "yml"],
"toml",
"xml",
"openapi-yaml",
"openapi-json",
"graphql",
// Code file formats
["c", "h"],
["cpp", "hpp", "cxx", "hxx"],
["csharp", "cs"],
["go", "golang"],
"html",
"java",
["javascript", "js", "cjs", "mjs"],
["kotlin", "kt"],
["latex", "tex"],
["objective-c", "m"],
"php",
["python", "py"],
["ruby", "rb"],
["rust", "rs"],
["scala", "sc"],
["shell", "sh"],
"swift",
["typescript", "ts"],
];
/**
The list of canonical file formats that we support for pages.
*/
exports.pageFormats = pageFormatsWithSynonyms.map((type) => typeof type === "string" ? type : type[0]);
// Helper that makes every element of pageFormatsWithSynonyms a tuple.
// We define this outside of the function because it's derived from a
// const so does not need to be redefined every time the function is
// called.
const pageFormatsAndSynonymsAsTuples = pageFormatsWithSynonyms.map((t) => typeof t === "string" ? [t] : t);
/**
Maps a string to the canonical page format it represents.
@returns The canonical page format, or undefined if the string is not
a recognized page format.
*/
const asPageFormat = (str) => {
for (const pageFormatAndSynonyms of pageFormatsAndSynonymsAsTuples) {
if (pageFormatAndSynonyms.includes(str)) {
return pageFormatAndSynonyms[0];
}
}
};
exports.asPageFormat = asPageFormat;
/**
Type guard to check if a string is a canonical page format.
*/
function isPageFormat(str) {
return exports.pageFormats.includes(str);
}
exports.isPageFormat = isPageFormat;
/**
Converts a string to a canonical page format. If the string is not a
recognized page format or a synonym for one, this returns a default
page format.
*/
function pageFormat(str, defaultPageFormat = "txt") {
return (0, exports.asPageFormat)(str) ?? defaultPageFormat;
}
exports.pageFormat = pageFormat;
//# sourceMappingURL=PageFormat.js.map