UNPKG

mongodb-rag-core

Version:

Common elements used by MongoDB Chatbot Framework components.

87 lines (85 loc) 2.68 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.pageFormat = exports.isPageFormat = exports.asPageFormat = exports.pageFormats = void 0; /** This is the definition of the canonical file formats that we support for pages. If something is not in this list, we treat it as a "txt" file. If a format may be referenced by multiple terms (e.g. a file extension), we list it as an array where the first element is the canonical name and the rest are synonyms. */ const pageFormatsWithSynonyms = [ // Text file formats ["txt", "text"], ["md", "markdown"], "mdx", ["restructuredtext", "rst"], // Data file formats "csv", "json", ["yaml", "yml"], "toml", "xml", "openapi-yaml", "openapi-json", "graphql", // Code file formats ["c", "h"], ["cpp", "hpp", "cxx", "hxx"], ["csharp", "cs"], ["go", "golang"], "html", "java", ["javascript", "js", "cjs", "mjs"], ["kotlin", "kt"], ["latex", "tex"], ["objective-c", "m"], "php", ["python", "py"], ["ruby", "rb"], ["rust", "rs"], ["scala", "sc"], ["shell", "sh"], "swift", ["typescript", "ts"], ]; /** The list of canonical file formats that we support for pages. */ exports.pageFormats = pageFormatsWithSynonyms.map((type) => typeof type === "string" ? type : type[0]); // Helper that makes every element of pageFormatsWithSynonyms a tuple. // We define this outside of the function because it's derived from a // const so does not need to be redefined every time the function is // called. const pageFormatsAndSynonymsAsTuples = pageFormatsWithSynonyms.map((t) => typeof t === "string" ? [t] : t); /** Maps a string to the canonical page format it represents. @returns The canonical page format, or undefined if the string is not a recognized page format. */ const asPageFormat = (str) => { for (const pageFormatAndSynonyms of pageFormatsAndSynonymsAsTuples) { if (pageFormatAndSynonyms.includes(str)) { return pageFormatAndSynonyms[0]; } } }; exports.asPageFormat = asPageFormat; /** Type guard to check if a string is a canonical page format. */ function isPageFormat(str) { return exports.pageFormats.includes(str); } exports.isPageFormat = isPageFormat; /** Converts a string to a canonical page format. If the string is not a recognized page format or a synonym for one, this returns a default page format. */ function pageFormat(str, defaultPageFormat = "txt") { return (0, exports.asPageFormat)(str) ?? defaultPageFormat; } exports.pageFormat = pageFormat; //# sourceMappingURL=PageFormat.js.map