UNPKG

salve-annos

Version:

A fork with support for documentation of Salve, a Javascript library which implements a validator able to validate an XML document on the basis of a subset of RelaxNG.

github.com/raffazizzi/salve

raffazizzi/salve

248 lines • 8.92 kB

JavaScript

"use strict"; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.SalveParsingError = void 0; exports.parse = parse; /** * This is a module that converts XMLSchema regular expressions to * plain JavaScript regular expressions. * * @author Louis-Dominique Dubeau * @license MPL 2.0 * @copyright Mangalam Research Center for Buddhist Languages */ // tslint:disable-next-line:import-name no-submodule-imports const unicode_base_1 = __importDefault(require("xregexp/lib/addons/unicode-base")); // tslint:disable-next-line:import-name no-submodule-imports const unicode_blocks_1 = __importDefault(require("xregexp/lib/addons/unicode-blocks")); // tslint:disable-next-line:import-name no-submodule-imports const unicode_categories_1 = __importDefault(require("xregexp/lib/addons/unicode-categories")); // tslint:disable-next-line:import-name no-submodule-imports const xregexp_1 = __importDefault(require("xregexp/lib/xregexp")); const tools_1 = require("../tools"); const xmlcharacters_1 = require("./xmlcharacters"); (0, unicode_base_1.default)(xregexp_1.default); (0, unicode_blocks_1.default)(xregexp_1.default); (0, unicode_categories_1.default)(xregexp_1.default); // // Terminology: // // - Character class: a square bracket construct in a regexp. // // - Positive character class: [...] without a leading caret. // // - Negative character class: [^...]. // // - Positive match: a single character or range that a character must // match. (Negative character classes also contain positive // matches. In [^abc], "a", "b" and "c" are positive matches. The // character classe is negative but the individual characters are // positive.) The only thing this excludes are multi-char escapes // that match negatively like \S and \D. // // - Positive version of a multi-char escape: defined only for // multi-char escapes that expand to a negative character class; the // same character class but with the negating caret removed. So the // positive version of \S is \s, etc. // // The main transformation performed by this code is to deal with XML // Schema regexp's multi character classes that have no class // equivalent in JavaScript. // // Some simple identities: // // - [xy] <-> (?:[x]|[y]) // - [^xy] <-> (?:(?![y])[^x]) // - (?![^x]) <-> (?=[x]) // // Positive multi-char escape in a positive character class: // // - [x\s] -> (?:[x]|[ \t\r\n]) -> [x \t\r\n] // // Just expand the character class to its constituents. // // Negative multi-char escape in a positive character class: // // - [x\S] -> (?:[x]|[^ \t\r\n]) // // - [x\S\D] -> (?:[x\D]|[^ \t\r\n]) // -> (?:[x]|[^\p{Nd}]|[^ \t\r\n]) // // So we output the positive matches in the class in one positive // character class, and ``or`` it with one negative character class // per negative multi-char escape. // // Positive multi-char escape in negative character class: // // - [^x\s] -> (?:(?![ \t\r\n])[^x]) // -> [^x \t\r\n] // // Just expand the multi-char escape to its constituents. // // Negative multi-char escape in negative character class: // // - [^x\S] -> (?:(?![^ \t\r\n])[^x]) // -> (?:(?=[ \t\r\n])[^x]) // // - [^x\S\D] -> (?:(?![\S\D])[^x]) // -> (?:(?![\S]|[\D])[^x]) // -> (?:(?=[ \t\r\n\p{Nd}])[^x]) // // So we output the positive matches in the class in one negative // character class, and put a positive lookahead in front with a // positive character class that matches the positive version of the // negative multi-char escapes. // // Subtractions: // // - [abcd-[bc]] -> (?:(?![bc])[abcd]) // - [ad-[bc]] -> (?:(?![bc])[ad]) // - [abcd-[bc-[c]] -> (?:(?![bc-[c]])[abcd]) // -> (?:(?!(?![c])[bc])[abcd]) // - [abcd-[^a]] -> (?:(?![^a])[abcd]) // We use the name ``Salve`` to help avoid potential clashes. ``ParsingError`` // seems too risky. class SalveParsingError extends Error { constructor(msg) { super(msg); (0, tools_1.fixPrototype)(this, SalveParsingError); } } exports.SalveParsingError = SalveParsingError; function isGroup(x) { return x.kind === "group"; } const multiCharEscapesInGroup = { s: " \\t\\n\\r", S: "^ \\t\\n\\r", i: `${xmlcharacters_1.xmlLetter}_:`, I: `^${xmlcharacters_1.xmlLetter}_:`, c: xmlcharacters_1.xmlNameChar, C: `^${xmlcharacters_1.xmlNameChar}`, d: "\\p{Nd}", D: "^\\p{Nd}", w: "^\\p{P}\\p{Z}\\p{C}", W: "\\p{P}\\p{Z}\\p{C}", }; const multiCharEscapes = Object.create(null); for (const k of Object.keys(multiCharEscapesInGroup)) { multiCharEscapes[k] = `[${multiCharEscapesInGroup[k]}]`; } function parse(input, outputType = "re") { const converted = convertString(input); return outputType === "re" ? new (/\\p/i.test(converted) ? xregexp_1.default : RegExp)(converted) : converted; } // tslint:disable-next-line:max-func-body-length function convertString(input) { const root = { kind: "root", output: "", }; const stack = [root]; for (let i = 0; i < input.length; ++i) { let top = stack[stack.length - 1]; const c = input[i]; switch (c) { case "(": top.output += "(?:"; break; case "[": { const negative = input[i + 1] === "^"; stack.push({ kind: "group", output: negative ? "[^" : "[", negative, capturedMultiChar: [], subtraction: false, toSubtract: "", }); if (negative) { i++; } break; } case "]": { if (isGroup(top)) { const group = top; stack.pop(); top = stack[stack.length - 1]; const { capturedMultiChar, negative } = group; const plain = `${group.output}]`; const toSubtract = group.toSubtract !== "" ? `(?!${group.toSubtract})` : ""; if (capturedMultiChar.length !== 0) { let prefix = ""; if (negative) { prefix = "(?=["; for (const multiChar of capturedMultiChar) { prefix += multiCharEscapesInGroup[multiChar].slice(1); } prefix += "])"; } else { for (const multiChar of capturedMultiChar) { prefix += `[${multiCharEscapesInGroup[multiChar]}]|`; } } const combined = `(?:${toSubtract}${prefix}${plain})`; if (isGroup(top) && top.subtraction) { top.toSubtract = combined; } else { top.output += combined; } } else { const combined = toSubtract !== "" ? `(?:${toSubtract}${plain})` : plain; if (isGroup(top) && top.subtraction) { top.toSubtract = combined; } else { top.output += combined; } } } break; } case "-": if (isGroup(top) && input[i + 1] === "[") { top.subtraction = true; } else { top.output += c; } break; case "\\": { const next = input[i + 1]; if ("sSiIcCdDwW".includes(next)) { i++; if (isGroup(top)) { const repl = multiCharEscapesInGroup[next]; if (repl[0] === "^") { top.capturedMultiChar.push(next); } else { top.output += repl; } } else { top.output += multiCharEscapes[next]; } } else { top.output += c; } break; } default: top.output += c; } } return `^${root.output}$`; } //# sourceMappingURL=regexp.js.map