UNPKG

v-regexp

Version:

JavaScript Regular Expression Parser and Visualizer.

444 lines 14.1 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); var tslib_1 = require("tslib"); // @ts-nocheck var constants_1 = require("./constants"); var NFA_1 = tslib_1.__importDefault(require("./NFA")); var Kit_1 = tslib_1.__importDefault(require("./Kit")); var parse_1 = tslib_1.__importDefault(require("./parse")); /** Mock RegExp class */ RegExp.DEBUG = RegExp.D = 1; RegExp.MULTILINE = RegExp.M = 2; RegExp.GLOBAL = RegExp.G = 4; RegExp.IGNORECASE = RegExp.I = 8; function RegExp(re, options) { if (!(this instanceof RegExp)) return new RegExp(re, options); re += ''; var opts = {}; if (typeof options === 'string') { options = options.toLowerCase(); if (~options.indexOf('i')) opts.ignoreCase = true; if (~options.indexOf('m')) opts.multiline = true; if (~options.indexOf('g')) opts.global = true; if (~options.indexOf('d')) opts.debug = true; } else { opts = options; } var ast = (this.ast = parse_1.default(re)); this.source = re; this.multiline = !!opts.multiline; this.global = !!opts.global; this.ignoreCase = !!opts.ignoreCase; this.debug = !!opts.debug; this.flags = ''; if (this.multiline) this.flags += 'm'; if (this.ignoreCase) this.flags += 'i'; if (this.global) this.flags += 'g'; _readonly(this, ['source', 'options', 'multiline', 'global', 'ignoreCase', 'flags', 'debug']); var ignoreCase = this.ignoreCase; ast.traverse(function (node) { explainCharset(node, ignoreCase); }, constants_1.CHARSET_NODE); ast.traverse(function (node) { explainExact(node, ignoreCase); }, constants_1.EXACT_NODE); if (this.multiline) ast.traverse(multilineAssert, constants_1.ASSERT_NODE); } RegExp.prototype = { toString: function () { return "/" + this.source + "/" + this.flags; }, test: function (s) { return this.exec(s) !== null; }, exec: function (s) { var nfa = this.getNFA(); var ret; var startIndex = this.global ? this.lastIndex || 0 : 0; var max = s.length; for (; startIndex < max; startIndex++) { ret = nfa.input(s, startIndex); if (ret.acceptable) break; } if (!ret || !ret.acceptable) { this.lastIndex = 0; return null; } var groups = new Array(this.ast.groupCount + 1); groups[0] = s.slice(startIndex, ret.lastIndex + 1); var stack = ret.stack; for (var i = 1, l = groups.length; i < l; i++) { groups[i] = getGroupContent(stack, i, s); } this.lastIndex = ret.lastIndex + 1; groups.index = startIndex; groups.input = s; return groups; }, getNFA: function () { if (this._nfa) return this._nfa; var nfa; var ast = this.ast; stateGUID = 1; // reset state guid nfa = tree2NFA(ast.tree); nfa = NFA_1.default(nfa, this.debug); this._nfa = nfa; return nfa; }, }; function explainExact(node, ignoreCase) { // expand exact node to ignore case var ranges; ranges = node.chars.split(''); if (ignoreCase) { ranges = ranges.map(function (c) { if (/[a-z]/.test(c)) return [c, c.toUpperCase()]; if (/[A-Z]/.test(c)) return [c, c.toLowerCase()]; return [c]; }); } else { ranges = ranges.map(function (c) { return [c]; }); } node.explained = ranges; } function multilineAssert(node) { var at = node.assertionType; if (at === constants_1.AssertBegin || at === constants_1.AssertEnd) node.multiline = true; } // var anyChar='\0\uffff'; var anyCharButNewline = Kit_1.default.parseCharset('^\n\r\u2028\u2029'); // \n \r \u2028 \u2029.But what's "\u2028" and "\u2029" // Not used var charClass2ranges = { // e.g. \d\D\w\W\s\S d: ['09'], w: ['AZ', 'az', '09', '_'], s: ' \f\n\r\t\v\u1680\u180e\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u2028\u2029\u202f\u205f\u3000'.split(''), }; ['d', 'w', 's'].forEach(function (cls) { // D W S,negate ranges charClass2ranges[cls.toUpperCase()] = Kit_1.default.negate(charClass2ranges[cls]); }); function explainCharset(node, ignoreCase) { var ranges = node.chars.split(''); ranges = ranges.concat(Kit_1.default.flatten2(node.classes.map(function (cls) { return charClass2ranges[cls]; }))); ranges = ranges.concat(node.ranges); if (ignoreCase) ranges = expandRangeIgnoreCase(ranges); ranges = Kit_1.default.classify(ranges).ranges; if (node.exclude) ranges = Kit_1.default.negate(ranges); ranges = Kit_1.default.coalesce(ranges); // compress ranges node.explained = ranges; } // expand ['Aa'] to ['az','Aa'] function expandRangeIgnoreCase(ranges) { return Kit_1.default.flatten2(ranges.map(function (r) { var parts = Kit_1.default.classify([r, 'az', 'AZ']).map[r]; return Kit_1.default.flatten2(parts.map(function (p) { if (/[a-z]/.test(p)) { return [p, p.toUpperCase()]; } if (/[A-Z]/.test(p)) { return [p, p.toLowerCase()]; } return [p]; })); })); } function tree2NFA(stack, from) { var trans = []; var accepts; from = from || ['start']; accepts = stack.reduce(function (from, node) { var a = node2NFA(node, from); trans = trans.concat(a.trans); return a.accepts; }, from); return { accepts: accepts, trans: trans }; } /* return {trans:[Transition],accepts:[State]} */ function node2NFA(node, from) { if (node.repeat) { return repeatNFA(node, from); } return NFABuilders[node.type](node, from); } function getGroupContent(stack, num, s) { var start; var end; var match; for (var i = 0, l = stack.length, item; i < l; i++) { item = stack[i]; if (item.num === num) { if (item.type === GROUP_CAPTURE_END) { end = item.index; } else if (item.type === GROUP_CAPTURE_START) { start = item.index; break; } } } if (start === undefined || end === undefined) return; return s.slice(start, end); } var stateGUID = 0; function newState() { return "q" + stateGUID++; } var GROUP_CAPTURE_START = 'GroupCaptureStart'; var GROUP_CAPTURE_END = 'GroupCaptureEnd'; var NFABuilders = (function _() { function exact(node, from) { var ts = []; var to; var ranges = node.explained; ranges.forEach(function (range) { ts.push({ from: from, to: (to = [newState()]), charset: range }); from = to; }); return { accepts: to, trans: ts }; } function charset(node, from) { var to = [newState()]; return { accepts: to, trans: [{ from: from, to: to, charset: node.explained }] }; } function dot(node, from) { var to = [newState()]; return { accepts: to, trans: [{ from: from, to: to, charset: anyCharButNewline }] }; } function empty(node, from) { var to = [newState()]; return { accepts: to, trans: [{ from: from, to: to, charset: false }] }; } function group(node, from) { var groupStart = [newState()]; var ts = [ { from: from, to: groupStart, charset: false, action: !node.nonCapture && function _groupStart(stack, c, i) { stack.unshift({ type: GROUP_CAPTURE_START, num: node.num, index: i }); }, }, ]; from = groupStart; var a = tree2NFA(node.sub, from); ts = ts.concat(a.trans); var groupEnd = [newState()]; ts.push({ from: a.accepts, to: groupEnd, charset: false, action: !node.nonCapture && function _groupEnd(stack, c, i) { stack.unshift({ type: GROUP_CAPTURE_END, num: node.num, index: i }); }, }); return { accepts: groupEnd, trans: ts }; } function backref(node, from) { var to = [newState()]; var groupNum = node.num; return { accepts: to, trans: [ { from: from, to: to, charset: false, assert: function _aBackref(stack, c, i, state, s) { // static invalid backref will throw parse error // dynamic invalid backref will treat as empty string // e.g. /(?:(\d)|-)\1/ will match "-" var match = getGroupContent(stack, groupNum, s); if (match === undefined) { match = ''; } if (s.slice(i, i + match.length) === match) { return match.length; } return false; }, }, ], }; } function choice(node, from) { var ts = []; var to = []; node.branches.forEach(function (branch) { var a = tree2NFA(branch, from); ts = ts.concat(a.trans); to = to.concat(a.accepts); }); return { trans: ts, accepts: to }; } function assert(node, from) { var f; switch (node.assertionType) { case constants_1.AssertBegin: f = node.multiline ? _assertLineBegin : _assertStrBegin; break; case constants_1.AssertEnd: f = node.multiline ? _assertLineEnd : _assertStrEnd; break; case constants_1.AssertWordBoundary: f = function _WB(_, c, i, state, s) { return _isBoundary(i, s); }; break; case constants_1.AssertNonWordBoundary: f = function _NWB(_, c, i, state, s) { return !_isBoundary(i, s); }; break; case constants_1.AssertLookahead: f = _lookahead(node); break; case constants_1.AssertNegativeLookahead: f = _negativeLookahead(node); break; } return _newAssert(node, from, f); function _newAssert(node, from, assert) { var to = [newState()]; return { accepts: to, trans: [ { from: from, to: to, charset: false, assert: assert, }, ], }; } function _lookahead(node) { var m = NFA_1.default(tree2NFA(node.sub, ['start'])); return function _Lookahead(stack, c, i, state, s) { var ret = m.input(s, i, null, stack); return ret.acceptable; }; } function _negativeLookahead(node) { var f = _lookahead(node); return function _NLookahead() { return !f.apply(this, arguments); }; } function _isBoundary(i, s) { return !!(_isWordChar(i - 1, s) ^ _isWordChar(i, s)); } function _isWordChar(i, s) { return i !== -1 && i !== s.length && /\w/.test(s[i]); } function _assertLineBegin(_, c, i, state, s) { return i === 0 || s[i - 1] === '\n'; } function _assertStrBegin(_, c, i, state, s) { return i === 0; } function _assertLineEnd(_, c, i, state, s) { return i === s.length || c === '\n'; } function _assertStrEnd(_, c, i, state, s) { return i === s.length; } } return { assert: assert, choice: choice, backref: backref, group: group, empty: empty, charset: charset, dot: dot, exact: exact, }; })(); function repeatNFA(node, from) { var builder = NFABuilders[node.type]; var a; var i; var trans = []; var repeat = node.repeat; var min = repeat.min; var max = repeat.max; i = min; while (i--) { a = builder(node, from); trans = trans.concat(a.trans); from = a.accepts; } var moreTrans = []; var accepts = [].concat(from); if (isFinite(max)) { for (; max > min; max--) { a = builder(node, from); moreTrans = moreTrans.concat(a.trans); from = a.accepts; accepts = accepts.concat(a.accepts); } } else { var beforeStates = from.slice(); a = builder(node, from); moreTrans = moreTrans.concat(a.trans); accepts = accepts.concat(a.accepts); moreTrans.push({ from: a.accepts, to: beforeStates, charset: false, }); } var endState = [newState()]; if (repeat.nonGreedy) { trans.push({ from: accepts, to: endState, charset: false, }); trans = trans.concat(moreTrans); } else { trans = trans.concat(moreTrans); trans.push({ from: accepts, to: endState, charset: false, }); } return { accepts: endState, trans: trans }; } function _readonly(obj, attrs) { attrs.forEach(function (a) { Object.defineProperty(obj, a, { writable: false, enumerable: true }); }); } exports.default = RegExp; //# sourceMappingURL=RegExp.js.map