v-regexp
Version:
JavaScript Regular Expression Parser and Visualizer.
358 lines (351 loc) • 11.6 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
var tslib_1 = require("tslib");
var Kit_1 = tslib_1.__importDefault(require("./Kit"));
/**
A Naive NFA Implementation
Start state is always named 'start'
a
type NFAConfig = {compact:false,accepts:StateSet,trans:[Transition]}
type State = String
type StateSet = [State]
type Tranisition = {from:StateSet,to:StateSet,charset:Charset,action:Action,assert:Assert}
type Charset = String|[Range]
Charset is similar to regex charset,supports negation and range but metacharacters
Examples:
includes: 'abc0-9','[^]'
excludes: '^c-z0-9','^a^' //excluded 'a' and '^' two chars
any char: '\0-\uffff'
Or set charset to processed disjoint ranges:['ac','d','eh']
Set `charset` to empty string to enable empty move(ε-moves).
Action:
Function(stack:Array,c:String,i:Int,state:String,inputs:String):Array
stack: storage stack
c: current char
i: current index
state: current state
inputs: whole input string
Optional return new stack
Only eMove transition allow `assert`
Actions and Asserts of eMove transition always execute before non-eMove transitions on current path.
Assert:
Function(stack:Array,c:String,i:Int,state:String,inputs:String):Boolean
Return True if assertion just success,if fail return false
If success and need skip num chars,
return the Int count to increase `i`,this feature is designed for backref.
Stack modifications in action only allow shift,unshift and return new stack.
NFAConfig example used to recognize numbers:{
compact:false,accepts:'start'.
trans:[{from:'start',to:'start',charset:'0-9'}]
}
CompactNFAConfig example,see `structure` function.
An automaton used to recognize triples:{
compact:true,accepts:'start',
trans:[
['start>start','0369'],['start>q1','147'],['start>q2','258'],
['q1>q1','0369'],['q1>q2','147'],['q1>start','258'],
['q2>q2','0369'],['q2>q1','258'],['q2>start','147'],
]
};
*/
function NFA(a) {
a = a.compact ? structure(a) : a;
var accepts = {};
var i;
var n;
var trans = a.trans;
// FMap={toState:Function}
var router = {
/*
fromState : {
eMove:[{to:State,action:Function,assert:Function,eMove:Bool}],
eMoveStates:[State],// ε-move dest states
charMove:{
// expanded to include eMove
Range:[{to:State,action:Function,assert:Function,eMove:Bool}],
Char:[{to:State,action:Function,assert:Function,eMove:Bool}]
},
ranges:Set([Range]),
// all trans keep original order in transitions list
trans:[Transition]
}
*/
};
for (i = 0, n = a.accepts.length; i < n; i++)
accepts[a.accepts[i]] = true; // add accept states
var t;
for (i = 0, n = trans.length; i < n; i++) {
// collect charsets
t = trans[i];
if (t.charset)
t.ranges = typeof t.charset === 'string' ? Kit_1.default.parseCharset(t.charset) : t.charset;
else
t.eMove = true;
t.from.forEach(function (from) {
var path = (router[from] = router[from] || {
eMoveStates: [],
eMove: [],
charMove: {},
trans: [],
ranges: [],
});
if (t.eMove)
path.eMoveStates = path.eMoveStates.concat(t.to);
else
path.ranges = path.ranges.concat(t.ranges);
path.trans.push(t);
});
}
var fromStates = Object.keys(router);
fromStates.forEach(function (from) {
var path = router[from];
var trans = path.trans;
var charMove = path.charMove;
var eMove = path.eMove;
var ranges = path.ranges;
var cls = Kit_1.default.classify(ranges);
var rangeMap = cls.map;
trans.forEach(function (t) {
if (t.eMove) {
t.to.forEach(function (toState) {
eMove.push({
to: toState,
action: t.action,
assert: t.assert,
eMove: true,
});
});
}
else {
Kit_1.default.flatten2(t.ranges.map(function (r) { return rangeMap[r]; })).forEach(function (r) {
(charMove[r] = charMove[r] || []).push(t);
});
}
});
ranges = Kit_1.default.Set(cls.ranges.filter(function (rg) { return !!rg[1]; })); // exclude single char
path.ranges = ranges;
// expand charMove to includes ε-move
Object.keys(charMove).forEach(function (r) {
var transChar = charMove[r];
var transAll = [];
trans.forEach(function (t) {
t.to.forEach(function (toState) {
if (t.eMove || ~transChar.indexOf(t)) {
transAll.push({
to: toState,
action: t.action,
assert: t.assert,
eMove: t.eMove,
});
}
});
});
charMove[r] = transAll;
});
delete path.trans;
delete path.eMoveStates;
});
return {
accepts: accepts,
router: router,
input: input,
assertDFA: assertDFA,
accept: accept,
};
}
function accept(state) {
return this.accepts.hasOwnProperty(state);
}
function assertDFA() {
var router = this.router;
var fromStates = Object.keys(router);
var path;
for (var i = 0, l = fromStates.length; i < l; i++) {
path = router[fromStates[i]];
if (path.eMove.length > 1) {
throw new Error("DFA Assertion Fail!\nFrom state `" + fromStates[i] + "` can goto to multi \u03B5-move states!");
}
var charMove = path.charMove;
var ranges = Object.keys(charMove);
for (var k = 0, n = ranges.length; k < n; k++) {
var t = charMove[ranges[k]];
if (t.length !== 1) {
throw new Error("DFA Assertion Fail!\nFrom state `" + fromStates[i] + "` via charset `" + ranges[k] + "` can goto to multi states!");
}
}
if (ranges.length && path.eMove.length) {
throw new Error("DFA Assertion Fail!\nFrom state `" + fromStates[i] + "` can goto extra \u03B5-move state!");
}
}
return true;
}
/**
return {
stack:Array,
acceptable:Boolean,
lastIndex:Int,
lastState:String
}
*/
function input(s, startIndex, _debug) {
startIndex = startIndex || 0;
var _this = this;
return _input(s, startIndex, 'start', [], startIndex - 1);
function _input(s, startIndex, fromState, stack, lastIndex) {
recur: do {
var c;
var range;
var advanceIndex;
var lastResult;
var path = _this.router[fromState];
if (!path)
break;
var eMove = path.eMove;
var charMove = path.charMove;
var trans;
if (startIndex < s.length) {
c = s[startIndex];
if (charMove.hasOwnProperty(c)) {
trans = charMove[c];
}
else if ((range = findRange(path.ranges, c))) {
trans = charMove[range];
}
else {
trans = eMove;
}
}
else {
trans = eMove;
}
var sp = stack.length;
var t;
var skip;
var ret;
var oldLastIndex = lastIndex;
for (var j = 0, n = trans.length; j < n; j++) {
t = trans[j];
advanceIndex = t.eMove ? 0 : 1;
lastIndex = oldLastIndex;
stack.splice(0, stack.length - sp);
sp = stack.length; // backup stack length
if (t.assert) {
if ((skip = t.assert(stack, c, startIndex, fromState, s)) === false)
continue;
// For backref skip num chars
if (typeof skip === 'number') {
startIndex += skip;
lastIndex += skip;
}
}
if (t.action)
stack = t.action(stack, c, startIndex, fromState, s) || stack;
lastIndex = t.eMove ? lastIndex : startIndex;
if (j === n - 1) {
startIndex += advanceIndex;
fromState = t.to;
continue recur; // Human flesh tail call optimize?
}
else {
ret = _input(s, startIndex + advanceIndex, t.to, stack, lastIndex);
}
if (ret.acceptable)
return ret;
lastResult = ret;
}
if (lastResult)
return lastResult;
break;
} while (true);
return {
stack: stack,
lastIndex: lastIndex,
lastState: fromState,
acceptable: _this.accept(fromState),
};
}
}
/** ε-closure
return closureMap {fromState:[toState]}
eMoveMap = {fromState:{to:[State]}}
*/
function eClosure(eMoves, eMoveMap) {
var closureMap = {};
eMoves.forEach(function (state) {
// FK forEach pass extra args
closure(state);
});
return closureMap;
function closure(state, _chain) {
if (closureMap.hasOwnProperty(state))
return closureMap[state];
if (!eMoveMap.hasOwnProperty(state))
return false;
_chain = _chain || [state];
var dest = eMoveMap[state];
var queue = dest.to.slice();
var toStates = [state];
var s;
var clos;
while (queue.length) {
s = queue.shift();
if (~_chain.indexOf(s)) {
throw new Error("Recursive \u03B5-move:" + _chain.join('>') + ">" + s + "!");
}
clos = closure(s, _chain);
if (clos)
queue = clos.slice(1).concat(queue);
toStates.push(s);
}
return (closureMap[state] = toStates);
}
}
function findRange(ranges, c /*: Char */) {
var i = ranges.indexOf(c, cmpRange);
if (!~i)
return false;
return ranges[i];
}
function cmpRange(c, rg) {
var head = rg[0];
var tail = rg[1];
if (c > tail)
return 1;
if (c < head)
return -1;
return 0;
}
/**
Convert CompactNFAConfig to NFAConfig
a
type CompactNFAConfig={compact:true,accepts:CompactStateSet,trans:[CompactTransition]}
type CompactStateSet = StateSet.join(",")
type CompactTransition = [CompactStateMap,Charset,Action,Assert]
type CompactStateMap = FromStateSet.join(",")+">"+ToStateSet.join(",")
*/
function structure(a) {
a.accepts = a.accepts.split(',');
var ts = a.trans;
var i = ts.length;
var t;
var s;
var from;
var to;
while (i--) {
t = ts[i];
s = t[0].split('>');
from = s[0].split(',');
to = s[1].split(',');
ts[i] = {
from: from,
to: to,
charset: t[1],
action: t[2],
assert: t[3],
};
}
a.compact = false;
return a;
}
exports.default = NFA;
//# sourceMappingURL=NFA.js.map