UNPKG

regulex

Version:

JavaScript Regular Expression Parser and Visualizer.

309 lines (279 loc) 9.25 kB
if (typeof define !== 'function') var define = require('amdefine')(module); define(['./Kit'],function (K) { /** A Naive NFA Implementation Start state is always named 'start' @param {NFAConfig|CompactNFAConfig} a type NFAConfig = {compact:false,accepts:StateSet,trans:[Transition]} type State = String type StateSet = [State] type Tranisition = {from:StateSet,to:StateSet,charset:Charset,action:Action,assert:Assert} type Charset = String|[Range] Charset is similar to regex charset,supports negation and range but metacharacters Examples: includes: 'abc0-9','[^]' excludes: '^c-z0-9','^a^' //excluded 'a' and '^' two chars any char: '\0-\uffff' Or set charset to processed disjoint ranges:['ac','d','eh'] Set `charset` to empty string to enable empty move(ε-moves). Action: Function(stack:Array,c:String,i:Int,state:String,inputs:String):Array stack: storage stack c: current char i: current index state: current state inputs: whole input string Optional return new stack Only eMove transition allow `assert` Actions and Asserts of eMove transition always execute before non-eMove transitions on current path. Assert: Function(stack:Array,c:String,i:Int,state:String,inputs:String):Boolean Return True if assertion just success,if fail return false If success and need skip num chars, return the Int count to increase `i`,this feature is designed for backref. Stack modifications in action only allow shift,unshift and return new stack. NFAConfig example used to recognize numbers:{ compact:false,accepts:'start'. trans:[{from:'start',to:'start',charset:'0-9'}] } CompactNFAConfig example,see `structure` function. An automaton used to recognize triples:{ compact:true,accepts:'start', trans:[ ['start>start','0369'],['start>q1','147'],['start>q2','258'], ['q1>q1','0369'],['q1>q2','147'],['q1>start','258'], ['q2>q2','0369'],['q2>q1','258'],['q2>start','147'], ] }; @return { input:Function } */ function NFA(a) { a=a.compact?structure(a):a; var accepts={},i,trans=a.trans, // FMap={toState:Function} router={/* fromState : { eMove:[{to:State,action:Function,assert:Function,eMove:Bool}], eMoveStates:[State],// ε-move dest states charMove:{ // expanded to include eMove Range:[{to:State,action:Function,assert:Function,eMove:Bool}], Char:[{to:State,action:Function,assert:Function,eMove:Bool}] }, ranges:Set([Range]), // all trans keep original order in transitions list trans:[Transition] } */}; for (i=0,n=a.accepts.length;i<n;i++) accepts[a.accepts[i]]=true; //add accept states var t; for (i=0,n=trans.length;i<n;i++) {//collect charsets t=trans[i]; if (t.charset) t.ranges= typeof t.charset==='string'?K.parseCharset(t.charset):t.charset; else t.eMove=true; t.from.forEach(function(from) { var path=(router[from]=router[from] || { eMoveStates:[],eMove:[],charMove:{},trans:[],ranges:[] }); if (t.eMove) path.eMoveStates=path.eMoveStates.concat(t.to); else path.ranges=path.ranges.concat(t.ranges); path.trans.push(t); }); } var fromStates=Object.keys(router); fromStates.forEach(function (from) { var path=router[from],trans=path.trans, charMove=path.charMove,eMove=path.eMove, ranges=path.ranges; var cls=K.classify(ranges),rangeMap=cls.map; trans.forEach(function (t) { if (t.eMove) { t.to.forEach(function (toState) { eMove.push({to:toState,action:t.action,assert:t.assert,eMove:true}); }); } else { K.flatten2(t.ranges.map(function (r) {return rangeMap[r]})).forEach(function (r) { (charMove[r]=charMove[r] || []).push(t); }); } }); ranges=K.Set(cls.ranges.filter(function (rg) {return !!rg[1]}));//exclude single char path.ranges=ranges; // expand charMove to includes ε-move Object.keys(charMove).forEach(function (r) { var transChar=charMove[r]; var transAll=[]; trans.forEach(function (t) { t.to.forEach(function (toState) { if (t.eMove || ~transChar.indexOf(t)) transAll.push({to:toState,action:t.action,assert:t.assert,eMove:t.eMove}); }); }); charMove[r]=transAll; }); delete path.trans; delete path.eMoveStates; }); return { accepts:accepts, router:router, input:input, assertDFA:assertDFA, accept:accept }; } function accept(state) { return this.accepts.hasOwnProperty(state); } function assertDFA() { var router=this.router; var fromStates=Object.keys(router),path; for (var i=0,l=fromStates.length;i<l;i++) { path=router[fromStates[i]]; if (path.eMove.length>1) { throw new Error("DFA Assertion Fail!\nFrom state `"+fromStates[i]+"` can goto to multi ε-move states!"); } var charMove=path.charMove; var ranges=Object.keys(charMove); for (var k=0,n=ranges.length;k<n;k++) { var t=charMove[ranges[k]]; if (t.length!==1) { K.log(charMove); throw new Error("DFA Assertion Fail!\nFrom state `"+fromStates[i]+"` via charset `"+ranges[k]+"` can goto to multi states!"); } } if (ranges.length && path.eMove.length) { throw new Error("DFA Assertion Fail!\nFrom state `"+fromStates[i]+"` can goto extra ε-move state!"); } } return true; } /** return { stack:Array, acceptable:Boolean, lastIndex:Int, lastState:String } */ function input(s,startIndex,_debug) { startIndex=startIndex || 0; var _this=this; return _input(s,startIndex,'start',[],startIndex-1); function _input(s,startIndex,fromState,stack,lastIndex) { recur:do { var c,range,advanceIndex,lastResult; var path=_this.router[fromState]; if (!path) break; var eMove=path.eMove,charMove=path.charMove,trans; if (startIndex<s.length) { c=s[startIndex]; if (charMove.hasOwnProperty(c)) { trans=charMove[c]; } else if (range=findRange(path.ranges,c)) { trans=charMove[range]; } else { trans=eMove; } } else { trans=eMove; } var sp=stack.length,t,skip,ret,oldLastIndex=lastIndex; for (var j=0,n=trans.length;j<n;j++) { t=trans[j]; advanceIndex=t.eMove?0:1; lastIndex=oldLastIndex; stack.splice(0,stack.length-sp); sp=stack.length; // backup stack length if (t.assert) { if ((skip=t.assert(stack,c,startIndex,fromState,s))===false) continue; // For backref skip num chars if (typeof skip==='number') {startIndex+=skip;lastIndex+=skip;} } if (t.action) stack=t.action(stack,c,startIndex,fromState,s) || stack; lastIndex=t.eMove?lastIndex:startIndex; _debug && K.log(c+":"+fromState+">"+t.to); if (j===n-1) { startIndex+=advanceIndex; fromState=t.to; continue recur; // Human flesh tail call optimize? } else { ret=_input(s,startIndex+advanceIndex,t.to,stack,lastIndex); } if (ret.acceptable) return ret; lastResult=ret; } if (lastResult) return lastResult; break; } while (true); return { stack:stack,lastIndex:lastIndex,lastState:fromState, acceptable:_this.accept(fromState) }; } } /** ε-closure return closureMap {fromState:[toState]} eMoveMap = {fromState:{to:[State]}} */ function eClosure(eMoves,eMoveMap) { var closureMap={}; eMoves.forEach(function (state) { // FK forEach pass extra args closure(state); }); return closureMap; function closure(state,_chain) { if (closureMap.hasOwnProperty(state)) return closureMap[state]; if (!eMoveMap.hasOwnProperty(state)) return false; _chain=_chain||[state]; var dest=eMoveMap[state], queue=dest.to.slice(), toStates=[state],s,clos; while (queue.length) { s=queue.shift(); if (~_chain.indexOf(s)) { throw new Error("Recursive ε-move:"+_chain.join(">")+">"+s+"!"); } clos=closure(s,_chain); if (clos) queue=clos.slice(1).concat(queue); toStates.push(s); } return closureMap[state]=toStates; } } function findRange(ranges,c/*:Char*/) { var i=ranges.indexOf(c,cmpRange); if (!~i) return false; return ranges[i]; } function cmpRange(c,rg) { var head=rg[0],tail=rg[1]; if (c>tail) return 1; if (c<head) return -1; return 0; } /** Convert CompactNFAConfig to NFAConfig @param {CompactNFAConfig} a type CompactNFAConfig={compact:true,accepts:CompactStateSet,trans:[CompactTransition]} type CompactStateSet = StateSet.join(",") type CompactTransition = [CompactStateMap,Charset,Action,Assert] type CompactStateMap = FromStateSet.join(",")+">"+ToStateSet.join(",") */ function structure(a) { a.accepts=a.accepts.split(','); var ts=a.trans, i=ts.length,t,s,from,to; while (i--) { t=ts[i]; s=t[0].split('>'); from=s[0].split(','); to=s[1].split(','); ts[i]={from:from,to:to,charset:t[1],action:t[2],assert:t[3]}; } a.compact=false; return a; } return NFA; });