incr-regex-package
Version:
An incremental regular expression parser in JavaScript; useful for input validation, RegExp
384 lines (324 loc) • 11.6 kB
JavaScript
import {rprefix, sRightMerge,n_cons,
flatten, ID, StackDedup, arr_uniq, arr_push} from "../utils";
import { matchable,boundary,dot,or,zero_or_one,zero_or_more, DONE, MORE, FAILED,
MAYBE, HOLDER_ZERO_OR_MORE, HOLDER_ANY, HOLDER_ZERO_OR_ONE } from "../rxtree";
import makeRxInfo from './makeRxInfo';
import {RxParser} from '../regexp-parser';
export function convertMask(s) {
return s.split('').map( c =>
c==="*" ? HOLDER_ZERO_OR_MORE : (
c==="?" ? HOLDER_ZERO_OR_ONE : /*568*/
(c === "_" ? HOLDER_ANY : c ))
).join('');
}
export function isMeta(ch) {
return ch === HOLDER_ANY || isOptional(ch);
}
export function isHolder(ch) { return ch === HOLDER_ANY; }
export function isOptional(ch) {
return ch === HOLDER_ZERO_OR_ONE || ch === HOLDER_ZERO_OR_MORE ;
}
function cleanMask(str) {
let last; // undefined
let list = [];
for(let i=0; i<str.length; i++) {
let c = str.charAt(i);
if( isOptional(c) && last === c) continue;
last = c;
list.push(c);
}
return list.join('');
}
function strip(s, notAllowed) {
for(let i=0; i<s.length; i++)
if(!notAllowed(s.charAt(i))) return s.substring(i+1,s.length);
return s;
}
function rationalize(s1,s2) {
//let l = s1.length < s2.length? s1.length : s2.length;
let post = strip(rprefix(s1,s2),isOptional);
let res = '';
//let hasNull = false;
const GET = 1;
const SKIP = 2;
const stream = function(str) {
let ix = 0;
return function(flag){
switch(flag) {
case GET: return (ix < str.length? str.charAt(ix): undefined);
case SKIP: ix++; break;
default: return ix < str.length;
}
};
};
let eq = function(a,b) { return a == b || ( isOptional(a) && isOptional(b)); };
let max = function(a,b) { return a === HOLDER_ZERO_OR_MORE ? a : b; };
let ss1 = stream(s1);
let ss2 = stream(s2);
for(; ss1() && ss2(); ) {
let c1 = ss1(GET);
let c2 = ss2(GET);
let canbeNull = isOptional(c1) || isOptional(c2);
//if( c1 === c2 && !canbeNull) res += c1;
if( eq(c1,c2) ) { res += max(c1,c2); ss1(SKIP); ss2(SKIP); }
else if( !canbeNull ) { res += HOLDER_ANY; ss1(SKIP); ss2(SKIP); }
else if( isOptional(c1) ) { res += c1; ss1(SKIP); }
else { res += c2; ss2(SKIP); }
}
return sRightMerge(res,post);
}
function __isDoneN(res) {
return res !== undefined &&
(res.filter(el => el === DONE).length === res.length);
}
//unit = [a]
//merge(a,b) = flatten([a,b]);
function makeArrayRxInfo(func,merge, base) {
return getArrayRxInfo;
function getArrayRxInfo(arr,prefix) {
return arr.reduce((a,b) => {
return merge(a,func(b,prefix));
}, base() ) ;
}
}
function mapper(rxn, deflt) {
if( !rxn ) return deflt;
switch(rxn.val) {
case "[0-9]":
case "\\d": return "9\u0332";
case "[A-Za-z]":
case "[a-zA-Z]" :
case "[a-z]": return "a\u0332";
case "[A-Z]" : return "A\u0332";
case "[0-9A-Za-z]":
case "[A-Z0-9a-z]":
case "[A-Za-z0-9]":
case "[0-9a-zA-Z]":
case "[a-z0-9A-Z]":
case "[a-zA-Z0-9]": return "z\u0332";
default: return deflt;
}
}
const arrayMaskListBuilder = ( (mapper,useopt) => {
const unit = (a) => a ===undefined ? [] : [a];
const addElem = (a,b) => a+b;
const merge = (a,b) => flatten([a,b]);
const aMerge = (a,b) => flatten(arr_push(a,b));
const optfn = (rxn, prefix, getRxInfo, optStop) => { // interesting function, to deal with loops/optional
if( rxn.left ) {
let ll = getRxInfo(rxn.nextNode,prefix,optStop);
let rr = getRxInfo(rxn.left,prefix,
zero_or_more(rxn)?
n_cons(rxn,optStop): // this is the optional part that could loop, push rxn (rx node) on the stack
// if we cone back to this node and find rxn of the stack, do not loop again
optStop); // Non-looping optional
return merge(ll,rr);
}
}
let optional = useopt?optfn:undefined;
return makeArrayRxInfo(makeRxInfo(unit,addElem, merge,optional,mapper), aMerge, unit )
});
export const getArrayMask = (() => {
const unit = (a) => a ;
const addElem = (a,b) => a+b;
const merge = (a,b) => rationalize(a,b);
const aMerge = (a,b) => rationalize(a || b, b);
const fn = makeArrayRxInfo(makeRxInfo(unit,addElem, merge), aMerge, unit );
return (rx) => cleanMask(fn(rx,''))
}
)();
function combine(a,b) { return (a === -1 || b === -1)? -1 : a+b; }
function fixedSizePattern(rxNode) {
if( !rxNode ) return 0;
if(rxNode === DONE ) return 0;
if( dot(rxNode) ) { // this is a node that concat of two regexp /AB/ => dot(A,B) - where A and B are regexp themselves
return combine(fixedSizePattern(rxNode.left), fixedSizePattern(rxNode.right) );
} else if( or(rxNode) ) { // /A|B/ => or(A,B)
let c = fixedSizePattern(rxNode.left);
return (c >= 0 && c === fixedSizePattern(rxNode.right))? c : -1;
}
else if(zero_or_one(rxNode) || zero_or_more(rxNode) ) return -1;
else if( matchable(rxNode) ) {
let res = matchable(rxNode)(undefined);
return res[1];
}
else if( boundary(rxNode) ) {
return fixedSizePattern(rxNode.left);
}
return 0;
}
const getArrayMaskListFull = arrayMaskListBuilder(mapper,true);
const getArrayMaskList = arrayMaskListBuilder(mapper,false);
export class BaseIncRegEx {
constructor(str,v) {
let len = 30;
if( !str && !v) {
this.str = "";
this.base = undefined;
this.tracker= undefined;
this.current = undefined;
this.one = this.current;
this.two = undefined;
this.lastCh = undefined;
this.maxLen = 0;
this._mask = undefined;
// this._lastEditableIndex = undefined;
len = fixedSizePattern(this.base);
if( len <= 0 ) len = 30;
this._len = len;
}
else {
if( !v && str) v = RxParser.parse(str);
//if( v ) v = makeFSM(v);
this.str = str;
this.base = v;
this.tracker= [];
this.current = new StackDedup(v);
this.one = this.current;
this.two = new StackDedup();
this.lastCh = undefined;
this.maxLen = 0;
this.mask = undefined; // cached value (set this to undefined everytime we change the tracker)
// this._lastEditableIndex = undefined; // cached value
this._len = 30;
}
}
/**
* [reset description]
* @return {[type]} [description]
*/
reset() { /* public */
this.tracker = [];
this.current.reset();
this.current.push(this.base);
this.lastCh = undefined;
this._state = undefined;
this._mask = undefined;
return this;
}
/**
* [clone description]
* @return {[type]} [description]
*/
clone() { /* public */
var t = new this.constructor();
t.str = this.str;
t.base = this.base;
t.tracker = this.tracker.slice(0); // copy
t.one = this.one.map(ID);
t.two = this.two.map(ID);
t.current = (this.current == this.one ? t.one : t.two) ;
t.lastCh = this.lastCh;
t._state = this._state;
t._mask = undefined;
t._len = this.length;
return t;
}
test(ch,curr) {
curr = curr || this.current;
let res = FAILED;
let next = this._getArr();
curr.forEach( e => { res = this._result(this.action(e,ch,next),res); } );
if( res === FAILED || next.length === 0) {
return undefined;
}
//console.log("TEST: ",next);
return next;
}
/**
* [getInputTracker description]
* @return {[type]} [description]
*/
getInputTracker() { return this.tracker.map(ID); }
/**
* [minCharsList description]
* @param {[type]} flag [description]
* @return {[type]} [description]
*/
minCharsList(flag) {
const fn = flag ? getArrayMaskListFull : getArrayMaskList;
return arr_uniq(fn(this.current,this.inputStr()));
}
// Private methods
_after(all, ix) { /* public */ // get the input matched so far after ix.
if(!ix) {
let al = all?this.tracker:this.tracker.filter( e => e[1] === undefined);
return al.map(e => e[0] ).join('');
} else {
let al = this.tracker.filter( (e,i) => i>= ix && (all || e[1] === undefined));
return al.map(e => e[0] ).join('');
}
}
_getArr() {
if( this.current === this.one ) return this.two.reset();
return this.one.reset();
}
action(e, ch, newStack,ignoreBoundary) {
if(e === DONE ) {
if(ch === DONE) {
newStack.push(DONE);
//if(this.nurul) console.log("*** DONE: ",ch);
return DONE;
}
return FAILED;
}
else if( dot(e) ) {
return this.action(e.left,ch, newStack,ignoreBoundary);
}
else if( or(e) ) {
let rl = this.action(e.left,ch, newStack, ignoreBoundary);
let rr = this.action(e.right,ch,newStack, ignoreBoundary);
return this._result(rl,rr);
}
else if(zero_or_one(e) || zero_or_more(e)) {
let rl = boundary(e.left)? DONE : this.action(e.left,ch, newStack, true);
let rr = this.action(e.nextNode,ch,newStack,ignoreBoundary);
return this._result(rl,rr);
}
else if( matchable(e) ) {
let res = e.match(ch);
//if(this.nurul) console.log("match: ",ch);
if( res[0] ) {
newStack.push(e.nextNode);
}
return res[0]? (e.nextNode === DONE? DONE:MORE) : FAILED;
}
else if( boundary(e) ) {
if( ignoreBoundary ) return FAILED;
//if( ch === DONE && this.nurul) console.log("boundary",ch)
if( ch === DONE) return this.action(e.nextNode,ch,newStack);// ignore the boundary
let res = e.match(this.lastCh,ch);
if( res[0] || ch === undefined) {
return this.action(e.nextNode,ch,newStack);
}
return FAILED;
}
return FAILED;
}
_result(l,r) {
if( l === r) return l;
if( l === MORE || r === MORE) return MORE;
}
_update(res,ch, fixed) {
if( res !== undefined) {
this.tracker.push([ch===undefined?HOLDER_ANY:ch, fixed]);
if(res.maxLen > this.maxLen) this.maxLen = res.maxLen;
this.current = res;
this.lastCh = ch;
this._state = undefined;
this._mask = undefined;
// this._lastEditableIndex = undefined;
}
return res !== undefined;
}
_stateCompute() {
//console.log("Compute State");
var res = this.test(undefined);
//if( this.nurul && res !== undefined) console.log("state:",res);
if( res === undefined ) return DONE;
let isdone = this.test(DONE);
//if(isdone === undefined) return DONE;
if( __isDoneN(isdone) ) return MAYBE;
return MORE;
}
}