UNPKG

tokenizr

Version:

String Tokenization Library for JavaScript

5 lines (4 loc) 11.5 kB
(function(_,u){typeof exports=="object"&&typeof module<"u"?module.exports=u():typeof define=="function"&&define.amd?define(u):(_=typeof globalThis<"u"?globalThis:_||self,_.Tokenizr=u())})(this,(function(){"use strict";const _=l=>l.charCodeAt(0).toString(16).toUpperCase(),u=(l,t)=>{const i=l.length;let e=t-20;e<0&&(e=0);let s=t+20;s>i&&(s=i);const n=(r,o,a)=>r.substring(o,o+a).replaceAll(/\\/g,"\\\\").replaceAll(/\x08/g,"\\b").replaceAll(/\t/g,"\\t").replaceAll(/\n/g,"\\n").replaceAll(/\f/g,"\\f").replaceAll(/\r/g,"\\r").replaceAll(/[\x00-\x07\x0B\x0E\x0F]/g,h=>"\\x0"+_(h)).replaceAll(/[\x10-\x1F\x80-\xFF]/g,h=>"\\x"+_(h)).replaceAll(/[\u0100-\u0FFF]/g,h=>"\\u0"+_(h)).replaceAll(/[\u1000-\uFFFF]/g,h=>"\\u"+_(h));return{prologTrunc:e>0,prologText:n(l,e,t-e),tokenText:n(l,t,1),epilogText:n(l,t+1,s-(t+1)),epilogTrunc:s<i}};class p{constructor(t,i,e,s=0,n=0,r=0){this.type=t,this.value=i,this.text=e,this.pos=s,this.line=n,this.column=r}toString(t=(i,e)=>e){return`${t("type",this.type)} (value: ${t("value",JSON.stringify(this.value))}, text: ${t("text",JSON.stringify(this.text))}, pos: ${t("pos",this.pos.toString())}, line: ${t("line",this.line.toString())}, column: ${t("column",this.column.toString())})`}isA(t,i){return!(t!==this.type||i!==void 0&&i!==this.value)}}class f extends Error{constructor(t,i,e,s,n){super(t),this.name="ParsingError",this.message=t,this.pos=i,this.line=e,this.column=s,this.input=n}toString(){const t=u(this.input,this.pos),i=`line ${this.line} (column ${this.column}): `,e=" ".repeat(i.length+t.prologText.length);return"Parsing Error: "+this.message+` `+i+t.prologText+t.tokenText+t.epilogText+` `+e+"^"}}class d{constructor(t){this._tokenizr=t,this._data={},this._repeat=!1,this._reject=!1,this._ignore=!1,this._match=null}data(t,i){const e=this._data[t];return arguments.length===2&&(this._data[t]=i),e}info(){return{line:this._tokenizr._line,column:this._tokenizr._column,pos:this._tokenizr._pos,len:this._match?.[0]?.length??0}}push(t){return this._tokenizr.push(t),this}pop(){return this._tokenizr.pop()}state(t){return t!==void 0?(this._tokenizr.state(t),this):this._tokenizr.state()}tag(t){return this._tokenizr.tag(t),this}tagged(t){return this._tokenizr.tagged(t)}untag(t){return this._tokenizr.untag(t),this}repeat(){return this._tokenizr._log(" REPEAT"),this._repeat=!0,this}reject(){return this._tokenizr._log(" REJECT"),this._reject=!0,this}ignore(){return this._tokenizr._log(" IGNORE"),this._ignore=!0,this}accept(t,i){return i=i??this._match?.[0],this._tokenizr._log(` ACCEPT: type: ${t}, value: ${JSON.stringify(i)} (${typeof i}), text: "${this._match?.[0]??""}"`),this._tokenizr._pending.push(new p(t,i,this._match?.[0]??"",this._tokenizr._pos,this._tokenizr._line,this._tokenizr._column)),this}stop(){return this._tokenizr._stopped=!0,this}}class x{constructor(){this._before=null,this._after=null,this._finish=null,this._rules=[],this._debug=!1,this._input="",this._len=0,this._eof=!1,this._pos=0,this._line=1,this._column=1,this._state=["default"],this._tag={},this._transaction=[],this._pending=[],this._stopped=!1,this._ctx=new d(this)}reset(){return this._input="",this._len=0,this._eof=!1,this._pos=0,this._line=1,this._column=1,this._state=["default"],this._tag={},this._transaction=[],this._pending=[],this._stopped=!1,this._ctx=new d(this),this}error(t){return new f(t,this._pos,this._line,this._column,this._input)}debug(t){return this._debug=t,this}_log(t){this._debug&&console.log(`tokenizr: ${t}`)}input(t){if(typeof t!="string")throw new Error('parameter "input" not a String');return this.reset(),this._input=t,this._len=t.length,this}push(t){if(arguments.length!==1)throw new Error("invalid number of arguments");if(typeof t!="string")throw new Error('parameter "state" not a String');return this._log(` STATE (PUSH): old: <${this._state[this._state.length-1]}>, new: <${t}>`),this._state.push(t),this}pop(){if(arguments.length!==0)throw new Error("invalid number of arguments");if(this._state.length<2)throw new Error("no more custom states to pop");return this._log(` STATE (POP): old: <${this._state[this._state.length-1]}>, new: <${this._state[this._state.length-2]}>`),this._state.pop()}state(t){if(arguments.length===1){if(typeof t!="string")throw new Error('parameter "state" not a String');return this._log(` STATE (SET): old: <${this._state[this._state.length-1]}>, new: <${t}>`),this._state[this._state.length-1]=t,this}else if(arguments.length===0)return this._state[this._state.length-1];throw new Error("invalid number of arguments")}tag(t){if(arguments.length!==1)throw new Error("invalid number of arguments");if(typeof t!="string")throw new Error('parameter "tag" not a String');return this._log(` TAG (ADD): ${t}`),this._tag[t]=!0,this}tagged(t){if(arguments.length!==1)throw new Error("invalid number of arguments");if(typeof t!="string")throw new Error('parameter "tag" not a String');return this._tag[t]===!0}untag(t){if(arguments.length!==1)throw new Error("invalid number of arguments");if(typeof t!="string")throw new Error('parameter "tag" not a String');return this._log(` TAG (DEL): ${t}`),delete this._tag[t],this}before(t){return this._before=t,this}after(t){return this._after=t,this}finish(t){return this._finish=t,this}rule(t,i,e,s="unknown"){if(arguments.length===2&&typeof i=="function"?([i,e]=[t,i],t="*"):arguments.length===3&&typeof i=="function"&&([i,e,s]=[t,i,e],t="*"),typeof t!="string")throw new Error('parameter "state" not a String');if(!(typeof i=="object"&&i instanceof RegExp))throw new Error('parameter "pattern" not a RegExp');if(typeof e!="function")throw new Error('parameter "action" not a Function');if(typeof s!="string")throw new Error('parameter "name" not a String');const n=t.split(/\s*,\s*/g).map(a=>{const h=a.split(/\s+/g),g=h.filter(c=>c.match(/^#/)===null),m=h.filter(c=>c.match(/^#/)!==null).map(c=>c.replace(/^#/,""));if(g.length!==1)throw new Error("exactly one state required");return{state:g[0],tags:m}});let r="g";try{typeof new RegExp("","y").sticky=="boolean"&&(r="y")}catch{}typeof i.multiline=="boolean"&&i.multiline&&(r+="m"),typeof i.dotAll=="boolean"&&i.dotAll&&(r+="s"),typeof i.ignoreCase=="boolean"&&i.ignoreCase&&(r+="i"),typeof i.unicode=="boolean"&&i.unicode&&(r+="u");const o=new RegExp(i.source,r);return this._log(`rule: configure rule (state: ${t}, pattern: ${o.source})`),this._rules.push({state:n,pattern:o,action:e,name:s}),this}_progress(t,i){const e=this._line,s=this._column,n=this._input;for(let r=t;r<i;r++){const o=n.charAt(r);o==="\r"?this._column=1:o===` `?(this._line++,this._column=1):o===" "?this._column+=8-this._column%8:this._column++}this._log(` PROGRESS: characters: ${i-t}, from: <line ${e}, column ${s}>, to: <line ${this._line}, column ${this._column}>`)}_tokenize(){const t=()=>{this._eof||(this._finish!==null&&this._finish.call(this._ctx,this._ctx),this._eof=!0,this._pending.push(new p("EOF","","",this._pos,this._line,this._column)))};if(this._stopped||this._pos>=this._len){t();return}let i=!0;for(;i;){if(i=!1,this._debug){const e=u(this._input,this._pos),s=Object.keys(this._tag).map(n=>`#${n}`).join(" ");this._log(`INPUT: state: <${this._state[this._state.length-1]}>, tags: <${s}>, text: `+(e.prologTrunc?"...":'"')+`${e.prologText}<${e.tokenText}>${e.epilogText}`+(e.epilogTrunc?"...":'"')+`, at: <line ${this._line}, column ${this._column}>`)}for(let e=0;e<this._rules.length;e++){if(this._debug){const a=this._rules[e].state.map(h=>{let g=h.state;return h.tags.length>0&&(g+=" "+h.tags.map(m=>`#${m}`).join(" ")),g}).join(", ");this._log(` RULE: state(s): <${a}>, pattern: ${this._rules[e].pattern.source}`)}let s=!1;const n=this._rules[e].state.map(a=>a.state);let r=n.indexOf("*");if(r<0&&(r=n.indexOf(this._state[this._state.length-1])),r>=0&&(s=this._rules[e].state[r].tags.every(h=>this._tag[h])),!s)continue;this._rules[e].pattern.lastIndex=this._pos;const o=this._rules[e].pattern.exec(this._input);if(o!==null&&o.index===this._pos){if(this._debug&&this._log(" MATCHED: "+JSON.stringify(o)),this._ctx._match=o,this._ctx._repeat=!1,this._ctx._reject=!1,this._ctx._ignore=!1,this._before!==null&&this._before.call(this._ctx,this._ctx,o,this._rules[e]),this._rules[e].action.call(this._ctx,this._ctx,o),this._after!==null&&this._after.call(this._ctx,this._ctx,o,this._rules[e]),this._ctx._reject)continue;if(this._ctx._repeat){i=!0;break}else if(this._ctx._ignore){if(this._progress(this._pos,this._rules[e].pattern.lastIndex),this._pos=this._rules[e].pattern.lastIndex,this._pos>=this._len){t();return}i=!0;break}else if(this._pending.length>0){this._progress(this._pos,this._rules[e].pattern.lastIndex),this._pos=this._rules[e].pattern.lastIndex,this._pos>=this._len&&t();return}else throw new Error('action of pattern "'+this._rules[e].pattern.source+'" neither rejected nor accepted any token(s)')}}}throw this.error("token not recognized")}token(){if(this._pending.length===0&&this._tokenize(),this._pending.length>0){const t=this._pending.shift();return this._transaction.length>0&&this._transaction[0].push(t),this._log(`TOKEN: ${t.toString()}`),t}return null}tokens(){const t=[];let i;for(;(i=this.token())!==null;)t.push(i);return t}peek(t){if(t===void 0&&(t=0),typeof t!="number"||t<0)throw new Error('parameter "offset" not a positive Number');for(;t>=this._pending.length&&(this._tokenize(),this._pending.length!==0););if(t>=this._pending.length)throw new Error("not enough tokens available for peek operation");return this._log(`PEEK: ${this._pending[t].toString()}`),this._pending[t]}skip(t){t===void 0&&(t=1);for(let i=0;i<t;i++)this._tokenize();if(t>this._pending.length)throw new Error("not enough tokens available for skip operation");for(;t-- >0;)this.token();return this}consume(t,i){for(let n=0;n<this._pending.length+1;n++)this._tokenize();if(this._pending.length===0)throw new Error("not enough tokens available for consume operation");const e=this.token();this._log(`CONSUME: ${e.toString()}`);const s=()=>{throw new f(`expected: <type: ${t}, value: ${JSON.stringify(i)} (${typeof i})>, found: <type: ${e.type}, value: ${JSON.stringify(e.value)} (${typeof e.value})>`,e.pos,e.line,e.column,this._input)};return arguments.length===2&&!e.isA(t,i)?s():e.isA(t)||s(),e}begin(){return this._log(`BEGIN: level ${this._transaction.length}`),this._transaction.unshift([]),this}depth(){if(this._transaction.length===0)throw new Error("cannot determine depth -- no active transaction");return this._transaction[0].length}commit(){if(this._transaction.length===0)throw new Error("cannot commit transaction -- no active transaction");const t=this._transaction.shift();return this._transaction.length>0&&(this._transaction[0]=this._transaction[0].concat(t)),this._log(`COMMIT: level ${this._transaction.length}`),this}rollback(){if(this._transaction.length===0)throw new Error("cannot rollback transaction -- no active transaction");const t=this._transaction.shift();return this._pending=t.concat(this._pending),this._log(`ROLLBACK: level ${this._transaction.length}`),this}alternatives(...t){let i=null,e=[];for(let s=0;s<t.length;s++)try{this.begin(),i=t[s].call(this),this.commit();break}catch(n){n instanceof Error?(this._log(`EXCEPTION: ${n.message}`),e.push({ex:n,depth:this.depth()})):(this._log("EXCEPTION: alternative failed"),e.push({ex:new Error("alternative failed"),depth:this.depth()})),this.rollback();continue}if(i===null&&e.length>0)throw e=e.sort((s,n)=>s.depth-n.depth),e[0].ex;return i}static{this.Token=p}static{this.ParsingError=f}static{this.ActionContext=d}}return x}));