UNPKG

parser-transform

Version:

Streaming+Async lexer and parser

gitlab.com/shimaore/symptomatic-glue

672 lines (558 loc) • 21.4 kB

JavaScript

/** * LR parser generated by the Syntax tool. * * https://www.npmjs.com/package/syntax-cli * * npm install -g syntax-cli * * syntax-cli --help * * To regenerate run: * * syntax-cli \ * --grammar ~/path-to-grammar-file \ * --mode <parsing-mode> \ * --output ~/path-to-output-parser-file.js */ 'use strict'; /** * Matched token text. */ let yytext; /** * Length of the matched token text. */ let yyleng; /** * Storage object. */ let yy = {}; /** * Result of semantic action. */ let __; /** * Result location object. */ let __loc; function yyloc(start, end) { if (!yy.options.captureLocations) { return null; } // Epsilon doesn't produce location. if (!start || !end) { return start || end; } return { startOffset: start.startOffset, endOffset: end.endOffset, startLine: start.startLine, endLine: end.endLine, startColumn: start.startColumn, endColumn: end.endColumn, }; } const EOF = '$'; /** * List of productions (generated by Syntax tool). */ const productions = [[-1,1,(_1) => { __ = _1 }], [0,3,(_1,_2,_3) => { __ = convert_to_dfas(_3(new Map(_1),new Map())) }], [1,0,() => { __ = [] }], [1,2,(_1,_2) => { __ = _2.concat(_1) }], [2,3,(_1,_2,_3) => { __ = [[_1, _2]] }], [3,1,(_1) => { __ = _1 }], [3,2,(_1,_2) => { __ = function (macros,nfas) { _1(macros,nfas); _2(macros,nfas); return nfas } }], [4,3,(_1,_2,_3) => { __ = function (macros,nfas) { let code = build_acceptable(_3); let pattern = _2(macros).toLastAcceptable(code); _1.forEach( (qualifier) => nfa_by_name(nfas,qualifier).appendFragment(pattern,START_STATE) ); return nfas } }], [5,0,() => { __ = ['INITIAL'] }], [5,1,(_1) => { __ = _1 }], [6,1,(_1) => { __ = _1 }], [6,3,(_1,_2,_3) => { __ = _1.concat(_2) }], [7,1,(_1) => { __ = [_1] }], [8,3,(_1,_2,_3) => { __ = (macros) => Fragment.mergeAll([_1(macros),_3(macros)]) }], [8,1,(_1) => { __ = _1 }], [9,2,(_1,_2) => { __ = (macros) => Fragment.concatAll([_1(macros),_2(macros)]) }], [9,1,(_1) => { __ = _1 }], [10,3,(_1,_2,_3) => { __ = _2 }], [10,3,(_1,_2,_3) => { __ = (macros) => simple(new CharLabel(_2)) }], [10,4,(_1,_2,_3,_4) => { __ = (macros) => simple(new CharLabel(_3.not())) }], [10,1,(_1) => { __ = (macros) => simple(new CharLabel(new BitSet().not())) }], [10,1,(_1) => { __ = (macros) => single(_1) }], [10,1,(_1) => { __ = (macros) => { if(!macros.has(_1)) { throw new Error(`Undefined macro ${_1}`) }; return macros.get(_1)(macros) } }], [10,2,(_1,_2) => { __ = (macros) => _1(macros).kleeneStar() }], [10,2,(_1,_2) => { __ = (macros) => _1(macros).kleenePlus() }], [10,2,(_1,_2) => { __ = (macros) => _1(macros).optional() }], [11,1,(_1) => { __ = _1 }], [11,1,(_1) => { __ = new BitSet().set(CP('-')) }], [11,2,(_1,_2) => { __ = _2.or(new BitSet().set(CP('-'))) }], [12,1,(_1) => { __ = _1 }], [12,2,(_1,_2) => { __ = _1.or(_2) }], [13,1,(_1) => { __ = new BitSet().set(CP(_1)) }], [13,3,(_1,_2,_3) => { __ = new BitSet().setRange(CP(_1),CP(_3)) }]]; /** * Encoded tokens map. */ const tokens = {"NAME":"14","END_OF_LINE":"15","END_OF_TOKEN":"16","QUALIFIER_NAME":"17","START_SET":"18","END_SET":"19","INVERT_SET":"20","PATTERN_TOKEN":"21","QUOTED_NAME":"22","RANGE":"23","SET_TOKEN":"24","'%%'":"25","','":"26","'|'":"27","'('":"28","')'":"29","'.'":"30","'*'":"31","'+'":"32","'?'":"33","$":"34"}; /** * Parsing table (generated by Syntax tool). */ const table = [{"0":1,"1":2,"2":3,"14":"s4","25":"r2"},{"34":"acc"},{"25":"s5"},{"1":45,"2":3,"14":"s4","25":"r2"},{"8":46,"9":14,"10":15,"18":"s17","21":"s19","22":"s20","28":"s16","30":"s18"},{"3":6,"4":7,"5":8,"6":9,"7":10,"17":"s11","18":"r8","21":"r8","22":"r8","28":"r8","30":"r8"},{"4":12,"5":8,"6":9,"7":10,"17":"s11","18":"r8","21":"r8","22":"r8","28":"r8","30":"r8","34":"r1"},{"17":"r5","18":"r5","21":"r5","22":"r5","28":"r5","30":"r5","34":"r5"},{"8":13,"9":14,"10":15,"18":"s17","21":"s19","22":"s20","28":"s16","30":"s18"},{"18":"r9","21":"r9","22":"r9","26":"s43","28":"r9","30":"r9"},{"18":"r10","21":"r10","22":"r10","26":"r10","28":"r10","30":"r10"},{"18":"r12","21":"r12","22":"r12","26":"r12","28":"r12","30":"r12"},{"17":"r6","18":"r6","21":"r6","22":"r6","28":"r6","30":"r6","34":"r6"},{"16":"s21"},{"15":"r14","16":"r14","27":"s22","29":"r14"},{"9":24,"10":15,"15":"r16","16":"r16","18":"s17","21":"s19","22":"s20","27":"r16","28":"s16","29":"r16","30":"s18","31":"s25","32":"s26","33":"s27"},{"8":28,"9":14,"10":15,"18":"s17","21":"s19","22":"s20","28":"s16","30":"s18"},{"11":30,"12":32,"13":34,"20":"s31","23":"s33","24":"s35"},{"15":"r20","16":"r20","18":"r20","21":"r20","22":"r20","27":"r20","28":"r20","29":"r20","30":"r20","31":"r20","32":"r20","33":"r20"},{"15":"r21","16":"r21","18":"r21","21":"r21","22":"r21","27":"r21","28":"r21","29":"r21","30":"r21","31":"r21","32":"r21","33":"r21"},{"15":"r22","16":"r22","18":"r22","21":"r22","22":"r22","27":"r22","28":"r22","29":"r22","30":"r22","31":"r22","32":"r22","33":"r22"},{"17":"r7","18":"r7","21":"r7","22":"r7","28":"r7","30":"r7","34":"r7"},{"8":23,"9":14,"10":15,"18":"s17","21":"s19","22":"s20","28":"s16","30":"s18"},{"15":"r13","16":"r13","29":"r13"},{"15":"r15","16":"r15","27":"r15","29":"r15"},{"15":"r23","16":"r23","18":"r23","21":"r23","22":"r23","27":"r23","28":"r23","29":"r23","30":"r23","31":"r23","32":"r23","33":"r23"},{"15":"r24","16":"r24","18":"r24","21":"r24","22":"r24","27":"r24","28":"r24","29":"r24","30":"r24","31":"r24","32":"r24","33":"r24"},{"15":"r25","16":"r25","18":"r25","21":"r25","22":"r25","27":"r25","28":"r25","29":"r25","30":"r25","31":"r25","32":"r25","33":"r25"},{"29":"s29"},{"15":"r17","16":"r17","18":"r17","21":"r17","22":"r17","27":"r17","28":"r17","29":"r17","30":"r17","31":"r17","32":"r17","33":"r17"},{"19":"s36"},{"11":37,"12":32,"13":34,"23":"s33","24":"s35"},{"19":"r26"},{"12":39,"13":34,"19":"r27","24":"s35"},{"12":40,"13":34,"19":"r29","24":"s35"},{"19":"r31","23":"s41","24":"r31"},{"15":"r18","16":"r18","18":"r18","21":"r18","22":"r18","27":"r18","28":"r18","29":"r18","30":"r18","31":"r18","32":"r18","33":"r18"},{"19":"s38"},{"15":"r19","16":"r19","18":"r19","21":"r19","22":"r19","27":"r19","28":"r19","29":"r19","30":"r19","31":"r19","32":"r19","33":"r19"},{"19":"r28"},{"19":"r30"},{"24":"s42"},{"19":"r32","24":"r32"},{"7":44,"17":"s11"},{"18":"r11","21":"r11","22":"r11","26":"r11","28":"r11","30":"r11"},{"25":"r3"},{"15":"s47"},{"14":"r4","25":"r4"}]; /** * Parsing stack. */ const stack = []; /** * Tokenizer instance. */ let tokenizer; /** * Generic tokenizer used by the parser in the Syntax tool. * * https://www.npmjs.com/package/syntax-cli * * See `--custom-tokinzer` to skip this generation, and use a custom one. */ const lexRules = [[/^[a-zA-Z]+[a-zA-Z0-9]*\s+/, function() { this.popState(); this.begin('pattern'); yytext = yytext.trim(); return 'NAME' }], [/^%%.*\n/, function() { this.popState(); this.begin('tokenizer'); return "'%%'" }], [/^[\t ]+\S+\n/, function() { this.popState(); yytext = yytext.slice(0,-1).trim(); return 'END_OF_LINE' }], [/^[\t ]*\n/, function() { this.popState(); yytext = ''; return 'END_OF_LINE' }], [/^[\t ]+.*\n/, function() { this.popState(); this.begin('tokenizer'); yytext = yytext.slice(0,-1).trim(); return 'END_OF_TOKEN' }], [/^</, function() { this.popState(); this.begin('tokenizer'); this.begin('qualifier') }], [/^[()+*?.|]/, function() { return `'${yytext[0]}'` }], [/^\[/, function() { this.begin('set'); return 'START_SET' }], [/^\{[a-zA-Z]+[a-zA-Z0-9]*\}/, function() { yytext = yytext.slice(1,-1); return 'QUOTED_NAME' }], [/^\\t/, function() { yytext = '\t'; return 'PATTERN_TOKEN' }], [/^\\r/, function() { yytext = '\r'; return 'PATTERN_TOKEN' }], [/^\\n/, function() { yytext = '\n'; return 'PATTERN_TOKEN' }], [/^\\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]/, function() { yytext = String.fromCodePoint(parseInt(yytext.slice(2),16)); return 'PATTERN_TOKEN' }], [/^\\./, function() { yytext = yytext.slice(1); return 'PATTERN_TOKEN' }], [/^./, function() { return 'PATTERN_TOKEN' }], [/^\]/, function() { this.popState(); return 'END_SET' }], [/^\^/, function() { return `INVERT_SET` }], [/^-/, function() { return `RANGE` }], [/^\\t/, function() { yytext = '\t'; return 'SET_TOKEN' }], [/^\\r/, function() { yytext = '\r'; return 'SET_TOKEN' }], [/^\\n/, function() { yytext = '\n'; return 'SET_TOKEN' }], [/^\\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]/, function() { yytext = String.fromCodePoint(parseInt(yytext.slice(2),16)); return 'SET_TOKEN' }], [/^\\./, function() { yytext = yytext.slice(1); return 'SET_TOKEN' }], [/^./, function() { return 'SET_TOKEN' }], [/^[a-zA-Z]+[a-zA-Z0-9]*/, function() { return 'QUALIFIER_NAME' }], [/^[,]/, function() { /* ignore */ }], [/^>/, function() { this.popState() }], [/^%%/, function() { return "'%%'"; }], [/^,/, function() { return "','"; }], [/^\|/, function() { return "'|'"; }], [/^\(/, function() { return "'('"; }], [/^\)/, function() { return "')'"; }], [/^\./, function() { return "'.'"; }], [/^\*/, function() { return "'*'"; }], [/^\+/, function() { return "'+'"; }], [/^\?/, function() { return "'?'"; }]]; const lexRulesByConditions = {"INITIAL":[0,1,27,28,29,30,31,32,33,34,35],"pattern":[2,3,6,7,8,9,10,11,12,13,14],"set":[15,16,17,18,19,20,21,22,23],"tokenizer":[4,5,6,7,8,9,10,11,12,13,14],"qualifier":[24,25,26]}; const EOF_TOKEN = { type: EOF, value: '', }; tokenizer = { initString(string) { this._string = string; this._cursor = 0; this._states = ['INITIAL']; this._tokensQueue = []; this._currentLine = 1; this._currentColumn = 0; this._currentLineBeginOffset = 0; /** * Matched token location data. */ this._tokenStartOffset = 0; this._tokenEndOffset = 0; this._tokenStartLine = 1; this._tokenEndLine = 1; this._tokenStartColumn = 0; this._tokenEndColumn = 0; return this; }, /** * Returns tokenizer states. */ getStates() { return this._states; }, getCurrentState() { return this._states[this._states.length - 1]; }, pushState(state) { this._states.push(state); }, begin(state) { this.pushState(state); }, popState() { if (this._states.length > 1) { return this._states.pop(); } return this._states[0]; }, getNextToken() { // Something was queued, return it. if (this._tokensQueue.length > 0) { return this.onToken(this._toToken(this._tokensQueue.shift())); } if (!this.hasMoreTokens()) { return this.onToken(EOF_TOKEN); } let string = this._string.slice(this._cursor); let lexRulesForState = lexRulesByConditions[this.getCurrentState()]; for (let i = 0; i < lexRulesForState.length; i++) { let lexRuleIndex = lexRulesForState[i]; let lexRule = lexRules[lexRuleIndex]; let matched = this._match(string, lexRule[0]); // Manual handling of EOF token (the end of string). Return it // as `EOF` symbol. if (string === '' && matched === '') { this._cursor++; } if (matched !== null) { yytext = matched; yyleng = yytext.length; let token = lexRule[1].call(this); if (!token) { return this.getNextToken(); } // If multiple tokens are returned, save them to return // on next `getNextToken` call. if (Array.isArray(token)) { const tokensToQueue = token.slice(1); token = token[0]; if (tokensToQueue.length > 0) { this._tokensQueue.unshift(...tokensToQueue); } } return this.onToken(this._toToken(token, yytext)); } } if (this.isEOF()) { this._cursor++; return EOF_TOKEN; } this.throwUnexpectedToken( string[0], this._currentLine, this._currentColumn ); }, /** * Throws default "Unexpected token" exception, showing the actual * line from the source, pointing with the ^ marker to the bad token. * In addition, shows `line:column` location. */ throwUnexpectedToken(symbol, line, column) { const lineSource = this._string.split('\n')[line - 1]; let lineData = ''; if (lineSource) { const pad = ' '.repeat(column); lineData = '\n\n' + lineSource + '\n' + pad + '^\n'; } throw new SyntaxError( `${lineData}Unexpected token: "${symbol}" ` + `at ${line}:${column}.` ); }, getCursor() { return this._cursor; }, getCurrentLine() { return this._currentLine; }, getCurrentColumn() { return this._currentColumn; }, _captureLocation(matched) { const nlRe = /\n/g; // Absolute offsets. this._tokenStartOffset = this._cursor; // Line-based locations, start. this._tokenStartLine = this._currentLine; this._tokenStartColumn = this._tokenStartOffset - this._currentLineBeginOffset; // Extract `\n` in the matched token. let nlMatch; while ((nlMatch = nlRe.exec(matched)) !== null) { this._currentLine++; this._currentLineBeginOffset = this._tokenStartOffset + nlMatch.index + 1; } this._tokenEndOffset = this._cursor + matched.length; // Line-based locations, end. this._tokenEndLine = this._currentLine; this._tokenEndColumn = this._currentColumn = (this._tokenEndOffset - this._currentLineBeginOffset); }, _toToken(tokenType, yytext = '') { return { // Basic data. type: tokenType, value: yytext, // Location data. startOffset: this._tokenStartOffset, endOffset: this._tokenEndOffset, startLine: this._tokenStartLine, endLine: this._tokenEndLine, startColumn: this._tokenStartColumn, endColumn: this._tokenEndColumn, }; }, isEOF() { return this._cursor === this._string.length; }, hasMoreTokens() { return this._cursor <= this._string.length; }, _match(string, regexp) { let matched = string.match(regexp); if (matched) { // Handle `\n` in the matched token to track line numbers. this._captureLocation(matched[0]); this._cursor += matched[0].length; return matched[0]; } return null; }, /** * Allows analyzing, and transforming token. Default implementation * just passes the token through. */ onToken(token) { return token; }, }; /** * Expose tokenizer so it can be accessed in semantic actions. */ yy.lexer = tokenizer; yy.tokenizer = tokenizer; /** * Global parsing options. Some options can be shadowed per * each `parse` call, if the optations are passed. * * Initalized to the `captureLocations` which is passed * from the generator. Other options can be added at runtime. */ yy.options = { captureLocations: false, }; /** * Parsing module. */ const yyparse = { /** * Sets global parsing options. */ setOptions(options) { yy.options = options; return this; }, /** * Returns parsing options. */ getOptions() { return yy.options; }, /** * Parses a string. */ parse(string, parseOptions) { if (!tokenizer) { throw new Error(`Tokenizer instance wasn't specified.`); } tokenizer.initString(string); /** * If parse options are passed, override global parse options for * this call, and later restore global options. */ let globalOptions = yy.options; if (parseOptions) { yy.options = Object.assign({}, yy.options, parseOptions); } /** * Allow callers to do setup work based on the * parsing string, and passed options. */ yyparse.onParseBegin(string, tokenizer, yy.options); stack.length = 0; stack.push(0); let token = tokenizer.getNextToken(); let shiftedToken = null; do { if (!token) { // Restore options. yy.options = globalOptions; unexpectedEndOfInput(); } let state = stack[stack.length - 1]; let column = tokens[token.type]; if (!table[state].hasOwnProperty(column)) { yy.options = globalOptions; unexpectedToken(token); } let entry = table[state][column]; // Shift action. if (entry[0] === 's') { let loc = null; if (yy.options.captureLocations) { loc = { startOffset: token.startOffset, endOffset: token.endOffset, startLine: token.startLine, endLine: token.endLine, startColumn: token.startColumn, endColumn: token.endColumn, }; } shiftedToken = this.onShift(token); stack.push( {symbol: tokens[shiftedToken.type], semanticValue: shiftedToken.value, loc}, Number(entry.slice(1)) ); token = tokenizer.getNextToken(); } // Reduce action. else if (entry[0] === 'r') { let productionNumber = entry.slice(1); let production = productions[productionNumber]; let hasSemanticAction = typeof production[2] === 'function'; let semanticValueArgs = hasSemanticAction ? [] : null; const locationArgs = ( hasSemanticAction && yy.options.captureLocations ? [] : null ); if (production[1] !== 0) { let rhsLength = production[1]; while (rhsLength-- > 0) { stack.pop(); let stackEntry = stack.pop(); if (hasSemanticAction) { semanticValueArgs.unshift(stackEntry.semanticValue); if (locationArgs) { locationArgs.unshift(stackEntry.loc); } } } } const reduceStackEntry = {symbol: production[0]}; if (hasSemanticAction) { yytext = shiftedToken ? shiftedToken.value : null; yyleng = shiftedToken ? shiftedToken.value.length : null; const semanticActionArgs = ( locationArgs !== null ? semanticValueArgs.concat(locationArgs) : semanticValueArgs ); production[2](...semanticActionArgs); reduceStackEntry.semanticValue = __; if (locationArgs) { reduceStackEntry.loc = __loc; } } const nextState = stack[stack.length - 1]; const symbolToReduceWith = production[0]; stack.push( reduceStackEntry, table[nextState][symbolToReduceWith] ); } // Accept. else if (entry === 'acc') { stack.pop(); let parsed = stack.pop(); if (stack.length !== 1 || stack[0] !== 0 || tokenizer.hasMoreTokens()) { // Restore options. yy.options = globalOptions; unexpectedToken(token); } if (parsed.hasOwnProperty('semanticValue')) { yy.options = globalOptions; yyparse.onParseEnd(parsed.semanticValue); return parsed.semanticValue; } yyparse.onParseEnd(); // Restore options. yy.options = globalOptions; return true; } } while (tokenizer.hasMoreTokens() || stack.length > 1); }, setTokenizer(customTokenizer) { tokenizer = customTokenizer; return yyparse; }, getTokenizer() { return tokenizer; }, onParseBegin(string, tokenizer, options) {}, onParseEnd(parsed) {}, /** * Allows analyzing, and transforming shifted token. Default implementation * just passes the token through. */ onShift(token) { return token; }, }; const {CharLabel,State,Edge,NFA,Fragment} = require('@shimaore/dfa.js') const BitSet = require('bitset') const {Single} = CharLabel const START_STATE = 0 function nfa_by_name(nfas,name) { if(!nfas.has(name)) { let nfa = new NFA() nfa.addStartState( new State(START_STATE) ) nfas.set(name,nfa) } return nfas.get(name) } function convert_to_dfas(nfas) { let dfas = new Map() nfas.forEach( function (nfa,name) { dfas.set(name,nfa.toDFA()) }) return dfas } var state = 1000 let simple = function (c,attrs = {}) { let state1 = state++ let state2 = state++ return new Fragment([ new State(state1, [new Edge(c, state2)]), new State(state2, [], attrs) ]) } let single = function (c) { return simple(new Single(c)) } let CP = function (c) { return c.codePointAt(0) } let build_acceptable = function (f) { return new Function(f) } function unexpectedToken(token) { if (token.type === EOF) { unexpectedEndOfInput(); } tokenizer.throwUnexpectedToken( token.value, token.startLine, token.startColumn ); } function unexpectedEndOfInput() { parseError(`Unexpected end of input.`); } function parseError(message) { throw new SyntaxError(message); } module.exports = yyparse;