parser-transform
Version:
Streaming+Async lexer and parser
672 lines (558 loc) • 21.4 kB
JavaScript
/**
* LR parser generated by the Syntax tool.
*
* https://www.npmjs.com/package/syntax-cli
*
* npm install -g syntax-cli
*
* syntax-cli --help
*
* To regenerate run:
*
* syntax-cli \
* --grammar ~/path-to-grammar-file \
* --mode <parsing-mode> \
* --output ~/path-to-output-parser-file.js
*/
;
/**
* Matched token text.
*/
let yytext;
/**
* Length of the matched token text.
*/
let yyleng;
/**
* Storage object.
*/
let yy = {};
/**
* Result of semantic action.
*/
let __;
/**
* Result location object.
*/
let __loc;
function yyloc(start, end) {
if (!yy.options.captureLocations) {
return null;
}
// Epsilon doesn't produce location.
if (!start || !end) {
return start || end;
}
return {
startOffset: start.startOffset,
endOffset: end.endOffset,
startLine: start.startLine,
endLine: end.endLine,
startColumn: start.startColumn,
endColumn: end.endColumn,
};
}
const EOF = '$';
/**
* List of productions (generated by Syntax tool).
*/
const productions = [[-1,1,(_1) => { __ = _1 }],
[0,3,(_1,_2,_3) => { __ = convert_to_dfas(_3(new Map(_1),new Map())) }],
[1,0,() => { __ = [] }],
[1,2,(_1,_2) => { __ = _2.concat(_1) }],
[2,3,(_1,_2,_3) => { __ = [[_1, _2]] }],
[3,1,(_1) => { __ = _1 }],
[3,2,(_1,_2) => { __ = function (macros,nfas) { _1(macros,nfas); _2(macros,nfas); return nfas } }],
[4,3,(_1,_2,_3) => { __ = function (macros,nfas) { let code = build_acceptable(_3); let pattern = _2(macros).toLastAcceptable(code); _1.forEach( (qualifier) => nfa_by_name(nfas,qualifier).appendFragment(pattern,START_STATE) ); return nfas } }],
[5,0,() => { __ = ['INITIAL'] }],
[5,1,(_1) => { __ = _1 }],
[6,1,(_1) => { __ = _1 }],
[6,3,(_1,_2,_3) => { __ = _1.concat(_2) }],
[7,1,(_1) => { __ = [_1] }],
[8,3,(_1,_2,_3) => { __ = (macros) => Fragment.mergeAll([_1(macros),_3(macros)]) }],
[8,1,(_1) => { __ = _1 }],
[9,2,(_1,_2) => { __ = (macros) => Fragment.concatAll([_1(macros),_2(macros)]) }],
[9,1,(_1) => { __ = _1 }],
[10,3,(_1,_2,_3) => { __ = _2 }],
[10,3,(_1,_2,_3) => { __ = (macros) => simple(new CharLabel(_2)) }],
[10,4,(_1,_2,_3,_4) => { __ = (macros) => simple(new CharLabel(_3.not())) }],
[10,1,(_1) => { __ = (macros) => simple(new CharLabel(new BitSet().not())) }],
[10,1,(_1) => { __ = (macros) => single(_1) }],
[10,1,(_1) => { __ = (macros) => { if(!macros.has(_1)) { throw new Error(`Undefined macro ${_1}`) }; return macros.get(_1)(macros) } }],
[10,2,(_1,_2) => { __ = (macros) => _1(macros).kleeneStar() }],
[10,2,(_1,_2) => { __ = (macros) => _1(macros).kleenePlus() }],
[10,2,(_1,_2) => { __ = (macros) => _1(macros).optional() }],
[11,1,(_1) => { __ = _1 }],
[11,1,(_1) => { __ = new BitSet().set(CP('-')) }],
[11,2,(_1,_2) => { __ = _2.or(new BitSet().set(CP('-'))) }],
[12,1,(_1) => { __ = _1 }],
[12,2,(_1,_2) => { __ = _1.or(_2) }],
[13,1,(_1) => { __ = new BitSet().set(CP(_1)) }],
[13,3,(_1,_2,_3) => { __ = new BitSet().setRange(CP(_1),CP(_3)) }]];
/**
* Encoded tokens map.
*/
const tokens = {"NAME":"14","END_OF_LINE":"15","END_OF_TOKEN":"16","QUALIFIER_NAME":"17","START_SET":"18","END_SET":"19","INVERT_SET":"20","PATTERN_TOKEN":"21","QUOTED_NAME":"22","RANGE":"23","SET_TOKEN":"24","'%%'":"25","','":"26","'|'":"27","'('":"28","')'":"29","'.'":"30","'*'":"31","'+'":"32","'?'":"33","$":"34"};
/**
* Parsing table (generated by Syntax tool).
*/
const table = [{"0":1,"1":2,"2":3,"14":"s4","25":"r2"},{"34":"acc"},{"25":"s5"},{"1":45,"2":3,"14":"s4","25":"r2"},{"8":46,"9":14,"10":15,"18":"s17","21":"s19","22":"s20","28":"s16","30":"s18"},{"3":6,"4":7,"5":8,"6":9,"7":10,"17":"s11","18":"r8","21":"r8","22":"r8","28":"r8","30":"r8"},{"4":12,"5":8,"6":9,"7":10,"17":"s11","18":"r8","21":"r8","22":"r8","28":"r8","30":"r8","34":"r1"},{"17":"r5","18":"r5","21":"r5","22":"r5","28":"r5","30":"r5","34":"r5"},{"8":13,"9":14,"10":15,"18":"s17","21":"s19","22":"s20","28":"s16","30":"s18"},{"18":"r9","21":"r9","22":"r9","26":"s43","28":"r9","30":"r9"},{"18":"r10","21":"r10","22":"r10","26":"r10","28":"r10","30":"r10"},{"18":"r12","21":"r12","22":"r12","26":"r12","28":"r12","30":"r12"},{"17":"r6","18":"r6","21":"r6","22":"r6","28":"r6","30":"r6","34":"r6"},{"16":"s21"},{"15":"r14","16":"r14","27":"s22","29":"r14"},{"9":24,"10":15,"15":"r16","16":"r16","18":"s17","21":"s19","22":"s20","27":"r16","28":"s16","29":"r16","30":"s18","31":"s25","32":"s26","33":"s27"},{"8":28,"9":14,"10":15,"18":"s17","21":"s19","22":"s20","28":"s16","30":"s18"},{"11":30,"12":32,"13":34,"20":"s31","23":"s33","24":"s35"},{"15":"r20","16":"r20","18":"r20","21":"r20","22":"r20","27":"r20","28":"r20","29":"r20","30":"r20","31":"r20","32":"r20","33":"r20"},{"15":"r21","16":"r21","18":"r21","21":"r21","22":"r21","27":"r21","28":"r21","29":"r21","30":"r21","31":"r21","32":"r21","33":"r21"},{"15":"r22","16":"r22","18":"r22","21":"r22","22":"r22","27":"r22","28":"r22","29":"r22","30":"r22","31":"r22","32":"r22","33":"r22"},{"17":"r7","18":"r7","21":"r7","22":"r7","28":"r7","30":"r7","34":"r7"},{"8":23,"9":14,"10":15,"18":"s17","21":"s19","22":"s20","28":"s16","30":"s18"},{"15":"r13","16":"r13","29":"r13"},{"15":"r15","16":"r15","27":"r15","29":"r15"},{"15":"r23","16":"r23","18":"r23","21":"r23","22":"r23","27":"r23","28":"r23","29":"r23","30":"r23","31":"r23","32":"r23","33":"r23"},{"15":"r24","16":"r24","18":"r24","21":"r24","22":"r24","27":"r24","28":"r24","29":"r24","30":"r24","31":"r24","32":"r24","33":"r24"},{"15":"r25","16":"r25","18":"r25","21":"r25","22":"r25","27":"r25","28":"r25","29":"r25","30":"r25","31":"r25","32":"r25","33":"r25"},{"29":"s29"},{"15":"r17","16":"r17","18":"r17","21":"r17","22":"r17","27":"r17","28":"r17","29":"r17","30":"r17","31":"r17","32":"r17","33":"r17"},{"19":"s36"},{"11":37,"12":32,"13":34,"23":"s33","24":"s35"},{"19":"r26"},{"12":39,"13":34,"19":"r27","24":"s35"},{"12":40,"13":34,"19":"r29","24":"s35"},{"19":"r31","23":"s41","24":"r31"},{"15":"r18","16":"r18","18":"r18","21":"r18","22":"r18","27":"r18","28":"r18","29":"r18","30":"r18","31":"r18","32":"r18","33":"r18"},{"19":"s38"},{"15":"r19","16":"r19","18":"r19","21":"r19","22":"r19","27":"r19","28":"r19","29":"r19","30":"r19","31":"r19","32":"r19","33":"r19"},{"19":"r28"},{"19":"r30"},{"24":"s42"},{"19":"r32","24":"r32"},{"7":44,"17":"s11"},{"18":"r11","21":"r11","22":"r11","26":"r11","28":"r11","30":"r11"},{"25":"r3"},{"15":"s47"},{"14":"r4","25":"r4"}];
/**
* Parsing stack.
*/
const stack = [];
/**
* Tokenizer instance.
*/
let tokenizer;
/**
* Generic tokenizer used by the parser in the Syntax tool.
*
* https://www.npmjs.com/package/syntax-cli
*
* See `--custom-tokinzer` to skip this generation, and use a custom one.
*/
const lexRules = [[/^[a-zA-Z]+[a-zA-Z0-9]*\s+/, function() { this.popState(); this.begin('pattern'); yytext = yytext.trim(); return 'NAME' }],
[/^%%.*\n/, function() { this.popState(); this.begin('tokenizer'); return "'%%'" }],
[/^[\t ]+\S+\n/, function() { this.popState(); yytext = yytext.slice(0,-1).trim(); return 'END_OF_LINE' }],
[/^[\t ]*\n/, function() { this.popState(); yytext = ''; return 'END_OF_LINE' }],
[/^[\t ]+.*\n/, function() { this.popState(); this.begin('tokenizer'); yytext = yytext.slice(0,-1).trim(); return 'END_OF_TOKEN' }],
[/^</, function() { this.popState(); this.begin('tokenizer'); this.begin('qualifier') }],
[/^[()+*?.|]/, function() { return `'${yytext[0]}'` }],
[/^\[/, function() { this.begin('set'); return 'START_SET' }],
[/^\{[a-zA-Z]+[a-zA-Z0-9]*\}/, function() { yytext = yytext.slice(1,-1); return 'QUOTED_NAME' }],
[/^\\t/, function() { yytext = '\t'; return 'PATTERN_TOKEN' }],
[/^\\r/, function() { yytext = '\r'; return 'PATTERN_TOKEN' }],
[/^\\n/, function() { yytext = '\n'; return 'PATTERN_TOKEN' }],
[/^\\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]/, function() { yytext = String.fromCodePoint(parseInt(yytext.slice(2),16)); return 'PATTERN_TOKEN' }],
[/^\\./, function() { yytext = yytext.slice(1); return 'PATTERN_TOKEN' }],
[/^./, function() { return 'PATTERN_TOKEN' }],
[/^\]/, function() { this.popState(); return 'END_SET' }],
[/^\^/, function() { return `INVERT_SET` }],
[/^-/, function() { return `RANGE` }],
[/^\\t/, function() { yytext = '\t'; return 'SET_TOKEN' }],
[/^\\r/, function() { yytext = '\r'; return 'SET_TOKEN' }],
[/^\\n/, function() { yytext = '\n'; return 'SET_TOKEN' }],
[/^\\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]/, function() { yytext = String.fromCodePoint(parseInt(yytext.slice(2),16)); return 'SET_TOKEN' }],
[/^\\./, function() { yytext = yytext.slice(1); return 'SET_TOKEN' }],
[/^./, function() { return 'SET_TOKEN' }],
[/^[a-zA-Z]+[a-zA-Z0-9]*/, function() { return 'QUALIFIER_NAME' }],
[/^[,]/, function() { /* ignore */ }],
[/^>/, function() { this.popState() }],
[/^%%/, function() { return "'%%'"; }],
[/^,/, function() { return "','"; }],
[/^\|/, function() { return "'|'"; }],
[/^\(/, function() { return "'('"; }],
[/^\)/, function() { return "')'"; }],
[/^\./, function() { return "'.'"; }],
[/^\*/, function() { return "'*'"; }],
[/^\+/, function() { return "'+'"; }],
[/^\?/, function() { return "'?'"; }]];
const lexRulesByConditions = {"INITIAL":[0,1,27,28,29,30,31,32,33,34,35],"pattern":[2,3,6,7,8,9,10,11,12,13,14],"set":[15,16,17,18,19,20,21,22,23],"tokenizer":[4,5,6,7,8,9,10,11,12,13,14],"qualifier":[24,25,26]};
const EOF_TOKEN = {
type: EOF,
value: '',
};
tokenizer = {
initString(string) {
this._string = string;
this._cursor = 0;
this._states = ['INITIAL'];
this._tokensQueue = [];
this._currentLine = 1;
this._currentColumn = 0;
this._currentLineBeginOffset = 0;
/**
* Matched token location data.
*/
this._tokenStartOffset = 0;
this._tokenEndOffset = 0;
this._tokenStartLine = 1;
this._tokenEndLine = 1;
this._tokenStartColumn = 0;
this._tokenEndColumn = 0;
return this;
},
/**
* Returns tokenizer states.
*/
getStates() {
return this._states;
},
getCurrentState() {
return this._states[this._states.length - 1];
},
pushState(state) {
this._states.push(state);
},
begin(state) {
this.pushState(state);
},
popState() {
if (this._states.length > 1) {
return this._states.pop();
}
return this._states[0];
},
getNextToken() {
// Something was queued, return it.
if (this._tokensQueue.length > 0) {
return this.onToken(this._toToken(this._tokensQueue.shift()));
}
if (!this.hasMoreTokens()) {
return this.onToken(EOF_TOKEN);
}
let string = this._string.slice(this._cursor);
let lexRulesForState = lexRulesByConditions[this.getCurrentState()];
for (let i = 0; i < lexRulesForState.length; i++) {
let lexRuleIndex = lexRulesForState[i];
let lexRule = lexRules[lexRuleIndex];
let matched = this._match(string, lexRule[0]);
// Manual handling of EOF token (the end of string). Return it
// as `EOF` symbol.
if (string === '' && matched === '') {
this._cursor++;
}
if (matched !== null) {
yytext = matched;
yyleng = yytext.length;
let token = lexRule[1].call(this);
if (!token) {
return this.getNextToken();
}
// If multiple tokens are returned, save them to return
// on next `getNextToken` call.
if (Array.isArray(token)) {
const tokensToQueue = token.slice(1);
token = token[0];
if (tokensToQueue.length > 0) {
this._tokensQueue.unshift(...tokensToQueue);
}
}
return this.onToken(this._toToken(token, yytext));
}
}
if (this.isEOF()) {
this._cursor++;
return EOF_TOKEN;
}
this.throwUnexpectedToken(
string[0],
this._currentLine,
this._currentColumn
);
},
/**
* Throws default "Unexpected token" exception, showing the actual
* line from the source, pointing with the ^ marker to the bad token.
* In addition, shows `line:column` location.
*/
throwUnexpectedToken(symbol, line, column) {
const lineSource = this._string.split('\n')[line - 1];
let lineData = '';
if (lineSource) {
const pad = ' '.repeat(column);
lineData = '\n\n' + lineSource + '\n' + pad + '^\n';
}
throw new SyntaxError(
`${lineData}Unexpected token: "${symbol}" ` +
`at ${line}:${column}.`
);
},
getCursor() {
return this._cursor;
},
getCurrentLine() {
return this._currentLine;
},
getCurrentColumn() {
return this._currentColumn;
},
_captureLocation(matched) {
const nlRe = /\n/g;
// Absolute offsets.
this._tokenStartOffset = this._cursor;
// Line-based locations, start.
this._tokenStartLine = this._currentLine;
this._tokenStartColumn =
this._tokenStartOffset - this._currentLineBeginOffset;
// Extract `\n` in the matched token.
let nlMatch;
while ((nlMatch = nlRe.exec(matched)) !== null) {
this._currentLine++;
this._currentLineBeginOffset = this._tokenStartOffset + nlMatch.index + 1;
}
this._tokenEndOffset = this._cursor + matched.length;
// Line-based locations, end.
this._tokenEndLine = this._currentLine;
this._tokenEndColumn = this._currentColumn =
(this._tokenEndOffset - this._currentLineBeginOffset);
},
_toToken(tokenType, yytext = '') {
return {
// Basic data.
type: tokenType,
value: yytext,
// Location data.
startOffset: this._tokenStartOffset,
endOffset: this._tokenEndOffset,
startLine: this._tokenStartLine,
endLine: this._tokenEndLine,
startColumn: this._tokenStartColumn,
endColumn: this._tokenEndColumn,
};
},
isEOF() {
return this._cursor === this._string.length;
},
hasMoreTokens() {
return this._cursor <= this._string.length;
},
_match(string, regexp) {
let matched = string.match(regexp);
if (matched) {
// Handle `\n` in the matched token to track line numbers.
this._captureLocation(matched[0]);
this._cursor += matched[0].length;
return matched[0];
}
return null;
},
/**
* Allows analyzing, and transforming token. Default implementation
* just passes the token through.
*/
onToken(token) {
return token;
},
};
/**
* Expose tokenizer so it can be accessed in semantic actions.
*/
yy.lexer = tokenizer;
yy.tokenizer = tokenizer;
/**
* Global parsing options. Some options can be shadowed per
* each `parse` call, if the optations are passed.
*
* Initalized to the `captureLocations` which is passed
* from the generator. Other options can be added at runtime.
*/
yy.options = {
captureLocations: false,
};
/**
* Parsing module.
*/
const yyparse = {
/**
* Sets global parsing options.
*/
setOptions(options) {
yy.options = options;
return this;
},
/**
* Returns parsing options.
*/
getOptions() {
return yy.options;
},
/**
* Parses a string.
*/
parse(string, parseOptions) {
if (!tokenizer) {
throw new Error(`Tokenizer instance wasn't specified.`);
}
tokenizer.initString(string);
/**
* If parse options are passed, override global parse options for
* this call, and later restore global options.
*/
let globalOptions = yy.options;
if (parseOptions) {
yy.options = Object.assign({}, yy.options, parseOptions);
}
/**
* Allow callers to do setup work based on the
* parsing string, and passed options.
*/
yyparse.onParseBegin(string, tokenizer, yy.options);
stack.length = 0;
stack.push(0);
let token = tokenizer.getNextToken();
let shiftedToken = null;
do {
if (!token) {
// Restore options.
yy.options = globalOptions;
unexpectedEndOfInput();
}
let state = stack[stack.length - 1];
let column = tokens[token.type];
if (!table[state].hasOwnProperty(column)) {
yy.options = globalOptions;
unexpectedToken(token);
}
let entry = table[state][column];
// Shift action.
if (entry[0] === 's') {
let loc = null;
if (yy.options.captureLocations) {
loc = {
startOffset: token.startOffset,
endOffset: token.endOffset,
startLine: token.startLine,
endLine: token.endLine,
startColumn: token.startColumn,
endColumn: token.endColumn,
};
}
shiftedToken = this.onShift(token);
stack.push(
{symbol: tokens[shiftedToken.type], semanticValue: shiftedToken.value, loc},
Number(entry.slice(1))
);
token = tokenizer.getNextToken();
}
// Reduce action.
else if (entry[0] === 'r') {
let productionNumber = entry.slice(1);
let production = productions[productionNumber];
let hasSemanticAction = typeof production[2] === 'function';
let semanticValueArgs = hasSemanticAction ? [] : null;
const locationArgs = (
hasSemanticAction && yy.options.captureLocations
? []
: null
);
if (production[1] !== 0) {
let rhsLength = production[1];
while (rhsLength-- > 0) {
stack.pop();
let stackEntry = stack.pop();
if (hasSemanticAction) {
semanticValueArgs.unshift(stackEntry.semanticValue);
if (locationArgs) {
locationArgs.unshift(stackEntry.loc);
}
}
}
}
const reduceStackEntry = {symbol: production[0]};
if (hasSemanticAction) {
yytext = shiftedToken ? shiftedToken.value : null;
yyleng = shiftedToken ? shiftedToken.value.length : null;
const semanticActionArgs = (
locationArgs !== null
? semanticValueArgs.concat(locationArgs)
: semanticValueArgs
);
production[2](...semanticActionArgs);
reduceStackEntry.semanticValue = __;
if (locationArgs) {
reduceStackEntry.loc = __loc;
}
}
const nextState = stack[stack.length - 1];
const symbolToReduceWith = production[0];
stack.push(
reduceStackEntry,
table[nextState][symbolToReduceWith]
);
}
// Accept.
else if (entry === 'acc') {
stack.pop();
let parsed = stack.pop();
if (stack.length !== 1 ||
stack[0] !== 0 ||
tokenizer.hasMoreTokens()) {
// Restore options.
yy.options = globalOptions;
unexpectedToken(token);
}
if (parsed.hasOwnProperty('semanticValue')) {
yy.options = globalOptions;
yyparse.onParseEnd(parsed.semanticValue);
return parsed.semanticValue;
}
yyparse.onParseEnd();
// Restore options.
yy.options = globalOptions;
return true;
}
} while (tokenizer.hasMoreTokens() || stack.length > 1);
},
setTokenizer(customTokenizer) {
tokenizer = customTokenizer;
return yyparse;
},
getTokenizer() {
return tokenizer;
},
onParseBegin(string, tokenizer, options) {},
onParseEnd(parsed) {},
/**
* Allows analyzing, and transforming shifted token. Default implementation
* just passes the token through.
*/
onShift(token) {
return token;
},
};
const {CharLabel,State,Edge,NFA,Fragment} = require('@shimaore/dfa.js')
const BitSet = require('bitset')
const {Single} = CharLabel
const START_STATE = 0
function nfa_by_name(nfas,name) {
if(!nfas.has(name)) {
let nfa = new NFA()
nfa.addStartState( new State(START_STATE) )
nfas.set(name,nfa)
}
return nfas.get(name)
}
function convert_to_dfas(nfas) {
let dfas = new Map()
nfas.forEach( function (nfa,name) {
dfas.set(name,nfa.toDFA())
})
return dfas
}
var state = 1000
let simple = function (c,attrs = {}) {
let state1 = state++
let state2 = state++
return new Fragment([
new State(state1, [new Edge(c, state2)]),
new State(state2, [], attrs)
])
}
let single = function (c) {
return simple(new Single(c))
}
let CP = function (c) {
return c.codePointAt(0)
}
let build_acceptable = function (f) {
return new Function(f)
}
function unexpectedToken(token) {
if (token.type === EOF) {
unexpectedEndOfInput();
}
tokenizer.throwUnexpectedToken(
token.value,
token.startLine,
token.startColumn
);
}
function unexpectedEndOfInput() {
parseError(`Unexpected end of input.`);
}
function parseError(message) {
throw new SyntaxError(message);
}
module.exports = yyparse;