@lahmatiy/jison
Version:
A parser generator with Bison's API
459 lines (375 loc) • 13.1 kB
JavaScript
const { version } = require('../../package.json');
const { fnBody } = require('../utils');
module.exports = {
generateModuleBody,
generateModule
};
function generatePerformAction({ rules, tokens, actionInclude }) {
const code = [
'(yy,yy_,__lexelActionId,YY_START) {',
actionInclude || '',
'const YYSTATE=YY_START;'
];
const tokenNameToIndex = Object.create(null);
let hasTokens = false;
for (const key in tokens) {
tokenNameToIndex[tokens[key]] = key;
hasTokens = true;
}
code.push('switch (__lexelActionId) {');
rules.forEach((rule, i) => {
let action = typeof rule[1] === 'function'
? fnBody(rule[1])
: rule[1];
if (hasTokens) {
action = action.replace(/\breturn\s+('[^']+?'|"[^"]+?")/g, (m, s) =>
'return ' + (tokenNameToIndex[s.slice(1, -1)] || s)
);
}
code.push('case ' + i + ':{' +
action.replace(/\b(yytext|yyleng|yylineno|yylloc)\b/g, 'yy_.$1') +
'\nbreak;}');
});
code.push('}', '}');
return code.join('\n');
}
function generateModuleBody(opt) {
const options = opt.options || {};
const out = [
'yy:{}',
'options: ' + JSON.stringify(options),
'rules: [' + opt.patterns + ']',
'conditions: ' + JSON.stringify(opt.conditions),
'performAction' + generatePerformAction(opt)
];
for (const method of Object.values(lexerInterface)) {
out.push(' ' + method.toString().replace(
/this\.options\.([a-zA-Z$_][a-zA-Z0-9$_]*)/g,
(m, option) => option in options && typeof options[option] !== 'boolean'
? m
: Boolean(options[option])
));
}
return '{\n' + out.join(',\n') + '\n}';
}
function generateModule(options, format = 'iife') {
options = options || {};
let out = '';
const banner = '/* Lexer generated by jison ' + version + ' */\n';
const body = 'const lexer = ' + generateModuleBody(options) + ';\n' +
(options.moduleInclude
? options.moduleInclude + ';\n'
: '');
switch (format) {
case 'cjs':
out += banner;
out += body;
out += 'module.exports = lexer.lexer = lexer;';
break;
case 'esm':
out += banner;
out += 'export ' + body;
out += 'export default lexer;\n';
break;
case 'iife':
out += '(function(){\n';
out += banner;
out += body;
out += 'return lexer;\n';
out += '})();\n';
break;
default:
throw new Error(`Unknown module format "${format}"`);
}
return out;
}
const lexerInterface = {
// resets the lexer, sets new input
setInput(input, yy) {
this.yy = yy || this.yy || {};
this._input = input;
this._more = false;
this._backtrack = false;
this.done = false;
this.currentCondition = 'INITIAL';
this.conditionStack = ['INITIAL'];
this.offset = 0;
this.yytext = this.match = '';
this.yyleng = 0;
this.yylineno = 0;
this.yylloc = {
first_line: 1,
first_column: 0,
last_line: 1,
last_column: 0
};
if (this.options.ranges) {
this.yylloc.range = [0, 0];
}
return this;
},
parseError(str, details) {
if (!this.yy.parser) {
throw new Error(str);
}
this.yy.parser.parseError(str, details);
},
// consumes and returns one char from the input
input() {
const ch = this._input[this.offset];
const newline = /\r\n?|\n/g.test(ch);
this.offset++;
this.yyleng++;
this.yytext = this.match = this.yytext + ch;
if (newline) {
this.yylineno++;
this.yylloc.last_line++;
this.yylloc.last_column = 0;
} else {
this.yylloc.last_column++;
}
if (this.options.ranges) {
this.yylloc.range[1]++;
}
return ch;
},
lineColumnForOffset(offset = this.offset, str = this._input, startLine = 1, startColumn = 0) {
const rx = /\r\n?|\n/g;
let line = startLine;
let column = startColumn;
let prevLastIndex = 0;
let match;
while ((match = rx.exec(str)) !== null) {
if (match.index < offset) {
line++;
column = 0;
prevLastIndex = match.index + match[0].length;
} else {
column += offset - prevLastIndex;
prevLastIndex = -1;
break;
}
}
if (prevLastIndex !== -1) {
column += offset - prevLastIndex;
}
return {
line,
column
};
},
setMatch(start, end) {
const newMatch = this._input.slice(start, end);
const firstLoc = start !== this.offset ? this.lineColumnForOffset(start) : {
line: this.yylloc.last_line,
column: this.yylloc.last_column
};
const lastLoc = start !== end
? this.lineColumnForOffset(end - start, newMatch, firstLoc.line, firstLoc.column)
: firstLoc;
this.yytext = this.match = newMatch;
this.yyleng = end - start;
this.offset = end;
this.yylineno = lastLoc.line - 1;
this.yylloc = {
first_line: firstLoc.line,
first_column: firstLoc.column,
last_line: lastLoc.line,
last_column: lastLoc.column
};
if (this.options.ranges) {
this.yylloc.range = [start, end];
}
},
// retain first n characters of the match
less(n) {
const start = this.offset - this.match.length;
this.setMatch(start, start + n);
},
// unshifts part of matched text
unput(chunk) {
const newMatchLength = this.match.length - chunk.length;
if (!this.match.startsWith(chunk, newMatchLength)) {
throw new Error('Lexer#unput() should receive a part of matched text only');
}
this.less(newMatchLength);
},
// When called from action, caches matched text and appends it on next action
more() {
this._more = true;
},
// When called from action, signals the lexer that this rule fails to match the input, so the next matching rule (regex) should be tested instead.
reject() {
if (this.options.backtrack_lexer) {
this._backtrack = true;
return;
}
// FIXME: remove method when options.backtrack_lexer is false
throw new Error('reject() is allowed only when options.backtrack_lexer = true');
},
// displays the character position where the lexing error occurred, i.e. for error messages
showPosition(offset = this.offset - this.match.length) {
const input = this._input;
const start = Math.max(offset - 20, 0);
const end = Math.min(offset + 20, input.length);
const pre = (start === 0 ? '' : '...') + input.slice(start, offset).replace(/\r\n?|\n/g, '\\n');
const post = input.slice(offset, end).replace(/\r\n?|\n/g, '\\n') + (end === input.length ? '' : '...');
return (
pre +
post +
`\n${'-'.repeat(pre.length)}^`
);
},
// test the lexed token: return FALSE when not a match, otherwise return token
test_match(match, rule) {
let backup;
if (this.options.backtrack_lexer) {
// save context
backup = {
yylineno: this.yylineno,
yylloc: {
first_line: this.yylloc.first_line,
last_line: this.yylloc.last_line,
first_column: this.yylloc.first_column,
last_column: this.yylloc.last_column
},
yytext: this.yytext,
match: this.match,
matches: this.matches,
yyleng: this.yyleng,
offset: this.offset,
_more: this._more,
yy: this.yy,
conditionStack: this.conditionStack.slice(0),
done: this.done
};
if (this.options.ranges) {
backup.yylloc.range = this.yylloc.range.slice(0);
}
}
this._more = false;
this._backtrack = false;
this.setMatch(this.offset, this.offset + match[0].length);
this.matches = match;
// perform action
const token = this.performAction.call(
this,
this.yy,
this,
rule,
this.conditionStack[this.conditionStack.length - 1]
);
if (!token && this._backtrack) {
// recover context
Object.assign(this, backup);
return false; // rule action called reject() implying the next rule should be tested instead.
}
if (!this.eof()) {
this.done = false;
}
if (typeof token === 'number' && this.yy.parser) {
return this.yy.parser.terminals_[token];
}
return token || false;
},
eof() {
return this.offset === this._input.length;
},
// return next match in input
next() {
if (this.done) {
return 1; // EOF
}
if (this.eof()) {
this.done = true;
}
if (!this._more) {
this.yytext = '';
this.match = '';
}
let match;
let matchRuleId;
for (const ruleId of this.conditions[this.currentCondition].rules) {
const rule = this.rules[ruleId];
const ruleInput = this._input.slice(this.offset);
let tempMatch;
if (typeof rule === 'function') {
const ret = rule(ruleInput, this.currentCondition);
switch (typeof ret) {
case 'string': tempMatch = [ret]; break;
case 'number': tempMatch = [ruleInput.slice(0, ret)]; break;
default: tempMatch = ret;
}
} else {
tempMatch = ruleInput.match(rule);
}
if (tempMatch && (!match || tempMatch[0].length > match[0].length)) {
match = tempMatch;
matchRuleId = ruleId;
if (this.options.backtrack_lexer) {
const token = this.test_match(tempMatch, matchRuleId);
if (token === false && this._backtrack) {
match = false;
continue; // rule action called reject() implying a rule MISmatch.
}
// when token is false this is a lexer rule which consumes
// input without producing a token (e.g. whitespace)
return token;
}
if (!this.options.flex) {
break;
}
}
}
if (match) {
// when token is false this is a lexer rule which consumes
// input without producing a token (e.g. whitespace)
return this.test_match(match, matchRuleId);
}
if (this.eof()) {
return 1; // EOF
}
this.parseError('Lexical error on line ' + (this.yylineno + 1) + '. Unrecognized text.\n' + this.showPosition(), {
text: '',
token: null,
line: this.yylineno
});
},
// return next match that has a token
lex() {
let token;
while (!token) {
token = this.next();
}
return token;
},
// activates a new lexer condition state (pushes the new lexer condition state onto the condition stack)
begin(condition) {
this.currentCondition = condition;
this.conditionStack.push(condition);
},
// alias for begin(condition)
pushState(condition) {
this.begin(condition);
},
// pop the previously active lexer condition state off the condition stack
popState() {
this.conditionStack.pop();
return this.currentCondition = (this.conditionStack.length
? this.conditionStack[this.conditionStack.length - 1]
: 'INITIAL'
);
},
// return the currently active lexer condition state;
// when an index argument is provided it produces the N-th previous condition state, if available
topState(n) {
n = this.conditionStack.length - 1 - Math.abs(n || 0);
return n >= 0
? this.conditionStack[n]
: 'INITIAL';
},
// return the number of states pushed
stateStackSize() {
return this.conditionStack.length;
}
};