UNPKG

@lahmatiy/jison

Version:

A parser generator with Bison's API

459 lines (375 loc) • 13.1 kB

JavaScript

const { version } = require('../../package.json'); const { fnBody } = require('../utils'); module.exports = { generateModuleBody, generateModule }; function generatePerformAction({ rules, tokens, actionInclude }) { const code = [ '(yy,yy_,__lexelActionId,YY_START) {', actionInclude || '', 'const YYSTATE=YY_START;' ]; const tokenNameToIndex = Object.create(null); let hasTokens = false; for (const key in tokens) { tokenNameToIndex[tokens[key]] = key; hasTokens = true; } code.push('switch (__lexelActionId) {'); rules.forEach((rule, i) => { let action = typeof rule[1] === 'function' ? fnBody(rule[1]) : rule[1]; if (hasTokens) { action = action.replace(/\breturn\s+('[^']+?'|"[^"]+?")/g, (m, s) => 'return ' + (tokenNameToIndex[s.slice(1, -1)] || s) ); } code.push('case ' + i + ':{' + action.replace(/\b(yytext|yyleng|yylineno|yylloc)\b/g, 'yy_.$1') + '\nbreak;}'); }); code.push('}', '}'); return code.join('\n'); } function generateModuleBody(opt) { const options = opt.options || {}; const out = [ 'yy:{}', 'options: ' + JSON.stringify(options), 'rules: [' + opt.patterns + ']', 'conditions: ' + JSON.stringify(opt.conditions), 'performAction' + generatePerformAction(opt) ]; for (const method of Object.values(lexerInterface)) { out.push(' ' + method.toString().replace( /this\.options\.([a-zA-Z$_][a-zA-Z0-9$_]*)/g, (m, option) => option in options && typeof options[option] !== 'boolean' ? m : Boolean(options[option]) )); } return '{\n' + out.join(',\n') + '\n}'; } function generateModule(options, format = 'iife') { options = options || {}; let out = ''; const banner = '/* Lexer generated by jison ' + version + ' */\n'; const body = 'const lexer = ' + generateModuleBody(options) + ';\n' + (options.moduleInclude ? options.moduleInclude + ';\n' : ''); switch (format) { case 'cjs': out += banner; out += body; out += 'module.exports = lexer.lexer = lexer;'; break; case 'esm': out += banner; out += 'export ' + body; out += 'export default lexer;\n'; break; case 'iife': out += '(function(){\n'; out += banner; out += body; out += 'return lexer;\n'; out += '})();\n'; break; default: throw new Error(`Unknown module format "${format}"`); } return out; } const lexerInterface = { // resets the lexer, sets new input setInput(input, yy) { this.yy = yy || this.yy || {}; this._input = input; this._more = false; this._backtrack = false; this.done = false; this.currentCondition = 'INITIAL'; this.conditionStack = ['INITIAL']; this.offset = 0; this.yytext = this.match = ''; this.yyleng = 0; this.yylineno = 0; this.yylloc = { first_line: 1, first_column: 0, last_line: 1, last_column: 0 }; if (this.options.ranges) { this.yylloc.range = [0, 0]; } return this; }, parseError(str, details) { if (!this.yy.parser) { throw new Error(str); } this.yy.parser.parseError(str, details); }, // consumes and returns one char from the input input() { const ch = this._input[this.offset]; const newline = /\r\n?|\n/g.test(ch); this.offset++; this.yyleng++; this.yytext = this.match = this.yytext + ch; if (newline) { this.yylineno++; this.yylloc.last_line++; this.yylloc.last_column = 0; } else { this.yylloc.last_column++; } if (this.options.ranges) { this.yylloc.range[1]++; } return ch; }, lineColumnForOffset(offset = this.offset, str = this._input, startLine = 1, startColumn = 0) { const rx = /\r\n?|\n/g; let line = startLine; let column = startColumn; let prevLastIndex = 0; let match; while ((match = rx.exec(str)) !== null) { if (match.index < offset) { line++; column = 0; prevLastIndex = match.index + match[0].length; } else { column += offset - prevLastIndex; prevLastIndex = -1; break; } } if (prevLastIndex !== -1) { column += offset - prevLastIndex; } return { line, column }; }, setMatch(start, end) { const newMatch = this._input.slice(start, end); const firstLoc = start !== this.offset ? this.lineColumnForOffset(start) : { line: this.yylloc.last_line, column: this.yylloc.last_column }; const lastLoc = start !== end ? this.lineColumnForOffset(end - start, newMatch, firstLoc.line, firstLoc.column) : firstLoc; this.yytext = this.match = newMatch; this.yyleng = end - start; this.offset = end; this.yylineno = lastLoc.line - 1; this.yylloc = { first_line: firstLoc.line, first_column: firstLoc.column, last_line: lastLoc.line, last_column: lastLoc.column }; if (this.options.ranges) { this.yylloc.range = [start, end]; } }, // retain first n characters of the match less(n) { const start = this.offset - this.match.length; this.setMatch(start, start + n); }, // unshifts part of matched text unput(chunk) { const newMatchLength = this.match.length - chunk.length; if (!this.match.startsWith(chunk, newMatchLength)) { throw new Error('Lexer#unput() should receive a part of matched text only'); } this.less(newMatchLength); }, // When called from action, caches matched text and appends it on next action more() { this._more = true; }, // When called from action, signals the lexer that this rule fails to match the input, so the next matching rule (regex) should be tested instead. reject() { if (this.options.backtrack_lexer) { this._backtrack = true; return; } // FIXME: remove method when options.backtrack_lexer is false throw new Error('reject() is allowed only when options.backtrack_lexer = true'); }, // displays the character position where the lexing error occurred, i.e. for error messages showPosition(offset = this.offset - this.match.length) { const input = this._input; const start = Math.max(offset - 20, 0); const end = Math.min(offset + 20, input.length); const pre = (start === 0 ? '' : '...') + input.slice(start, offset).replace(/\r\n?|\n/g, '\\n'); const post = input.slice(offset, end).replace(/\r\n?|\n/g, '\\n') + (end === input.length ? '' : '...'); return ( pre + post + `\n${'-'.repeat(pre.length)}^` ); }, // test the lexed token: return FALSE when not a match, otherwise return token test_match(match, rule) { let backup; if (this.options.backtrack_lexer) { // save context backup = { yylineno: this.yylineno, yylloc: { first_line: this.yylloc.first_line, last_line: this.yylloc.last_line, first_column: this.yylloc.first_column, last_column: this.yylloc.last_column }, yytext: this.yytext, match: this.match, matches: this.matches, yyleng: this.yyleng, offset: this.offset, _more: this._more, yy: this.yy, conditionStack: this.conditionStack.slice(0), done: this.done }; if (this.options.ranges) { backup.yylloc.range = this.yylloc.range.slice(0); } } this._more = false; this._backtrack = false; this.setMatch(this.offset, this.offset + match[0].length); this.matches = match; // perform action const token = this.performAction.call( this, this.yy, this, rule, this.conditionStack[this.conditionStack.length - 1] ); if (!token && this._backtrack) { // recover context Object.assign(this, backup); return false; // rule action called reject() implying the next rule should be tested instead. } if (!this.eof()) { this.done = false; } if (typeof token === 'number' && this.yy.parser) { return this.yy.parser.terminals_[token]; } return token || false; }, eof() { return this.offset === this._input.length; }, // return next match in input next() { if (this.done) { return 1; // EOF } if (this.eof()) { this.done = true; } if (!this._more) { this.yytext = ''; this.match = ''; } let match; let matchRuleId; for (const ruleId of this.conditions[this.currentCondition].rules) { const rule = this.rules[ruleId]; const ruleInput = this._input.slice(this.offset); let tempMatch; if (typeof rule === 'function') { const ret = rule(ruleInput, this.currentCondition); switch (typeof ret) { case 'string': tempMatch = [ret]; break; case 'number': tempMatch = [ruleInput.slice(0, ret)]; break; default: tempMatch = ret; } } else { tempMatch = ruleInput.match(rule); } if (tempMatch && (!match || tempMatch[0].length > match[0].length)) { match = tempMatch; matchRuleId = ruleId; if (this.options.backtrack_lexer) { const token = this.test_match(tempMatch, matchRuleId); if (token === false && this._backtrack) { match = false; continue; // rule action called reject() implying a rule MISmatch. } // when token is false this is a lexer rule which consumes // input without producing a token (e.g. whitespace) return token; } if (!this.options.flex) { break; } } } if (match) { // when token is false this is a lexer rule which consumes // input without producing a token (e.g. whitespace) return this.test_match(match, matchRuleId); } if (this.eof()) { return 1; // EOF } this.parseError('Lexical error on line ' + (this.yylineno + 1) + '. Unrecognized text.\n' + this.showPosition(), { text: '', token: null, line: this.yylineno }); }, // return next match that has a token lex() { let token; while (!token) { token = this.next(); } return token; }, // activates a new lexer condition state (pushes the new lexer condition state onto the condition stack) begin(condition) { this.currentCondition = condition; this.conditionStack.push(condition); }, // alias for begin(condition) pushState(condition) { this.begin(condition); }, // pop the previously active lexer condition state off the condition stack popState() { this.conditionStack.pop(); return this.currentCondition = (this.conditionStack.length ? this.conditionStack[this.conditionStack.length - 1] : 'INITIAL' ); }, // return the currently active lexer condition state; // when an index argument is provided it produces the N-th previous condition state, if available topState(n) { n = this.conditionStack.length - 1 - Math.abs(n || 0); return n >= 0 ? this.conditionStack[n] : 'INITIAL'; }, // return the number of states pushed stateStackSize() { return this.conditionStack.length; } };