UNPKG

parser-transform

Version:

Streaming+Async lexer and parser

162 lines (148 loc) 3.84 kB
const dfalib = require("@shimaore/dfa.js"); const {CharInput,Input,Label} = dfalib const Stream = require('stream') class InputEOF extends Input { constructor() { super() this.char = '<<EOF>>' this.code = -1 return Object.freeze(this) } val() { return -1 } } Input.EOF = new InputEOF() class LabelEOF extends Label { constructor() { super() } match(input) { return input === Input.EOF } equals(label) { return this === label; } } Label.EOF = new LabelEOF() // FIXME Tokens sent out should send the location of the _start_ of the lexis (aka `lexis_location`), not the location of the end+1. class LexerTransform extends Stream.Transform { constructor(dfas,options = {}) { options.objectMode = true super(options) this.dfas = dfas this.unget = '' this.line = 1 this.column = 1 this.stack = [] this.yy = {} this.dfa = this.dfas.get('INITIAL') this._newTransition() } _newTransition() { this.lexis = '' this.trans = this.dfa.startNewTransition() } _begin(name) { this.stack.push(this.dfa) this.dfa = this.dfas.get(name) this._newTransition() } _pop() { this.dfa = this.stack.pop() this._newTransition() } _transform(chunk,encoding,next) { if( encoding === 'buffer' || 'string' !== typeof chunk) { throw new Error('Ensure the stream generates Strings by using `setEncoding`.') } this.lex(chunk) next() } _final(next) { this.lex(null) let state = { token: null, text: this.lexis, line: this.line, column: this.column, eof: true } this.lexis = '' this.push(state) next() } lex(chunk) { var eof = 0 if( chunk === null ) { eof = 2 } else { this.unget += chunk } let index = 0 while( index < this.unget.length+eof ) { let coch = (index < this.unget.length) ? new CharInput(this.unget[index]) : Input.EOF let progress = (ch) => { index++ if( coch === Input.EOF ) { return // EOF } this.lexis += ch if(coch.char === '\n') { this.line++ this.column = 1 } else { this.column++ } } if (this.trans.move(coch)) { let {replace,process} = this.trans.current.attrs; if(replace && typeof replace[0] === 'string') { progress(replace[0]); } else { progress(coch.char); } if(process && typeof process[0] === 'function') { process[0](this,coch); } } else { if(this.trans.isAcceptable()) { let state = { token: null, text: this.lexis, line: this.line, column: this.column, } this.lexis = '' state.begin = (name) => this._begin(name) state.pop = () => this._pop() state.yy = this.yy let action = this.trans.getAcceptedObject() let token = action.call(state) if(typeof token !== 'undefined' && token !== null) { state.token = token; delete state.begin delete state.pop delete state.yy this.push(state); } } else { progress(coch.char) // It's OK to not handle `<<EOF>>`. if( coch !== Input.EOF ) { let state = { token: 'ERROR', text: this.lexis, line: this.line, column: this.column, } this.lexis = '' this.push(state) } } // Start back from the beginning of the tokenizer. this._newTransition() } } this.unget = this.unget.slice(index); } } module.exports = {LexerTransform,EOF:Label.EOF};