parser-transform
Version:
Streaming+Async lexer and parser
162 lines (148 loc) • 3.84 kB
JavaScript
const dfalib = require("@shimaore/dfa.js");
const {CharInput,Input,Label} = dfalib
const Stream = require('stream')
class InputEOF extends Input {
constructor() {
super()
this.char = '<<EOF>>'
this.code = -1
return Object.freeze(this)
}
val() { return -1 }
}
Input.EOF = new InputEOF()
class LabelEOF extends Label {
constructor() {
super()
}
match(input) {
return input === Input.EOF
}
equals(label) {
return this === label;
}
}
Label.EOF = new LabelEOF()
// FIXME Tokens sent out should send the location of the _start_ of the lexis (aka `lexis_location`), not the location of the end+1.
class LexerTransform extends Stream.Transform {
constructor(dfas,options = {}) {
options.objectMode = true
super(options)
this.dfas = dfas
this.unget = ''
this.line = 1
this.column = 1
this.stack = []
this.yy = {}
this.dfa = this.dfas.get('INITIAL')
this._newTransition()
}
_newTransition() {
this.lexis = ''
this.trans = this.dfa.startNewTransition()
}
_begin(name) {
this.stack.push(this.dfa)
this.dfa = this.dfas.get(name)
this._newTransition()
}
_pop() {
this.dfa = this.stack.pop()
this._newTransition()
}
_transform(chunk,encoding,next) {
if( encoding === 'buffer' || 'string' !== typeof chunk) {
throw new Error('Ensure the stream generates Strings by using `setEncoding`.')
}
this.lex(chunk)
next()
}
_final(next) {
this.lex(null)
let state = {
token: null,
text: this.lexis,
line: this.line,
column: this.column,
eof: true
}
this.lexis = ''
this.push(state)
next()
}
lex(chunk) {
var eof = 0
if( chunk === null ) {
eof = 2
} else {
this.unget += chunk
}
let index = 0
while( index < this.unget.length+eof ) {
let coch = (index < this.unget.length) ? new CharInput(this.unget[index]) : Input.EOF
let progress = (ch) => {
index++
if( coch === Input.EOF ) {
return // EOF
}
this.lexis += ch
if(coch.char === '\n') {
this.line++
this.column = 1
} else {
this.column++
}
}
if (this.trans.move(coch)) {
let {replace,process} = this.trans.current.attrs;
if(replace && typeof replace[0] === 'string') {
progress(replace[0]);
} else {
progress(coch.char);
}
if(process && typeof process[0] === 'function') {
process[0](this,coch);
}
} else {
if(this.trans.isAcceptable()) {
let state = {
token: null,
text: this.lexis,
line: this.line,
column: this.column,
}
this.lexis = ''
state.begin = (name) => this._begin(name)
state.pop = () => this._pop()
state.yy = this.yy
let action = this.trans.getAcceptedObject()
let token = action.call(state)
if(typeof token !== 'undefined' && token !== null) {
state.token = token;
delete state.begin
delete state.pop
delete state.yy
this.push(state);
}
} else {
progress(coch.char)
// It's OK to not handle `<<EOF>>`.
if( coch !== Input.EOF ) {
let state = {
token: 'ERROR',
text: this.lexis,
line: this.line,
column: this.column,
}
this.lexis = ''
this.push(state)
}
}
// Start back from the beginning of the tokenizer.
this._newTransition()
}
}
this.unget = this.unget.slice(index);
}
}
module.exports = {LexerTransform,EOF:Label.EOF};