UNPKG

@sap/cds-compiler

Version:

CDS (Core Data Services) compiler and backends

1,073 lines (978 loc) 41.6 kB
// Base class for generated parser, for redepage v0.3.1 'use strict'; // TODO: instance method // name → true, list of predicates which are tested for rule exit // const ruleExitPredicates = {}; // list of predicates which are tested when continue parsing after error starts, // i.e. there is a predicate on the first token to match after recover example // `afterBrace` or just method which by default just sets this.conditionTokenIdx // and this.conditionStackLength and returns true? class BaseParser { keywords; table; lexer; tokens = undefined; tokenIdx = 0; conditionTokenIdx = -1; errorTokenIdx = -1; // token index where error is detected recoverTokenIdx = -1; // token index where error recovery resumes reuseErrorTokenIdx = null; // tmp special for error recovery “token-reuse” fixKeywordTokenIdx = -1; conditionStackLength = -1; nextTokenAsId = false; s = null; errorState = null; stack = []; dynamic_ = {}; // TODO: extra class prec_ = null; $hasErrors = null; leanConditions = {}; // trace: trace = []; constructor( lexer, keywords, table ) { this.keywords = { __proto__: null, ...keywords }; this.table = compileTable( table ); this.lexer = lexer; } init() { this.lexer.tokenize( this ); return this; } _runTransparently( callback ) { const { tokenIdx } = this; const saved = this._saveForWalk(); const { length } = this.stack; const r = callback(); this.stack.length = length; Object.assign( this, saved ); this.tokenIdx = tokenIdx; return r; } _saveForWalk() { return { s: this.s, stack: this.stack, dynamic_: this.dynamic_, prec_: this.prec_, // TODO: necessary? }; } _cloneFromSaved( saved ) { // non-deep: Object.assign this.s = saved.s; this.stack = saved.stack.map( obj => ({ ...obj }) ); this.dynamic_ = this._cloneDynamic( saved.dynamic_ ); this.prec_ = saved.prec_; } _cloneDynamic( dynamic_ ) { let chain = []; while (dynamic_ !== Object.prototype) { const obj = {}; for (const [ prop, val ] of Object.entries( dynamic_ )) obj[prop] = Array.isArray( val ) ? [ ...val ] : val; chain.push( obj ); dynamic_ = Object.getPrototypeOf( dynamic_ ); } let copy = Object.prototype; let { length } = chain; while (--length >= 0) copy = { __proto__: copy, ...chain[length] }; return copy; } // methods for actions -------------------------------------------------------- la() { // lookahead: complete token return this.tokens[this.tokenIdx]; } lb( k = 1 ) { // look back: complete token return this.tokens[this.tokenIdx - k]; } lr() { // return the first token matched by current rule return this.tokens[this.stack[this.stack.length - 1].tokenIdx]; } // lookahead, error: ---------------------------------------------------------- l() { // lookahead: token type return this.la().type; } // instead of l() if keyword (reserved and/or unreserved) is in one of the cases lk() { // keyword lookahead const la = this.la(); if (!this.nextTokenAsId) return la.keyword || la.type; // return la.keyword && this.table[this.s][la.keyword] && la.keyword || la.type; this.nextTokenAsId = false; return la.type; } e() { // error: report and recover const la = this.la(); this._trace( 'detect parsing error' ); // TODO: write reason (la, no tokens matched, …) if (this.errorTokenIdx === this.tokenIdx && (this.reuseErrorTokenIdx == null && this.reuseErrorTokenIdx < this.tokenIdx)) throw Error( `Already reported error for ${ tokenFullName( la ) } at ${ la.location }`); la.parsedAs = ''; // current token is erroneous this.errorTokenIdx = this.tokenIdx; this.conditionStackLength = null; let { length } = this.stack; while (--length && this.tokenIdx === this.stack[length].tokenIdx) this.stack[length].followState = null; if (++length === this.stack.length) // last good state in current rule return this._reportAndRecover(); // otherwise report+recovery after unwind in exit_() this.stack[length].followState = this.errorState; this.s = null; return false; } // instead of e() in default if lk() had been used and 'Id' is in a non-default case ei() { // error (after trying to test again as identifier) if (!this.la().keyword) // lk() had directly returned the type return this.e(); this.nextTokenAsId = true; return false; // do not execute action after it } // goto state: ---------------------------------------------------------------- // go to end of the rule, in tracing parser: g(0) gr( follow ) { // intersection follow set for fast exit if (this.stack[this.stack.length - 1].tokenIdx === this.tokenIdx) //this.tokenIdx >= this.errorTokenIdx) not necessary return this.e(); // match at least one token this.s = 0; // TODO: also have recursive flag in stack: was rule was called recursively? // extra val 'gr' when rule was called when it could reach the rule end const { type, keyword } = this.tokens[this.tokenIdx]; if (keyword && // Id also for unreserved, except after condition failure follow?.[0] === 'Id' && !this.keywords[keyword] && this.fixKeywordTokenIdx !== this.tokenIdx || follow?.includes( keyword || type )) { this._tracePush( [ 'E', true ] ); return true; } this._tracePush( [ 'E', 0 ] ); const match = this._matchesInFollow( type, keyword, 'E' ); // If the parser reaches this point with match = null, even the top-level rule // does not have a required token (typically `EOF`) at the end → the parser // must accept any token → rule exit possible (but no output '✔' in trace). return (match ?? true) || this.e(); } // go to state; non-tracing parser: `this.s=‹state›` or `this.gr()` g( state, follow ) { if (!(state == null ? this.e() : state || this.gr( follow ))) return false; this.s = state; // is just `this.s=‹state›` in non-trace parser this._tracePush( this.s ); return true; } // instead of gi() for `Id<greedy>` giA( state, follow ) { // go to state (after trying to test again as identifier) if (!this.tokens[this.tokenIdx].keyword) // lk() had directly returned the type return this.g( state, follow ); this.nextTokenAsId = true; return false; // do not execute action after it } // instead of g() in default if lk() had been used and 'Id' is in a non-default case gi( state, follow ) { // go to state (after trying to test again as identifier) const lk = this.tokens[this.tokenIdx].keyword; // As opposed to ei(), we also check for reserved keywords here; this way, we // do not have to add reserved keywords from the follow-set to the `switch`. if (!lk || this.keywords[lk]) // TODO: consider fixKeywordTokenIdx ? return this.g( state, follow ); this.nextTokenAsId = true; return false; // do not execute action after it } // instead of g() in a non-default case if there is a LL1 conflict gP( state, follow ) { // goto state with standard weak-conflict prediction return this.lP( follow ) && this.g( state ); } // match and consume token: --------------------------------------------------- m( state, token ) { // match token = compare and consume return (this.tokens[this.tokenIdx].type === token) ? this.c( state ) : this.e(); } // instead of m() for identifiers via `Id` or `Id<weak>` mi( state, ident = true ) { // match identifier token return (this.tokens[this.tokenIdx].type === 'Id') ? this.ci( state, ident ) : this.e(); } // instead of mi() for `Id<greedy>` miA( state, ident = true ) { // match identifier token return (this.tokens[this.tokenIdx].type === 'Id') ? this.ciA( state, ident ) : this.e(); } // instead of m() for reserved keywords or unreserved without conflict: mk( state, token ) { // match keyword token return (this.tokens[this.tokenIdx].keyword === token) ? this.ck( state ) : this.e(); } c( state, parsedAs = 'token' ) { // consume token const la = this.tokens[this.tokenIdx++]; // ++ now also for EOF la.parsedAs = parsedAs; this.s = state; this.errorState = state; if (this.constructor.tracingParser) this._trace( `consume ${ tokenFullName( la, ' as ' ) }`, la ); return true; } // instead of c() for identifiers, used both with l() and lk() ci( state, ident = 'ident' ) { // consume identifier token if (this.tokenIdx === this.fixKeywordTokenIdx) return this.e(); const la = this.tokens[this.tokenIdx]; // TODO: consider this like a failed condition? Will be relevant if we try // different error recovery possibilities. if (this.keywords[la.keyword]) { this._tracePush( [ 'R' ] ); if (this._runTransparently( () => { ++this.tokenIdx; this.s = state; const { type, keyword } = this.la(); return !(this._pred_next( type, keyword, 'R' ) ?? this._matchesInFollow( type, keyword, 'R' )); } )) return this.e(); this.reportReservedWord_(); // with error recovery: use that (consider this having a good score) } return this.c( state, ident ) } // instead of ci() for `Id<greedy>`, used both with l() and lk() ciA( state, ident = 'ident' ) { // consume identifier token, the "All" variant return this.c( state, ident ) } // instead of c() for reserved or unreserved without conflict, requires lk() ck( state ) { // consume keyword token return this.c( state, 'keyword' ) } // instead of ck() if there is a LL1 conflict ckP( state, first2 ) { // consume unreserved keyword with weak conflict return this.lP( first2 ) && this.ck( state ); } // for parser token or token set via `/` -> cx() ? ckA( state ) { // if it really should be considered an Id, `set this.la().parsedAs` yourself return this.c( state, (this.l() === 'Id' ? 'keyword' : 'token') ); } skipToken_() { ++this.tokenIdx; } reuseToken_() { --this.tokenIdx; } // condition and precedence handling ------------------------------------------ // state must match the goto-state of the default (there must be no default // action), or null for error, lP() must have been used before. There is no // “or Id” behavior other than via gpP() // “go if user condition fails” gc( state, cond, arg ) { if (this.conditionTokenIdx === this.tokenIdx && // tested on same this.conditionStackLength == null && // after error recovery !this[cond].afterError) { this._tracePush( [ 'C' ] ); return true; } // TODO: let this[cond]( true ) return recovery badness in error case if (this.constructor.tracingParser) { const { traceName } = this[cond]; this._tracePush( [ 'C', traceName?.call( this, arg ) ?? cond ] ); } // calling the condition might have side effects (precendence conditions have) // → call tracing “name” before const fail = this[cond]( arg, true ); // TODO: use single-letter for run! → 'X' if (this.constructor.tracingParser) this._traceSubPush( !fail ); // The default case must not have actions. If written in grammar with action, // the default must currently have <default=true> if (fail) { // TODO: extra gcK() method instead of check below // TODO: probably remove the following (and `conditionStackLength` tests) // altogether, error with gr() should be enough // if (this.conditionTokenIdx === this.tokenIdx && // this.conditionStackLength == this.stack.length) // return this.e(); // already failed on same token in same rule // TODO: extra method necessary for academic case // ( 'unreserved' 'foo' | <cond> Id 'bar' )` with input `unreserved bar` const { keyword } = this.la(); if (keyword && this.table[this.s][keyword]) this.fixKeywordTokenIdx = this.tokenIdx; this.conditionTokenIdx = this.tokenIdx; this.conditionStackLength = this.stack.length; this.conditionName = cond; // we also set the failure here, because the reporting might have a // different context (consider immediate exit) this.conditionFailure = fail; } return !fail || this.g( state ) && false; } ec( cond, arg ) { return this.gc( null, cond, arg ); } // predefined guard: isNoKeywordInRuleFollow( _arg, mode ) { const { keyword } = this.la(); if (this.constructor.tracingParser && (mode === true || mode === 'M')) { // TODO: mode === 'X' || mode === 'M' --this.trace.at(-1).length; // do not show guard name in trace if (!keyword || this.keywords[keyword] == null) return false; // ok const r = this._matchesInFollow( 'Id', keyword, 'F' ); --this.trace.at(-1).length; // this.gc() also traces result return r; } if (!keyword || this.keywords[keyword] == null) return false; // ok return this._matchesInFollow( 'Id', keyword, 'F' ); // TODO: still extra tests for 'F' in _pred_next()? } // rule start, end and call: -------------------------------------------------- rule_( state, followState = -1 ) { // start rule this.s = state; this._trace( [ 'call rule', state, ' at alt start' ] ); this.stack.push( { ruleState: state, followState, tokenIdx: this.tokenIdx, prec: this.prec_, } ); this.dynamic_ = Object.create( this.dynamic_ ); this.prec_ = null; this.errorState ??= state; } exit_() { // exit rule if (this.s) throw Error( `this.s === ${ this.s } // illegally set by action, or runtime/generator bug` ); this.dynamic_ = Object.getPrototypeOf( this.dynamic_ ); const caller = this.stack.pop(); const immediately = this.tokenIdx === caller.tokenIdx && this.tokenIdx >= this.errorTokenIdx; if (this.constructor.tracingParser) { const post = this.s == null && (immediately ? ' immediately' // no token matched (further unwind or to last good state) : caller.followState == null ? ' unsuccessfully' // further error rewind : ' prematurely'); // continue at sync state after exit const text = immediately ? '⚠ exit rule' : '⏎ exit rule'; this.s = caller.followState; // for trace this._trace( [ text, caller.ruleState, post, 'back to' ] ) if (immediately && this.stack.at(-1)?.followState != null) this.trace = [ this.errorState ]; // show last good state in trace } this.prec_ = caller.prec; this.s = caller.followState; if (immediately) // this.gr() has already called this.e() if no token is matched return this.s != null && this._reportAndRecover(); if (this.s != null) this.errorState = this.s; // last good state is now after rule call return true; // attached actions are executed even with "unsuccessful exit" } // predicate used before rule call (and called by `ckP` and `gP`) on keyword // branch if with weak LL(1) conflict, i.e. there is an 'Id' branch or the // default branch has `Id` in its first-set (TODO: or rule end, and `Id` is in // follow-union) lP( first2 ) { // nothing to check if not a non-reserved keyword: const { keyword: lk1 } = this.tokens[this.tokenIdx]; if (!lk1 || this.keywords[lk1] !== 0 || this.fixKeywordTokenIdx === this.tokenIdx) return true; this._tracePush( [ 'K' ] ); const { type: lt2, keyword: lk2 } = this.tokens[this.tokenIdx + 1]; if (lt2 === 'IllegalToken') return true // Argument first2 is just a performance hint: if (lk2 && first2?.[0] === 'Id' && !this.keywords[lk2] || first2?.includes( lk2 || lt2 )) { this._traceSubPush( true ); return true; } // now check it dynamically: if (this._walkPred( this.table[this.s][lk1], lk1, lt2, lk2 )) return true; this._tracePush( [ 'I' ] ); const choice = this.table[this.s]; if (!this._walkPred( choice.Id || choice[''], null, lt2, lk2 )) return true; this.nextTokenAsId = true; return false; } _walkPred( cmd, lk1, lt2, lk2 ) { const saved = this._saveForWalk(); const { length } = this.stack; if (typeof cmd[0] !== 'number') // don't skip push to state with rule call this.s = cmd[1]; if (cmd[0] !== (lk1 ? 'ck' : 'ci')) { // make the std case fast // TODO: also not with lean condition let match1 = this._pred_next( 'Id', lk1, 'P' ); // first step of `K`/`I` prediction if (!match1) { if (lk1) { // assert for correct code generation // Remark: this._pred_next() returns false also if rule has not matched any token const { location } = this.la(); throw Error( `Cannot match first prediction token at ${ location.line }:${ location.col } in rule at state ${ saved.s }` ); } if (match1 == null) { // TODO: just return true, rule exit prediction will do it this._traceSubPush( 0 ); // TODO: make _pred_next push this match1 = this._matchesInFollow( 'Id', lk1, 'I' ); } else { this._traceSubPush( false ); } Object.assign( this, saved ); this.stack.length = length; return !!match1; } } this._traceSubPush( '' ); // between the two tokens ++this.tokenIdx; // for user lookahead fns and conditions const mode = lk1 ? 'K' : 'I'; let match2 = this._pred_next( lt2, lk2, mode ); if (match2 == null) { this._traceSubPush( 0 ); // TODO: make _pred_next push this match2 = !!this._matchesInFollow( lt2, lk2, mode ); // TODO: we might use mode 'E' in _matchesInFollow (depends on caching) } else { this._traceSubPush( match2 ); } Object.assign( this, saved ); this.stack.length = length; --this.tokenIdx; return match2; } // Now the helper methods ===================================================== // Standard weak-conflict predicate ------------------------------------------- /** * Return whether current token (its type and keyword are args - TODO delete?) * would be matched when starting at the current state: * - true/false are definite answers, * - null: reached end-of-rule (let caller decide what to do). * * Changes by side-effect: * - this.s * - with mode='P' (first step in keyword prediction) if a rule is called: * this.stack, this.dynamic_, this.prec_ * * Conditions are only evaluated with mode='M' (expected set in msgs) or if * condition is listed in `this.leanConditions`. */ _pred_next( type, keyword, mode ) { // mode = P | K | I | E | R | M const properCall = (mode === 'P'); const lean = (mode !== 'M'); // TODO: extra method with conditions ? // TODO: if false, use condition in this.leanConditions let hasMatchedToken = null; // undecided yet → calculate on demand while (this.s) { if (lean) this._traceSubPush( this.s ); else this._tracePush( this.s ); // TODO: push new state instead let cmd = this.table[this.s]; if (!Array.isArray( cmd )) { const lookahead = cmd[' lookahead']; const c = lookahead // TODO: call with { keyword, type } ? ? cmd[this[lookahead]( mode )] : keyword && cmd[keyword] || cmd[type]; cmd = !(c && this._rejectCondition( c, mode, lean )) && c || cmd['']; } const state = this.s; this.s = cmd[1]; switch (cmd[0]) { case 'c': case 'ck': case 'ckA': // TODO: re-check ckA return true; case 'ciA': // TODO: fixKeywordTokenIdx ? return mode !== 'F'; // in the R prediction for optional `Id<weak>` at rule end, only // alternative keyword matches are preferred, not identifier matches // TODO: delete this prediction case 'ci': if (!keyword || !this.keywords[keyword] && this.fixKeywordTokenIdx !== this.tokenIdx) return mode !== 'F'; cmd = this.table[state]['']; // is currently always 'g' or 'e' this.s = cmd[1]; break; case 'm': return type === cmd[2]; case 'mi': return type === 'Id' && mode !== 'F' && (!keyword || !this.keywords[keyword] && this.fixKeywordTokenIdx !== this.tokenIdx); case 'miA': return type === 'Id' && mode !== 'F'; case 'mk': return keyword === cmd[2]; case 'g': case 'e': break; default: if (typeof cmd[0] !== 'number') throw Error( `Unexpected command ${ cmd[0] } at state ${ state }` ); // If the parser enters a rule, reaching the rule end (can happen with // option `minTokensMatched`) means "no match". hasMatchedToken = false; // If we want to support conditions before matching the first token in a // rule, we would have to handle `this.stack` and `this.dynamically_`. if (properCall) { // rule_() - TODO: also w/ conditions before matching first token this.stack.push( { ruleState: cmd[1], followState: cmd[0], tokenIdx: this.tokenIdx, prec: this.prec_, } ); this.dynamic_ = Object.create( this.dynamic_ ); this.prec_ = null; } } // We could optimize with rule call - only 'Id' must be further investigated // TODO: actually also with `g` // in both cases if no condition is evaluated // TODO <prepare=…, arg=…> for real trial run also before all returns // if (cmd[5]) // this.cmd[5]( cmd[4], mode ); } // If invalid state, the second token does not match, e.g. for `VIRTUAL +` // or `VIRTUAL ⎀` (with IllegalToken): if (this.s == null) return false; // Otherwise, the parser could end the rule after having matched the keyword // with prediction. TODO: as we do not look behind the current rule for the // prediction, the tool can normally omit the prediction (and output a // message), no so with `ruleStartingWithUnreserved`. We will rather look // behind the current rule _after_ having decided that the token is to be // matched as identifier. return (hasMatchedToken ?? this.tokenIdx > this.stack.at( -1 ).tokenIdx) && null; // let caller decide how to interpret this } _rejectCondition( cmd, mode, lean ) { const cond = cmd[3]; if (!cond || lean && !this.leanConditions[cond]) return false; if (!this.constructor.tracingParser) return !!this[cond]( cmd[4], mode ); // TODO: let this[cond]( true ) return recovery badness in error case if (!lean) { const { traceName } = this[cond]; this._tracePush( [ 'C', traceName?.call( this, cmd[4] ) ?? cond ] ); // calling the condition might have side effects (precendence conditions have) // → call tracing “name” before } const succeed = !this[cond]( cmd[4], mode ); this._traceSubPush( lean ? { true: 'C✔', false: 'C✖' }[succeed] : succeed ); return !succeed; } _matchesInFollow( type, keyword, mode ) { // mode = E | R and K | I // TODO: now also set stack! const savedState = this.s; // TODO: caching const { dynamic_ } = this; let match; let depth = this.stack.length; // TODO: currently assumes that lookahead does not use stack.at() while (match == null && --depth) { this.dynamic_ = Object.getPrototypeOf( this.dynamic_ ); this.s = this.stack[depth].followState; // TODO: this.prec_ ? match = this._pred_next( type, keyword, mode ); this._traceSubPush( match == null ? 0 : match === (mode !== 'F') ); // successfully matching a keyword in giR() means unsuccessful match as // reserved identifer // TODO: this.stack ? } this.dynamic_ = dynamic_; this.s = savedState; return match; } _confirmExpected( token, saved ) { // mode = M const fix = /^[_a-z]/.test( token ); const [ type, keyword ] = (fix) ? [ 'Id', token ] : [ token ]; Object.assign( this.la(), { type, keyword } ); this._cloneFromSaved( saved ); this.fixKeywordTokenIdx = fix && this.tokenIdx; this.trace = []; let match; while (this.stack.length) { match = this._pred_next( type, keyword, 'M' ); if (match != null) { this._tracePush( { true: '✔', false: '✖' }[match] ); break; } this.dynamic_ = Object.getPrototypeOf( this.dynamic_ ); this.s = this.stack.pop().followState; } if (this.constructor.tracingParser) { this.stack = saved.stack; // influences indentation this._trace( tokenName( token ), 2 ); } return match ?? true; } // Set of expected and sync tokens: for error reporting and recovery ---------- // Calculate array of expected tokens / error sync set _calculateTokenSet( mode ) { // mode = M | Y this._tracePush( [ mode ] ); // TODO later (after trying different synchronization tokens), we could use // one set for both M and Y, the latter just adds more tokens to it const savedState = this.s; const savedDynamic = this.dynamic_; const savedStack = this.stack; this.stack = [ ...savedStack ]; this.s = this.errorState; const set = Object.create(null); // Add follow sets of outer rules if at potential rule end if (mode === 'M') { // for messages while (this.stack.length && this._tokenSetInRule( set, true )) { this.dynamic_ = Object.getPrototypeOf( this.dynamic_ ); this.s = this.stack.pop().followState; } } else { // or always when calculating the sync-set let val = this.stack.length + 1; while (this.stack.length) { this._tokenSetInRule( set, val ); // TODO: use if Y-M unification val = this.stack.length; // TODO: use new _tracePush if `val` changes, probably also use Y‹val›(…) this.dynamic_ = Object.getPrototypeOf( this.dynamic_ ); this.s = this.stack.pop().followState; } set.EOF ??= 0; // TODO: really necessary, see also _findSyncToken() } this.stack = savedStack; this.s = savedState; // should be the errorState anyway - TODO: confirm this.dynamic_ = savedDynamic; return set; } // Filter after this fn for conditions via interpreter call after: consider // ( <prefer, guard=fail> 'foo' | rule ) with // rule : 'foo' | Id ; // doing it already here would list `foo` as expected token _tokenSetInRule( expecting, val, cmd, collectKeywordsAndIdOnly = false ) { const savedDynamic = this.dynamic_; const savedState = this.s; let enteredRules = 0; loop: while (this.s) { cmd ??= this.table[this.s]; if (!Array.isArray( cmd )) { const lookahead = cmd[' lookahead']; const dict = cmd; for (const prop in dict) { if (prop && Object.hasOwn( dict, prop ) && prop !== 'Id' && !Object.hasOwn( expecting, prop ) && prop.charAt(0) !== ' ') this.addTokenToSet_( expecting, prop, val, collectKeywordsAndIdOnly, lookahead ); } cmd = dict['']; if (dict.Id) { // recursive call only if Id branch with non-error default branch if (cmd[0] === 'e') { collectKeywordsAndIdOnly = true; cmd = dict.Id; } else { // Id branch never leads to rule exit (really?): this._tracePush( [ '[' ] ); this._tokenSetInRule( expecting, val, dict.Id, true ); this._tracePush( [ ']' ] ); } } } this._traceSubPush( this.s ); switch (cmd[0]) { case 'm': case 'mk': this.addTokenToSet_( expecting, cmd[2], val, collectKeywordsAndIdOnly ); break loop; case 'ci': case 'ciA': case 'mi': case 'miA': this.addTokenToSet_( expecting, 'Id', val, false ); // TODO: should we do s/th special, such that a reserved word is a sync // token for Id<greedy>? Probably not, see also comment in // _findSyncToken() break loop; case 'g': case 'gi': case 'e': break; default: if (typeof cmd[0] !== 'number') throw Error( `Unexpected command ${ cmd[0] } at state ${ this.s }` ); ++enteredRules; // conditions might use stack/dynamic_ // core rule_(): this.stack.push( { ruleState: cmd[1], followState: cmd[0], tokenIdx: this.tokenIdx, prec: this.prec_, } ); this.dynamic_ = Object.create( this.dynamic_ ); this.prec_ = null; } this.s = cmd[1]; cmd = null; } const inspectOuterRules = (this.s === 0 && !enteredRules); this.s = savedState; this.dynamic_ = savedDynamic; this.stack.length -= enteredRules; return inspectOuterRules; } // Remark: when called for `Id` token, `collectKeywordsOnly` is `false` addTokenToSet_( set, token, val, collectKeywordsOnly, _lookahead ) { if (!collectKeywordsOnly || /^[_a-z]/.test( token )) set[token] ??= val; } // Error reporting and recovery ----------------------------------------------- expectingArray_() { const token = this.la(); const set = this._calculateTokenSet( 'M' ); // Speed-up: delete current token const { keyword, type } = token; if (keyword && set[keyword] === true) delete set[keyword]; else if (set[type] === true && !(keyword && this.keywords[keyword] != null)) delete set[type]; // delete if not keyword this._trace( 'collect tokens for message' ); const { trace } = this; const saved = this._saveForWalk(); saved.fixKeywordTokenIdx = this.fixKeywordTokenIdx; // changed by confirmExpected const expecting = Object.keys( set ) .filter( tok => this._confirmExpected( tok, saved ) ); token.type = type; // overwritten by _confirmExpected token.keyword = keyword; Object.assign( this, saved ); this.trace = trace; return expecting; } _findSyncToken( syncSet ) { // only called from _reportAndRecover() const rewindDepth = this.stack.length this.recoverTokenIdx = this.tokenIdx; // TODO: make it part of the return value? while (this.recoverTokenIdx < this.tokens.length) { const { keyword, type } = this.tokens[this.recoverTokenIdx]; let recoverDepth = keyword ? syncSet[keyword] : null; if (recoverDepth != null) return recoverDepth; recoverDepth = syncSet[type]; // sync to Id only if in intra-rule expected set of last good state or if after ';'/`}` if (recoverDepth != null && (type !== 'Id' || (!keyword || !this.keywords[keyword]) && // reserved words do not match Id in expected-set → as method (recoverDepth > rewindDepth || [ ';', '}' ].includes( this.tokens[this.recoverTokenIdx - 1].type )))) // if (recoverDepth != null && // (this.recoverTokenIdx > this.tokenIdx || return recoverDepth; ++this.recoverTokenIdx; } throw Error( 'EOF must be last in `tokens`' ); // TODO: really necessary? } _reportAndRecover() { // called from this.e() and this.exit_() after rewind this.s = this.errorState; const syncSet = this._calculateTokenSet( 'Y' ); // TODO: use (extended) syncSet also for “expecting” in messages this._trace( 'investigate how to recover' ); this.recoverTokenIdx = this.tokenIdx;; const prev = this.lb(); const reuseRecoverDepth = this.reuseErrorTokenIdx != null && // only if specially switched on prev?.keyword && prev.parsedAs !== 'keyword' && this.reuseErrorTokenIdx < this.tokenIdx && syncSet[prev.keyword]; // TODO: this reuse-token will only properly work if we can check that the token at // this.la() will definitely match after having re-used this.lb() // → if we have done that, we can again remove this.reuseErrorTokenIdx this.reportUnexpectedToken_( reuseRecoverDepth ? 'reuse' : null ); if (this.reuseErrorTokenIdx != null) this.reuseErrorTokenIdx = (reuseRecoverDepth) ? this.tokenIdx : -1; this.fixKeywordTokenIdx = (reuseRecoverDepth) ? --this.recoverTokenIdx : -1; const recoverDepth = reuseRecoverDepth || this._findSyncToken( syncSet ); this.s = null; let depth = this.stack.length; if (recoverDepth > depth) { // no rewind, no rule exit this.trace = [ this.errorState ]; // show last good state in trace this.s = this.errorState; } while (depth > recoverDepth) this.stack[--depth].followState = null; // TODO: when the error is due to failed rule exit prediction, try to keep // existing followState (if that reaches RuleEnd_) // Continue parsing: ignore next predicate (TODO: except some specified ones?) // TODO: re-check for rule calls which are at the optional rule end: // x: 'x not'; b: 'b'? x {console.log('x→b')} 'b'?; a: b {console.log('b→a')} 'a' // with start rule `a` and input `x a`: output should be x→b + b→a // with start rule `a` and input `b a`: output should be b→a if (this.constructor.tracingParser) { this._trace( `skipped ${ this.recoverTokenIdx - this.tokenIdx } tokens to recover from error`, this.tokens[this.recoverTokenIdx] ); } if (this.tokenIdx > this.recoverTokenIdx) this.reuseToken_(); else while (this.tokenIdx < this.recoverTokenIdx) this.skipToken_(); this.conditionTokenIdx = this.tokenIdx; this.conditionStackLength = null; return false; } // small methods -------------------------------------------------------------- log( ...args ) { console.log( ...args ); } reportError_( location, text ) { this.$hasErrors = true; this.log( `${ location }:`, text ); } reportUnexpectedToken_( msg ) { const token = (msg === 'reuse') ? this.lb() : this.la(); msg ??= `Unexpected token ${ tokenFullName( token, ': ' ) }`; msg = (msg === 'reuse') ? `Missing input before keyword ${ tokenFullName( token, ': ' ) }` : msg + ' - expecting: ' + this.expectingArray_().map( tokenName ).sort().join( ', ' ); this.reportError_( token.location, msg ); } reportReservedWord_() { this.reportUnexpectedToken_( `Unexpected reserved word ‘${ this.la().text }’` ); } errorAndRecoverOutside( token, text ) { // TODO: re-check // TODO: TMP this.reportError_( token.location, text ); while (this.l() !== ';') this.skipToken_(); this.s = null; return false; } _tracePush( state ) { if (this.constructor.tracingParser) this.trace.push( state ?? '⚠' ); } _traceSubPush( state ) { if (this.constructor.tracingParser) this.trace.at(-1).push( state ); } traceAction( location ) { // TODO: remove this._trace( 1, location ); } _trace( msg, la = this.la() ?? this.lb() ) { if (!this.constructor.tracingParser) return; // indentation according to rule call depth is nice, but only if without // excessive spaces → truncate: const indent = ' '.repeat( this.stack.length % 32 ); if (msg === 1) { let line = ' execute action'; // align with non-action messages if (this.trace.length > 1) { // i.e. with some 'g' command line += ', states: ' + this.trace.map( traceStep ).join( ' → ' ); this.trace = [ this.s ?? '⚠' ]; } this.log( indent, line, `(${ la })` ); return; } else if (la === 2) { // confirming tokens in expected set this.log( indent, ' ', msg + ':', this.trace.map( traceStep ).join( ' → ' ) ); this.trace = [ this.s ?? '⚠' ]; return; } const { location } = la; if (!this.trace.length) { this.log( `In ${ location.file }:` ); this.trace = [ -1 ]; } this.trace.push( this.s ?? '⚠' ); if (Array.isArray( msg )) { // rule call and exit const [ intro, state, finale, exit ] = msg; let start = state; while (typeof this.table[--start] !== 'string') ; const post = (exit || start + 1 < state) && finale; msg = `${ intro } “${ this.table[start] }”${ post || '' } ${ exit || 'from' } stack level ${ this.stack.length }`; } // Yes, I know util.format, but do not want to have a `require` in this file const line = location.line < 1e5 ? ` ${ location.line }`.slice(-5) : `${ location.line }`; const col = location.col < 1e4 ? `:${ location.col } `.slice(0,5) : `:${location.col }`; this.log( line + col + indent + msg + ', states:', this.trace.map( traceStep ).join( ' → ' ) ); this.trace = [ this.s ?? '⚠' ]; } inSameRule_( lowState = this.s, highState = this.stack.at(-1).followState ) { if (lowState > highState) [ lowState, highState ] = [ highState, lowState ]; while (lowState < highState) { if (typeof this.table[++lowState] === 'string') // rule boundary return false; } return true; } // Predefined conditions with extra option names: hide_( _arg, mode ) { return mode === 'M'; } precLeft_( prec ) { // <prec=…>, <…,assoc=left>, <…,prefix=once> const parentPrec = this.stack.at( -1 ).prec; if (parentPrec != null && parentPrec >= prec) return true; this.prec_ = prec; return false; } precRight_( prec ) { // <…,assoc=right>, <…,prefix> const parentPrec = this.stack.at( -1 ).prec; if (parentPrec != null && parentPrec >= prec) return true; this.prec_ = prec - 1; return false; } precNone_( prec ) { // <…,assoc=none>, <…,postfix=once> const parentPrec = this.stack.at( -1 ).prec; if (parentPrec != null && parentPrec >= prec || this.prec_ != null && this.prec_ <= prec) return true; this.prec_ = prec; return false; } precPost_( prec ) { // <…,postfix> const parentPrec = this.stack.at( -1 ).prec; if (parentPrec != null && parentPrec >= prec || this.prec_ != null && this.prec_ < prec) return true; this.prec_ = prec; return false; } } const members = BaseParser.prototype; // functions below are to be called with `call` to set `this` members.isNoKeywordInRuleFollow.afterError = true; members.precLeft_.traceName = function( prec ) { const parentPrec = this.stack.at( -1 ).prec; return `${ parentPrec ?? '-∞' }<${ prec }`; } members.precRight_.traceName = function( prec ) { const left = this.precLeft_.traceName.call( this, prec ); return `${ left },↓`; } members.precNone_.traceName = function( prec ) { const left = this.precLeft_.traceName.call( this, prec ); return `${ left }<${ this.prec_ == null ? '∞' : this.prec_ }`; } members.precPost_.traceName = function( prec ) { const left = this.precLeft_.traceName.call( this, prec ); return `${ left }≤${ this.prec_ == null ? '∞' : this.prec_ }`; } function traceStep( step ) { if (!Array.isArray( step )) return step; const result = { true: '✔', false: '✖' }[step.at( -1 )] ?? ''; const intro = (typeof step[1] === 'number') ? '→' : ''; const arg = step.slice( 1, result ? -1 : undefined ).join( '→' ); return `${ step[0] }(${ intro }${ arg })${ result }`; } function tokenName( type ) { if (typeof type !== 'string') type = (!type.parsedAs || type.parsedAs === 'keyword') && type.keyword || type.type; return (/^[A-Z]+/.test( type )) ? `‹${ type }›` : `‘${ type }’`; } function tokenFullName( token, sep ) { return (token.parsedAs && token.parsedAs !== 'keyword' && token.parsedAs !== 'token' || token.type !== 'Id' && token.type !== token.text && token.text) ? `‘${ token.text }’${ sep }${ tokenName( token ) }` : tokenName( token ); } function compileTable( table ) { if (table.$compiled) return table; for (const line of table) { if (typeof line !== 'object' || Array.isArray( line )) continue; const cache = Object.create( null ); // very sparse array for (const prop of Object.keys( line )) { const alt = line[prop]; if (!Array.isArray( alt ) && prop.charAt(0) !== ' ') // string or number line[prop] = (typeof alt === 'string') ? line[alt] : (cache[alt] ??= [ 'g', alt ]); } if (!line['']) line[''] = [ 'e' ]; } table.$compiled = true; return table; } module.exports = BaseParser;