UNPKG

mframejs

Version:
336 lines (257 loc) 9.86 kB
import { SymbolContainer } from './symbolContainer'; import { SymbolTemplate } from './symbolTemplate'; import { ITokens } from '../../interface/exported'; /** * Using Douglas Crockford paper on Vaughan Pratt "Top Down Operator Precedence" * For more info about this look here * https://crockford.com/javascript/tdop/tdop.html (added to docs folder so I dont loose it) * Not everything is used * * This class will do most of the heavy lifting of generating the AST for our expressions * It will need symbols added and a symbol container * */ export class AST { public behavior: any; public valueConverter: any; private tokens: ITokens[]; public currentStatement: any = null; public currentTokenIndex = 0; public currentToken: any; private symbolContainer: SymbolContainer; public statementsArray: any[]; /** * start the parsing of tokens * */ public start(tokens: ITokens[]) { // set tokens this.tokens = tokens; // clear this.currentStatement = null; this.currentTokenIndex = 0; this.currentToken = null; // get first token this.advance(); // generate ast const ast = this.statements(); // return ast return ast; } /** * adds symbol container we will use to get/set our symbols * */ public addSymbolContainer(symbolContainer: SymbolContainer) { this.symbolContainer = symbolContainer; } /** * Symbol id and an optional binding power that defaults to 0 and returns a symbol object for that id. * If the symbol already exists in the symbol_table, the function returns that symbol object. * Otherwise, it makes a new symbol object that inherits from the symbolTemplate, stores it in the symbol container, and returns it. * A symbol object initially contains an id, a value, a left binding power, and the stuff it inherits from the symbolTemplate. * */ public symbol(id: string, bp?: number) { let s = this.symbolContainer[id]; bp = bp || 0; if (s) { if (bp >= s.lbp) { s.lbp = bp; } } else { s = new SymbolTemplate(); s.id = s.value = id; s.lbp = bp; this.symbolContainer[id] = s; } return s; } /** * Prefix operators are right associative. * A prefix does not have a left binding power because it does not bind to the left. * Prefix operators can also sometimes be reserved words. * */ public prefix(id: string, nud?: Function) { const s = this.symbol(id); const astInstance = this; s.nud = nud || function () { // scope.reserve(this); this.first = astInstance.expression(70); this.arity = 'unary'; return this; }; return s; } /** * The infix function takes an id, a binding power, and an optional led function. * If a led function is not provided, the infix function supplies a default led that is useful in most cases. * */ public infix(id: string, bp?: number, led?: Function) { const s = this.symbol(id, bp); const astInstance = this; s.led = led || function (left: any) { this.first = left; this.second = astInstance.expression(bp); this.arity = 'binary'; return this; }; return s; } /** * Those infix operators are left associative. * We can also make right associative operators, such as short-circuiting logical operators, by reducing the right binding power. * */ public infixr(id: string, bp: number, led?: Function) { const s = this.symbol(id, bp); const astInstance = this; s.led = led || function (left: any) { this.first = left; this.second = astInstance.expression(bp - 1); this.arity = 'binary'; return this; }; return s; } /** * The stmt function is used to add statement symbols to the symbol table. It takes a statement id and an std function. * */ public stmt(id: string, f: Function) { const x = this.symbol(id); x.std = f; return x; } /** * We could use infixr to define our assignment operators, * but we will make a specialized assignment function because we want it to do two extra bits of business: * examine the left operand to make sure that it is a proper lvalue, * and set an assignment member so that we can later quickly identify assignment statements. */ public assignment = function (id: string) { const astInstance = this; return this.infixr(id, 10, function (left: any) { if (left.id !== '.' && left.id !== '[' && left.arity !== 'variable') { left.error('Bad lvalue.'); } this.first = left; this.second = astInstance.expression(9); this.assignment = true; this.arity = 'binary'; return this; }); }; /** * The statement function parses one statement. * If the current token has an std method, the token is reserved and the std is invoked. * Otherwise,we assume an expression statement terminated with a semi-colon. * For reliability, we will reject an expression statement that is not an assignment or invocation. * */ public statement() { const n = this.currentToken; let v; if (n.std) { this.advance(); // scope.reserve(n); return n.std(); } v = this.expression(0); return v; } /** * The statements function parses statements until it sees (end) or } which signals the end of a block. * The function returns a statement, an array of statements, or null if there were no statements present. * */ public statements() { this.statementsArray = []; let s; while (true) { if (this.currentToken.id === '}' || this.currentToken.id === '(end)') { break; } s = this.statement(); if (s) { this.statementsArray.push(s); } } return this.statementsArray.length === 0 ? null : this.statementsArray.length === 1 ? this.statementsArray[0] : this.statementsArray; } /** * The advance function makes a new token object from the next simple token in the array and assigns it to the token variable. * It can take an optional id parameter which it can check against the id of the previous token. * The new token object's prototype is a (name) token in the current scope or a symbol from the symbol table. * The new token's arity is "name", "literal", or "operator". * Its arity may be changed later to "binary", "unary", or "statement" when we know more about the token's role in the program. * */ public advance(expected?: string) { let type, o, token, value; token = this.tokens[this.currentTokenIndex]; if (token && expected) { const nextToken = this.tokens[this.currentTokenIndex + 1]; if (expected === '}' && nextToken && (nextToken.value === '|' || nextToken.value === '&')) { return; } // -> will happend if signle inside expression ${values | valuconverter} const prevToken = this.tokens[this.currentTokenIndex - 1]; if (expected === '}' && prevToken && (prevToken.value === '|' || prevToken.value === '&')) { return; } } if (this.currentTokenIndex >= this.tokens.length) { this.currentToken = this.symbolContainer['(end)']; return; } this.currentTokenIndex += 1; value = token.value; type = token.type; if (type === 'variable') { type = 'variable'; o = this.symbolContainer['(variable)']; } else if (type === 'operator') { o = this.symbolContainer[value]; if (!o) { console.warn('Unknown operator.', token); } } else if (type === 'string' || type === 'number') { type = 'literal'; o = this.symbolContainer['(literal)']; } else { console.warn('Unexpected token.', token); } this.currentToken = Object.create(o); this.currentToken.value = value; this.currentToken.arity = type; if ((<any>token).root) { this.currentToken.root = true; } return this.currentToken; } /** * The heart of Pratt's technique is the expression function. * It takes a right binding power that controls how aggressively it binds to tokens on its right. * */ public expression(rbp: any) { let left; let token = this.currentToken; this.advance(); left = token.nud(); try { while (rbp < this.currentToken.lbp) { token = this.currentToken; this.advance(); left = token.led(left); } } catch (e) { console.warn('parser fail'); } return left; } }