mframejs
Version:
simple framework
336 lines (257 loc) • 9.86 kB
text/typescript
import { SymbolContainer } from './symbolContainer';
import { SymbolTemplate } from './symbolTemplate';
import { ITokens } from '../../interface/exported';
/**
* Using Douglas Crockford paper on Vaughan Pratt "Top Down Operator Precedence"
* For more info about this look here
* https://crockford.com/javascript/tdop/tdop.html (added to docs folder so I dont loose it)
* Not everything is used
*
* This class will do most of the heavy lifting of generating the AST for our expressions
* It will need symbols added and a symbol container
*
*/
export class AST {
public behavior: any;
public valueConverter: any;
private tokens: ITokens[];
public currentStatement: any = null;
public currentTokenIndex = 0;
public currentToken: any;
private symbolContainer: SymbolContainer;
public statementsArray: any[];
/**
* start the parsing of tokens
*
*/
public start(tokens: ITokens[]) {
// set tokens
this.tokens = tokens;
// clear
this.currentStatement = null;
this.currentTokenIndex = 0;
this.currentToken = null;
// get first token
this.advance();
// generate ast
const ast = this.statements();
// return ast
return ast;
}
/**
* adds symbol container we will use to get/set our symbols
*
*/
public addSymbolContainer(symbolContainer: SymbolContainer) {
this.symbolContainer = symbolContainer;
}
/**
* Symbol id and an optional binding power that defaults to 0 and returns a symbol object for that id.
* If the symbol already exists in the symbol_table, the function returns that symbol object.
* Otherwise, it makes a new symbol object that inherits from the symbolTemplate, stores it in the symbol container, and returns it.
* A symbol object initially contains an id, a value, a left binding power, and the stuff it inherits from the symbolTemplate.
*
*/
public symbol(id: string, bp?: number) {
let s = this.symbolContainer[id];
bp = bp || 0;
if (s) {
if (bp >= s.lbp) {
s.lbp = bp;
}
} else {
s = new SymbolTemplate();
s.id = s.value = id;
s.lbp = bp;
this.symbolContainer[id] = s;
}
return s;
}
/**
* Prefix operators are right associative.
* A prefix does not have a left binding power because it does not bind to the left.
* Prefix operators can also sometimes be reserved words.
*
*/
public prefix(id: string, nud?: Function) {
const s = this.symbol(id);
const astInstance = this;
s.nud = nud || function () {
// scope.reserve(this);
this.first = astInstance.expression(70);
this.arity = 'unary';
return this;
};
return s;
}
/**
* The infix function takes an id, a binding power, and an optional led function.
* If a led function is not provided, the infix function supplies a default led that is useful in most cases.
*
*/
public infix(id: string, bp?: number, led?: Function) {
const s = this.symbol(id, bp);
const astInstance = this;
s.led = led || function (left: any) {
this.first = left;
this.second = astInstance.expression(bp);
this.arity = 'binary';
return this;
};
return s;
}
/**
* Those infix operators are left associative.
* We can also make right associative operators, such as short-circuiting logical operators, by reducing the right binding power.
*
*/
public infixr(id: string, bp: number, led?: Function) {
const s = this.symbol(id, bp);
const astInstance = this;
s.led = led || function (left: any) {
this.first = left;
this.second = astInstance.expression(bp - 1);
this.arity = 'binary';
return this;
};
return s;
}
/**
* The stmt function is used to add statement symbols to the symbol table. It takes a statement id and an std function.
*
*/
public stmt(id: string, f: Function) {
const x = this.symbol(id);
x.std = f;
return x;
}
/**
* We could use infixr to define our assignment operators,
* but we will make a specialized assignment function because we want it to do two extra bits of business:
* examine the left operand to make sure that it is a proper lvalue,
* and set an assignment member so that we can later quickly identify assignment statements.
*/
public assignment = function (id: string) {
const astInstance = this;
return this.infixr(id, 10, function (left: any) {
if (left.id !== '.' && left.id !== '[' &&
left.arity !== 'variable') {
left.error('Bad lvalue.');
}
this.first = left;
this.second = astInstance.expression(9);
this.assignment = true;
this.arity = 'binary';
return this;
});
};
/**
* The statement function parses one statement.
* If the current token has an std method, the token is reserved and the std is invoked.
* Otherwise,we assume an expression statement terminated with a semi-colon.
* For reliability, we will reject an expression statement that is not an assignment or invocation.
*
*/
public statement() {
const n = this.currentToken;
let v;
if (n.std) {
this.advance();
// scope.reserve(n);
return n.std();
}
v = this.expression(0);
return v;
}
/**
* The statements function parses statements until it sees (end) or } which signals the end of a block.
* The function returns a statement, an array of statements, or null if there were no statements present.
*
*/
public statements() {
this.statementsArray = [];
let s;
while (true) {
if (this.currentToken.id === '}' || this.currentToken.id === '(end)') {
break;
}
s = this.statement();
if (s) {
this.statementsArray.push(s);
}
}
return this.statementsArray.length === 0 ? null : this.statementsArray.length === 1 ? this.statementsArray[0] : this.statementsArray;
}
/**
* The advance function makes a new token object from the next simple token in the array and assigns it to the token variable.
* It can take an optional id parameter which it can check against the id of the previous token.
* The new token object's prototype is a (name) token in the current scope or a symbol from the symbol table.
* The new token's arity is "name", "literal", or "operator".
* Its arity may be changed later to "binary", "unary", or "statement" when we know more about the token's role in the program.
*
*/
public advance(expected?: string) {
let type, o, token, value;
token = this.tokens[this.currentTokenIndex];
if (token && expected) {
const nextToken = this.tokens[this.currentTokenIndex + 1];
if (expected === '}' && nextToken && (nextToken.value === '|' || nextToken.value === '&')) {
return;
}
// -> will happend if signle inside expression ${values | valuconverter}
const prevToken = this.tokens[this.currentTokenIndex - 1];
if (expected === '}' && prevToken && (prevToken.value === '|' || prevToken.value === '&')) {
return;
}
}
if (this.currentTokenIndex >= this.tokens.length) {
this.currentToken = this.symbolContainer['(end)'];
return;
}
this.currentTokenIndex += 1;
value = token.value;
type = token.type;
if (type === 'variable') {
type = 'variable';
o = this.symbolContainer['(variable)'];
} else if (type === 'operator') {
o = this.symbolContainer[value];
if (!o) {
console.warn('Unknown operator.', token);
}
} else if (type === 'string' || type === 'number') {
type = 'literal';
o = this.symbolContainer['(literal)'];
} else {
console.warn('Unexpected token.', token);
}
this.currentToken = Object.create(o);
this.currentToken.value = value;
this.currentToken.arity = type;
if ((<any>token).root) {
this.currentToken.root = true;
}
return this.currentToken;
}
/**
* The heart of Pratt's technique is the expression function.
* It takes a right binding power that controls how aggressively it binds to tokens on its right.
*
*/
public expression(rbp: any) {
let left;
let token = this.currentToken;
this.advance();
left = token.nud();
try {
while (rbp < this.currentToken.lbp) {
token = this.currentToken;
this.advance();
left = token.led(left);
}
} catch (e) {
console.warn('parser fail');
}
return left;
}
}