@gobstones/gobstones-parser
Version:
Gobstones parser
1,413 lines (1,318 loc) • 49.7 kB
text/typescript
/* eslint-disable camelcase */
/* eslint-disable no-underscore-dangle */
import {
ASTConstructorDeclaration,
ASTDefFunction,
ASTDefInteractiveProgram,
ASTDefProcedure,
ASTDefProgram,
ASTDefType,
ASTExpr,
ASTExprChoose,
ASTExprConstantNumber,
ASTExprConstantString,
ASTExprFunctionCall,
ASTExprList,
ASTExprMatching,
ASTExprRange,
ASTExprStructure,
ASTExprStructureUpdate,
ASTExprTuple,
ASTExprVariable,
ASTFieldBinding,
ASTMain,
ASTMatchingBranch,
ASTNode,
ASTNodeWithPattern,
ASTPattern,
ASTPatternNumber,
ASTPatternStructure,
ASTPatternTimeout,
ASTPatternTuple,
ASTPatternVariable,
ASTPatternWildcard,
ASTStmtAssignTuple,
ASTStmtAssignVariable,
ASTStmtBlock,
ASTStmtForeach,
ASTStmtIf,
ASTStmtProcedureCall,
ASTStmtRepeat,
ASTStmtReturn,
ASTStmtSwitch,
ASTStmtWhile,
ASTSwitchBranch,
N_ExprVariable
} from './ast';
import { Input, SourceReader } from './reader';
import {
T_ARROW,
T_ASSIGN,
T_CASE,
T_CHOOSE,
T_COMMA,
T_ELLIPSIS,
T_ELSE,
T_ELSEIF,
T_EOF,
T_FIELD,
T_FOREACH,
T_FUNCTION,
T_GETS,
T_IF,
T_IN,
T_INTERACTIVE,
T_IS,
T_LBRACE,
T_LBRACK,
T_LET,
T_LOWERID,
T_LPAREN,
T_MATCHING,
T_MINUS,
T_NUM,
T_ON,
T_OTHERWISE,
T_PIPE,
T_PROCEDURE,
T_PROGRAM,
T_RANGE,
T_RBRACE,
T_RBRACK,
T_RECORD,
T_REPEAT,
T_RETURN,
T_RPAREN,
T_SELECT,
T_SEMICOLON,
T_STRING,
T_SWITCH,
T_THEN,
T_TIMEOUT,
T_TO,
T_TYPE,
T_UNDERSCORE,
T_UPPERID,
T_VARIANT,
T_WHEN,
T_WHILE,
Token
} from './token';
import { GbsSyntaxError } from './exceptions';
import { Lexer } from './lexer';
import { i18n } from './i18n';
// Only for typing purposes
// eslint-disable-next-line @typescript-eslint/ban-types
const toFunc = (x: string | Function): Function => x as Function;
// Only for typing purposes
// eslint-disable-next-line @typescript-eslint/ban-types
const toStr = (x: string | Function): string => x as string;
const Infix = Symbol.for('Infix');
const InfixL = Symbol.for('InfixL');
const InfixR = Symbol.for('InfixR');
const Prefix = Symbol.for('Prefix');
class PrecedenceLevel {
private _fixity: any;
private _operators: Record<string, string>;
/* Operators should be a dictionary mapping operator tags to
* their function names */
public constructor(fixity: symbol, operators: Record<string, string>) {
this._fixity = fixity;
this._operators = operators;
}
public get fixity(): symbol {
return this._fixity;
}
public isOperator(token: Token): boolean {
return Symbol.keyFor(token.tag) in this._operators;
}
public functionName(token: Token): Token {
return new Token(
T_LOWERID,
this._operators[Symbol.keyFor(token.tag)],
token.startPos,
token.endPos
);
}
}
/* OPERATORS is a list of precedence levels.
* Precedence levels are ordered from lesser to greater precedence.
*/
const OPERATORS = [
/* Logical operators */
new PrecedenceLevel(InfixR, {
T_OR: '||'
}),
new PrecedenceLevel(InfixR, {
T_AND: '&&'
}),
new PrecedenceLevel(Prefix, {
T_NOT: 'not'
}),
/* Relational operators */
new PrecedenceLevel(Infix, {
T_EQ: '==',
T_NE: '/=',
T_LE: '<=',
T_GE: '>=',
T_LT: '<',
T_GT: '>'
}),
/* List concatenation */
new PrecedenceLevel(InfixL, {
T_CONCAT: '++'
}),
/* Additive operators */
new PrecedenceLevel(InfixL, {
T_PLUS: '+',
T_MINUS: '-'
}),
/* Multiplicative operators */
new PrecedenceLevel(InfixL, {
T_TIMES: '*'
}),
/* Division operators */
new PrecedenceLevel(InfixL, {
T_DIV: 'div',
T_MOD: 'mod'
}),
/* Exponential operators */
new PrecedenceLevel(InfixR, {
T_POW: '^'
}),
/* Unary minus */
new PrecedenceLevel(Prefix, {
T_MINUS: '-(unary)'
})
];
function fail(startPos: SourceReader, endPos: SourceReader, reason: string, args: any[]): void {
throw new GbsSyntaxError(startPos, endPos, reason, args);
}
/* Represents a parser for a Gobstones/XGobstones program.
* It is structured as a straightforward recursive-descent parser.
*
* The parameter 'input' may be either a string or a dictionary
* mapping filenames to strings.
*
* All the "parseFoo" methods agree to the following convention:
* - parseFoo returns an AST for a Foo construction,
* - parseFoo consumes a fragment of the input by successively requesting
* the next token from the lexer,
* - when calling parseFoo, the current token should already be located
* on the first token of the corresponding construction,
* - when parseFoo returns, the current token is already located on
* the following token, after the corresponding construction.
*/
export class Parser {
private _lexer: Lexer;
private _currentToken: Token;
public constructor(input: Input) {
this._lexer = new Lexer(input);
this._nextToken();
}
/* Return the AST that results from parsing a full program */
public parse(): ASTMain {
const definitions = [];
while (this._currentToken.tag !== T_EOF) {
definitions.push(this._parseDefinition());
}
return new ASTMain(definitions);
}
/* Return the list of all language options collected by the tokenizer.
* Language options are set by the LANGUAGE pragma. */
public getLanguageOptions(): string[] {
return this._lexer.getLanguageOptions();
}
/** Definitions **/
public _parseDefinition(): ASTNode {
switch (this._currentToken.tag) {
case T_PROGRAM:
return this._parseDefProgram();
case T_INTERACTIVE:
return this._parseDefInteractiveProgram();
case T_PROCEDURE:
return this._parseDefProcedure();
case T_FUNCTION:
return this._parseDefFunction();
case T_TYPE:
return this._parseDefType();
default:
fail(this._currentToken.startPos, this._currentToken.endPos, 'expected-but-found', [
i18n('definition'),
i18n(Symbol.keyFor(this._currentToken.tag))
]);
}
}
public _parseDefProgram(): ASTNode {
const startPos = this._currentToken.startPos;
this._match(T_PROGRAM);
const attributes = this._lexer.getPendingAttributes();
const block = this._parseStmtBlock();
const result = new ASTDefProgram(block);
result.startPos = startPos;
result.endPos = block.endPos;
result.attributes = attributes;
return result;
}
public _parseDefInteractiveProgram(): ASTNode {
const startPos = this._currentToken.startPos;
this._match(T_INTERACTIVE);
this._match(T_PROGRAM);
const attributes = this._lexer.getPendingAttributes();
this._match(T_LBRACE);
const branches = this._parseSwitchBranches();
const endPos = this._currentToken.startPos;
this._match(T_RBRACE);
const result = new ASTDefInteractiveProgram(branches);
result.startPos = startPos;
result.endPos = endPos;
result.attributes = attributes;
return result;
}
public _parseDefProcedure(): ASTNode {
const startPos = this._currentToken.startPos;
this._match(T_PROCEDURE);
const name = this._parseUpperid();
this._match(T_LPAREN);
const parameters = this._parseLoweridSeq();
this._match(T_RPAREN);
const attributes = this._lexer.getPendingAttributes();
const block = this._parseStmtBlock();
const result = new ASTDefProcedure(name, parameters, block);
result.startPos = startPos;
result.endPos = block.endPos;
result.attributes = attributes;
return result;
}
public _parseDefFunction(): ASTNode {
const startPos = this._currentToken.startPos;
this._match(T_FUNCTION);
const name = this._currentToken;
this._match(T_LOWERID);
this._match(T_LPAREN);
const parameters = this._parseLoweridSeq();
this._match(T_RPAREN);
const attributes = this._lexer.getPendingAttributes();
const block = this._parseStmtBlock();
const result = new ASTDefFunction(name, parameters, block);
result.startPos = startPos;
result.endPos = block.endPos;
result.attributes = attributes;
return result;
}
public _parseDefType(): ASTNode {
const startPos = this._currentToken.startPos;
this._match(T_TYPE);
const typeName = this._parseUpperid();
this._match(T_IS);
switch (this._currentToken.tag) {
case T_RECORD:
return this._parseDefTypeRecord(startPos, typeName);
case T_VARIANT:
return this._parseDefTypeVariant(startPos, typeName);
default:
fail(this._currentToken.startPos, this._currentToken.endPos, 'expected-but-found', [
toFunc(i18n('<alternative>'))([i18n('T_RECORD'), i18n('T_VARIANT')]),
i18n(Symbol.keyFor(this._currentToken.tag))
]);
}
}
public _parseDefTypeRecord(startPos: SourceReader, typeName: Token): ASTNode {
this._match(T_RECORD);
const attributes = this._lexer.getPendingAttributes();
this._match(T_LBRACE);
const fieldNames = this._parseFieldNames();
const endPos = this._currentToken.startPos;
this._matchExpected(T_RBRACE, [T_FIELD, T_RBRACE]);
const result = new ASTDefType(typeName, [
new ASTConstructorDeclaration(typeName, fieldNames)
]);
result.startPos = startPos;
result.endPos = endPos;
result.attributes = attributes;
return result;
}
public _parseDefTypeVariant(startPos: SourceReader, typeName: Token): ASTNode {
const constructorDeclarations = [];
this._match(T_VARIANT);
const attributes = this._lexer.getPendingAttributes();
this._match(T_LBRACE);
while (this._currentToken.tag === T_CASE) {
constructorDeclarations.push(this._parseConstructorDeclaration());
}
const endPos = this._currentToken.startPos;
this._matchExpected(T_RBRACE, [T_CASE, T_RBRACE]);
const result = new ASTDefType(typeName, constructorDeclarations);
result.startPos = startPos;
result.endPos = endPos;
result.attributes = attributes;
return result;
}
public _parseConstructorDeclaration(): ASTNode {
const startPos = this._currentToken.startPos;
this._match(T_CASE);
const constructorName = this._parseUpperid();
this._match(T_LBRACE);
const fieldNames = this._parseFieldNames();
const endPos = this._currentToken.startPos;
this._matchExpected(T_RBRACE, [T_FIELD, T_RBRACE]);
const result = new ASTConstructorDeclaration(constructorName, fieldNames);
result.startPos = startPos;
result.endPos = endPos;
return result;
}
public _parseFieldNames(): Token[] {
const fieldNames = [];
while (this._currentToken.tag === T_FIELD) {
this._match(T_FIELD);
fieldNames.push(this._parseLowerid());
}
return fieldNames;
}
/** Statements **/
/* Statement, optionally followed by semicolon */
public _parseStatement(): ASTNode {
const statement = this._parsePureStatement();
if (this._currentToken.tag === T_SEMICOLON) {
this._match(T_SEMICOLON);
}
return statement;
}
/* Statement (not followed by semicolon) */
public _parsePureStatement(): ASTNode {
switch (this._currentToken.tag) {
case T_ELLIPSIS:
return this._parseStmtEllipsis();
case T_RETURN:
return this._parseStmtReturn();
case T_IF:
return this._parseStmtIf(true /* expectInitialIf */);
case T_REPEAT:
return this._parseStmtRepeat();
case T_FOREACH:
return this._parseStmtForeach();
case T_WHILE:
return this._parseStmtWhile();
case T_SWITCH:
return this._parseStmtSwitch();
case T_LET:
return this._parseStmtLet();
case T_LBRACE:
return this._parseStmtBlock();
case T_LOWERID:
return this._parseStmtAssignVariable();
case T_UPPERID:
return this._parseStmtProcedureCall();
case T_LPAREN:
/* Special error for rejecting tuple assignments
* (x1, ..., xN) := expression
* in favour of
* let (x1, ..., xN) := expression
*/
fail(
this._currentToken.startPos,
this._currentToken.endPos,
'obsolete-tuple-assignment',
[]
);
return;
default:
fail(this._currentToken.startPos, this._currentToken.endPos, 'expected-but-found', [
i18n('statement'),
i18n(Symbol.keyFor(this._currentToken.tag))
]);
}
}
public _parseStmtBlock(): ASTStmtBlock {
const startPos = this._currentToken.startPos;
const statements = [];
this._match(T_LBRACE);
while (this._currentToken.tag !== T_RBRACE) {
statements.push(this._parseStatement());
if (this._currentToken.tag === T_SEMICOLON) {
this._match(T_SEMICOLON);
}
}
const endPos = this._currentToken.startPos;
this._match(T_RBRACE);
const result = new ASTStmtBlock(statements);
result.startPos = startPos;
result.endPos = endPos;
return result;
}
public _parseStmtEllipsis(): ASTNode {
const startPos = this._currentToken.startPos;
this._match(T_ELLIPSIS);
const result = new ASTStmtProcedureCall(
new Token(T_UPPERID, toStr(i18n('PRIM:BOOM')), startPos, startPos),
[new ASTExprConstantString(new Token(T_STRING, toStr(i18n('errmsg:ellipsis'))))]
);
result.startPos = startPos;
result.endPos = this._currentToken.startPos;
return result;
}
public _parseStmtReturn(): ASTNode {
const startPos = this._currentToken.startPos;
this._match(T_RETURN);
const tuple = this._parseExprTuple(false /* possiblyEmpty */);
const result = new ASTStmtReturn(tuple);
result.startPos = startPos;
result.endPos = tuple.endPos;
return result;
}
public _parseStmtIf(expectInitialIf: boolean): ASTNode {
const startPos = this._currentToken.startPos;
if (expectInitialIf) {
this._match(T_IF);
}
this._match(T_LPAREN);
const condition = this._parseExpression();
this._match(T_RPAREN);
/* Optional 'then' */
if (this._currentToken.tag === T_THEN) {
this._match(T_THEN);
}
const thenBlock = this._parseStmtBlock();
let endPos: SourceReader;
let elseBlock: ASTNode;
if (this._currentToken.tag === T_ELSEIF) {
this._match(T_ELSEIF);
elseBlock = this._parseStmtIf(false /* expectInitialIf */);
endPos = elseBlock.endPos;
} else if (this._currentToken.tag === T_ELSE) {
this._match(T_ELSE);
elseBlock = this._parseStmtBlock();
endPos = elseBlock.endPos;
} else {
elseBlock = undefined;
endPos = thenBlock.endPos;
}
const result = new ASTStmtIf(condition, thenBlock, elseBlock);
result.startPos = startPos;
result.endPos = endPos;
return result;
}
public _parseStmtRepeat(): ASTNode {
const startPos = this._currentToken.startPos;
this._match(T_REPEAT);
this._match(T_LPAREN);
const times = this._parseExpression();
this._match(T_RPAREN);
const body = this._parseStmtBlock();
const result = new ASTStmtRepeat(times, body);
result.startPos = startPos;
result.endPos = body.endPos;
return result;
}
public _parseStmtForeach(): ASTNode {
const startPos = this._currentToken.startPos;
this._match(T_FOREACH);
const pattern = this._parsePattern();
this._match(T_IN);
const range = this._parseExpression();
const body = this._parseStmtBlock();
const result = new ASTStmtForeach(pattern, range, body);
result.startPos = startPos;
result.endPos = body.endPos;
return result;
}
public _parseStmtWhile(): ASTNode {
const startPos = this._currentToken.startPos;
this._match(T_WHILE);
this._match(T_LPAREN);
const condition = this._parseExpression();
this._match(T_RPAREN);
const body = this._parseStmtBlock();
const result = new ASTStmtWhile(condition, body);
result.startPos = startPos;
result.endPos = body.endPos;
return result;
}
public _parseStmtSwitch(): ASTStmtSwitch {
const startPos = this._currentToken.startPos;
this._match(T_SWITCH);
this._match(T_LPAREN);
const subject = this._parseExpression();
this._match(T_RPAREN);
if (this._currentToken.tag === T_TO) {
this._match(T_TO);
}
this._match(T_LBRACE);
const branches = this._parseSwitchBranches();
const endPos = this._currentToken.startPos;
this._match(T_RBRACE);
const result = new ASTStmtSwitch(subject, branches);
result.startPos = startPos;
result.endPos = endPos;
return result;
}
public _parseStmtLet(): ASTNode {
const startPos = this._currentToken.startPos;
this._match(T_LET);
let result: ASTNode;
if (this._currentToken.tag === T_LOWERID) {
result = this._parseStmtAssignVariable();
} else if (this._currentToken.tag === T_LPAREN) {
result = this._parseStmtAssignTuple();
} else {
fail(this._currentToken.startPos, this._currentToken.endPos, 'expected-but-found', [
toFunc(i18n('<alternative>'))(i18n('T_LOWERID'), i18n('T_LPAREN')),
toStr(i18n(Symbol.keyFor(this._currentToken.tag)))
]);
}
result.startPos = startPos;
return result;
}
public _parseStmtAssignVariable(): ASTNode {
const variable = this._parseLowerid();
this._match(T_ASSIGN);
const value = this._parseExpression();
const result = new ASTStmtAssignVariable(variable, value);
result.startPos = variable.startPos;
result.endPos = value.endPos;
return result;
}
public _parseStmtAssignTuple(): ASTNode {
const startPos = this._currentToken.startPos;
this._match(T_LPAREN);
const variables = this._parseLoweridSeq();
if (variables.length === 1) {
fail(startPos, this._currentToken.endPos, 'assignment-tuple-cannot-be-singleton', []);
}
this._match(T_RPAREN);
this._match(T_ASSIGN);
const value = this._parseExpression();
const result = new ASTStmtAssignTuple(variables, value);
result.startPos = startPos;
result.endPos = value.endPos;
return result;
}
public _parseStmtProcedureCall(): ASTNode {
const procedureName = this._parseUpperid();
this._match(T_LPAREN);
const args = this._parseDelimitedSeq(T_RPAREN, T_COMMA, () => this._parseExpression());
const endPos = this._currentToken.startPos;
this._match(T_RPAREN);
const result = new ASTStmtProcedureCall(procedureName, args);
result.startPos = procedureName.startPos;
result.endPos = endPos;
return result;
}
/** Patterns **/
public _parsePattern(): ASTPattern {
switch (this._currentToken.tag) {
case T_UNDERSCORE:
return this._parsePatternWildcard();
case T_LOWERID:
return this._parsePatternVariable();
case T_NUM:
case T_MINUS:
return this._parsePatternNumber();
case T_UPPERID:
return this._parsePatternStructure();
case T_LPAREN:
return this._parsePatternTuple();
case T_TIMEOUT:
return this._parsePatternTimeout();
default:
fail(this._currentToken.startPos, this._currentToken.endPos, 'expected-but-found', [
i18n('pattern'),
i18n(Symbol.keyFor(this._currentToken.tag))
]);
}
}
public _parsePatternWildcard(): ASTPatternWildcard {
const startPos = this._currentToken.startPos;
this._match(T_UNDERSCORE);
const result = new ASTPatternWildcard();
const endPos = startPos;
result.startPos = startPos;
result.endPos = endPos;
return result;
}
public _parsePatternVariable(): ASTPatternVariable {
const startPos = this._currentToken.startPos;
const id = this._parseLowerid();
const result = new ASTPatternVariable(id);
result.startPos = startPos;
result.endPos = id.endPos;
return result;
}
public _parsePatternNumber(): ASTPatternNumber {
const startPos = this._currentToken.startPos;
let sign = '';
if (this._currentToken.tag === T_MINUS) {
this._match(T_MINUS);
sign = '-';
}
let number = this._currentToken;
this._match(T_NUM);
const value = sign + number.value;
if (value === '-0') {
fail(startPos, number.endPos, 'pattern-number-cannot-be-negative-zero', []);
}
number = new Token(T_NUM, value, number.startPos, number.endPos);
const result = new ASTPatternNumber(number);
result.startPos = startPos;
result.endPos = number.endPos;
return result;
}
public _parsePatternStructure(): ASTPatternStructure {
const startPos = this._currentToken.startPos;
let endPos = this._currentToken.startPos;
const constructor = this._parseUpperid();
let parameters;
if (this._currentToken.tag === T_LPAREN) {
this._match(T_LPAREN);
parameters = this._parseLoweridSeq();
endPos = this._currentToken.startPos;
this._match(T_RPAREN);
} else {
parameters = [];
}
const result = new ASTPatternStructure(constructor, parameters);
result.startPos = startPos;
result.endPos = endPos;
return result;
}
public _parsePatternTuple(): ASTPatternTuple {
const startPos = this._currentToken.startPos;
this._match(T_LPAREN);
const parameters = this._parseLoweridSeq();
if (parameters.length === 1) {
fail(startPos, this._currentToken.endPos, 'pattern-tuple-cannot-be-singleton', []);
}
const endPos = this._currentToken.startPos;
this._match(T_RPAREN);
const result = new ASTPatternTuple(parameters);
result.startPos = startPos;
result.endPos = endPos;
return result;
}
public _parsePatternTimeout(): ASTPatternTimeout {
const startPos = this._currentToken.startPos;
this._match(T_TIMEOUT);
this._match(T_LPAREN);
const timeout = this._currentToken;
this._match(T_NUM);
const endPos = this._currentToken.startPos;
this._match(T_RPAREN);
const result = new ASTPatternTimeout(timeout);
result.startPos = startPos;
result.endPos = endPos;
return result;
}
/** Expressions **/
public _parseExpression(): ASTExpr {
return this._parseExprOperator(0);
}
/* Read an expression of the given level.
*
* If the list OPERATORS of precedence levels has N elements, then:
* - Expressions of level 0 are arbitrary expressions.
* - Expressions of level N are atomic expressions.
* - In general, expressions of level I involve operators
* of levels I, I+1, ..., N-1,
* and they can only include operators of the lower levels
* by surrounding them in parentheses.
*/
public _parseExprOperator(level: number): ASTExpr {
if (level === OPERATORS.length) {
return this._parseExprAtom();
}
switch (OPERATORS[level].fixity) {
case Infix:
return this._parseExprOperatorInfix(level);
case InfixL:
return this._parseExprOperatorInfixL(level);
case InfixR:
return this._parseExprOperatorInfixR(level);
case Prefix:
return this._parseExprOperatorPrefix(level);
default:
throw Error('Invalid operator.');
}
}
public _parseExprOperatorInfix(level: number): ASTExpr {
const left = this._parseExprOperator(level + 1);
if (OPERATORS[level].isOperator(this._currentToken)) {
const op = this._currentToken;
this._nextToken();
const right = this._parseExprOperator(level + 1);
/* Check that it is not used associatively */
if (OPERATORS[level].isOperator(this._currentToken)) {
fail(
this._currentToken.startPos,
this._currentToken.endPos,
'operators-are-not-associative',
[i18n(Symbol.keyFor(op.tag)), i18n(Symbol.keyFor(this._currentToken.tag))]
);
}
const result = new ASTExprFunctionCall(OPERATORS[level].functionName(op), [
left,
right
]);
result.startPos = left.startPos;
result.endPos = right.endPos;
return result;
} else {
return left;
}
}
public _parseExprOperatorInfixL(level: number): ASTExpr {
let result = this._parseExprOperator(level + 1);
while (OPERATORS[level].isOperator(this._currentToken)) {
const op = this._currentToken;
this._nextToken();
const right = this._parseExprOperator(level + 1);
const result2 = new ASTExprFunctionCall(OPERATORS[level].functionName(op), [
result,
right
]);
result2.startPos = result.startPos;
result2.endPos = right.endPos;
result = result2;
}
return result;
}
public _parseExprOperatorInfixR(level: number): ASTExpr {
const left = this._parseExprOperator(level + 1);
if (OPERATORS[level].isOperator(this._currentToken)) {
const op = this._currentToken;
this._nextToken();
const right = this._parseExprOperator(level); /* same level */
const result = new ASTExprFunctionCall(OPERATORS[level].functionName(op), [
left,
right
]);
result.startPos = left.startPos;
result.endPos = right.endPos;
return result;
} else {
return left;
}
}
public _parseExprOperatorPrefix(level: number): ASTExpr {
if (OPERATORS[level].isOperator(this._currentToken)) {
const op = this._currentToken;
this._nextToken();
const inner = this._parseExprOperator(level); /* same level */
const result = new ASTExprFunctionCall(OPERATORS[level].functionName(op), [inner]);
result.startPos = op.startPos;
result.endPos = inner.endPos;
return result;
} else {
return this._parseExprOperator(level + 1);
}
}
/* Parse an atomic expression.
* I.e. all the operators must be surrounded by parentheses */
public _parseExprAtom(): ASTExpr {
switch (this._currentToken.tag) {
case T_ELLIPSIS:
return this._parseExprEllipsis();
case T_LOWERID:
return this._parseExprVariableOrFunctionCall();
case T_NUM:
return this._parseExprConstantNumber();
case T_STRING:
return this._parseExprConstantString();
case T_CHOOSE:
return this._parseExprChoose(true /* expectInitialChoose */);
case T_MATCHING:
return this._parseExprMatching();
case T_UPPERID:
return this._parseExprStructureOrStructureUpdate();
case T_LPAREN:
return this._parseExprTuple(true /* possiblyEmpty */);
case T_LBRACK:
return this._parseExprListOrRange();
default:
fail(this._currentToken.startPos, this._currentToken.endPos, 'expected-but-found', [
i18n('expression'),
i18n(Symbol.keyFor(this._currentToken.tag))
]);
}
}
public _parseExprEllipsis(): ASTExprFunctionCall {
const startPos = this._currentToken.startPos;
this._match(T_ELLIPSIS);
const result = new ASTExprFunctionCall(
new Token(T_LOWERID, toStr(i18n('PRIM:boom')), startPos, startPos),
[new ASTExprConstantString(new Token(T_STRING, toStr(i18n('errmsg:ellipsis'))))]
);
result.startPos = startPos;
result.endPos = this._currentToken.startPos;
return result;
}
public _parseExprVariableOrFunctionCall(): ASTExprVariable | ASTExprFunctionCall {
const id = this._parseLowerid();
let result;
let endPos;
if (this._currentToken.tag === T_LPAREN) {
this._match(T_LPAREN);
const args = this._parseExpressionSeq(T_RPAREN);
result = new ASTExprFunctionCall(id, args);
endPos = this._currentToken.startPos;
this._match(T_RPAREN);
} else {
result = new ASTExprVariable(id);
endPos = id.endPos;
}
result.startPos = id.startPos;
result.endPos = endPos;
return result;
}
public _parseExprConstantNumber(): ASTExprConstantNumber {
const number = this._currentToken;
this._match(T_NUM);
const result = new ASTExprConstantNumber(number);
result.startPos = number.startPos;
result.endPos = number.endPos;
return result;
}
public _parseExprConstantString(): ASTExprConstantString {
const string = this._currentToken;
this._match(T_STRING);
const result = new ASTExprConstantString(string);
result.startPos = string.startPos;
result.endPos = string.endPos;
return result;
}
public _parseExprChoose(expectInitialChoose: boolean): ASTExprChoose {
const startPos = this._currentToken.startPos;
if (expectInitialChoose) {
this._match(T_CHOOSE);
}
const expr1 = this._parseExpression();
if (this._currentToken.tag === T_WHEN) {
this._match(T_WHEN);
this._match(T_LPAREN);
const condition = this._parseExpression();
this._match(T_RPAREN);
const expr2 = this._parseExprChoose(false /* expectInitialChoose */);
const result = new ASTExprChoose(condition, expr1, expr2);
result.startPos = startPos;
result.endPos = expr2.endPos;
return result;
} else {
const endPos = this._currentToken.endPos;
this._match(T_OTHERWISE);
expr1.startPos = startPos;
expr1.endPos = endPos;
return expr1 as ASTExprChoose;
}
}
public _parseExprMatching(): ASTExprMatching {
const startPos = this._currentToken.startPos;
this._match(T_MATCHING);
this._match(T_LPAREN);
const subject = this._parseExpression();
this._match(T_RPAREN);
this._match(T_SELECT);
const branches = this._parseMatchingBranches();
const result = new ASTExprMatching(subject, branches);
result.startPos = startPos;
// result.endPos = result.endPos;
return result;
}
/*
* Parse any of the following constructions:
* (1) Structure with no arguments: "Norte"
* (2) Structure with no arguments and explicit parentheses: "Nil()"
* (3) Structure with arguments: "Coord(x <- 1, y <- 2)"
* (4) Update structure with arguments: "Coord(expression | x <- 2)"
*
* Deciding between (3) and (4) unfortunately cannot be done with one
* token of lookahead, so after reading the constructor and a left
* parenthesis we resort to the following workaround:
*
* - Parse an expression.
* - If the next token is GETS ("<-") we are in case (3).
* We must then ensure that the expression is just a variable
* and recover its name.
* - If the next token is PIPE ("|") we are in case (4), and we go on.
*/
public _parseExprStructureOrStructureUpdate(): ASTExprStructure | ASTExprStructureUpdate {
const constructorName = this._parseUpperid();
if (this._currentToken.tag !== T_LPAREN) {
/* Structure with no arguments, e.g. "Norte" */
const result = new ASTExprStructure(constructorName, []);
result.startPos = constructorName.startPos;
result.endPos = constructorName.endPos;
return result;
}
this._match(T_LPAREN);
if (this._currentToken.tag === T_RPAREN) {
/* Structure with no arguments with explicit parentheses,
* e.g. "Nil()" */
const result = new ASTExprStructure(constructorName, []);
const endPos = this._currentToken.startPos;
this._match(T_RPAREN);
result.startPos = constructorName.startPos;
result.endPos = endPos;
return result;
}
const subject = this._parseExpression();
switch (this._currentToken.tag) {
case T_GETS:
if (subject.tag !== N_ExprVariable) {
fail(
this._currentToken.startPos,
this._currentToken.endPos,
'expected-but-found',
[i18n('T_PIPE'), i18n('T_GETS')]
);
return;
}
return this._parseStructure(
constructorName,
(subject as ASTExprVariable).variableName
);
case T_PIPE:
return this._parseStructureUpdate(constructorName, subject);
case T_COMMA:
case T_RPAREN:
/* Issue a specific error message to deal with a common
* programming error, namely calling a procedure name
* where an expression is expected. */
fail(constructorName.startPos, constructorName.endPos, 'expected-but-found', [
i18n('expression'),
i18n('procedure call')
]);
return;
default: {
let expected: string;
if (subject.tag === N_ExprVariable) {
expected = toFunc(i18n('<alternative>'))([i18n('T_GETS'), i18n('T_PIPE')]);
} else {
expected = toStr(i18n('T_PIPE'));
}
fail(constructorName.startPos, constructorName.endPos, 'expected-but-found', [
expected,
i18n(Symbol.keyFor(this._currentToken.tag))
]);
}
}
}
/* Parse a structure A(x1 <- expr1, ..., xN <- exprN)
* where N >= 1,
* assuming that "A(x1" has already been read.
*
* - constructorName and fieldName1 correspond to "A" and "x1"
* respectively.
*/
public _parseStructure(constructorName: Token, fieldName1: Token): ASTExprStructure {
/* Read "<- expr1" */
this._match(T_GETS);
const value1 = this._parseExpression();
const fieldBinding1 = new ASTFieldBinding(fieldName1, value1);
fieldBinding1.startPos = fieldName1.startPos;
fieldBinding1.endPos = value1.endPos;
/* Read "x2 <- expr2, ..., xN <- exprN" (this might be empty) */
const fieldBindings = this._parseNonEmptyDelimitedSeq(
T_RPAREN,
T_COMMA,
[fieldBinding1],
() => this._parseFieldBinding()
) as ASTFieldBinding[];
/* Read ")" */
const endPos = this._currentToken.startPos;
this._match(T_RPAREN);
/* Return an ExprStructure node */
const result = new ASTExprStructure(constructorName, fieldBindings);
result.startPos = constructorName.startPos;
result.endPos = endPos;
return result;
}
/* Parse a structure update A(e | x1 <- expr1, ..., xN <- exprN)
* where N >= 1,
* assuming that "A(e" has already been read.
*
* constructorName and original correspond to "A" and "e"
* respectively.
*/
public _parseStructureUpdate(
constructorName: Token,
original: ASTExpr
): ASTExprStructureUpdate {
/* Read "|" */
this._match(T_PIPE);
/* Read "x2 <- expr2, ..., xN <- exprN" (this might be empty) */
const fieldBindings = this._parseDelimitedSeq(T_RPAREN, T_COMMA, () =>
this._parseFieldBinding()
) as ASTFieldBinding[];
/* Read ")" */
const endPos = this._currentToken.startPos;
this._match(T_RPAREN);
/* Return an ExprStructureUpdate node */
const result = new ASTExprStructureUpdate(constructorName, original, fieldBindings);
result.startPos = constructorName.startPos;
result.endPos = endPos;
return result;
}
/* Read a list
* [expr1, ..., exprN]
* a range expression
* [first .. last]
* or a range expression with step
* [first, second .. last]
*/
public _parseExprListOrRange(): ASTExprList | ASTExprRange {
const startPos = this._currentToken.startPos;
this._match(T_LBRACK);
if (this._currentToken.tag === T_RBRACK) {
return this._parseExprListRemainder(startPos, []);
}
const first = this._parseExpression();
switch (this._currentToken.tag) {
case T_RBRACK:
return this._parseExprListRemainder(startPos, [first]);
case T_RANGE:
return this._parseExprRange(startPos, first);
case T_COMMA: {
this._match(T_COMMA);
const second = this._parseExpression();
switch (this._currentToken.tag) {
case T_RBRACK:
case T_COMMA:
return this._parseExprListRemainder(startPos, [first, second]);
case T_RANGE:
return this._parseExprRange(startPos, first, second);
default:
fail(
this._currentToken.startPos,
this._currentToken.endPos,
'expected-but-found',
[
toFunc(i18n('<alternative>'))([
i18n('T_COMMA'),
i18n('T_RANGE'),
i18n('T_RBRACK')
]),
i18n(Symbol.keyFor(this._currentToken.tag))
]
);
return;
}
}
default:
fail(this._currentToken.startPos, this._currentToken.endPos, 'expected-but-found', [
toFunc(i18n('<alternative>'))([
i18n('T_COMMA'),
i18n('T_RANGE'),
i18n('T_RBRACK')
]),
i18n(Symbol.keyFor(this._currentToken.tag))
]);
return;
}
}
/* Read the end of a list "[expr1, ..., exprN]" assumming we have
* already read "[expr1, ..., exprK" up to some point K >= 1.
* - startPos is the position of "["
* - prefix is the list of elements we have already read
*/
public _parseExprListRemainder(startPos: SourceReader, prefix: ASTExpr[]): ASTExprList {
const elements = this._parseNonEmptyDelimitedSeq(T_RBRACK, T_COMMA, prefix, () =>
this._parseExpression()
);
const endPos = this._currentToken.startPos;
this._match(T_RBRACK);
const result = new ASTExprList(elements);
result.startPos = startPos;
result.endPos = endPos;
return result;
}
/* Read a range "[first..last]" or "[first,second..last]"
* assumming we are left to read "..last]"
* - startPos is the position of "[".
* - second may be null */
public _parseExprRange(startPos: SourceReader, first: ASTExpr, second?: ASTExpr): ASTExprRange {
this._match(T_RANGE);
const last = this._parseExpression();
const endPos = this._currentToken.startPos;
this._match(T_RBRACK);
const result = new ASTExprRange(first, second, last);
result.startPos = startPos;
result.endPos = endPos;
return result;
}
/* Read a list of expressions separated by commas and delimited
* by parentheses. If there is a single expression, return the
* expression itself. If there are 0 or >=2 expressions, return
* a tuple.
*/
public _parseExprTuple(possiblyEmpty: boolean): ASTExprTuple {
const startPos = this._currentToken.startPos;
this._match(T_LPAREN);
const expressionList = this._parseExpressionSeq(T_RPAREN);
const endPos = this._currentToken.startPos;
this._match(T_RPAREN);
if (!possiblyEmpty && expressionList.length === 0) {
fail(startPos, endPos, 'return-tuple-cannot-be-empty', []);
}
let result: ASTExprTuple;
if (expressionList.length === 1) {
result = expressionList[0] as ASTExprTuple;
} else {
result = new ASTExprTuple(expressionList);
}
result.startPos = startPos;
result.endPos = endPos;
return result;
}
/** SwitchBranch **/
public _parseSwitchBranches(): ASTNodeWithPattern[] {
const branches = [];
while (this._currentToken.tag !== T_RBRACE) {
branches.push(this._parseSwitchBranch());
}
return branches;
}
public _parseSwitchBranch(): ASTSwitchBranch {
const pattern = this._parsePattern();
this._match(T_ARROW);
const body = this._parseStmtBlock();
const result = new ASTSwitchBranch(pattern, body);
result.startPos = pattern.startPos;
result.endPos = body.endPos;
return result;
}
/** MatchingBranch **/
public _parseMatchingBranches(): ASTNode[] {
const branches = [];
while (this._currentToken.tag !== T_OTHERWISE) {
branches.push(this._parseMatchingBranch());
}
this._match(T_OTHERWISE);
return branches;
}
public _parseMatchingBranch(): ASTNode {
const body = this._parseExpression();
switch (this._currentToken.tag) {
case T_ON: {
this._match(T_ON);
const pattern = this._parsePattern();
const result = new ASTMatchingBranch(pattern, body);
result.startPos = body.startPos;
result.endPos = pattern.endPos;
return result;
}
case T_OTHERWISE: {
const pattern = new ASTPatternWildcard();
pattern.startPos = this._currentToken.startPos;
pattern.endPos = this._currentToken.endPos;
const result = new ASTMatchingBranch(pattern, body);
result.startPos = body.startPos;
result.endPos = this._currentToken.endPos;
return result;
}
default:
fail(this._currentToken.startPos, this._currentToken.endPos, 'expected-but-found', [
toFunc(i18n('<alternative>'))([i18n('T_ON'), i18n('T_OTHERWISE')]),
i18n(Symbol.keyFor(this._currentToken.tag))
]);
}
}
/** FieldBinding **/
public _parseFieldBinding(): ASTNode {
const fieldName = this._parseLowerid();
this._match(T_GETS);
const value = this._parseExpression();
const result = new ASTFieldBinding(fieldName, value);
result.startPos = fieldName.startPos;
result.endPos = value.endPos;
return result;
}
/** Helpers **/
/* Advance to the next token */
public _nextToken(): void {
this._currentToken = this._lexer.nextToken();
}
/* Check that the current token has the expected tag.
* Then advance to the next token. */
public _match(tokenTag: symbol): void {
if (this._currentToken.tag !== tokenTag) {
fail(this._currentToken.startPos, this._currentToken.endPos, 'expected-but-found', [
i18n(Symbol.keyFor(tokenTag)),
i18n(Symbol.keyFor(this._currentToken.tag))
]);
}
this._nextToken();
}
/* Check that the current token has the expected tag.
* Then advance to the next token.
* Otherwise report that any of the alternatives in the tagList
* was expected.
*/
public _matchExpected(tokenTag: symbol, tagList: symbol[]): void {
if (this._currentToken.tag !== tokenTag) {
fail(this._currentToken.startPos, this._currentToken.endPos, 'expected-but-found', [
toFunc(i18n('<alternative>'))(tagList.map((tag) => i18n(Symbol.keyFor(tag)))),
i18n(Symbol.keyFor(this._currentToken.tag))
]);
}
this._nextToken();
}
/* Parse a delimited list:
* rightDelimiter: token tag for the right delimiter
* separator: token tag for the separator
* parseElement: function that parses one element */
public _parseDelimitedSeq<T>(
rightDelimiter: symbol,
separator: symbol,
parseElement: () => T
): T[] {
if (this._currentToken.tag === rightDelimiter) {
return []; /* Empty case */
}
const first = parseElement();
return this._parseNonEmptyDelimitedSeq(rightDelimiter, separator, [first], parseElement);
}
/* Parse a delimited list, assuming the first elements are already given.
* rightDelimiter: token tag for the right delimiter
* separator: token tag for the separator
* prefix: non-empty list of all the first elements (already given)
* parseElement: function that parses one element */
public _parseNonEmptyDelimitedSeq<T>(
rightDelimiter: symbol,
separator: symbol,
prefix: T[],
parseElement: () => T
): T[] {
const list = prefix;
while (this._currentToken.tag === separator) {
this._match(separator);
list.push(parseElement());
}
if (this._currentToken.tag !== rightDelimiter) {
fail(this._currentToken.startPos, this._currentToken.endPos, 'expected-but-found', [
toFunc(i18n('<alternative>'))([
i18n(Symbol.keyFor(separator)),
i18n(Symbol.keyFor(rightDelimiter))
]),
i18n(Symbol.keyFor(this._currentToken.tag))
]);
}
return list;
}
public _parseLowerid(): Token {
const lowerid = this._currentToken;
this._match(T_LOWERID);
return lowerid;
}
public _parseUpperid(): Token {
const upperid = this._currentToken;
this._match(T_UPPERID);
return upperid;
}
public _parseLoweridSeq(): Token[] {
return this._parseDelimitedSeq(T_RPAREN, T_COMMA, () => this._parseLowerid());
}
/* Parse a list of expressions delimited by the given right delimiter
* e.g. T_RPAREN or T_RBRACK, without consuming the delimiter. */
public _parseExpressionSeq(rightDelimiter: symbol): ASTExpr[] {
return this._parseDelimitedSeq(rightDelimiter, T_COMMA, () => this._parseExpression());
}
}