UNPKG

@gobstones/gobstones-parser

Version:
1,413 lines (1,318 loc) 49.7 kB
/* eslint-disable camelcase */ /* eslint-disable no-underscore-dangle */ import { ASTConstructorDeclaration, ASTDefFunction, ASTDefInteractiveProgram, ASTDefProcedure, ASTDefProgram, ASTDefType, ASTExpr, ASTExprChoose, ASTExprConstantNumber, ASTExprConstantString, ASTExprFunctionCall, ASTExprList, ASTExprMatching, ASTExprRange, ASTExprStructure, ASTExprStructureUpdate, ASTExprTuple, ASTExprVariable, ASTFieldBinding, ASTMain, ASTMatchingBranch, ASTNode, ASTNodeWithPattern, ASTPattern, ASTPatternNumber, ASTPatternStructure, ASTPatternTimeout, ASTPatternTuple, ASTPatternVariable, ASTPatternWildcard, ASTStmtAssignTuple, ASTStmtAssignVariable, ASTStmtBlock, ASTStmtForeach, ASTStmtIf, ASTStmtProcedureCall, ASTStmtRepeat, ASTStmtReturn, ASTStmtSwitch, ASTStmtWhile, ASTSwitchBranch, N_ExprVariable } from './ast'; import { Input, SourceReader } from './reader'; import { T_ARROW, T_ASSIGN, T_CASE, T_CHOOSE, T_COMMA, T_ELLIPSIS, T_ELSE, T_ELSEIF, T_EOF, T_FIELD, T_FOREACH, T_FUNCTION, T_GETS, T_IF, T_IN, T_INTERACTIVE, T_IS, T_LBRACE, T_LBRACK, T_LET, T_LOWERID, T_LPAREN, T_MATCHING, T_MINUS, T_NUM, T_ON, T_OTHERWISE, T_PIPE, T_PROCEDURE, T_PROGRAM, T_RANGE, T_RBRACE, T_RBRACK, T_RECORD, T_REPEAT, T_RETURN, T_RPAREN, T_SELECT, T_SEMICOLON, T_STRING, T_SWITCH, T_THEN, T_TIMEOUT, T_TO, T_TYPE, T_UNDERSCORE, T_UPPERID, T_VARIANT, T_WHEN, T_WHILE, Token } from './token'; import { GbsSyntaxError } from './exceptions'; import { Lexer } from './lexer'; import { i18n } from './i18n'; // Only for typing purposes // eslint-disable-next-line @typescript-eslint/ban-types const toFunc = (x: string | Function): Function => x as Function; // Only for typing purposes // eslint-disable-next-line @typescript-eslint/ban-types const toStr = (x: string | Function): string => x as string; const Infix = Symbol.for('Infix'); const InfixL = Symbol.for('InfixL'); const InfixR = Symbol.for('InfixR'); const Prefix = Symbol.for('Prefix'); class PrecedenceLevel { private _fixity: any; private _operators: Record<string, string>; /* Operators should be a dictionary mapping operator tags to * their function names */ public constructor(fixity: symbol, operators: Record<string, string>) { this._fixity = fixity; this._operators = operators; } public get fixity(): symbol { return this._fixity; } public isOperator(token: Token): boolean { return Symbol.keyFor(token.tag) in this._operators; } public functionName(token: Token): Token { return new Token( T_LOWERID, this._operators[Symbol.keyFor(token.tag)], token.startPos, token.endPos ); } } /* OPERATORS is a list of precedence levels. * Precedence levels are ordered from lesser to greater precedence. */ const OPERATORS = [ /* Logical operators */ new PrecedenceLevel(InfixR, { T_OR: '||' }), new PrecedenceLevel(InfixR, { T_AND: '&&' }), new PrecedenceLevel(Prefix, { T_NOT: 'not' }), /* Relational operators */ new PrecedenceLevel(Infix, { T_EQ: '==', T_NE: '/=', T_LE: '<=', T_GE: '>=', T_LT: '<', T_GT: '>' }), /* List concatenation */ new PrecedenceLevel(InfixL, { T_CONCAT: '++' }), /* Additive operators */ new PrecedenceLevel(InfixL, { T_PLUS: '+', T_MINUS: '-' }), /* Multiplicative operators */ new PrecedenceLevel(InfixL, { T_TIMES: '*' }), /* Division operators */ new PrecedenceLevel(InfixL, { T_DIV: 'div', T_MOD: 'mod' }), /* Exponential operators */ new PrecedenceLevel(InfixR, { T_POW: '^' }), /* Unary minus */ new PrecedenceLevel(Prefix, { T_MINUS: '-(unary)' }) ]; function fail(startPos: SourceReader, endPos: SourceReader, reason: string, args: any[]): void { throw new GbsSyntaxError(startPos, endPos, reason, args); } /* Represents a parser for a Gobstones/XGobstones program. * It is structured as a straightforward recursive-descent parser. * * The parameter 'input' may be either a string or a dictionary * mapping filenames to strings. * * All the "parseFoo" methods agree to the following convention: * - parseFoo returns an AST for a Foo construction, * - parseFoo consumes a fragment of the input by successively requesting * the next token from the lexer, * - when calling parseFoo, the current token should already be located * on the first token of the corresponding construction, * - when parseFoo returns, the current token is already located on * the following token, after the corresponding construction. */ export class Parser { private _lexer: Lexer; private _currentToken: Token; public constructor(input: Input) { this._lexer = new Lexer(input); this._nextToken(); } /* Return the AST that results from parsing a full program */ public parse(): ASTMain { const definitions = []; while (this._currentToken.tag !== T_EOF) { definitions.push(this._parseDefinition()); } return new ASTMain(definitions); } /* Return the list of all language options collected by the tokenizer. * Language options are set by the LANGUAGE pragma. */ public getLanguageOptions(): string[] { return this._lexer.getLanguageOptions(); } /** Definitions **/ public _parseDefinition(): ASTNode { switch (this._currentToken.tag) { case T_PROGRAM: return this._parseDefProgram(); case T_INTERACTIVE: return this._parseDefInteractiveProgram(); case T_PROCEDURE: return this._parseDefProcedure(); case T_FUNCTION: return this._parseDefFunction(); case T_TYPE: return this._parseDefType(); default: fail(this._currentToken.startPos, this._currentToken.endPos, 'expected-but-found', [ i18n('definition'), i18n(Symbol.keyFor(this._currentToken.tag)) ]); } } public _parseDefProgram(): ASTNode { const startPos = this._currentToken.startPos; this._match(T_PROGRAM); const attributes = this._lexer.getPendingAttributes(); const block = this._parseStmtBlock(); const result = new ASTDefProgram(block); result.startPos = startPos; result.endPos = block.endPos; result.attributes = attributes; return result; } public _parseDefInteractiveProgram(): ASTNode { const startPos = this._currentToken.startPos; this._match(T_INTERACTIVE); this._match(T_PROGRAM); const attributes = this._lexer.getPendingAttributes(); this._match(T_LBRACE); const branches = this._parseSwitchBranches(); const endPos = this._currentToken.startPos; this._match(T_RBRACE); const result = new ASTDefInteractiveProgram(branches); result.startPos = startPos; result.endPos = endPos; result.attributes = attributes; return result; } public _parseDefProcedure(): ASTNode { const startPos = this._currentToken.startPos; this._match(T_PROCEDURE); const name = this._parseUpperid(); this._match(T_LPAREN); const parameters = this._parseLoweridSeq(); this._match(T_RPAREN); const attributes = this._lexer.getPendingAttributes(); const block = this._parseStmtBlock(); const result = new ASTDefProcedure(name, parameters, block); result.startPos = startPos; result.endPos = block.endPos; result.attributes = attributes; return result; } public _parseDefFunction(): ASTNode { const startPos = this._currentToken.startPos; this._match(T_FUNCTION); const name = this._currentToken; this._match(T_LOWERID); this._match(T_LPAREN); const parameters = this._parseLoweridSeq(); this._match(T_RPAREN); const attributes = this._lexer.getPendingAttributes(); const block = this._parseStmtBlock(); const result = new ASTDefFunction(name, parameters, block); result.startPos = startPos; result.endPos = block.endPos; result.attributes = attributes; return result; } public _parseDefType(): ASTNode { const startPos = this._currentToken.startPos; this._match(T_TYPE); const typeName = this._parseUpperid(); this._match(T_IS); switch (this._currentToken.tag) { case T_RECORD: return this._parseDefTypeRecord(startPos, typeName); case T_VARIANT: return this._parseDefTypeVariant(startPos, typeName); default: fail(this._currentToken.startPos, this._currentToken.endPos, 'expected-but-found', [ toFunc(i18n('<alternative>'))([i18n('T_RECORD'), i18n('T_VARIANT')]), i18n(Symbol.keyFor(this._currentToken.tag)) ]); } } public _parseDefTypeRecord(startPos: SourceReader, typeName: Token): ASTNode { this._match(T_RECORD); const attributes = this._lexer.getPendingAttributes(); this._match(T_LBRACE); const fieldNames = this._parseFieldNames(); const endPos = this._currentToken.startPos; this._matchExpected(T_RBRACE, [T_FIELD, T_RBRACE]); const result = new ASTDefType(typeName, [ new ASTConstructorDeclaration(typeName, fieldNames) ]); result.startPos = startPos; result.endPos = endPos; result.attributes = attributes; return result; } public _parseDefTypeVariant(startPos: SourceReader, typeName: Token): ASTNode { const constructorDeclarations = []; this._match(T_VARIANT); const attributes = this._lexer.getPendingAttributes(); this._match(T_LBRACE); while (this._currentToken.tag === T_CASE) { constructorDeclarations.push(this._parseConstructorDeclaration()); } const endPos = this._currentToken.startPos; this._matchExpected(T_RBRACE, [T_CASE, T_RBRACE]); const result = new ASTDefType(typeName, constructorDeclarations); result.startPos = startPos; result.endPos = endPos; result.attributes = attributes; return result; } public _parseConstructorDeclaration(): ASTNode { const startPos = this._currentToken.startPos; this._match(T_CASE); const constructorName = this._parseUpperid(); this._match(T_LBRACE); const fieldNames = this._parseFieldNames(); const endPos = this._currentToken.startPos; this._matchExpected(T_RBRACE, [T_FIELD, T_RBRACE]); const result = new ASTConstructorDeclaration(constructorName, fieldNames); result.startPos = startPos; result.endPos = endPos; return result; } public _parseFieldNames(): Token[] { const fieldNames = []; while (this._currentToken.tag === T_FIELD) { this._match(T_FIELD); fieldNames.push(this._parseLowerid()); } return fieldNames; } /** Statements **/ /* Statement, optionally followed by semicolon */ public _parseStatement(): ASTNode { const statement = this._parsePureStatement(); if (this._currentToken.tag === T_SEMICOLON) { this._match(T_SEMICOLON); } return statement; } /* Statement (not followed by semicolon) */ public _parsePureStatement(): ASTNode { switch (this._currentToken.tag) { case T_ELLIPSIS: return this._parseStmtEllipsis(); case T_RETURN: return this._parseStmtReturn(); case T_IF: return this._parseStmtIf(true /* expectInitialIf */); case T_REPEAT: return this._parseStmtRepeat(); case T_FOREACH: return this._parseStmtForeach(); case T_WHILE: return this._parseStmtWhile(); case T_SWITCH: return this._parseStmtSwitch(); case T_LET: return this._parseStmtLet(); case T_LBRACE: return this._parseStmtBlock(); case T_LOWERID: return this._parseStmtAssignVariable(); case T_UPPERID: return this._parseStmtProcedureCall(); case T_LPAREN: /* Special error for rejecting tuple assignments * (x1, ..., xN) := expression * in favour of * let (x1, ..., xN) := expression */ fail( this._currentToken.startPos, this._currentToken.endPos, 'obsolete-tuple-assignment', [] ); return; default: fail(this._currentToken.startPos, this._currentToken.endPos, 'expected-but-found', [ i18n('statement'), i18n(Symbol.keyFor(this._currentToken.tag)) ]); } } public _parseStmtBlock(): ASTStmtBlock { const startPos = this._currentToken.startPos; const statements = []; this._match(T_LBRACE); while (this._currentToken.tag !== T_RBRACE) { statements.push(this._parseStatement()); if (this._currentToken.tag === T_SEMICOLON) { this._match(T_SEMICOLON); } } const endPos = this._currentToken.startPos; this._match(T_RBRACE); const result = new ASTStmtBlock(statements); result.startPos = startPos; result.endPos = endPos; return result; } public _parseStmtEllipsis(): ASTNode { const startPos = this._currentToken.startPos; this._match(T_ELLIPSIS); const result = new ASTStmtProcedureCall( new Token(T_UPPERID, toStr(i18n('PRIM:BOOM')), startPos, startPos), [new ASTExprConstantString(new Token(T_STRING, toStr(i18n('errmsg:ellipsis'))))] ); result.startPos = startPos; result.endPos = this._currentToken.startPos; return result; } public _parseStmtReturn(): ASTNode { const startPos = this._currentToken.startPos; this._match(T_RETURN); const tuple = this._parseExprTuple(false /* possiblyEmpty */); const result = new ASTStmtReturn(tuple); result.startPos = startPos; result.endPos = tuple.endPos; return result; } public _parseStmtIf(expectInitialIf: boolean): ASTNode { const startPos = this._currentToken.startPos; if (expectInitialIf) { this._match(T_IF); } this._match(T_LPAREN); const condition = this._parseExpression(); this._match(T_RPAREN); /* Optional 'then' */ if (this._currentToken.tag === T_THEN) { this._match(T_THEN); } const thenBlock = this._parseStmtBlock(); let endPos: SourceReader; let elseBlock: ASTNode; if (this._currentToken.tag === T_ELSEIF) { this._match(T_ELSEIF); elseBlock = this._parseStmtIf(false /* expectInitialIf */); endPos = elseBlock.endPos; } else if (this._currentToken.tag === T_ELSE) { this._match(T_ELSE); elseBlock = this._parseStmtBlock(); endPos = elseBlock.endPos; } else { elseBlock = undefined; endPos = thenBlock.endPos; } const result = new ASTStmtIf(condition, thenBlock, elseBlock); result.startPos = startPos; result.endPos = endPos; return result; } public _parseStmtRepeat(): ASTNode { const startPos = this._currentToken.startPos; this._match(T_REPEAT); this._match(T_LPAREN); const times = this._parseExpression(); this._match(T_RPAREN); const body = this._parseStmtBlock(); const result = new ASTStmtRepeat(times, body); result.startPos = startPos; result.endPos = body.endPos; return result; } public _parseStmtForeach(): ASTNode { const startPos = this._currentToken.startPos; this._match(T_FOREACH); const pattern = this._parsePattern(); this._match(T_IN); const range = this._parseExpression(); const body = this._parseStmtBlock(); const result = new ASTStmtForeach(pattern, range, body); result.startPos = startPos; result.endPos = body.endPos; return result; } public _parseStmtWhile(): ASTNode { const startPos = this._currentToken.startPos; this._match(T_WHILE); this._match(T_LPAREN); const condition = this._parseExpression(); this._match(T_RPAREN); const body = this._parseStmtBlock(); const result = new ASTStmtWhile(condition, body); result.startPos = startPos; result.endPos = body.endPos; return result; } public _parseStmtSwitch(): ASTStmtSwitch { const startPos = this._currentToken.startPos; this._match(T_SWITCH); this._match(T_LPAREN); const subject = this._parseExpression(); this._match(T_RPAREN); if (this._currentToken.tag === T_TO) { this._match(T_TO); } this._match(T_LBRACE); const branches = this._parseSwitchBranches(); const endPos = this._currentToken.startPos; this._match(T_RBRACE); const result = new ASTStmtSwitch(subject, branches); result.startPos = startPos; result.endPos = endPos; return result; } public _parseStmtLet(): ASTNode { const startPos = this._currentToken.startPos; this._match(T_LET); let result: ASTNode; if (this._currentToken.tag === T_LOWERID) { result = this._parseStmtAssignVariable(); } else if (this._currentToken.tag === T_LPAREN) { result = this._parseStmtAssignTuple(); } else { fail(this._currentToken.startPos, this._currentToken.endPos, 'expected-but-found', [ toFunc(i18n('<alternative>'))(i18n('T_LOWERID'), i18n('T_LPAREN')), toStr(i18n(Symbol.keyFor(this._currentToken.tag))) ]); } result.startPos = startPos; return result; } public _parseStmtAssignVariable(): ASTNode { const variable = this._parseLowerid(); this._match(T_ASSIGN); const value = this._parseExpression(); const result = new ASTStmtAssignVariable(variable, value); result.startPos = variable.startPos; result.endPos = value.endPos; return result; } public _parseStmtAssignTuple(): ASTNode { const startPos = this._currentToken.startPos; this._match(T_LPAREN); const variables = this._parseLoweridSeq(); if (variables.length === 1) { fail(startPos, this._currentToken.endPos, 'assignment-tuple-cannot-be-singleton', []); } this._match(T_RPAREN); this._match(T_ASSIGN); const value = this._parseExpression(); const result = new ASTStmtAssignTuple(variables, value); result.startPos = startPos; result.endPos = value.endPos; return result; } public _parseStmtProcedureCall(): ASTNode { const procedureName = this._parseUpperid(); this._match(T_LPAREN); const args = this._parseDelimitedSeq(T_RPAREN, T_COMMA, () => this._parseExpression()); const endPos = this._currentToken.startPos; this._match(T_RPAREN); const result = new ASTStmtProcedureCall(procedureName, args); result.startPos = procedureName.startPos; result.endPos = endPos; return result; } /** Patterns **/ public _parsePattern(): ASTPattern { switch (this._currentToken.tag) { case T_UNDERSCORE: return this._parsePatternWildcard(); case T_LOWERID: return this._parsePatternVariable(); case T_NUM: case T_MINUS: return this._parsePatternNumber(); case T_UPPERID: return this._parsePatternStructure(); case T_LPAREN: return this._parsePatternTuple(); case T_TIMEOUT: return this._parsePatternTimeout(); default: fail(this._currentToken.startPos, this._currentToken.endPos, 'expected-but-found', [ i18n('pattern'), i18n(Symbol.keyFor(this._currentToken.tag)) ]); } } public _parsePatternWildcard(): ASTPatternWildcard { const startPos = this._currentToken.startPos; this._match(T_UNDERSCORE); const result = new ASTPatternWildcard(); const endPos = startPos; result.startPos = startPos; result.endPos = endPos; return result; } public _parsePatternVariable(): ASTPatternVariable { const startPos = this._currentToken.startPos; const id = this._parseLowerid(); const result = new ASTPatternVariable(id); result.startPos = startPos; result.endPos = id.endPos; return result; } public _parsePatternNumber(): ASTPatternNumber { const startPos = this._currentToken.startPos; let sign = ''; if (this._currentToken.tag === T_MINUS) { this._match(T_MINUS); sign = '-'; } let number = this._currentToken; this._match(T_NUM); const value = sign + number.value; if (value === '-0') { fail(startPos, number.endPos, 'pattern-number-cannot-be-negative-zero', []); } number = new Token(T_NUM, value, number.startPos, number.endPos); const result = new ASTPatternNumber(number); result.startPos = startPos; result.endPos = number.endPos; return result; } public _parsePatternStructure(): ASTPatternStructure { const startPos = this._currentToken.startPos; let endPos = this._currentToken.startPos; const constructor = this._parseUpperid(); let parameters; if (this._currentToken.tag === T_LPAREN) { this._match(T_LPAREN); parameters = this._parseLoweridSeq(); endPos = this._currentToken.startPos; this._match(T_RPAREN); } else { parameters = []; } const result = new ASTPatternStructure(constructor, parameters); result.startPos = startPos; result.endPos = endPos; return result; } public _parsePatternTuple(): ASTPatternTuple { const startPos = this._currentToken.startPos; this._match(T_LPAREN); const parameters = this._parseLoweridSeq(); if (parameters.length === 1) { fail(startPos, this._currentToken.endPos, 'pattern-tuple-cannot-be-singleton', []); } const endPos = this._currentToken.startPos; this._match(T_RPAREN); const result = new ASTPatternTuple(parameters); result.startPos = startPos; result.endPos = endPos; return result; } public _parsePatternTimeout(): ASTPatternTimeout { const startPos = this._currentToken.startPos; this._match(T_TIMEOUT); this._match(T_LPAREN); const timeout = this._currentToken; this._match(T_NUM); const endPos = this._currentToken.startPos; this._match(T_RPAREN); const result = new ASTPatternTimeout(timeout); result.startPos = startPos; result.endPos = endPos; return result; } /** Expressions **/ public _parseExpression(): ASTExpr { return this._parseExprOperator(0); } /* Read an expression of the given level. * * If the list OPERATORS of precedence levels has N elements, then: * - Expressions of level 0 are arbitrary expressions. * - Expressions of level N are atomic expressions. * - In general, expressions of level I involve operators * of levels I, I+1, ..., N-1, * and they can only include operators of the lower levels * by surrounding them in parentheses. */ public _parseExprOperator(level: number): ASTExpr { if (level === OPERATORS.length) { return this._parseExprAtom(); } switch (OPERATORS[level].fixity) { case Infix: return this._parseExprOperatorInfix(level); case InfixL: return this._parseExprOperatorInfixL(level); case InfixR: return this._parseExprOperatorInfixR(level); case Prefix: return this._parseExprOperatorPrefix(level); default: throw Error('Invalid operator.'); } } public _parseExprOperatorInfix(level: number): ASTExpr { const left = this._parseExprOperator(level + 1); if (OPERATORS[level].isOperator(this._currentToken)) { const op = this._currentToken; this._nextToken(); const right = this._parseExprOperator(level + 1); /* Check that it is not used associatively */ if (OPERATORS[level].isOperator(this._currentToken)) { fail( this._currentToken.startPos, this._currentToken.endPos, 'operators-are-not-associative', [i18n(Symbol.keyFor(op.tag)), i18n(Symbol.keyFor(this._currentToken.tag))] ); } const result = new ASTExprFunctionCall(OPERATORS[level].functionName(op), [ left, right ]); result.startPos = left.startPos; result.endPos = right.endPos; return result; } else { return left; } } public _parseExprOperatorInfixL(level: number): ASTExpr { let result = this._parseExprOperator(level + 1); while (OPERATORS[level].isOperator(this._currentToken)) { const op = this._currentToken; this._nextToken(); const right = this._parseExprOperator(level + 1); const result2 = new ASTExprFunctionCall(OPERATORS[level].functionName(op), [ result, right ]); result2.startPos = result.startPos; result2.endPos = right.endPos; result = result2; } return result; } public _parseExprOperatorInfixR(level: number): ASTExpr { const left = this._parseExprOperator(level + 1); if (OPERATORS[level].isOperator(this._currentToken)) { const op = this._currentToken; this._nextToken(); const right = this._parseExprOperator(level); /* same level */ const result = new ASTExprFunctionCall(OPERATORS[level].functionName(op), [ left, right ]); result.startPos = left.startPos; result.endPos = right.endPos; return result; } else { return left; } } public _parseExprOperatorPrefix(level: number): ASTExpr { if (OPERATORS[level].isOperator(this._currentToken)) { const op = this._currentToken; this._nextToken(); const inner = this._parseExprOperator(level); /* same level */ const result = new ASTExprFunctionCall(OPERATORS[level].functionName(op), [inner]); result.startPos = op.startPos; result.endPos = inner.endPos; return result; } else { return this._parseExprOperator(level + 1); } } /* Parse an atomic expression. * I.e. all the operators must be surrounded by parentheses */ public _parseExprAtom(): ASTExpr { switch (this._currentToken.tag) { case T_ELLIPSIS: return this._parseExprEllipsis(); case T_LOWERID: return this._parseExprVariableOrFunctionCall(); case T_NUM: return this._parseExprConstantNumber(); case T_STRING: return this._parseExprConstantString(); case T_CHOOSE: return this._parseExprChoose(true /* expectInitialChoose */); case T_MATCHING: return this._parseExprMatching(); case T_UPPERID: return this._parseExprStructureOrStructureUpdate(); case T_LPAREN: return this._parseExprTuple(true /* possiblyEmpty */); case T_LBRACK: return this._parseExprListOrRange(); default: fail(this._currentToken.startPos, this._currentToken.endPos, 'expected-but-found', [ i18n('expression'), i18n(Symbol.keyFor(this._currentToken.tag)) ]); } } public _parseExprEllipsis(): ASTExprFunctionCall { const startPos = this._currentToken.startPos; this._match(T_ELLIPSIS); const result = new ASTExprFunctionCall( new Token(T_LOWERID, toStr(i18n('PRIM:boom')), startPos, startPos), [new ASTExprConstantString(new Token(T_STRING, toStr(i18n('errmsg:ellipsis'))))] ); result.startPos = startPos; result.endPos = this._currentToken.startPos; return result; } public _parseExprVariableOrFunctionCall(): ASTExprVariable | ASTExprFunctionCall { const id = this._parseLowerid(); let result; let endPos; if (this._currentToken.tag === T_LPAREN) { this._match(T_LPAREN); const args = this._parseExpressionSeq(T_RPAREN); result = new ASTExprFunctionCall(id, args); endPos = this._currentToken.startPos; this._match(T_RPAREN); } else { result = new ASTExprVariable(id); endPos = id.endPos; } result.startPos = id.startPos; result.endPos = endPos; return result; } public _parseExprConstantNumber(): ASTExprConstantNumber { const number = this._currentToken; this._match(T_NUM); const result = new ASTExprConstantNumber(number); result.startPos = number.startPos; result.endPos = number.endPos; return result; } public _parseExprConstantString(): ASTExprConstantString { const string = this._currentToken; this._match(T_STRING); const result = new ASTExprConstantString(string); result.startPos = string.startPos; result.endPos = string.endPos; return result; } public _parseExprChoose(expectInitialChoose: boolean): ASTExprChoose { const startPos = this._currentToken.startPos; if (expectInitialChoose) { this._match(T_CHOOSE); } const expr1 = this._parseExpression(); if (this._currentToken.tag === T_WHEN) { this._match(T_WHEN); this._match(T_LPAREN); const condition = this._parseExpression(); this._match(T_RPAREN); const expr2 = this._parseExprChoose(false /* expectInitialChoose */); const result = new ASTExprChoose(condition, expr1, expr2); result.startPos = startPos; result.endPos = expr2.endPos; return result; } else { const endPos = this._currentToken.endPos; this._match(T_OTHERWISE); expr1.startPos = startPos; expr1.endPos = endPos; return expr1 as ASTExprChoose; } } public _parseExprMatching(): ASTExprMatching { const startPos = this._currentToken.startPos; this._match(T_MATCHING); this._match(T_LPAREN); const subject = this._parseExpression(); this._match(T_RPAREN); this._match(T_SELECT); const branches = this._parseMatchingBranches(); const result = new ASTExprMatching(subject, branches); result.startPos = startPos; // result.endPos = result.endPos; return result; } /* * Parse any of the following constructions: * (1) Structure with no arguments: "Norte" * (2) Structure with no arguments and explicit parentheses: "Nil()" * (3) Structure with arguments: "Coord(x <- 1, y <- 2)" * (4) Update structure with arguments: "Coord(expression | x <- 2)" * * Deciding between (3) and (4) unfortunately cannot be done with one * token of lookahead, so after reading the constructor and a left * parenthesis we resort to the following workaround: * * - Parse an expression. * - If the next token is GETS ("<-") we are in case (3). * We must then ensure that the expression is just a variable * and recover its name. * - If the next token is PIPE ("|") we are in case (4), and we go on. */ public _parseExprStructureOrStructureUpdate(): ASTExprStructure | ASTExprStructureUpdate { const constructorName = this._parseUpperid(); if (this._currentToken.tag !== T_LPAREN) { /* Structure with no arguments, e.g. "Norte" */ const result = new ASTExprStructure(constructorName, []); result.startPos = constructorName.startPos; result.endPos = constructorName.endPos; return result; } this._match(T_LPAREN); if (this._currentToken.tag === T_RPAREN) { /* Structure with no arguments with explicit parentheses, * e.g. "Nil()" */ const result = new ASTExprStructure(constructorName, []); const endPos = this._currentToken.startPos; this._match(T_RPAREN); result.startPos = constructorName.startPos; result.endPos = endPos; return result; } const subject = this._parseExpression(); switch (this._currentToken.tag) { case T_GETS: if (subject.tag !== N_ExprVariable) { fail( this._currentToken.startPos, this._currentToken.endPos, 'expected-but-found', [i18n('T_PIPE'), i18n('T_GETS')] ); return; } return this._parseStructure( constructorName, (subject as ASTExprVariable).variableName ); case T_PIPE: return this._parseStructureUpdate(constructorName, subject); case T_COMMA: case T_RPAREN: /* Issue a specific error message to deal with a common * programming error, namely calling a procedure name * where an expression is expected. */ fail(constructorName.startPos, constructorName.endPos, 'expected-but-found', [ i18n('expression'), i18n('procedure call') ]); return; default: { let expected: string; if (subject.tag === N_ExprVariable) { expected = toFunc(i18n('<alternative>'))([i18n('T_GETS'), i18n('T_PIPE')]); } else { expected = toStr(i18n('T_PIPE')); } fail(constructorName.startPos, constructorName.endPos, 'expected-but-found', [ expected, i18n(Symbol.keyFor(this._currentToken.tag)) ]); } } } /* Parse a structure A(x1 <- expr1, ..., xN <- exprN) * where N >= 1, * assuming that "A(x1" has already been read. * * - constructorName and fieldName1 correspond to "A" and "x1" * respectively. */ public _parseStructure(constructorName: Token, fieldName1: Token): ASTExprStructure { /* Read "<- expr1" */ this._match(T_GETS); const value1 = this._parseExpression(); const fieldBinding1 = new ASTFieldBinding(fieldName1, value1); fieldBinding1.startPos = fieldName1.startPos; fieldBinding1.endPos = value1.endPos; /* Read "x2 <- expr2, ..., xN <- exprN" (this might be empty) */ const fieldBindings = this._parseNonEmptyDelimitedSeq( T_RPAREN, T_COMMA, [fieldBinding1], () => this._parseFieldBinding() ) as ASTFieldBinding[]; /* Read ")" */ const endPos = this._currentToken.startPos; this._match(T_RPAREN); /* Return an ExprStructure node */ const result = new ASTExprStructure(constructorName, fieldBindings); result.startPos = constructorName.startPos; result.endPos = endPos; return result; } /* Parse a structure update A(e | x1 <- expr1, ..., xN <- exprN) * where N >= 1, * assuming that "A(e" has already been read. * * constructorName and original correspond to "A" and "e" * respectively. */ public _parseStructureUpdate( constructorName: Token, original: ASTExpr ): ASTExprStructureUpdate { /* Read "|" */ this._match(T_PIPE); /* Read "x2 <- expr2, ..., xN <- exprN" (this might be empty) */ const fieldBindings = this._parseDelimitedSeq(T_RPAREN, T_COMMA, () => this._parseFieldBinding() ) as ASTFieldBinding[]; /* Read ")" */ const endPos = this._currentToken.startPos; this._match(T_RPAREN); /* Return an ExprStructureUpdate node */ const result = new ASTExprStructureUpdate(constructorName, original, fieldBindings); result.startPos = constructorName.startPos; result.endPos = endPos; return result; } /* Read a list * [expr1, ..., exprN] * a range expression * [first .. last] * or a range expression with step * [first, second .. last] */ public _parseExprListOrRange(): ASTExprList | ASTExprRange { const startPos = this._currentToken.startPos; this._match(T_LBRACK); if (this._currentToken.tag === T_RBRACK) { return this._parseExprListRemainder(startPos, []); } const first = this._parseExpression(); switch (this._currentToken.tag) { case T_RBRACK: return this._parseExprListRemainder(startPos, [first]); case T_RANGE: return this._parseExprRange(startPos, first); case T_COMMA: { this._match(T_COMMA); const second = this._parseExpression(); switch (this._currentToken.tag) { case T_RBRACK: case T_COMMA: return this._parseExprListRemainder(startPos, [first, second]); case T_RANGE: return this._parseExprRange(startPos, first, second); default: fail( this._currentToken.startPos, this._currentToken.endPos, 'expected-but-found', [ toFunc(i18n('<alternative>'))([ i18n('T_COMMA'), i18n('T_RANGE'), i18n('T_RBRACK') ]), i18n(Symbol.keyFor(this._currentToken.tag)) ] ); return; } } default: fail(this._currentToken.startPos, this._currentToken.endPos, 'expected-but-found', [ toFunc(i18n('<alternative>'))([ i18n('T_COMMA'), i18n('T_RANGE'), i18n('T_RBRACK') ]), i18n(Symbol.keyFor(this._currentToken.tag)) ]); return; } } /* Read the end of a list "[expr1, ..., exprN]" assumming we have * already read "[expr1, ..., exprK" up to some point K >= 1. * - startPos is the position of "[" * - prefix is the list of elements we have already read */ public _parseExprListRemainder(startPos: SourceReader, prefix: ASTExpr[]): ASTExprList { const elements = this._parseNonEmptyDelimitedSeq(T_RBRACK, T_COMMA, prefix, () => this._parseExpression() ); const endPos = this._currentToken.startPos; this._match(T_RBRACK); const result = new ASTExprList(elements); result.startPos = startPos; result.endPos = endPos; return result; } /* Read a range "[first..last]" or "[first,second..last]" * assumming we are left to read "..last]" * - startPos is the position of "[". * - second may be null */ public _parseExprRange(startPos: SourceReader, first: ASTExpr, second?: ASTExpr): ASTExprRange { this._match(T_RANGE); const last = this._parseExpression(); const endPos = this._currentToken.startPos; this._match(T_RBRACK); const result = new ASTExprRange(first, second, last); result.startPos = startPos; result.endPos = endPos; return result; } /* Read a list of expressions separated by commas and delimited * by parentheses. If there is a single expression, return the * expression itself. If there are 0 or >=2 expressions, return * a tuple. */ public _parseExprTuple(possiblyEmpty: boolean): ASTExprTuple { const startPos = this._currentToken.startPos; this._match(T_LPAREN); const expressionList = this._parseExpressionSeq(T_RPAREN); const endPos = this._currentToken.startPos; this._match(T_RPAREN); if (!possiblyEmpty && expressionList.length === 0) { fail(startPos, endPos, 'return-tuple-cannot-be-empty', []); } let result: ASTExprTuple; if (expressionList.length === 1) { result = expressionList[0] as ASTExprTuple; } else { result = new ASTExprTuple(expressionList); } result.startPos = startPos; result.endPos = endPos; return result; } /** SwitchBranch **/ public _parseSwitchBranches(): ASTNodeWithPattern[] { const branches = []; while (this._currentToken.tag !== T_RBRACE) { branches.push(this._parseSwitchBranch()); } return branches; } public _parseSwitchBranch(): ASTSwitchBranch { const pattern = this._parsePattern(); this._match(T_ARROW); const body = this._parseStmtBlock(); const result = new ASTSwitchBranch(pattern, body); result.startPos = pattern.startPos; result.endPos = body.endPos; return result; } /** MatchingBranch **/ public _parseMatchingBranches(): ASTNode[] { const branches = []; while (this._currentToken.tag !== T_OTHERWISE) { branches.push(this._parseMatchingBranch()); } this._match(T_OTHERWISE); return branches; } public _parseMatchingBranch(): ASTNode { const body = this._parseExpression(); switch (this._currentToken.tag) { case T_ON: { this._match(T_ON); const pattern = this._parsePattern(); const result = new ASTMatchingBranch(pattern, body); result.startPos = body.startPos; result.endPos = pattern.endPos; return result; } case T_OTHERWISE: { const pattern = new ASTPatternWildcard(); pattern.startPos = this._currentToken.startPos; pattern.endPos = this._currentToken.endPos; const result = new ASTMatchingBranch(pattern, body); result.startPos = body.startPos; result.endPos = this._currentToken.endPos; return result; } default: fail(this._currentToken.startPos, this._currentToken.endPos, 'expected-but-found', [ toFunc(i18n('<alternative>'))([i18n('T_ON'), i18n('T_OTHERWISE')]), i18n(Symbol.keyFor(this._currentToken.tag)) ]); } } /** FieldBinding **/ public _parseFieldBinding(): ASTNode { const fieldName = this._parseLowerid(); this._match(T_GETS); const value = this._parseExpression(); const result = new ASTFieldBinding(fieldName, value); result.startPos = fieldName.startPos; result.endPos = value.endPos; return result; } /** Helpers **/ /* Advance to the next token */ public _nextToken(): void { this._currentToken = this._lexer.nextToken(); } /* Check that the current token has the expected tag. * Then advance to the next token. */ public _match(tokenTag: symbol): void { if (this._currentToken.tag !== tokenTag) { fail(this._currentToken.startPos, this._currentToken.endPos, 'expected-but-found', [ i18n(Symbol.keyFor(tokenTag)), i18n(Symbol.keyFor(this._currentToken.tag)) ]); } this._nextToken(); } /* Check that the current token has the expected tag. * Then advance to the next token. * Otherwise report that any of the alternatives in the tagList * was expected. */ public _matchExpected(tokenTag: symbol, tagList: symbol[]): void { if (this._currentToken.tag !== tokenTag) { fail(this._currentToken.startPos, this._currentToken.endPos, 'expected-but-found', [ toFunc(i18n('<alternative>'))(tagList.map((tag) => i18n(Symbol.keyFor(tag)))), i18n(Symbol.keyFor(this._currentToken.tag)) ]); } this._nextToken(); } /* Parse a delimited list: * rightDelimiter: token tag for the right delimiter * separator: token tag for the separator * parseElement: function that parses one element */ public _parseDelimitedSeq<T>( rightDelimiter: symbol, separator: symbol, parseElement: () => T ): T[] { if (this._currentToken.tag === rightDelimiter) { return []; /* Empty case */ } const first = parseElement(); return this._parseNonEmptyDelimitedSeq(rightDelimiter, separator, [first], parseElement); } /* Parse a delimited list, assuming the first elements are already given. * rightDelimiter: token tag for the right delimiter * separator: token tag for the separator * prefix: non-empty list of all the first elements (already given) * parseElement: function that parses one element */ public _parseNonEmptyDelimitedSeq<T>( rightDelimiter: symbol, separator: symbol, prefix: T[], parseElement: () => T ): T[] { const list = prefix; while (this._currentToken.tag === separator) { this._match(separator); list.push(parseElement()); } if (this._currentToken.tag !== rightDelimiter) { fail(this._currentToken.startPos, this._currentToken.endPos, 'expected-but-found', [ toFunc(i18n('<alternative>'))([ i18n(Symbol.keyFor(separator)), i18n(Symbol.keyFor(rightDelimiter)) ]), i18n(Symbol.keyFor(this._currentToken.tag)) ]); } return list; } public _parseLowerid(): Token { const lowerid = this._currentToken; this._match(T_LOWERID); return lowerid; } public _parseUpperid(): Token { const upperid = this._currentToken; this._match(T_UPPERID); return upperid; } public _parseLoweridSeq(): Token[] { return this._parseDelimitedSeq(T_RPAREN, T_COMMA, () => this._parseLowerid()); } /* Parse a list of expressions delimited by the given right delimiter * e.g. T_RPAREN or T_RBRACK, without consuming the delimiter. */ public _parseExpressionSeq(rightDelimiter: symbol): ASTExpr[] { return this._parseDelimitedSeq(rightDelimiter, T_COMMA, () => this._parseExpression()); } }