UNPKG

java-parser

Version:

Java Parser in JavaScript

696 lines (626 loc) 20.2 kB
"use strict"; const { tokenMatcher } = require("chevrotain"); function defineRules($, t) { $.RULE("expression", () => { $.OR([ { GATE: () => this.BACKTRACK_LOOKAHEAD($.isLambdaExpression), ALT: () => $.SUBRULE($.lambdaExpression) }, { ALT: () => $.SUBRULE($.ternaryExpression) } ]); }); // https://docs.oracle.com/javase/specs/jls/se11/html/jls-15.html#jls-LambdaExpression $.RULE("lambdaExpression", () => { $.SUBRULE($.lambdaParameters); $.CONSUME(t.Arrow); $.SUBRULE($.lambdaBody); }); $.RULE("lambdaParameters", () => { $.OR([ { ALT: () => $.SUBRULE($.lambdaParametersWithBraces) }, { ALT: () => $.CONSUME(t.Identifier) } ]); }); $.RULE("lambdaParametersWithBraces", () => { $.CONSUME(t.LBrace); $.OPTION(() => { $.SUBRULE($.lambdaParameterList); }); $.CONSUME(t.RBrace); }); $.RULE("lambdaParameterList", () => { $.OR([ { GATE: () => { const nextTokType = this.LA(1).tokenType; const nextNextTokType = this.LA(2).tokenType; return ( tokenMatcher(nextTokType, t.Identifier) && (tokenMatcher(nextNextTokType, t.RBrace) || tokenMatcher(nextNextTokType, t.Comma)) ); }, ALT: () => $.SUBRULE($.inferredLambdaParameterList) }, { ALT: () => $.SUBRULE($.explicitLambdaParameterList) } ]); }); $.RULE("inferredLambdaParameterList", () => { $.CONSUME(t.Identifier); $.MANY(() => { $.CONSUME(t.Comma); $.CONSUME2(t.Identifier); }); }); $.RULE("explicitLambdaParameterList", () => { $.SUBRULE($.lambdaParameter); $.MANY(() => { $.CONSUME(t.Comma); $.SUBRULE2($.lambdaParameter); }); }); $.RULE("lambdaParameter", () => { // TODO: performance, investigate the performance boost that could // be gained by refactoring out the backtracking. $.OR([ { GATE: $.BACKTRACK($.regularLambdaParameter), ALT: () => $.SUBRULE($.regularLambdaParameter) }, { ALT: () => $.SUBRULE($.variableArityParameter) } ]); }); $.RULE("regularLambdaParameter", () => { $.MANY(() => { $.SUBRULE($.variableModifier); }); $.SUBRULE($.lambdaParameterType); $.SUBRULE($.variableDeclaratorId); }); $.RULE("lambdaParameterType", () => { $.OR({ DEF: [ { ALT: () => $.SUBRULE($.unannType) }, { ALT: () => $.CONSUME(t.Var) } ], IGNORE_AMBIGUITIES: true }); }); $.RULE("lambdaBody", () => { $.OR([ { ALT: () => $.SUBRULE($.expression) }, { ALT: () => $.SUBRULE($.block) } ]); }); $.RULE("ternaryExpression", () => { $.SUBRULE($.binaryExpression); $.OPTION(() => { $.CONSUME(t.QuestionMark); $.SUBRULE($.expression); $.CONSUME(t.Colon); // TODO: in the grammar this is limited to "lambdaExpression: or "conditionalExpression" $.SUBRULE2($.expression); }); }); $.RULE("binaryExpression", () => { $.SUBRULE($.unaryExpression); $.MANY(() => { $.OR({ DEF: [ { ALT: () => { $.CONSUME(t.Instanceof); $.SUBRULE($.referenceType); } }, { ALT: () => { $.CONSUME(t.AssignmentOperator); $.SUBRULE2($.expression); } }, // This is an example of why Java does not have a well designed grammar // See: https://manas.tech/blog/2008/10/12/why-java-generics-dont-have-problems-with-right-shift-operator.html // TODO: ensure the LT/GT sequences have no whitespace between each other. { // TODO: this is a bug in Chevrotain lookahead calculation. the "BinaryOperator" token can match "Less" or "Greater" // as well, but because it is a **token Category** Chevrotain does not understand it need to looks two tokens ahead. GATE: () => tokenMatcher($.LA(2).tokenType, t.Less) || tokenMatcher($.LA(2).tokenType, t.Greater), ALT: () => { $.OR2([ { GATE: () => $.LA(1).startOffset + 1 === $.LA(2).startOffset, ALT: () => { $.CONSUME(t.Less); $.CONSUME2(t.Less); } }, { GATE: () => $.LA(1).startOffset + 1 === $.LA(2).startOffset, ALT: () => { $.CONSUME(t.Greater); $.CONSUME2(t.Greater); $.OPTION({ GATE: () => $.LA(0).startOffset + 1 === $.LA(1).startOffset, DEF: () => $.CONSUME3(t.Greater) }); } } ]); $.SUBRULE2($.unaryExpression); } }, { ALT: () => { $.CONSUME(t.BinaryOperator); $.SUBRULE3($.unaryExpression); } } ], IGNORE_AMBIGUITIES: true // the ambiguity between 1 and 4 options is resolved by the order (instanceOf is first) }); }); }); $.RULE("unaryExpression", () => { $.MANY(() => { $.CONSUME(t.UnaryPrefixOperator); }); $.SUBRULE($.primary); $.MANY2(() => { $.CONSUME(t.UnarySuffixOperator); }); }); $.RULE("unaryExpressionNotPlusMinus", () => { $.MANY(() => { $.CONSUME(t.UnaryPrefixOperatorNotPlusMinus); }); $.SUBRULE($.primary); $.MANY2(() => { $.CONSUME(t.UnarySuffixOperator); }); }); $.RULE("primary", () => { $.SUBRULE($.primaryPrefix); $.MANY(() => { $.SUBRULE($.primarySuffix); }); }); $.RULE("primaryPrefix", () => { let isCastExpression = false; if (tokenMatcher($.LA(1).tokenType, t.LBrace)) { isCastExpression = this.BACKTRACK_LOOKAHEAD($.isCastExpression); } $.OR([ { ALT: () => $.SUBRULE($.literal) }, { ALT: () => $.CONSUME(t.This) }, { ALT: () => $.CONSUME(t.Void) }, { ALT: () => $.SUBRULE($.unannPrimitiveTypeWithOptionalDimsSuffix) }, { ALT: () => $.SUBRULE($.fqnOrRefType) }, { GATE: () => isCastExpression, ALT: () => $.SUBRULE($.castExpression) }, { ALT: () => $.SUBRULE($.parenthesisExpression) }, { ALT: () => $.SUBRULE($.newExpression) }, { ALT: () => $.SUBRULE($.switchStatement) } ]); }); $.RULE("primarySuffix", () => { $.OR({ DEF: [ { ALT: () => { $.CONSUME(t.Dot); $.OR2([ { ALT: () => $.CONSUME(t.This) }, { ALT: () => $.SUBRULE($.unqualifiedClassInstanceCreationExpression) }, { ALT: () => { $.OPTION(() => { $.SUBRULE($.typeArguments); }); $.CONSUME(t.Identifier); } } ]); } }, { ALT: () => $.SUBRULE($.methodInvocationSuffix) }, { ALT: () => $.SUBRULE($.classLiteralSuffix) }, { ALT: () => $.SUBRULE($.arrayAccessSuffix) }, { ALT: () => $.SUBRULE($.methodReferenceSuffix) } ], MAX_LOOKAHEAD: 2 }); }); // See https://github.com/jhipster/prettier-java/pull/154 to understand // why fqnOrRefTypePart is split in two rules (First and Rest) $.RULE("fqnOrRefType", () => { $.SUBRULE($.fqnOrRefTypePartFirst); $.MANY2({ // ".class" is a classLiteralSuffix GATE: () => // avoids ambiguity with ".this" and ".new" which are parsed as a primary suffix. tokenMatcher(this.LA(2).tokenType, t.Class) === false && tokenMatcher(this.LA(2).tokenType, t.This) === false && tokenMatcher(this.LA(2).tokenType, t.New) === false, DEF: () => { $.CONSUME(t.Dot); $.SUBRULE2($.fqnOrRefTypePartRest); } }); // in case of an arrayType $.OPTION({ // it is not enough to check only the opening "[", we must avoid conflict with // arrayAccessSuffix GATE: () => tokenMatcher($.LA(1).tokenType, t.At) || tokenMatcher($.LA(2).tokenType, t.RSquare), DEF: () => { $.SUBRULE($.dims); } }); }); // TODO: validation: // 1. "annotation" cannot be mixed with "methodTypeArguments" or "Super". // 2. "methodTypeArguments" cannot be mixed with "classTypeArguments" or "annotation". // 3. "Super" cannot be mixed with "classTypeArguments" or "annotation". // 4. At most one "Super" may be used. // 5. "Super" may be last or one before last (last may also be first if there is only a single part). $.RULE("fqnOrRefTypePartRest", () => { $.MANY(() => { $.SUBRULE($.annotation); }); $.OPTION({ NAME: "$methodTypeArguments", DEF: () => { $.SUBRULE2($.typeArguments); } }); $.SUBRULE($.fqnOrRefTypePartCommon); }); $.RULE("fqnOrRefTypePartCommon", () => { $.OR([ { ALT: () => $.CONSUME(t.Identifier) }, { ALT: () => $.CONSUME(t.Super) } ]); let isRefTypeInMethodRef = false; // Performance optimization, only perform this backtracking when a '<' is found // TODO: performance optimization evaluation: avoid doing this backtracking for every "<" encountered. // we could do it once (using global state) per "fqnOrRefType" // We could do it only once for if (tokenMatcher($.LA(1).tokenType, t.Less)) { isRefTypeInMethodRef = this.BACKTRACK_LOOKAHEAD($.isRefTypeInMethodRef); } $.OPTION2({ NAME: "$classTypeArguments", // unrestricted typeArguments here would create an ambiguity with "LessThan" operator // e.g: "var x = a < b;" // The "<" would be parsed as the beginning of a "typeArguments" // and we will get an error: "expecting '>' but found: ';'" GATE: () => isRefTypeInMethodRef, DEF: () => { $.SUBRULE3($.typeArguments); } }); }); $.RULE("fqnOrRefTypePartFirst", () => { $.MANY(() => { $.SUBRULE($.annotation); }); $.SUBRULE($.fqnOrRefTypePartCommon); }); $.RULE("parenthesisExpression", () => { $.CONSUME(t.LBrace); $.SUBRULE($.expression); $.CONSUME(t.RBrace); }); $.RULE("castExpression", () => { $.OR([ { // TODO: performance: can avoid backtracking again here, parent rule could have this information // when it checks isCastExpression (refactor needed) GATE: () => this.BACKTRACK_LOOKAHEAD($.isPrimitiveCastExpression), ALT: () => $.SUBRULE($.primitiveCastExpression) }, { ALT: () => $.SUBRULE($.referenceTypeCastExpression) } ]); }); $.RULE("primitiveCastExpression", () => { $.CONSUME(t.LBrace); $.SUBRULE($.primitiveType); $.CONSUME(t.RBrace); $.SUBRULE($.unaryExpression); }); $.RULE("referenceTypeCastExpression", () => { $.CONSUME(t.LBrace); $.SUBRULE($.referenceType); $.MANY(() => { $.SUBRULE($.additionalBound); }); $.CONSUME(t.RBrace); $.OR([ { GATE: () => this.BACKTRACK_LOOKAHEAD($.isLambdaExpression), ALT: () => $.SUBRULE($.lambdaExpression) }, { ALT: () => $.SUBRULE($.unaryExpressionNotPlusMinus) } ]); }); const newExpressionTypes = { arrayCreationExpression: 1, unqualifiedClassInstanceCreationExpression: 2 }; $.RULE("newExpression", () => { const type = this.BACKTRACK_LOOKAHEAD($.identifyNewExpressionType); $.OR([ { GATE: () => type === newExpressionTypes.arrayCreationExpression, ALT: () => $.SUBRULE($.arrayCreationExpression) }, { GATE: () => type === newExpressionTypes.unqualifiedClassInstanceCreationExpression, ALT: () => $.SUBRULE($.unqualifiedClassInstanceCreationExpression) } ]); }); // https://docs.oracle.com/javase/specs/jls/se11/html/jls-15.html#jls-UnqualifiedClassInstanceCreationExpression $.RULE("unqualifiedClassInstanceCreationExpression", () => { $.CONSUME(t.New); $.OPTION(() => { $.SUBRULE($.typeArguments); }); $.SUBRULE($.classOrInterfaceTypeToInstantiate); $.CONSUME(t.LBrace); $.OPTION2(() => { $.SUBRULE($.argumentList); }); $.CONSUME(t.RBrace); $.OPTION3(() => { $.SUBRULE($.classBody); }); }); $.RULE("classOrInterfaceTypeToInstantiate", () => { $.MANY(() => { $.SUBRULE($.annotation); }); $.CONSUME(t.Identifier); $.MANY2(() => { $.CONSUME(t.Dot); $.MANY3(() => { $.SUBRULE2($.annotation); }); $.CONSUME2(t.Identifier); }); $.OPTION(() => { $.SUBRULE($.typeArgumentsOrDiamond); }); }); $.RULE("typeArgumentsOrDiamond", () => { $.OR({ DEF: [ { ALT: () => $.SUBRULE($.diamond) }, { ALT: () => $.SUBRULE($.typeArguments) } ], MAX_LOOKAHEAD: 2 }); }); $.RULE("diamond", () => { $.CONSUME(t.Less); $.CONSUME(t.Greater); }); $.RULE("methodInvocationSuffix", () => { $.CONSUME(t.LBrace); $.OPTION2(() => { $.SUBRULE($.argumentList); }); $.CONSUME(t.RBrace); }); $.RULE("argumentList", () => { $.SUBRULE($.expression); $.MANY(() => { $.CONSUME(t.Comma); $.SUBRULE2($.expression); }); }); // https://docs.oracle.com/javase/specs/jls/se11/html/jls-15.html#jls-15.10.1 $.RULE("arrayCreationExpression", () => { $.CONSUME(t.New); $.OR([ { GATE: $.BACKTRACK($.primitiveType), ALT: () => $.SUBRULE($.primitiveType) }, { ALT: () => $.SUBRULE($.classOrInterfaceType) } ]); $.OR2([ { GATE: $.BACKTRACK($.arrayCreationDefaultInitSuffix), ALT: () => $.SUBRULE($.arrayCreationDefaultInitSuffix) }, { ALT: () => $.SUBRULE($.arrayCreationExplicitInitSuffix) } ]); }); $.RULE("arrayCreationDefaultInitSuffix", () => { $.SUBRULE($.dimExprs); $.OPTION(() => { $.SUBRULE($.dims); }); }); $.RULE("arrayCreationExplicitInitSuffix", () => { $.SUBRULE($.dims); $.SUBRULE($.arrayInitializer); }); // https://docs.oracle.com/javase/specs/jls/se11/html/jls-15.html#jls-DimExprs $.RULE("dimExprs", () => { $.SUBRULE($.dimExpr); $.MANY({ // The GATE is to distinguish DimExpr from Dims : // the only difference between these two is the presence of an expression in the DimExpr // Example: If the GATE is not present double[3][] won't be parsed as the parser will try to parse "[]" // as a dimExpr instead of a dims GATE: () => tokenMatcher($.LA(2).tokenType, t.RSquare) === false, DEF: () => $.SUBRULE2($.dimExpr) }); }); // https://docs.oracle.com/javase/specs/jls/se11/html/jls-15.html#jls-DimExpr $.RULE("dimExpr", () => { $.MANY(() => { $.SUBRULE($.annotation); }); $.CONSUME(t.LSquare); $.SUBRULE($.expression); $.CONSUME(t.RSquare); }); $.RULE("classLiteralSuffix", () => { $.MANY(() => { $.CONSUME(t.LSquare); $.CONSUME(t.RSquare); }); $.CONSUME(t.Dot); $.CONSUME(t.Class); }); $.RULE("arrayAccessSuffix", () => { $.CONSUME(t.LSquare); $.SUBRULE($.expression); $.CONSUME(t.RSquare); }); $.RULE("methodReferenceSuffix", () => { $.CONSUME(t.ColonColon); $.OPTION(() => { $.SUBRULE($.typeArguments); }); $.OR([ { ALT: () => $.CONSUME(t.Identifier) }, // TODO: a constructor method reference ("new") can only be used // in specific contexts, but perhaps this verification is best left // for a semantic analysis phase { ALT: () => $.CONSUME(t.New) } ]); }); // backtracking lookahead logic $.RULE("identifyNewExpressionType", () => { $.CONSUME(t.New); const firstTokenAfterNew = this.LA(1).tokenType; // not an array initialization due to the prefix "TypeArguments" if (tokenMatcher(firstTokenAfterNew, t.Less)) { return newExpressionTypes.unqualifiedClassInstanceCreationExpression; } try { $.SUBRULE($.classOrInterfaceTypeToInstantiate); } catch (e) { // if it is not a "classOrInterfaceTypeToInstantiate" then // (assuming a valid input) we are looking at an "arrayCreationExpression" return newExpressionTypes.arrayCreationExpression; } const firstTokenAfterClassType = this.LA(1).tokenType; if (tokenMatcher(firstTokenAfterClassType, t.LBrace)) { return newExpressionTypes.unqualifiedClassInstanceCreationExpression; } // The LBrace above is mandatory in "classInstanceCreation..." so // it must be an "arrayCreationExp" (if the input is valid) // TODO: upgrade the logic to return "unknown" type if at this // point it does not match "arrayCreation" either. // - This will provide a better error message to the user // in case of invalid inputs return newExpressionTypes.arrayCreationExpression; }); // Optimized backtracking, only scan ahead until the arrow("->"). $.RULE("isLambdaExpression", () => { // TODO: this check of next two tokens is probably redundant as the normal lookahead should take care of this. const firstTokenType = this.LA(1).tokenType; const secondTokenType = this.LA(2).tokenType; // no parent lambda "x -> x * 2" if ( tokenMatcher(firstTokenType, t.Identifier) && tokenMatcher(secondTokenType, t.Arrow) ) { return true; } // Performance optimizations, fail fast if it is not a LBrace. else if (tokenMatcher(firstTokenType, t.LBrace)) { $.SUBRULE($.lambdaParametersWithBraces); const followedByArrow = tokenMatcher(this.LA(1).tokenType, t.Arrow); return followedByArrow; } return false; }); $.RULE("isCastExpression", () => { if (this.BACKTRACK_LOOKAHEAD($.isPrimitiveCastExpression)) { return true; } return this.BACKTRACK_LOOKAHEAD($.isReferenceTypeCastExpression); }); $.RULE("isPrimitiveCastExpression", () => { $.CONSUME(t.LBrace); $.SUBRULE($.primitiveType); // No dims so this is not a reference Type $.CONSUME(t.RBrace); return true; }); $.RULE("isReferenceTypeCastExpression", () => { $.CONSUME(t.LBrace); $.SUBRULE($.referenceType); $.MANY(() => { $.SUBRULE($.additionalBound); }); $.CONSUME(t.RBrace); const firstTokTypeAfterRBrace = this.LA(1).tokenType; return ( this.firstForUnaryExpressionNotPlusMinus.find(tokType => tokenMatcher(firstTokTypeAfterRBrace, tokType) ) !== undefined ); }); $.RULE("isRefTypeInMethodRef", () => { let result = undefined; $.SUBRULE($.typeArguments); // arrayType const hasDims = $.OPTION(() => { $.SUBRULE($.dims); }); const firstTokTypeAfterTypeArgs = this.LA(1).tokenType; if (tokenMatcher(firstTokTypeAfterTypeArgs, t.ColonColon)) { result = true; } // we must be at the end of a "referenceType" if "dims" were encountered // So there is not point to check farther else if (hasDims) { result = false; } // in the middle of a "classReferenceType" $.OPTION2(() => { $.CONSUME(t.Dot); $.SUBRULE($.classOrInterfaceType); }); if (result !== undefined) { return result; } const firstTokTypeAfterRefType = this.LA(1).tokenType; return tokenMatcher(firstTokTypeAfterRefType, t.ColonColon); }); } function computeFirstForUnaryExpressionNotPlusMinus() { const firstUnaryExpressionNotPlusMinus = this.computeContentAssist( "unaryExpressionNotPlusMinus", [] ); const nextTokTypes = firstUnaryExpressionNotPlusMinus.map( x => x.nextTokenType ); // uniq return nextTokTypes.filter((v, i, a) => a.indexOf(v) === i); } module.exports = { defineRules, computeFirstForUnaryExpressionNotPlusMinus };