UNPKG

chevrotain

Version:

Chevrotain is a high performance fault tolerant javascript parsing DSL for building recursive decent parsers

499 lines 23.7 kB
"use strict"; var __extends = (this && this.__extends) || (function () { var extendStatics = function (d, b) { extendStatics = Object.setPrototypeOf || ({ __proto__: [] } instanceof Array && function (d, b) { d.__proto__ = b; }) || function (d, b) { for (var p in b) if (Object.prototype.hasOwnProperty.call(b, p)) d[p] = b[p]; }; return extendStatics(d, b); }; return function (d, b) { extendStatics(d, b); function __() { this.constructor = d; } d.prototype = b === null ? Object.create(b) : (__.prototype = b.prototype, new __()); }; })(); Object.defineProperty(exports, "__esModule", { value: true }); exports.areTokenCategoriesNotUsed = exports.isStrictPrefixOfPath = exports.containsPath = exports.getLookaheadPathsForOptionalProd = exports.getLookaheadPathsForOr = exports.lookAheadSequenceFromAlternatives = exports.buildSingleAlternativeLookaheadFunction = exports.buildAlternativesLookAheadFunc = exports.buildLookaheadFuncForOptionalProd = exports.buildLookaheadFuncForOr = exports.getProdType = exports.PROD_TYPE = void 0; var utils_1 = require("../../utils/utils"); var interpreter_1 = require("./interpreter"); var rest_1 = require("./rest"); var tokens_1 = require("../../scan/tokens"); var gast_public_1 = require("./gast/gast_public"); var gast_visitor_public_1 = require("./gast/gast_visitor_public"); var PROD_TYPE; (function (PROD_TYPE) { PROD_TYPE[PROD_TYPE["OPTION"] = 0] = "OPTION"; PROD_TYPE[PROD_TYPE["REPETITION"] = 1] = "REPETITION"; PROD_TYPE[PROD_TYPE["REPETITION_MANDATORY"] = 2] = "REPETITION_MANDATORY"; PROD_TYPE[PROD_TYPE["REPETITION_MANDATORY_WITH_SEPARATOR"] = 3] = "REPETITION_MANDATORY_WITH_SEPARATOR"; PROD_TYPE[PROD_TYPE["REPETITION_WITH_SEPARATOR"] = 4] = "REPETITION_WITH_SEPARATOR"; PROD_TYPE[PROD_TYPE["ALTERNATION"] = 5] = "ALTERNATION"; })(PROD_TYPE = exports.PROD_TYPE || (exports.PROD_TYPE = {})); function getProdType(prod) { /* istanbul ignore else */ if (prod instanceof gast_public_1.Option) { return PROD_TYPE.OPTION; } else if (prod instanceof gast_public_1.Repetition) { return PROD_TYPE.REPETITION; } else if (prod instanceof gast_public_1.RepetitionMandatory) { return PROD_TYPE.REPETITION_MANDATORY; } else if (prod instanceof gast_public_1.RepetitionMandatoryWithSeparator) { return PROD_TYPE.REPETITION_MANDATORY_WITH_SEPARATOR; } else if (prod instanceof gast_public_1.RepetitionWithSeparator) { return PROD_TYPE.REPETITION_WITH_SEPARATOR; } else if (prod instanceof gast_public_1.Alternation) { return PROD_TYPE.ALTERNATION; } else { throw Error("non exhaustive match"); } } exports.getProdType = getProdType; function buildLookaheadFuncForOr(occurrence, ruleGrammar, maxLookahead, hasPredicates, dynamicTokensEnabled, laFuncBuilder) { var lookAheadPaths = getLookaheadPathsForOr(occurrence, ruleGrammar, maxLookahead); var tokenMatcher = areTokenCategoriesNotUsed(lookAheadPaths) ? tokens_1.tokenStructuredMatcherNoCategories : tokens_1.tokenStructuredMatcher; return laFuncBuilder(lookAheadPaths, hasPredicates, tokenMatcher, dynamicTokensEnabled); } exports.buildLookaheadFuncForOr = buildLookaheadFuncForOr; /** * When dealing with an Optional production (OPTION/MANY/2nd iteration of AT_LEAST_ONE/...) we need to compare * the lookahead "inside" the production and the lookahead immediately "after" it in the same top level rule (context free). * * Example: given a production: * ABC(DE)?DF * * The optional '(DE)?' should only be entered if we see 'DE'. a single Token 'D' is not sufficient to distinguish between the two * alternatives. * * @returns A Lookahead function which will return true IFF the parser should parse the Optional production. */ function buildLookaheadFuncForOptionalProd(occurrence, ruleGrammar, k, dynamicTokensEnabled, prodType, lookaheadBuilder) { var lookAheadPaths = getLookaheadPathsForOptionalProd(occurrence, ruleGrammar, prodType, k); var tokenMatcher = areTokenCategoriesNotUsed(lookAheadPaths) ? tokens_1.tokenStructuredMatcherNoCategories : tokens_1.tokenStructuredMatcher; return lookaheadBuilder(lookAheadPaths[0], tokenMatcher, dynamicTokensEnabled); } exports.buildLookaheadFuncForOptionalProd = buildLookaheadFuncForOptionalProd; function buildAlternativesLookAheadFunc(alts, hasPredicates, tokenMatcher, dynamicTokensEnabled) { var numOfAlts = alts.length; var areAllOneTokenLookahead = utils_1.every(alts, function (currAlt) { return utils_1.every(currAlt, function (currPath) { return currPath.length === 1; }); }); // This version takes into account the predicates as well. if (hasPredicates) { /** * @returns {number} - The chosen alternative index */ return function (orAlts) { // unfortunately the predicates must be extracted every single time // as they cannot be cached due to references to parameters(vars) which are no longer valid. // note that in the common case of no predicates, no cpu time will be wasted on this (see else block) var predicates = utils_1.map(orAlts, function (currAlt) { return currAlt.GATE; }); for (var t = 0; t < numOfAlts; t++) { var currAlt = alts[t]; var currNumOfPaths = currAlt.length; var currPredicate = predicates[t]; if (currPredicate !== undefined && currPredicate.call(this) === false) { // if the predicate does not match there is no point in checking the paths continue; } nextPath: for (var j = 0; j < currNumOfPaths; j++) { var currPath = currAlt[j]; var currPathLength = currPath.length; for (var i = 0; i < currPathLength; i++) { var nextToken = this.LA(i + 1); if (tokenMatcher(nextToken, currPath[i]) === false) { // mismatch in current path // try the next pth continue nextPath; } } // found a full path that matches. // this will also work for an empty ALT as the loop will be skipped return t; } // none of the paths for the current alternative matched // try the next alternative } // none of the alternatives could be matched return undefined; }; } else if (areAllOneTokenLookahead && !dynamicTokensEnabled) { // optimized (common) case of all the lookaheads paths requiring only // a single token lookahead. These Optimizations cannot work if dynamically defined Tokens are used. var singleTokenAlts = utils_1.map(alts, function (currAlt) { return utils_1.flatten(currAlt); }); var choiceToAlt_1 = utils_1.reduce(singleTokenAlts, function (result, currAlt, idx) { utils_1.forEach(currAlt, function (currTokType) { if (!utils_1.has(result, currTokType.tokenTypeIdx)) { result[currTokType.tokenTypeIdx] = idx; } utils_1.forEach(currTokType.categoryMatches, function (currExtendingType) { if (!utils_1.has(result, currExtendingType)) { result[currExtendingType] = idx; } }); }); return result; }, []); /** * @returns {number} - The chosen alternative index */ return function () { var nextToken = this.LA(1); return choiceToAlt_1[nextToken.tokenTypeIdx]; }; } else { // optimized lookahead without needing to check the predicates at all. // this causes code duplication which is intentional to improve performance. /** * @returns {number} - The chosen alternative index */ return function () { for (var t = 0; t < numOfAlts; t++) { var currAlt = alts[t]; var currNumOfPaths = currAlt.length; nextPath: for (var j = 0; j < currNumOfPaths; j++) { var currPath = currAlt[j]; var currPathLength = currPath.length; for (var i = 0; i < currPathLength; i++) { var nextToken = this.LA(i + 1); if (tokenMatcher(nextToken, currPath[i]) === false) { // mismatch in current path // try the next pth continue nextPath; } } // found a full path that matches. // this will also work for an empty ALT as the loop will be skipped return t; } // none of the paths for the current alternative matched // try the next alternative } // none of the alternatives could be matched return undefined; }; } } exports.buildAlternativesLookAheadFunc = buildAlternativesLookAheadFunc; function buildSingleAlternativeLookaheadFunction(alt, tokenMatcher, dynamicTokensEnabled) { var areAllOneTokenLookahead = utils_1.every(alt, function (currPath) { return currPath.length === 1; }); var numOfPaths = alt.length; // optimized (common) case of all the lookaheads paths requiring only // a single token lookahead. if (areAllOneTokenLookahead && !dynamicTokensEnabled) { var singleTokensTypes = utils_1.flatten(alt); if (singleTokensTypes.length === 1 && utils_1.isEmpty(singleTokensTypes[0].categoryMatches)) { var expectedTokenType = singleTokensTypes[0]; var expectedTokenUniqueKey_1 = expectedTokenType.tokenTypeIdx; return function () { return this.LA(1).tokenTypeIdx === expectedTokenUniqueKey_1; }; } else { var choiceToAlt_2 = utils_1.reduce(singleTokensTypes, function (result, currTokType, idx) { result[currTokType.tokenTypeIdx] = true; utils_1.forEach(currTokType.categoryMatches, function (currExtendingType) { result[currExtendingType] = true; }); return result; }, []); return function () { var nextToken = this.LA(1); return choiceToAlt_2[nextToken.tokenTypeIdx] === true; }; } } else { return function () { nextPath: for (var j = 0; j < numOfPaths; j++) { var currPath = alt[j]; var currPathLength = currPath.length; for (var i = 0; i < currPathLength; i++) { var nextToken = this.LA(i + 1); if (tokenMatcher(nextToken, currPath[i]) === false) { // mismatch in current path // try the next pth continue nextPath; } } // found a full path that matches. return true; } // none of the paths matched return false; }; } } exports.buildSingleAlternativeLookaheadFunction = buildSingleAlternativeLookaheadFunction; var RestDefinitionFinderWalker = /** @class */ (function (_super) { __extends(RestDefinitionFinderWalker, _super); function RestDefinitionFinderWalker(topProd, targetOccurrence, targetProdType) { var _this = _super.call(this) || this; _this.topProd = topProd; _this.targetOccurrence = targetOccurrence; _this.targetProdType = targetProdType; return _this; } RestDefinitionFinderWalker.prototype.startWalking = function () { this.walk(this.topProd); return this.restDef; }; RestDefinitionFinderWalker.prototype.checkIsTarget = function (node, expectedProdType, currRest, prevRest) { if (node.idx === this.targetOccurrence && this.targetProdType === expectedProdType) { this.restDef = currRest.concat(prevRest); return true; } // performance optimization, do not iterate over the entire Grammar ast after we have found the target return false; }; RestDefinitionFinderWalker.prototype.walkOption = function (optionProd, currRest, prevRest) { if (!this.checkIsTarget(optionProd, PROD_TYPE.OPTION, currRest, prevRest)) { _super.prototype.walkOption.call(this, optionProd, currRest, prevRest); } }; RestDefinitionFinderWalker.prototype.walkAtLeastOne = function (atLeastOneProd, currRest, prevRest) { if (!this.checkIsTarget(atLeastOneProd, PROD_TYPE.REPETITION_MANDATORY, currRest, prevRest)) { _super.prototype.walkOption.call(this, atLeastOneProd, currRest, prevRest); } }; RestDefinitionFinderWalker.prototype.walkAtLeastOneSep = function (atLeastOneSepProd, currRest, prevRest) { if (!this.checkIsTarget(atLeastOneSepProd, PROD_TYPE.REPETITION_MANDATORY_WITH_SEPARATOR, currRest, prevRest)) { _super.prototype.walkOption.call(this, atLeastOneSepProd, currRest, prevRest); } }; RestDefinitionFinderWalker.prototype.walkMany = function (manyProd, currRest, prevRest) { if (!this.checkIsTarget(manyProd, PROD_TYPE.REPETITION, currRest, prevRest)) { _super.prototype.walkOption.call(this, manyProd, currRest, prevRest); } }; RestDefinitionFinderWalker.prototype.walkManySep = function (manySepProd, currRest, prevRest) { if (!this.checkIsTarget(manySepProd, PROD_TYPE.REPETITION_WITH_SEPARATOR, currRest, prevRest)) { _super.prototype.walkOption.call(this, manySepProd, currRest, prevRest); } }; return RestDefinitionFinderWalker; }(rest_1.RestWalker)); /** * Returns the definition of a target production in a top level level rule. */ var InsideDefinitionFinderVisitor = /** @class */ (function (_super) { __extends(InsideDefinitionFinderVisitor, _super); function InsideDefinitionFinderVisitor(targetOccurrence, targetProdType, targetRef) { var _this = _super.call(this) || this; _this.targetOccurrence = targetOccurrence; _this.targetProdType = targetProdType; _this.targetRef = targetRef; _this.result = []; return _this; } InsideDefinitionFinderVisitor.prototype.checkIsTarget = function (node, expectedProdName) { if (node.idx === this.targetOccurrence && this.targetProdType === expectedProdName && (this.targetRef === undefined || node === this.targetRef)) { this.result = node.definition; } }; InsideDefinitionFinderVisitor.prototype.visitOption = function (node) { this.checkIsTarget(node, PROD_TYPE.OPTION); }; InsideDefinitionFinderVisitor.prototype.visitRepetition = function (node) { this.checkIsTarget(node, PROD_TYPE.REPETITION); }; InsideDefinitionFinderVisitor.prototype.visitRepetitionMandatory = function (node) { this.checkIsTarget(node, PROD_TYPE.REPETITION_MANDATORY); }; InsideDefinitionFinderVisitor.prototype.visitRepetitionMandatoryWithSeparator = function (node) { this.checkIsTarget(node, PROD_TYPE.REPETITION_MANDATORY_WITH_SEPARATOR); }; InsideDefinitionFinderVisitor.prototype.visitRepetitionWithSeparator = function (node) { this.checkIsTarget(node, PROD_TYPE.REPETITION_WITH_SEPARATOR); }; InsideDefinitionFinderVisitor.prototype.visitAlternation = function (node) { this.checkIsTarget(node, PROD_TYPE.ALTERNATION); }; return InsideDefinitionFinderVisitor; }(gast_visitor_public_1.GAstVisitor)); function initializeArrayOfArrays(size) { var result = new Array(size); for (var i = 0; i < size; i++) { result[i] = []; } return result; } /** * A sort of hash function between a Path in the grammar and a string. * Note that this returns multiple "hashes" to support the scenario of token categories. * - A single path with categories may match multiple **actual** paths. */ function pathToHashKeys(path) { var keys = [""]; for (var i = 0; i < path.length; i++) { var tokType = path[i]; var longerKeys = []; for (var j = 0; j < keys.length; j++) { var currShorterKey = keys[j]; longerKeys.push(currShorterKey + "_" + tokType.tokenTypeIdx); for (var t = 0; t < tokType.categoryMatches.length; t++) { var categoriesKeySuffix = "_" + tokType.categoryMatches[t]; longerKeys.push(currShorterKey + categoriesKeySuffix); } } keys = longerKeys; } return keys; } /** * Imperative style due to being called from a hot spot */ function isUniquePrefixHash(altKnownPathsKeys, searchPathKeys, idx) { for (var currAltIdx = 0; currAltIdx < altKnownPathsKeys.length; currAltIdx++) { // We only want to test vs the other alternatives if (currAltIdx === idx) { continue; } var otherAltKnownPathsKeys = altKnownPathsKeys[currAltIdx]; for (var searchIdx = 0; searchIdx < searchPathKeys.length; searchIdx++) { var searchKey = searchPathKeys[searchIdx]; if (otherAltKnownPathsKeys[searchKey] === true) { return false; } } } // None of the SearchPathKeys were found in any of the other alternatives return true; } function lookAheadSequenceFromAlternatives(altsDefs, k) { var partialAlts = utils_1.map(altsDefs, function (currAlt) { return interpreter_1.possiblePathsFrom([currAlt], 1); }); var finalResult = initializeArrayOfArrays(partialAlts.length); var altsHashes = utils_1.map(partialAlts, function (currAltPaths) { var dict = {}; utils_1.forEach(currAltPaths, function (item) { var keys = pathToHashKeys(item.partialPath); utils_1.forEach(keys, function (currKey) { dict[currKey] = true; }); }); return dict; }); var newData = partialAlts; // maxLookahead loop for (var pathLength = 1; pathLength <= k; pathLength++) { var currDataset = newData; newData = initializeArrayOfArrays(currDataset.length); var _loop_1 = function (altIdx) { var currAltPathsAndSuffixes = currDataset[altIdx]; // paths in current alternative loop for (var currPathIdx = 0; currPathIdx < currAltPathsAndSuffixes.length; currPathIdx++) { var currPathPrefix = currAltPathsAndSuffixes[currPathIdx].partialPath; var suffixDef = currAltPathsAndSuffixes[currPathIdx].suffixDef; var prefixKeys = pathToHashKeys(currPathPrefix); var isUnique = isUniquePrefixHash(altsHashes, prefixKeys, altIdx); // End of the line for this path. if (isUnique || utils_1.isEmpty(suffixDef) || currPathPrefix.length === k) { var currAltResult = finalResult[altIdx]; // TODO: Can we implement a containsPath using Maps/Dictionaries? if (containsPath(currAltResult, currPathPrefix) === false) { currAltResult.push(currPathPrefix); // Update all new keys for the current path. for (var j = 0; j < prefixKeys.length; j++) { var currKey = prefixKeys[j]; altsHashes[altIdx][currKey] = true; } } } // Expand longer paths else { var newPartialPathsAndSuffixes = interpreter_1.possiblePathsFrom(suffixDef, pathLength + 1, currPathPrefix); newData[altIdx] = newData[altIdx].concat(newPartialPathsAndSuffixes); // Update keys for new known paths utils_1.forEach(newPartialPathsAndSuffixes, function (item) { var prefixKeys = pathToHashKeys(item.partialPath); utils_1.forEach(prefixKeys, function (key) { altsHashes[altIdx][key] = true; }); }); } } }; // alternatives loop for (var altIdx = 0; altIdx < currDataset.length; altIdx++) { _loop_1(altIdx); } } return finalResult; } exports.lookAheadSequenceFromAlternatives = lookAheadSequenceFromAlternatives; function getLookaheadPathsForOr(occurrence, ruleGrammar, k, orProd) { var visitor = new InsideDefinitionFinderVisitor(occurrence, PROD_TYPE.ALTERNATION, orProd); ruleGrammar.accept(visitor); return lookAheadSequenceFromAlternatives(visitor.result, k); } exports.getLookaheadPathsForOr = getLookaheadPathsForOr; function getLookaheadPathsForOptionalProd(occurrence, ruleGrammar, prodType, k) { var insideDefVisitor = new InsideDefinitionFinderVisitor(occurrence, prodType); ruleGrammar.accept(insideDefVisitor); var insideDef = insideDefVisitor.result; var afterDefWalker = new RestDefinitionFinderWalker(ruleGrammar, occurrence, prodType); var afterDef = afterDefWalker.startWalking(); var insideFlat = new gast_public_1.Alternative({ definition: insideDef }); var afterFlat = new gast_public_1.Alternative({ definition: afterDef }); return lookAheadSequenceFromAlternatives([insideFlat, afterFlat], k); } exports.getLookaheadPathsForOptionalProd = getLookaheadPathsForOptionalProd; function containsPath(alternative, searchPath) { compareOtherPath: for (var i = 0; i < alternative.length; i++) { var otherPath = alternative[i]; if (otherPath.length !== searchPath.length) { continue; } for (var j = 0; j < otherPath.length; j++) { var searchTok = searchPath[j]; var otherTok = otherPath[j]; var matchingTokens = searchTok === otherTok || otherTok.categoryMatchesMap[searchTok.tokenTypeIdx] !== undefined; if (matchingTokens === false) { continue compareOtherPath; } } return true; } return false; } exports.containsPath = containsPath; function isStrictPrefixOfPath(prefix, other) { return (prefix.length < other.length && utils_1.every(prefix, function (tokType, idx) { var otherTokType = other[idx]; return (tokType === otherTokType || otherTokType.categoryMatchesMap[tokType.tokenTypeIdx]); })); } exports.isStrictPrefixOfPath = isStrictPrefixOfPath; function areTokenCategoriesNotUsed(lookAheadPaths) { return utils_1.every(lookAheadPaths, function (singleAltPaths) { return utils_1.every(singleAltPaths, function (singlePath) { return utils_1.every(singlePath, function (token) { return utils_1.isEmpty(token.categoryMatches); }); }); }); } exports.areTokenCategoriesNotUsed = areTokenCategoriesNotUsed; //# sourceMappingURL=lookahead.js.map