UNPKG

ohm-js

Version:

An object-oriented language for parsing and pattern matching

181 lines (151 loc) 6.49 kB
import {abstract, copyWithoutDuplicates} from './common.js'; import * as pexprs from './pexprs-main.js'; // -------------------------------------------------------------------- // Private stuff // -------------------------------------------------------------------- function isRestrictedJSIdentifier(str) { return /^[a-zA-Z_$][0-9a-zA-Z_$]*$/.test(str); } function resolveDuplicatedNames(argumentNameList) { // `count` is used to record the number of times each argument name occurs in the list, // this is useful for checking duplicated argument name. It maps argument names to ints. const count = Object.create(null); argumentNameList.forEach(argName => { count[argName] = (count[argName] || 0) + 1; }); // Append subscripts ('_1', '_2', ...) to duplicate argument names. Object.keys(count).forEach(dupArgName => { if (count[dupArgName] <= 1) { return; } // This name shows up more than once, so add subscripts. let subscript = 1; argumentNameList.forEach((argName, idx) => { if (argName === dupArgName) { argumentNameList[idx] = argName + '_' + subscript++; } }); }); } // -------------------------------------------------------------------- // Operations // -------------------------------------------------------------------- /* Returns a list of strings that will be used as the default argument names for its receiver (a pexpr) in a semantic action. This is used exclusively by the Semantics Editor. `firstArgIndex` is the 1-based index of the first argument name that will be generated for this pexpr. It enables us to name arguments positionally, e.g., if the second argument is a non-alphanumeric terminal like "+", it will be named '$2'. `noDupCheck` is true if the caller of `toArgumentNameList` is not a top level caller. It enables us to avoid nested duplication subscripts appending, e.g., '_1_1', '_1_2', by only checking duplicates at the top level. Here is a more elaborate example that illustrates how this method works: `(a "+" b).toArgumentNameList(1)` evaluates to `['a', '$2', 'b']` with the following recursive calls: (a).toArgumentNameList(1) -> ['a'], ("+").toArgumentNameList(2) -> ['$2'], (b).toArgumentNameList(3) -> ['b'] Notes: * This method must only be called on well-formed expressions, e.g., the receiver must not have any Alt sub-expressions with inconsistent arities. * e.getArity() === e.toArgumentNameList(1).length */ // function(firstArgIndex, noDupCheck) { ... } pexprs.PExpr.prototype.toArgumentNameList = abstract('toArgumentNameList'); pexprs.any.toArgumentNameList = function(firstArgIndex, noDupCheck) { return ['any']; }; pexprs.end.toArgumentNameList = function(firstArgIndex, noDupCheck) { return ['end']; }; pexprs.Terminal.prototype.toArgumentNameList = function(firstArgIndex, noDupCheck) { if (typeof this.obj === 'string' && /^[_a-zA-Z0-9]+$/.test(this.obj)) { // If this terminal is a valid suffix for a JS identifier, just prepend it with '_' return ['_' + this.obj]; } else { // Otherwise, name it positionally. return ['$' + firstArgIndex]; } }; pexprs.Range.prototype.toArgumentNameList = function(firstArgIndex, noDupCheck) { let argName = this.from + '_to_' + this.to; // If the `argName` is not valid then try to prepend a `_`. if (!isRestrictedJSIdentifier(argName)) { argName = '_' + argName; } // If the `argName` still not valid after prepending a `_`, then name it positionally. if (!isRestrictedJSIdentifier(argName)) { argName = '$' + firstArgIndex; } return [argName]; }; pexprs.Alt.prototype.toArgumentNameList = function(firstArgIndex, noDupCheck) { // `termArgNameLists` is an array of arrays where each row is the // argument name list that corresponds to a term in this alternation. const termArgNameLists = this.terms.map(term => term.toArgumentNameList(firstArgIndex, true), ); const argumentNameList = []; const numArgs = termArgNameLists[0].length; for (let colIdx = 0; colIdx < numArgs; colIdx++) { const col = []; for (let rowIdx = 0; rowIdx < this.terms.length; rowIdx++) { col.push(termArgNameLists[rowIdx][colIdx]); } const uniqueNames = copyWithoutDuplicates(col); argumentNameList.push(uniqueNames.join('_or_')); } if (!noDupCheck) { resolveDuplicatedNames(argumentNameList); } return argumentNameList; }; pexprs.Seq.prototype.toArgumentNameList = function(firstArgIndex, noDupCheck) { // Generate the argument name list, without worrying about duplicates. let argumentNameList = []; this.factors.forEach(factor => { const factorArgumentNameList = factor.toArgumentNameList(firstArgIndex, true); argumentNameList = argumentNameList.concat(factorArgumentNameList); // Shift the firstArgIndex to take this factor's argument names into account. firstArgIndex += factorArgumentNameList.length; }); if (!noDupCheck) { resolveDuplicatedNames(argumentNameList); } return argumentNameList; }; pexprs.Iter.prototype.toArgumentNameList = function(firstArgIndex, noDupCheck) { const argumentNameList = this.expr .toArgumentNameList(firstArgIndex, noDupCheck) .map(exprArgumentString => exprArgumentString[exprArgumentString.length - 1] === 's' ? exprArgumentString + 'es' : exprArgumentString + 's', ); if (!noDupCheck) { resolveDuplicatedNames(argumentNameList); } return argumentNameList; }; pexprs.Opt.prototype.toArgumentNameList = function(firstArgIndex, noDupCheck) { return this.expr.toArgumentNameList(firstArgIndex, noDupCheck).map(argName => { return 'opt' + argName[0].toUpperCase() + argName.slice(1); }); }; pexprs.Not.prototype.toArgumentNameList = function(firstArgIndex, noDupCheck) { return []; }; pexprs.Lookahead.prototype.toArgumentNameList = pexprs.Lex.prototype.toArgumentNameList = function(firstArgIndex, noDupCheck) { return this.expr.toArgumentNameList(firstArgIndex, noDupCheck); }; pexprs.Apply.prototype.toArgumentNameList = function(firstArgIndex, noDupCheck) { return [this.ruleName]; }; pexprs.UnicodeChar.prototype.toArgumentNameList = function(firstArgIndex, noDupCheck) { return ['$' + firstArgIndex]; }; pexprs.Param.prototype.toArgumentNameList = function(firstArgIndex, noDupCheck) { return ['param' + this.index]; }; // "Value pexprs" (Value, Str, Arr, Obj) are going away soon, so we don't worry about them here.