ohm-js
Version:
181 lines (151 loc) • 6.49 kB
JavaScript
import {abstract, copyWithoutDuplicates} from './common.js';
import * as pexprs from './pexprs-main.js';
// --------------------------------------------------------------------
// Private stuff
// --------------------------------------------------------------------
function isRestrictedJSIdentifier(str) {
return /^[a-zA-Z_$][0-9a-zA-Z_$]*$/.test(str);
}
function resolveDuplicatedNames(argumentNameList) {
// `count` is used to record the number of times each argument name occurs in the list,
// this is useful for checking duplicated argument name. It maps argument names to ints.
const count = Object.create(null);
argumentNameList.forEach(argName => {
count[argName] = (count[argName] || 0) + 1;
});
// Append subscripts ('_1', '_2', ...) to duplicate argument names.
Object.keys(count).forEach(dupArgName => {
if (count[dupArgName] <= 1) {
return;
}
// This name shows up more than once, so add subscripts.
let subscript = 1;
argumentNameList.forEach((argName, idx) => {
if (argName === dupArgName) {
argumentNameList[idx] = argName + '_' + subscript++;
}
});
});
}
// --------------------------------------------------------------------
// Operations
// --------------------------------------------------------------------
/*
Returns a list of strings that will be used as the default argument names for its receiver
(a pexpr) in a semantic action. This is used exclusively by the Semantics Editor.
`firstArgIndex` is the 1-based index of the first argument name that will be generated for this
pexpr. It enables us to name arguments positionally, e.g., if the second argument is a
non-alphanumeric terminal like "+", it will be named '$2'.
`noDupCheck` is true if the caller of `toArgumentNameList` is not a top level caller. It enables
us to avoid nested duplication subscripts appending, e.g., '_1_1', '_1_2', by only checking
duplicates at the top level.
Here is a more elaborate example that illustrates how this method works:
`(a "+" b).toArgumentNameList(1)` evaluates to `['a', '$2', 'b']` with the following recursive
calls:
(a).toArgumentNameList(1) -> ['a'],
("+").toArgumentNameList(2) -> ['$2'],
(b).toArgumentNameList(3) -> ['b']
Notes:
* This method must only be called on well-formed expressions, e.g., the receiver must
not have any Alt sub-expressions with inconsistent arities.
* e.getArity() === e.toArgumentNameList(1).length
*/
// function(firstArgIndex, noDupCheck) { ... }
pexprs.PExpr.prototype.toArgumentNameList = abstract('toArgumentNameList');
pexprs.any.toArgumentNameList = function(firstArgIndex, noDupCheck) {
return ['any'];
};
pexprs.end.toArgumentNameList = function(firstArgIndex, noDupCheck) {
return ['end'];
};
pexprs.Terminal.prototype.toArgumentNameList = function(firstArgIndex, noDupCheck) {
if (typeof this.obj === 'string' && /^[_a-zA-Z0-9]+$/.test(this.obj)) {
// If this terminal is a valid suffix for a JS identifier, just prepend it with '_'
return ['_' + this.obj];
} else {
// Otherwise, name it positionally.
return ['$' + firstArgIndex];
}
};
pexprs.Range.prototype.toArgumentNameList = function(firstArgIndex, noDupCheck) {
let argName = this.from + '_to_' + this.to;
// If the `argName` is not valid then try to prepend a `_`.
if (!isRestrictedJSIdentifier(argName)) {
argName = '_' + argName;
}
// If the `argName` still not valid after prepending a `_`, then name it positionally.
if (!isRestrictedJSIdentifier(argName)) {
argName = '$' + firstArgIndex;
}
return [argName];
};
pexprs.Alt.prototype.toArgumentNameList = function(firstArgIndex, noDupCheck) {
// `termArgNameLists` is an array of arrays where each row is the
// argument name list that corresponds to a term in this alternation.
const termArgNameLists = this.terms.map(term =>
term.toArgumentNameList(firstArgIndex, true),
);
const argumentNameList = [];
const numArgs = termArgNameLists[0].length;
for (let colIdx = 0; colIdx < numArgs; colIdx++) {
const col = [];
for (let rowIdx = 0; rowIdx < this.terms.length; rowIdx++) {
col.push(termArgNameLists[rowIdx][colIdx]);
}
const uniqueNames = copyWithoutDuplicates(col);
argumentNameList.push(uniqueNames.join('_or_'));
}
if (!noDupCheck) {
resolveDuplicatedNames(argumentNameList);
}
return argumentNameList;
};
pexprs.Seq.prototype.toArgumentNameList = function(firstArgIndex, noDupCheck) {
// Generate the argument name list, without worrying about duplicates.
let argumentNameList = [];
this.factors.forEach(factor => {
const factorArgumentNameList = factor.toArgumentNameList(firstArgIndex, true);
argumentNameList = argumentNameList.concat(factorArgumentNameList);
// Shift the firstArgIndex to take this factor's argument names into account.
firstArgIndex += factorArgumentNameList.length;
});
if (!noDupCheck) {
resolveDuplicatedNames(argumentNameList);
}
return argumentNameList;
};
pexprs.Iter.prototype.toArgumentNameList = function(firstArgIndex, noDupCheck) {
const argumentNameList = this.expr
.toArgumentNameList(firstArgIndex, noDupCheck)
.map(exprArgumentString =>
exprArgumentString[exprArgumentString.length - 1] === 's' ?
exprArgumentString + 'es' :
exprArgumentString + 's',
);
if (!noDupCheck) {
resolveDuplicatedNames(argumentNameList);
}
return argumentNameList;
};
pexprs.Opt.prototype.toArgumentNameList = function(firstArgIndex, noDupCheck) {
return this.expr.toArgumentNameList(firstArgIndex, noDupCheck).map(argName => {
return 'opt' + argName[0].toUpperCase() + argName.slice(1);
});
};
pexprs.Not.prototype.toArgumentNameList = function(firstArgIndex, noDupCheck) {
return [];
};
pexprs.Lookahead.prototype.toArgumentNameList = pexprs.Lex.prototype.toArgumentNameList =
function(firstArgIndex, noDupCheck) {
return this.expr.toArgumentNameList(firstArgIndex, noDupCheck);
};
pexprs.Apply.prototype.toArgumentNameList = function(firstArgIndex, noDupCheck) {
return [this.ruleName];
};
pexprs.UnicodeChar.prototype.toArgumentNameList = function(firstArgIndex, noDupCheck) {
return ['$' + firstArgIndex];
};
pexprs.Param.prototype.toArgumentNameList = function(firstArgIndex, noDupCheck) {
return ['param' + this.index];
};
// "Value pexprs" (Value, Str, Arr, Obj) are going away soon, so we don't worry about them here.