UNPKG

ohm-js

Version:

An object-oriented language for parsing and pattern matching

680 lines (591 loc) 24.4 kB
import {InputStream} from './InputStream.js'; import {IterationNode} from './nodes.js'; import {MatchResult} from './MatchResult.js'; import * as common from './common.js'; import * as errors from './errors.js'; import * as util from './util.js'; // -------------------------------------------------------------------- // Private stuff // -------------------------------------------------------------------- const globalActionStack = []; const hasOwnProperty = (x, prop) => Object.prototype.hasOwnProperty.call(x, prop); // ----------------- Wrappers ----------------- // Wrappers decorate CST nodes with all of the functionality (i.e., operations and attributes) // provided by a Semantics (see below). `Wrapper` is the abstract superclass of all wrappers. A // `Wrapper` must have `_node` and `_semantics` instance variables, which refer to the CST node and // Semantics (resp.) for which it was created, and a `_childWrappers` instance variable which is // used to cache the wrapper instances that are created for its child nodes. Setting these instance // variables is the responsibility of the constructor of each Semantics-specific subclass of // `Wrapper`. class Wrapper { constructor(node, sourceInterval, baseInterval) { this._node = node; this.source = sourceInterval; // The interval that the childOffsets of `node` are relative to. It should be the source // of the closest Nonterminal node. this._baseInterval = baseInterval; if (node.isNonterminal()) { common.assert(sourceInterval === baseInterval); } this._childWrappers = []; } _forgetMemoizedResultFor(attributeName) { // Remove the memoized attribute from the cstNode and all its children. delete this._node[this._semantics.attributeKeys[attributeName]]; this.children.forEach(child => { child._forgetMemoizedResultFor(attributeName); }); } // Returns the wrapper of the specified child node. Child wrappers are created lazily and // cached in the parent wrapper's `_childWrappers` instance variable. child(idx) { if (!(0 <= idx && idx < this._node.numChildren())) { // TODO: Consider throwing an exception here. return undefined; } let childWrapper = this._childWrappers[idx]; if (!childWrapper) { const childNode = this._node.childAt(idx); const offset = this._node.childOffsets[idx]; const source = this._baseInterval.subInterval(offset, childNode.matchLength); const base = childNode.isNonterminal() ? source : this._baseInterval; childWrapper = this._childWrappers[idx] = this._semantics.wrap(childNode, source, base); } return childWrapper; } // Returns an array containing the wrappers of all of the children of the node associated // with this wrapper. _children() { // Force the creation of all child wrappers for (let idx = 0; idx < this._node.numChildren(); idx++) { this.child(idx); } return this._childWrappers; } // Returns `true` if the CST node associated with this wrapper corresponds to an iteration // expression, i.e., a Kleene-*, Kleene-+, or an optional. Returns `false` otherwise. isIteration() { return this._node.isIteration(); } // Returns `true` if the CST node associated with this wrapper is a terminal node, `false` // otherwise. isTerminal() { return this._node.isTerminal(); } // Returns `true` if the CST node associated with this wrapper is a nonterminal node, `false` // otherwise. isNonterminal() { return this._node.isNonterminal(); } // Returns `true` if the CST node associated with this wrapper is a nonterminal node // corresponding to a syntactic rule, `false` otherwise. isSyntactic() { return this.isNonterminal() && this._node.isSyntactic(); } // Returns `true` if the CST node associated with this wrapper is a nonterminal node // corresponding to a lexical rule, `false` otherwise. isLexical() { return this.isNonterminal() && this._node.isLexical(); } // Returns `true` if the CST node associated with this wrapper is an iterator node // having either one or no child (? operator), `false` otherwise. // Otherwise, throws an exception. isOptional() { return this._node.isOptional(); } // Create a new _iter wrapper in the same semantics as this wrapper. iteration(optChildWrappers) { const childWrappers = optChildWrappers || []; const childNodes = childWrappers.map(c => c._node); const iter = new IterationNode(childNodes, [], -1, false); const wrapper = this._semantics.wrap(iter, null, null); wrapper._childWrappers = childWrappers; return wrapper; } // Returns an array containing the children of this CST node. get children() { return this._children(); } // Returns the name of grammar rule that created this CST node. get ctorName() { return this._node.ctorName; } // Returns the number of children of this CST node. get numChildren() { return this._node.numChildren(); } // Returns the contents of the input stream consumed by this CST node. get sourceString() { return this.source.contents; } } // ----------------- Semantics ----------------- // A Semantics is a container for a family of Operations and Attributes for a given grammar. // Semantics enable modularity (different clients of a grammar can create their set of operations // and attributes in isolation) and extensibility even when operations and attributes are mutually- // recursive. This constructor should not be called directly except from // `Semantics.createSemantics`. The normal ways to create a Semantics, given a grammar 'g', are // `g.createSemantics()` and `g.extendSemantics(parentSemantics)`. export class Semantics { constructor(grammar, superSemantics) { const self = this; this.grammar = grammar; this.checkedActionDicts = false; // Constructor for wrapper instances, which are passed as the arguments to the semantic actions // of an operation or attribute. Operations and attributes require double dispatch: the semantic // action is chosen based on both the node's type and the semantics. Wrappers ensure that // the `execute` method is called with the correct (most specific) semantics object as an // argument. this.Wrapper = class extends (superSemantics ? superSemantics.Wrapper : Wrapper) { constructor(node, sourceInterval, baseInterval) { super(node, sourceInterval, baseInterval); self.checkActionDictsIfHaventAlready(); this._semantics = self; } toString() { return '[semantics wrapper for ' + self.grammar.name + ']'; } }; this.super = superSemantics; if (superSemantics) { if (!(grammar.equals(this.super.grammar) || grammar._inheritsFrom(this.super.grammar))) { throw new Error( "Cannot extend a semantics for grammar '" + this.super.grammar.name + "' for use with grammar '" + grammar.name + "' (not a sub-grammar)", ); } this.operations = Object.create(this.super.operations); this.attributes = Object.create(this.super.attributes); this.attributeKeys = Object.create(null); // Assign unique symbols for each of the attributes inherited from the super-semantics so that // they are memoized independently. // eslint-disable-next-line guard-for-in for (const attributeName in this.attributes) { Object.defineProperty(this.attributeKeys, attributeName, { value: util.uniqueId(attributeName), }); } } else { this.operations = Object.create(null); this.attributes = Object.create(null); this.attributeKeys = Object.create(null); } } toString() { return '[semantics for ' + this.grammar.name + ']'; } checkActionDictsIfHaventAlready() { if (!this.checkedActionDicts) { this.checkActionDicts(); this.checkedActionDicts = true; } } // Checks that the action dictionaries for all operations and attributes in this semantics, // including the ones that were inherited from the super-semantics, agree with the grammar. // Throws an exception if one or more of them doesn't. checkActionDicts() { let name; // eslint-disable-next-line guard-for-in for (name in this.operations) { this.operations[name].checkActionDict(this.grammar); } // eslint-disable-next-line guard-for-in for (name in this.attributes) { this.attributes[name].checkActionDict(this.grammar); } } toRecipe(semanticsOnly) { function hasSuperSemantics(s) { return s.super !== Semantics.BuiltInSemantics._getSemantics(); } let str = '(function(g) {\n'; if (hasSuperSemantics(this)) { str += ' var semantics = ' + this.super.toRecipe(true) + '(g'; const superSemanticsGrammar = this.super.grammar; let relatedGrammar = this.grammar; while (relatedGrammar !== superSemanticsGrammar) { str += '.superGrammar'; relatedGrammar = relatedGrammar.superGrammar; } str += ');\n'; str += ' return g.extendSemantics(semantics)'; } else { str += ' return g.createSemantics()'; } ['Operation', 'Attribute'].forEach(type => { const semanticOperations = this[type.toLowerCase() + 's']; Object.keys(semanticOperations).forEach(name => { const {actionDict, formals, builtInDefault} = semanticOperations[name]; let signature = name; if (formals.length > 0) { signature += '(' + formals.join(', ') + ')'; } let method; if (hasSuperSemantics(this) && this.super[type.toLowerCase() + 's'][name]) { method = 'extend' + type; } else { method = 'add' + type; } str += '\n .' + method + '(' + JSON.stringify(signature) + ', {'; const srcArray = []; Object.keys(actionDict).forEach(actionName => { if (actionDict[actionName] !== builtInDefault) { let source = actionDict[actionName].toString().trim(); // Convert method shorthand to plain old function syntax. // https://github.com/ohmjs/ohm/issues/263 source = source.replace(/^.*\(/, 'function('); srcArray.push('\n ' + JSON.stringify(actionName) + ': ' + source); } }); str += srcArray.join(',') + '\n })'; }); }); str += ';\n })'; if (!semanticsOnly) { str = '(function() {\n' + ' var grammar = this.fromRecipe(' + this.grammar.toRecipe() + ');\n' + ' var semantics = ' + str + '(grammar);\n' + ' return semantics;\n' + '});\n'; } return str; } addOperationOrAttribute(type, signature, actionDict) { const typePlural = type + 's'; const parsedNameAndFormalArgs = parseSignature(signature, type); const {name} = parsedNameAndFormalArgs; const {formals} = parsedNameAndFormalArgs; // TODO: check that there are no duplicate formal arguments this.assertNewName(name, type); // Create the action dictionary for this operation / attribute that contains a `_default` action // which defines the default behavior of iteration, terminal, and non-terminal nodes... const builtInDefault = newDefaultAction(type, name, doIt); const realActionDict = {_default: builtInDefault}; // ... and add in the actions supplied by the programmer, which may override some or all of the // default ones. Object.keys(actionDict).forEach(name => { realActionDict[name] = actionDict[name]; }); const entry = type === 'operation' ? new Operation(name, formals, realActionDict, builtInDefault) : new Attribute(name, realActionDict, builtInDefault); // The following check is not strictly necessary (it will happen later anyway) but it's better // to catch errors early. entry.checkActionDict(this.grammar); this[typePlural][name] = entry; function doIt(...args) { // Dispatch to most specific version of this operation / attribute -- it may have been // overridden by a sub-semantics. const thisThing = this._semantics[typePlural][name]; // Check that the caller passed the correct number of arguments. if (arguments.length !== thisThing.formals.length) { throw new Error( 'Invalid number of arguments passed to ' + name + ' ' + type + ' (expected ' + thisThing.formals.length + ', got ' + arguments.length + ')', ); } // Create an "arguments object" from the arguments that were passed to this // operation / attribute. const argsObj = Object.create(null); for (const [idx, val] of Object.entries(args)) { const formal = thisThing.formals[idx]; argsObj[formal] = val; } const oldArgs = this.args; this.args = argsObj; const ans = thisThing.execute(this._semantics, this); this.args = oldArgs; return ans; } if (type === 'operation') { this.Wrapper.prototype[name] = doIt; this.Wrapper.prototype[name].toString = function() { return '[' + name + ' operation]'; }; } else { Object.defineProperty(this.Wrapper.prototype, name, { get: doIt, configurable: true, // So the property can be deleted. }); Object.defineProperty(this.attributeKeys, name, { value: util.uniqueId(name), }); } } extendOperationOrAttribute(type, name, actionDict) { const typePlural = type + 's'; // Make sure that `name` really is just a name, i.e., that it doesn't also contain formals. parseSignature(name, 'attribute'); if (!(this.super && name in this.super[typePlural])) { throw new Error( 'Cannot extend ' + type + " '" + name + "': did not inherit an " + type + ' with that name', ); } if (hasOwnProperty(this[typePlural], name)) { throw new Error('Cannot extend ' + type + " '" + name + "' again"); } // Create a new operation / attribute whose actionDict delegates to the super operation / // attribute's actionDict, and which has all the keys from `inheritedActionDict`. const inheritedFormals = this[typePlural][name].formals; const inheritedActionDict = this[typePlural][name].actionDict; const newActionDict = Object.create(inheritedActionDict); Object.keys(actionDict).forEach(name => { newActionDict[name] = actionDict[name]; }); this[typePlural][name] = type === 'operation' ? new Operation(name, inheritedFormals, newActionDict) : new Attribute(name, newActionDict); // The following check is not strictly necessary (it will happen later anyway) but it's better // to catch errors early. this[typePlural][name].checkActionDict(this.grammar); } assertNewName(name, type) { if (hasOwnProperty(Wrapper.prototype, name)) { throw new Error('Cannot add ' + type + " '" + name + "': that's a reserved name"); } if (name in this.operations) { throw new Error( 'Cannot add ' + type + " '" + name + "': an operation with that name already exists", ); } if (name in this.attributes) { throw new Error( 'Cannot add ' + type + " '" + name + "': an attribute with that name already exists", ); } } // Returns a wrapper for the given CST `node` in this semantics. // If `node` is already a wrapper, returns `node` itself. // TODO: why is this needed? wrap(node, source, optBaseInterval) { const baseInterval = optBaseInterval || source; return node instanceof this.Wrapper ? node : new this.Wrapper(node, source, baseInterval); } } function parseSignature(signature, type) { if (!Semantics.prototypeGrammar) { // The Operations and Attributes grammar won't be available while Ohm is loading, // but we can get away the following simplification b/c none of the operations // that are used while loading take arguments. common.assert(signature.indexOf('(') === -1); return { name: signature, formals: [], }; } const r = Semantics.prototypeGrammar.match( signature, type === 'operation' ? 'OperationSignature' : 'AttributeSignature', ); if (r.failed()) { throw new Error(r.message); } return Semantics.prototypeGrammarSemantics(r).parse(); } function newDefaultAction(type, name, doIt) { return function(...children) { const thisThing = this._semantics.operations[name] || this._semantics.attributes[name]; const args = thisThing.formals.map(formal => this.args[formal]); if (!this.isIteration() && children.length === 1) { // This CST node corresponds to a non-terminal in the grammar (e.g., AddExpr). The fact that // we got here means that this action dictionary doesn't have an action for this particular // non-terminal or a generic `_nonterminal` action. // As a convenience, if this node only has one child, we just return the result of applying // this operation / attribute to the child node. return doIt.apply(children[0], args); } else { // Otherwise, we throw an exception to let the programmer know that we don't know what // to do with this node. throw errors.missingSemanticAction(this.ctorName, name, type, globalActionStack); } }; } // Creates a new Semantics instance for `grammar`, inheriting operations and attributes from // `optSuperSemantics`, if it is specified. Returns a function that acts as a proxy for the new // Semantics instance. When that function is invoked with a CST node as an argument, it returns // a wrapper for that node which gives access to the operations and attributes provided by this // semantics. Semantics.createSemantics = function(grammar, optSuperSemantics) { const s = new Semantics( grammar, optSuperSemantics !== undefined ? optSuperSemantics : Semantics.BuiltInSemantics._getSemantics(), ); // To enable clients to invoke a semantics like a function, return a function that acts as a proxy // for `s`, which is the real `Semantics` instance. const proxy = function ASemantics(matchResult) { if (!(matchResult instanceof MatchResult)) { throw new TypeError( 'Semantics expected a MatchResult, but got ' + common.unexpectedObjToString(matchResult), ); } if (matchResult.failed()) { throw new TypeError('cannot apply Semantics to ' + matchResult.toString()); } const cst = matchResult._cst; if (cst.grammar !== grammar) { throw new Error( "Cannot use a MatchResult from grammar '" + cst.grammar.name + "' with a semantics for '" + grammar.name + "'", ); } const inputStream = new InputStream(matchResult.input); return s.wrap(cst, inputStream.interval(matchResult._cstOffset, matchResult.input.length)); }; // Forward public methods from the proxy to the semantics instance. proxy.addOperation = function(signature, actionDict) { s.addOperationOrAttribute('operation', signature, actionDict); return proxy; }; proxy.extendOperation = function(name, actionDict) { s.extendOperationOrAttribute('operation', name, actionDict); return proxy; }; proxy.addAttribute = function(name, actionDict) { s.addOperationOrAttribute('attribute', name, actionDict); return proxy; }; proxy.extendAttribute = function(name, actionDict) { s.extendOperationOrAttribute('attribute', name, actionDict); return proxy; }; proxy._getActionDict = function(operationOrAttributeName) { const action = s.operations[operationOrAttributeName] || s.attributes[operationOrAttributeName]; if (!action) { throw new Error( '"' + operationOrAttributeName + '" is not a valid operation or attribute ' + 'name in this semantics for "' + grammar.name + '"', ); } return action.actionDict; }; proxy._remove = function(operationOrAttributeName) { let semantic; if (operationOrAttributeName in s.operations) { semantic = s.operations[operationOrAttributeName]; delete s.operations[operationOrAttributeName]; } else if (operationOrAttributeName in s.attributes) { semantic = s.attributes[operationOrAttributeName]; delete s.attributes[operationOrAttributeName]; } delete s.Wrapper.prototype[operationOrAttributeName]; return semantic; }; proxy.getOperationNames = function() { return Object.keys(s.operations); }; proxy.getAttributeNames = function() { return Object.keys(s.attributes); }; proxy.getGrammar = function() { return s.grammar; }; proxy.toRecipe = function(semanticsOnly) { return s.toRecipe(semanticsOnly); }; // Make the proxy's toString() work. proxy.toString = s.toString.bind(s); // Returns the semantics for the proxy. proxy._getSemantics = function() { return s; }; return proxy; }; // ----------------- Operation ----------------- // An Operation represents a function to be applied to a concrete syntax tree (CST) -- it's very // similar to a Visitor (http://en.wikipedia.org/wiki/Visitor_pattern). An operation is executed by // recursively walking the CST, and at each node, invoking the matching semantic action from // `actionDict`. See `Operation.prototype.execute` for details of how a CST node's matching semantic // action is found. class Operation { constructor(name, formals, actionDict, builtInDefault) { this.name = name; this.formals = formals; this.actionDict = actionDict; this.builtInDefault = builtInDefault; } checkActionDict(grammar) { grammar._checkTopDownActionDict(this.typeName, this.name, this.actionDict); } // Execute this operation on the CST node associated with `nodeWrapper` in the context of the // given Semantics instance. execute(semantics, nodeWrapper) { try { // Look for a semantic action whose name matches the node's constructor name, which is either // the name of a rule in the grammar, or '_terminal' (for a terminal node), or '_iter' (for an // iteration node). const {ctorName} = nodeWrapper._node; let actionFn = this.actionDict[ctorName]; if (actionFn) { globalActionStack.push([this, ctorName]); return actionFn.apply(nodeWrapper, nodeWrapper._children()); } // The action dictionary does not contain a semantic action for this specific type of node. // If this is a nonterminal node and the programmer has provided a `_nonterminal` semantic // action, we invoke it: if (nodeWrapper.isNonterminal()) { actionFn = this.actionDict._nonterminal; if (actionFn) { globalActionStack.push([this, '_nonterminal', ctorName]); return actionFn.apply(nodeWrapper, nodeWrapper._children()); } } // Otherwise, we invoke the '_default' semantic action. globalActionStack.push([this, 'default action', ctorName]); return this.actionDict._default.apply(nodeWrapper, nodeWrapper._children()); } finally { globalActionStack.pop(); } } } Operation.prototype.typeName = 'operation'; // ----------------- Attribute ----------------- // Attributes are Operations whose results are memoized. This means that, for any given semantics, // the semantic action for a CST node will be invoked no more than once. class Attribute extends Operation { constructor(name, actionDict, builtInDefault) { super(name, [], actionDict, builtInDefault); } execute(semantics, nodeWrapper) { const node = nodeWrapper._node; const key = semantics.attributeKeys[this.name]; if (!hasOwnProperty(node, key)) { // The following is a super-send -- isn't JS beautiful? :/ node[key] = Operation.prototype.execute.call(this, semantics, nodeWrapper); } return node[key]; } } Attribute.prototype.typeName = 'attribute';