UNPKG

ohm-js

Version:

An object-oriented language for parsing and pattern matching

1,690 lines (1,436 loc) 214 kB
(function (global, factory) { typeof exports === 'object' && typeof module !== 'undefined' ? factory(exports) : typeof define === 'function' && define.amd ? define(['exports'], factory) : (global = typeof globalThis !== 'undefined' ? globalThis : global || self, factory(global.ohmExtras = {})); })(this, (function (exports) { 'use strict'; // -------------------------------------------------------------------- // -------------------------------------------------------------------- // Exports // -------------------------------------------------------------------- function abstract(optMethodName) { const methodName = optMethodName || ''; return function() { throw new Error( 'this method ' + methodName + ' is abstract! ' + '(it has no implementation in class ' + this.constructor.name + ')', ); }; } function assert(cond, message) { if (!cond) { throw new Error(message || 'Assertion failed'); } } // Define a lazily-computed, non-enumerable property named `propName` // on the object `obj`. `getterFn` will be called to compute the value the // first time the property is accessed. function defineLazyProperty(obj, propName, getterFn) { let memo; Object.defineProperty(obj, propName, { get() { if (!memo) { memo = getterFn.call(this); } return memo; }, }); } function clone(obj) { if (obj) { return Object.assign({}, obj); } return obj; } function repeatFn(fn, n) { const arr = []; while (n-- > 0) { arr.push(fn()); } return arr; } function repeatStr(str, n) { return new Array(n + 1).join(str); } function repeat(x, n) { return repeatFn(() => x, n); } function getDuplicates(array) { const duplicates = []; for (let idx = 0; idx < array.length; idx++) { const x = array[idx]; if (array.lastIndexOf(x) !== idx && duplicates.indexOf(x) < 0) { duplicates.push(x); } } return duplicates; } function copyWithoutDuplicates(array) { const noDuplicates = []; array.forEach(entry => { if (noDuplicates.indexOf(entry) < 0) { noDuplicates.push(entry); } }); return noDuplicates; } function isSyntactic(ruleName) { const firstChar = ruleName[0]; return firstChar === firstChar.toUpperCase(); } function isLexical(ruleName) { return !isSyntactic(ruleName); } function padLeft(str, len, optChar) { const ch = optChar || ' '; if (str.length < len) { return repeatStr(ch, len - str.length) + str; } return str; } // StringBuffer function StringBuffer() { this.strings = []; } StringBuffer.prototype.append = function(str) { this.strings.push(str); }; StringBuffer.prototype.contents = function() { return this.strings.join(''); }; const escapeUnicode = str => String.fromCodePoint(parseInt(str, 16)); function unescapeCodePoint(s) { if (s.charAt(0) === '\\') { switch (s.charAt(1)) { case 'b': return '\b'; case 'f': return '\f'; case 'n': return '\n'; case 'r': return '\r'; case 't': return '\t'; case 'v': return '\v'; case 'x': return escapeUnicode(s.slice(2, 4)); case 'u': return s.charAt(2) === '{' ? escapeUnicode(s.slice(3, -1)) : escapeUnicode(s.slice(2, 6)); default: return s.charAt(1); } } else { return s; } } // Helper for producing a description of an unknown object in a safe way. // Especially useful for error messages where an unexpected type of object was encountered. function unexpectedObjToString(obj) { if (obj == null) { return String(obj); } const baseToString = Object.prototype.toString.call(obj); try { let typeName; if (obj.constructor && obj.constructor.name) { typeName = obj.constructor.name; } else if (baseToString.indexOf('[object ') === 0) { typeName = baseToString.slice(8, -1); // Extract e.g. "Array" from "[object Array]". } else { typeName = typeof obj; } return typeName + ': ' + JSON.stringify(String(obj)); } catch (e) { return baseToString; } } function checkNotNull(obj, message = 'unexpected null value') { if (obj == null) { throw new Error(message); } return obj; } var common = /*#__PURE__*/Object.freeze({ __proto__: null, abstract: abstract, assert: assert, defineLazyProperty: defineLazyProperty, clone: clone, repeatFn: repeatFn, repeatStr: repeatStr, repeat: repeat, getDuplicates: getDuplicates, copyWithoutDuplicates: copyWithoutDuplicates, isSyntactic: isSyntactic, isLexical: isLexical, padLeft: padLeft, StringBuffer: StringBuffer, unescapeCodePoint: unescapeCodePoint, unexpectedObjToString: unexpectedObjToString, checkNotNull: checkNotNull }); // -------------------------------------------------------------------- // Private stuff // -------------------------------------------------------------------- // Given an array of numbers `arr`, return an array of the numbers as strings, // right-justified and padded to the same length. function padNumbersToEqualLength(arr) { let maxLen = 0; const strings = arr.map(n => { const str = n.toString(); maxLen = Math.max(maxLen, str.length); return str; }); return strings.map(s => padLeft(s, maxLen)); } // Produce a new string that would be the result of copying the contents // of the string `src` onto `dest` at offset `offest`. function strcpy(dest, src, offset) { const origDestLen = dest.length; const start = dest.slice(0, offset); const end = dest.slice(offset + src.length); return (start + src + end).substr(0, origDestLen); } // Casts the underlying lineAndCol object to a formatted message string, // highlighting `ranges`. function lineAndColumnToMessage(...ranges) { const lineAndCol = this; const {offset} = lineAndCol; const {repeatStr} = common; const sb = new StringBuffer(); sb.append('Line ' + lineAndCol.lineNum + ', col ' + lineAndCol.colNum + ':\n'); // An array of the previous, current, and next line numbers as strings of equal length. const lineNumbers = padNumbersToEqualLength([ lineAndCol.prevLine == null ? 0 : lineAndCol.lineNum - 1, lineAndCol.lineNum, lineAndCol.nextLine == null ? 0 : lineAndCol.lineNum + 1, ]); // Helper for appending formatting input lines to the buffer. const appendLine = (num, content, prefix) => { sb.append(prefix + lineNumbers[num] + ' | ' + content + '\n'); }; // Include the previous line for context if possible. if (lineAndCol.prevLine != null) { appendLine(0, lineAndCol.prevLine, ' '); } // Line that the error occurred on. appendLine(1, lineAndCol.line, '> '); // Build up the line that points to the offset and possible indicates one or more ranges. // Start with a blank line, and indicate each range by overlaying a string of `~` chars. const lineLen = lineAndCol.line.length; let indicationLine = repeatStr(' ', lineLen + 1); for (let i = 0; i < ranges.length; ++i) { let startIdx = ranges[i][0]; let endIdx = ranges[i][1]; assert(startIdx >= 0 && startIdx <= endIdx, 'range start must be >= 0 and <= end'); const lineStartOffset = offset - lineAndCol.colNum + 1; startIdx = Math.max(0, startIdx - lineStartOffset); endIdx = Math.min(endIdx - lineStartOffset, lineLen); indicationLine = strcpy(indicationLine, repeatStr('~', endIdx - startIdx), startIdx); } const gutterWidth = 2 + lineNumbers[1].length + 3; sb.append(repeatStr(' ', gutterWidth)); indicationLine = strcpy(indicationLine, '^', lineAndCol.colNum - 1); sb.append(indicationLine.replace(/ +$/, '') + '\n'); // Include the next line for context if possible. if (lineAndCol.nextLine != null) { appendLine(2, lineAndCol.nextLine, ' '); } return sb.contents(); } // -------------------------------------------------------------------- // Exports // -------------------------------------------------------------------- let builtInRulesCallbacks = []; // Since Grammar.BuiltInRules is bootstrapped, most of Ohm can't directly depend it. // This function allows modules that do depend on the built-in rules to register a callback // that will be called later in the initialization process. function awaitBuiltInRules(cb) { builtInRulesCallbacks.push(cb); } function announceBuiltInRules(grammar) { builtInRulesCallbacks.forEach(cb => { cb(grammar); }); builtInRulesCallbacks = null; } // Return an object with the line and column information for the given // offset in `str`. function getLineAndColumn(str, offset) { let lineNum = 1; let colNum = 1; let currOffset = 0; let lineStartOffset = 0; let nextLine = null; let prevLine = null; let prevLineStartOffset = -1; while (currOffset < offset) { const c = str.charAt(currOffset++); if (c === '\n') { lineNum++; colNum = 1; prevLineStartOffset = lineStartOffset; lineStartOffset = currOffset; } else if (c !== '\r') { colNum++; } } // Find the end of the target line. let lineEndOffset = str.indexOf('\n', lineStartOffset); if (lineEndOffset === -1) { lineEndOffset = str.length; } else { // Get the next line. const nextLineEndOffset = str.indexOf('\n', lineEndOffset + 1); nextLine = nextLineEndOffset === -1 ? str.slice(lineEndOffset) : str.slice(lineEndOffset, nextLineEndOffset); // Strip leading and trailing EOL char(s). nextLine = nextLine.replace(/^\r?\n/, '').replace(/\r$/, ''); } // Get the previous line. if (prevLineStartOffset >= 0) { // Strip trailing EOL char(s). prevLine = str.slice(prevLineStartOffset, lineStartOffset).replace(/\r?\n$/, ''); } // Get the target line, stripping a trailing carriage return if necessary. const line = str.slice(lineStartOffset, lineEndOffset).replace(/\r$/, ''); return { offset, lineNum, colNum, line, prevLine, nextLine, toString: lineAndColumnToMessage, }; } // Return a nicely-formatted string describing the line and column for the // given offset in `str` highlighting `ranges`. function getLineAndColumnMessage(str, offset, ...ranges) { return getLineAndColumn(str, offset).toString(...ranges); } const uniqueId = (() => { let idCounter = 0; return prefix => '' + prefix + idCounter++; })(); // Helpers function getProp(name, thing, fn) { return fn(thing[name]); } function mapProp(name, thing, fn) { return thing[name].map(fn); } // Returns a function that will walk a single property of a node. // `descriptor` is a string indicating the property name, optionally ending // with '[]' (e.g., 'children[]'). function getPropWalkFn(descriptor) { const parts = descriptor.split(/ ?\[\]/); if (parts.length === 2) { return mapProp.bind(null, parts[0]); } return getProp.bind(null, descriptor); } function getProps(walkFns, thing, fn) { return walkFns.map(walkFn => walkFn(thing, fn)); } function getWalkFn(shape) { if (typeof shape === 'string') { return getProps.bind(null, [getPropWalkFn(shape)]); } else if (Array.isArray(shape)) { return getProps.bind(null, shape.map(getPropWalkFn)); } else { assert(typeof shape === 'function', 'Expected a string, Array, or function'); assert(shape.length === 2, 'Expected a function of arity 2, got ' + shape.length); return shape; } } function isRestrictedIdentifier(str) { return /^[a-zA-Z_][0-9a-zA-Z_]*$/.test(str); } function trim(s) { return s.trim(); } function parseSignature$1(sig) { const parts = sig.split(/[()]/).map(trim); if (parts.length === 3 && parts[2] === '') { const name = parts[0]; let params = []; if (parts[1].length > 0) { params = parts[1].split(',').map(trim); } if (isRestrictedIdentifier(name) && params.every(isRestrictedIdentifier)) { return {name, formals: params}; } } throw new Error('Invalid operation signature: ' + sig); } /* A VisitorFamily contains a set of recursive operations that are defined over some kind of tree structure. The `config` parameter specifies how to walk the tree: - 'getTag' is function which, given a node in the tree, returns the node's 'tag' (type) - 'shapes' an object that maps from a tag to a value that describes how to recursively evaluate the operation for nodes of that type. The value can be: * a string indicating the property name that holds that node's only child * an Array of property names (or an empty array indicating a leaf type), or * a function taking two arguments (node, fn), and returning an Array which is the result of apply `fn` to each of the node's children. */ class VisitorFamily { constructor(config) { this._shapes = config.shapes; this._getTag = config.getTag; this.Adapter = function(thing, family) { this._adaptee = thing; this._family = family; }; this.Adapter.prototype.valueOf = function() { throw new Error('heeey!'); }; this.operations = {}; this._arities = Object.create(null); this._getChildren = Object.create(null); Object.keys(this._shapes).forEach(k => { const shape = this._shapes[k]; this._getChildren[k] = getWalkFn(shape); // A function means the arity isn't fixed, so don't put an entry in the arity map. if (typeof shape !== 'function') { this._arities[k] = Array.isArray(shape) ? shape.length : 1; } }); this._wrap = thing => new this.Adapter(thing, this); } wrap(thing) { return this._wrap(thing); } _checkActionDict(dict) { Object.keys(dict).forEach(k => { assert(k in this._getChildren, "Unrecognized action name '" + k + "'"); const action = dict[k]; assert( typeof action === 'function', "Key '" + k + "': expected function, got " + action, ); if (k in this._arities) { const expected = this._arities[k]; const actual = dict[k].length; assert( actual === expected, "Action '" + k + "' has the wrong arity: expected " + expected + ', got ' + actual, ); } }); } addOperation(signature, actions) { const sig = parseSignature$1(signature); const {name} = sig; this._checkActionDict(actions); this.operations[name] = { name, formals: sig.formals, actions, }; const family = this; this.Adapter.prototype[name] = function(...args) { const tag = family._getTag(this._adaptee); assert(tag in family._getChildren, "getTag returned unrecognized tag '" + tag + "'"); assert(tag in actions, "No action for '" + tag + "' in operation '" + name + "'"); // Create an "arguments object" from the arguments that were passed to this // operation / attribute. const argsObj = Object.create(null); for (const [i, val] of Object.entries(args)) { argsObj[sig.formals[i]] = val; } const oldArgs = this.args; this.args = argsObj; const ans = actions[tag].apply( this, family._getChildren[tag](this._adaptee, family._wrap), ); this.args = oldArgs; return ans; }; return this; } } function handleListOf(child) { return child.toAST(this.args.mapping); } function handleEmptyListOf() { return []; } function handleNonemptyListOf(first, sep, rest) { return [first.toAST(this.args.mapping)].concat(rest.toAST(this.args.mapping)); } const defaultMapping = { listOf: handleListOf, ListOf: handleListOf, emptyListOf: handleEmptyListOf, EmptyListOf: handleEmptyListOf, nonemptyListOf: handleNonemptyListOf, NonemptyListOf: handleNonemptyListOf, }; const defaultOperation = { _terminal() { return this.sourceString; }, _nonterminal(...children) { const {ctorName} = this._node; const {mapping} = this.args; // without customization if (!Object.prototype.hasOwnProperty.call(mapping, ctorName)) { // lexical rule if (this.isLexical()) { return this.sourceString; } // singular node (e.g. only surrounded by literals or lookaheads) const realChildren = children.filter(child => !child.isTerminal()); if (realChildren.length === 1) { return realChildren[0].toAST(mapping); } // rest: terms with multiple children } // direct forward if (typeof mapping[ctorName] === 'number') { return children[mapping[ctorName]].toAST(mapping); } // named/mapped children or unnamed children ('0', '1', '2', ...) const propMap = mapping[ctorName] || children; const node = { type: ctorName, }; // eslint-disable-next-line guard-for-in for (const prop in propMap) { const mappedProp = mapping[ctorName] && mapping[ctorName][prop]; if (typeof mappedProp === 'number') { // direct forward node[prop] = children[mappedProp].toAST(mapping); } else if ( typeof mappedProp === 'string' || typeof mappedProp === 'boolean' || mappedProp === null ) { // primitive value node[prop] = mappedProp; } else if (typeof mappedProp === 'object' && mappedProp instanceof Number) { // primitive number (must be unboxed) node[prop] = Number(mappedProp); } else if (typeof mappedProp === 'function') { // computed value node[prop] = mappedProp.call(this, children); } else if (mappedProp === undefined) { if (children[prop] && !children[prop].isTerminal()) { node[prop] = children[prop].toAST(mapping); } else { // delete predefined 'type' properties, like 'type', if explicitely removed delete node[prop]; } } } return node; }, _iter(...children) { if (this._node.isOptional()) { if (this.numChildren === 0) { return null; } else { return children[0].toAST(this.args.mapping); } } return children.map(c => c.toAST(this.args.mapping)); }, }; // Returns a plain JavaScript object that includes an abstract syntax tree (AST) // for the given match result `res` containg a concrete syntax tree (CST) and grammar. // The optional `mapping` parameter can be used to customize how the nodes of the CST // are mapped to the AST (see /doc/extras.md#toastmatchresult-mapping). function toAST(res, mapping) { if (typeof res.failed !== 'function' || res.failed()) { throw new Error('toAST() expects a succesful MatchResult as first parameter'); } mapping = Object.assign({}, defaultMapping, mapping); const operation = Object.assign({}, defaultOperation); for (const termName in mapping) { if (typeof mapping[termName] === 'function') { operation[termName] = mapping[termName]; delete mapping[termName]; } } const g = res._cst.grammar; const s = g.createSemantics().addOperation('toAST(mapping)', operation); return s(res).toAST(mapping); } // Returns a semantics containg the toAST(mapping) operation for the given grammar g. function semanticsForToAST(g) { if (typeof g.createSemantics !== 'function') { throw new Error('semanticsToAST() expects a Grammar as parameter'); } return g.createSemantics().addOperation('toAST(mapping)', defaultOperation); } // These are just categories that are used in ES5/ES2015. // The full list of Unicode categories is here: http://www.fileformat.info/info/unicode/category/index.htm. const UnicodeCategories = { // Letters Lu: /\p{Lu}/u, Ll: /\p{Ll}/u, Lt: /\p{Lt}/u, Lm: /\p{Lm}/u, Lo: /\p{Lo}/u, // Numbers Nl: /\p{Nl}/u, Nd: /\p{Nd}/u, // Marks Mn: /\p{Mn}/u, Mc: /\p{Mc}/u, // Punctuation, Connector Pc: /\p{Pc}/u, // Separator, Space Zs: /\p{Zs}/u, // These two are not real Unicode categories, but our useful for Ohm. // L is a combination of all the letter categories. // Ltmo is a combination of Lt, Lm, and Lo. L: /\p{Letter}/u, Ltmo: /\p{Lt}|\p{Lm}|\p{Lo}/u, }; // -------------------------------------------------------------------- // Private stuff // -------------------------------------------------------------------- // General stuff class PExpr { constructor() { if (this.constructor === PExpr) { throw new Error("PExpr cannot be instantiated -- it's abstract"); } } // Set the `source` property to the interval containing the source for this expression. withSource(interval) { if (interval) { this.source = interval.trimmed(); } return this; } } // Any const any = Object.create(PExpr.prototype); // End const end = Object.create(PExpr.prototype); // Terminals class Terminal extends PExpr { constructor(obj) { super(); this.obj = obj; } } // Ranges class Range extends PExpr { constructor(from, to) { super(); this.from = from; this.to = to; // If either `from` or `to` is made up of multiple code units, then // the range should consume a full code point, not a single code unit. this.matchCodePoint = from.length > 1 || to.length > 1; } } // Parameters class Param extends PExpr { constructor(index) { super(); this.index = index; } } // Alternation class Alt extends PExpr { constructor(terms) { super(); this.terms = terms; } } // Extend is an implementation detail of rule extension class Extend extends Alt { constructor(superGrammar, name, body) { const origBody = superGrammar.rules[name].body; super([body, origBody]); this.superGrammar = superGrammar; this.name = name; this.body = body; } } // Splice is an implementation detail of rule overriding with the `...` operator. class Splice extends Alt { constructor(superGrammar, ruleName, beforeTerms, afterTerms) { const origBody = superGrammar.rules[ruleName].body; super([...beforeTerms, origBody, ...afterTerms]); this.superGrammar = superGrammar; this.ruleName = ruleName; this.expansionPos = beforeTerms.length; } } // Sequences class Seq extends PExpr { constructor(factors) { super(); this.factors = factors; } } // Iterators and optionals class Iter extends PExpr { constructor(expr) { super(); this.expr = expr; } } class Star extends Iter {} class Plus extends Iter {} class Opt extends Iter {} Star.prototype.operator = '*'; Plus.prototype.operator = '+'; Opt.prototype.operator = '?'; Star.prototype.minNumMatches = 0; Plus.prototype.minNumMatches = 1; Opt.prototype.minNumMatches = 0; Star.prototype.maxNumMatches = Number.POSITIVE_INFINITY; Plus.prototype.maxNumMatches = Number.POSITIVE_INFINITY; Opt.prototype.maxNumMatches = 1; // Predicates class Not extends PExpr { constructor(expr) { super(); this.expr = expr; } } class Lookahead extends PExpr { constructor(expr) { super(); this.expr = expr; } } // "Lexification" class Lex extends PExpr { constructor(expr) { super(); this.expr = expr; } } // Rule application class Apply extends PExpr { constructor(ruleName, args = []) { super(); this.ruleName = ruleName; this.args = args; } isSyntactic() { return isSyntactic(this.ruleName); } // This method just caches the result of `this.toString()` in a non-enumerable property. toMemoKey() { if (!this._memoKey) { Object.defineProperty(this, '_memoKey', {value: this.toString()}); } return this._memoKey; } } // Unicode character class UnicodeChar extends PExpr { constructor(category) { super(); this.category = category; this.pattern = UnicodeCategories[category]; } } // -------------------------------------------------------------------- // Private stuff // -------------------------------------------------------------------- function createError(message, optInterval) { let e; if (optInterval) { e = new Error(optInterval.getLineAndColumnMessage() + message); e.shortMessage = message; e.interval = optInterval; } else { e = new Error(message); } return e; } // ----------------- errors about intervals ----------------- function intervalSourcesDontMatch() { return createError("Interval sources don't match"); } // ----------------- errors about grammars ----------------- // Grammar syntax error function grammarSyntaxError(matchFailure) { const e = new Error(); Object.defineProperty(e, 'message', { enumerable: true, get() { return matchFailure.message; }, }); Object.defineProperty(e, 'shortMessage', { enumerable: true, get() { return 'Expected ' + matchFailure.getExpectedText(); }, }); e.interval = matchFailure.getInterval(); return e; } // Undeclared grammar function undeclaredGrammar(grammarName, namespace, interval) { const message = namespace ? `Grammar ${grammarName} is not declared in namespace '${namespace}'` : 'Undeclared grammar ' + grammarName; return createError(message, interval); } // Duplicate grammar declaration function duplicateGrammarDeclaration(grammar, namespace) { return createError('Grammar ' + grammar.name + ' is already declared in this namespace'); } function grammarDoesNotSupportIncrementalParsing(grammar) { return createError(`Grammar '${grammar.name}' does not support incremental parsing`); } // ----------------- rules ----------------- // Undeclared rule function undeclaredRule(ruleName, grammarName, optInterval) { return createError( 'Rule ' + ruleName + ' is not declared in grammar ' + grammarName, optInterval, ); } // Cannot override undeclared rule function cannotOverrideUndeclaredRule(ruleName, grammarName, optSource) { return createError( 'Cannot override rule ' + ruleName + ' because it is not declared in ' + grammarName, optSource, ); } // Cannot extend undeclared rule function cannotExtendUndeclaredRule(ruleName, grammarName, optSource) { return createError( 'Cannot extend rule ' + ruleName + ' because it is not declared in ' + grammarName, optSource, ); } // Duplicate rule declaration function duplicateRuleDeclaration(ruleName, grammarName, declGrammarName, optSource) { let message = "Duplicate declaration for rule '" + ruleName + "' in grammar '" + grammarName + "'"; if (grammarName !== declGrammarName) { message += " (originally declared in '" + declGrammarName + "')"; } return createError(message, optSource); } // Wrong number of parameters function wrongNumberOfParameters(ruleName, expected, actual, source) { return createError( 'Wrong number of parameters for rule ' + ruleName + ' (expected ' + expected + ', got ' + actual + ')', source, ); } // Wrong number of arguments function wrongNumberOfArguments(ruleName, expected, actual, expr) { return createError( 'Wrong number of arguments for rule ' + ruleName + ' (expected ' + expected + ', got ' + actual + ')', expr, ); } // Duplicate parameter names function duplicateParameterNames(ruleName, duplicates, source) { return createError( 'Duplicate parameter names in rule ' + ruleName + ': ' + duplicates.join(', '), source, ); } // Invalid parameter expression function invalidParameter(ruleName, expr) { return createError( 'Invalid parameter to rule ' + ruleName + ': ' + expr + ' has arity ' + expr.getArity() + ', but parameter expressions must have arity 1', expr.source, ); } // Application of syntactic rule from lexical rule const syntacticVsLexicalNote = 'NOTE: A _syntactic rule_ is a rule whose name begins with a capital letter. ' + 'See https://ohmjs.org/d/svl for more details.'; function applicationOfSyntacticRuleFromLexicalContext(ruleName, applyExpr) { return createError( 'Cannot apply syntactic rule ' + ruleName + ' from here (inside a lexical context)', applyExpr.source, ); } // Lexical rule application used with applySyntactic function applySyntacticWithLexicalRuleApplication(applyExpr) { const {ruleName} = applyExpr; return createError( `applySyntactic is for syntactic rules, but '${ruleName}' is a lexical rule. ` + syntacticVsLexicalNote, applyExpr.source, ); } // Application of applySyntactic in a syntactic context function unnecessaryExperimentalApplySyntactic(applyExpr) { return createError( 'applySyntactic is not required here (in a syntactic context)', applyExpr.source, ); } // Incorrect argument type function incorrectArgumentType(expectedType, expr) { return createError('Incorrect argument type: expected ' + expectedType, expr.source); } // Multiple instances of the super-splice operator (`...`) in the rule body. function multipleSuperSplices(expr) { return createError("'...' can appear at most once in a rule body", expr.source); } // Unicode code point escapes function invalidCodePoint(applyWrapper) { const node = applyWrapper._node; assert(node && node.isNonterminal() && node.ctorName === 'escapeChar_unicodeCodePoint'); // Get an interval that covers all of the hex digits. const digitIntervals = applyWrapper.children.slice(1, -1).map(d => d.source); const fullInterval = digitIntervals[0].coverageWith(...digitIntervals.slice(1)); return createError( `U+${fullInterval.contents} is not a valid Unicode code point`, fullInterval, ); } // ----------------- Kleene operators ----------------- function kleeneExprHasNullableOperand(kleeneExpr, applicationStack) { const actuals = applicationStack.length > 0 ? applicationStack[applicationStack.length - 1].args : []; const expr = kleeneExpr.expr.substituteParams(actuals); let message = 'Nullable expression ' + expr + " is not allowed inside '" + kleeneExpr.operator + "' (possible infinite loop)"; if (applicationStack.length > 0) { const stackTrace = applicationStack .map(app => new Apply(app.ruleName, app.args)) .join('\n'); message += '\nApplication stack (most recent application last):\n' + stackTrace; } return createError(message, kleeneExpr.expr.source); } // ----------------- arity ----------------- function inconsistentArity(ruleName, expected, actual, expr) { return createError( 'Rule ' + ruleName + ' involves an alternation which has inconsistent arity ' + '(expected ' + expected + ', got ' + actual + ')', expr.source, ); } // ----------------- convenience ----------------- function multipleErrors(errors) { const messages = errors.map(e => e.message); return createError(['Errors:'].concat(messages).join('\n- '), errors[0].interval); } // ----------------- semantic ----------------- function missingSemanticAction(ctorName, name, type, stack) { let stackTrace = stack .slice(0, -1) .map(info => { const ans = ' ' + info[0].name + ' > ' + info[1]; return info.length === 3 ? ans + " for '" + info[2] + "'" : ans; }) .join('\n'); stackTrace += '\n ' + name + ' > ' + ctorName; let moreInfo = ''; if (ctorName === '_iter') { moreInfo = [ '\nNOTE: as of Ohm v16, there is no default action for iteration nodes — see ', ' https://ohmjs.org/d/dsa for details.', ].join('\n'); } const message = [ `Missing semantic action for '${ctorName}' in ${type} '${name}'.${moreInfo}`, 'Action stack (most recent call last):', stackTrace, ].join('\n'); const e = createError(message); e.name = 'missingSemanticAction'; return e; } function throwErrors(errors) { if (errors.length === 1) { throw errors[0]; } if (errors.length > 1) { throw multipleErrors(errors); } } // -------------------------------------------------------------------- // Private stuff // -------------------------------------------------------------------- class Interval { constructor(sourceString, startIdx, endIdx) { this.sourceString = sourceString; this.startIdx = startIdx; this.endIdx = endIdx; } get contents() { if (this._contents === undefined) { this._contents = this.sourceString.slice(this.startIdx, this.endIdx); } return this._contents; } get length() { return this.endIdx - this.startIdx; } coverageWith(...intervals) { return Interval.coverage(...intervals, this); } collapsedLeft() { return new Interval(this.sourceString, this.startIdx, this.startIdx); } collapsedRight() { return new Interval(this.sourceString, this.endIdx, this.endIdx); } getLineAndColumn() { return getLineAndColumn(this.sourceString, this.startIdx); } getLineAndColumnMessage() { const range = [this.startIdx, this.endIdx]; return getLineAndColumnMessage(this.sourceString, this.startIdx, range); } // Returns an array of 0, 1, or 2 intervals that represents the result of the // interval difference operation. minus(that) { if (this.sourceString !== that.sourceString) { throw intervalSourcesDontMatch(); } else if (this.startIdx === that.startIdx && this.endIdx === that.endIdx) { // `this` and `that` are the same interval! return []; } else if (this.startIdx < that.startIdx && that.endIdx < this.endIdx) { // `that` splits `this` into two intervals return [ new Interval(this.sourceString, this.startIdx, that.startIdx), new Interval(this.sourceString, that.endIdx, this.endIdx), ]; } else if (this.startIdx < that.endIdx && that.endIdx < this.endIdx) { // `that` contains a prefix of `this` return [new Interval(this.sourceString, that.endIdx, this.endIdx)]; } else if (this.startIdx < that.startIdx && that.startIdx < this.endIdx) { // `that` contains a suffix of `this` return [new Interval(this.sourceString, this.startIdx, that.startIdx)]; } else { // `that` and `this` do not overlap return [this]; } } // Returns a new Interval that has the same extent as this one, but which is relative // to `that`, an Interval that fully covers this one. relativeTo(that) { if (this.sourceString !== that.sourceString) { throw intervalSourcesDontMatch(); } assert( this.startIdx >= that.startIdx && this.endIdx <= that.endIdx, 'other interval does not cover this one', ); return new Interval( this.sourceString, this.startIdx - that.startIdx, this.endIdx - that.startIdx, ); } // Returns a new Interval which contains the same contents as this one, // but with whitespace trimmed from both ends. trimmed() { const {contents} = this; const startIdx = this.startIdx + contents.match(/^\s*/)[0].length; const endIdx = this.endIdx - contents.match(/\s*$/)[0].length; return new Interval(this.sourceString, startIdx, endIdx); } subInterval(offset, len) { const newStartIdx = this.startIdx + offset; return new Interval(this.sourceString, newStartIdx, newStartIdx + len); } } Interval.coverage = function(firstInterval, ...intervals) { let {startIdx, endIdx} = firstInterval; for (const interval of intervals) { if (interval.sourceString !== firstInterval.sourceString) { throw intervalSourcesDontMatch(); } else { startIdx = Math.min(startIdx, interval.startIdx); endIdx = Math.max(endIdx, interval.endIdx); } } return new Interval(firstInterval.sourceString, startIdx, endIdx); }; const MAX_CHAR_CODE = 0xffff; class InputStream { constructor(source) { this.source = source; this.pos = 0; this.examinedLength = 0; } atEnd() { const ans = this.pos >= this.source.length; this.examinedLength = Math.max(this.examinedLength, this.pos + 1); return ans; } next() { const ans = this.source[this.pos++]; this.examinedLength = Math.max(this.examinedLength, this.pos); return ans; } nextCharCode() { const nextChar = this.next(); return nextChar && nextChar.charCodeAt(0); } nextCodePoint() { const cp = this.source.slice(this.pos++).codePointAt(0); // If the code point is beyond plane 0, it takes up two characters. if (cp > MAX_CHAR_CODE) { this.pos += 1; } this.examinedLength = Math.max(this.examinedLength, this.pos); return cp; } matchString(s, optIgnoreCase) { let idx; if (optIgnoreCase) { /* Case-insensitive comparison is a tricky business. Some notable gotchas include the "Turkish I" problem (http://www.i18nguy.com/unicode/turkish-i18n.html) and the fact that the German Esszet (ß) turns into "SS" in upper case. This is intended to be a locale-invariant comparison, which means it may not obey locale-specific expectations (e.g. "i" => "İ"). */ for (idx = 0; idx < s.length; idx++) { const actual = this.next(); const expected = s[idx]; if (actual == null || actual.toUpperCase() !== expected.toUpperCase()) { return false; } } return true; } // Default is case-sensitive comparison. for (idx = 0; idx < s.length; idx++) { if (this.next() !== s[idx]) { return false; } } return true; } sourceSlice(startIdx, endIdx) { return this.source.slice(startIdx, endIdx); } interval(startIdx, optEndIdx) { return new Interval(this.source, startIdx, optEndIdx ? optEndIdx : this.pos); } } // -------------------------------------------------------------------- // Private stuff // -------------------------------------------------------------------- class MatchResult { constructor( matcher, input, startExpr, cst, cstOffset, rightmostFailurePosition, optRecordedFailures, ) { this.matcher = matcher; this.input = input; this.startExpr = startExpr; this._cst = cst; this._cstOffset = cstOffset; this._rightmostFailurePosition = rightmostFailurePosition; this._rightmostFailures = optRecordedFailures; if (this.failed()) { /* eslint-disable no-invalid-this */ defineLazyProperty(this, 'message', function() { const detail = 'Expected ' + this.getExpectedText(); return ( getLineAndColumnMessage(this.input, this.getRightmostFailurePosition()) + detail ); }); defineLazyProperty(this, 'shortMessage', function() { const detail = 'expected ' + this.getExpectedText(); const errorInfo = getLineAndColumn( this.input, this.getRightmostFailurePosition(), ); return 'Line ' + errorInfo.lineNum + ', col ' + errorInfo.colNum + ': ' + detail; }); /* eslint-enable no-invalid-this */ } } succeeded() { return !!this._cst; } failed() { return !this.succeeded(); } getRightmostFailurePosition() { return this._rightmostFailurePosition; } getRightmostFailures() { if (!this._rightmostFailures) { this.matcher.setInput(this.input); const matchResultWithFailures = this.matcher._match(this.startExpr, { tracing: false, positionToRecordFailures: this.getRightmostFailurePosition(), }); this._rightmostFailures = matchResultWithFailures.getRightmostFailures(); } return this._rightmostFailures; } toString() { return this.succeeded() ? '[match succeeded]' : '[match failed at position ' + this.getRightmostFailurePosition() + ']'; } // Return a string summarizing the expected contents of the input stream when // the match failure occurred. getExpectedText() { if (this.succeeded()) { throw new Error('cannot get expected text of a successful MatchResult'); } const sb = new StringBuffer(); let failures = this.getRightmostFailures(); // Filter out the fluffy failures to make the default error messages more useful failures = failures.filter(failure => !failure.isFluffy()); for (let idx = 0; idx < failures.length; idx++) { if (idx > 0) { if (idx === failures.length - 1) { sb.append(failures.length > 2 ? ', or ' : ' or '); } else { sb.append(', '); } } sb.append(failures[idx].toString()); } return sb.contents(); } getInterval() { const pos = this.getRightmostFailurePosition(); return new Interval(this.input, pos, pos); } } class PosInfo { constructor() { this.applicationMemoKeyStack = []; // active applications at this position this.memo = {}; this.maxExaminedLength = 0; this.maxRightmostFailureOffset = -1; this.currentLeftRecursion = undefined; } isActive(application) { return this.applicationMemoKeyStack.indexOf(application.toMemoKey()) >= 0; } enter(application) { this.applicationMemoKeyStack.push(application.toMemoKey()); } exit() { this.applicationMemoKeyStack.pop(); } startLeftRecursion(headApplication, memoRec) { memoRec.isLeftRecursion = true; memoRec.headApplication = headApplication; memoRec.nextLeftRecursion = this.currentLeftRecursion; this.currentLeftRecursion = memoRec; const {applicationMemoKeyStack} = this; const indexOfFirstInvolvedRule = applicationMemoKeyStack.indexOf(headApplication.toMemoKey()) + 1; const involvedApplicationMemoKeys = applicationMemoKeyStack.slice( indexOfFirstInvolvedRule, ); memoRec.isInvolved = function(applicationMemoKey) { return involvedApplicationMemoKeys.indexOf(applicationMemoKey) >= 0; }; memoRec.updateInvolvedApplicationMemoKeys = function() { for (let idx = indexOfFirstInvolvedRule; idx < applicationMemoKeyStack.length; idx++) { const applicationMemoKey = applicationMemoKeyStack[idx]; if (!this.isInvolved(applicationMemoKey)) { involvedApplicationMemoKeys.push(applicationMemoKey); } } }; } endLeftRecursion() { this.currentLeftRecursion = this.currentLeftRecursion.nextLeftRecursion; } // Note: this method doesn't get called for the "head" of a left recursion -- for LR heads, // the memoized result (which starts out being a failure) is always used. shouldUseMemoizedResult(memoRec) { if (!memoRec.isLeftRecursion) { return true; } const {applicationMemoKeyStack} = this; for (let idx = 0; idx < applicationMemoKeyStack.length; idx++) { const applicationMemoKey = applicationMemoKeyStack[idx]; if (memoRec.isInvolved(applicationMemoKey)) { return false; } } return true; } memoize(memoKey, memoRec) { this.memo[memoKey] = memoRec; this.maxExaminedLength = Math.max(this.maxExaminedLength, memoRec.examinedLength); this.maxRightmostFailureOffset = Math.max( this.maxRightmostFailureOffset, memoRec.rightmostFailureOffset, ); return memoRec; } clearObsoleteEntries(pos, invalidatedIdx) { if (pos + this.maxExaminedLength <= invalidatedIdx) { // Optimization: none of the rule applications that were memoized here examined the // interval of the input that changed, so nothing has to be invalidated. return; } const {memo} = this; this.maxExaminedLength = 0; this.maxRightmostFailureOffset = -1; Object.keys(memo).forEach(k => { const memoRec = memo[k]; if (pos + memoRec.examinedLength > invalidatedIdx) { delete memo[k]; } else { this.maxExaminedLength = Math.max(this.maxExaminedLength, memoRec.examinedLength); this.maxRightmostFailureOffset = Math.max( this.maxRightmostFailureOffset, memoRec.rightmostFailureOffset, ); } }); } } // -------------------------------------------------------------------- // Private stuff // -------------------------------------------------------------------- // Unicode characters that are used in the `toString` output. const BALLOT_X = '\u2717'; const CHECK_MARK = '\u2713'; const DOT_OPERATOR = '\u22C5'; const RIGHTWARDS_DOUBLE_ARROW = '\u21D2'; const SYMBOL_FOR_HORIZONTAL_TABULATION = '\u2409'; const SYMBOL_FOR_LINE_FEED = '\u240A'; const SYMBOL_FOR_CARRIAGE_RETURN = '\u240D'; const Flags = { succeeded: 1 << 0, isRootNode: 1 << 1, isImplicitSpaces: 1 << 2, isMemoized: 1 << 3, isHeadOfLeftRecursion: 1 << 4, terminatesLR: 1 << 5, }; function spaces(n) { return repeat(' ', n).join(''); } // Return a string representation of a portion of `input` at offset `pos`. // The result will contain exactly `len` characters. function getInputExcerpt(input, pos, len) { const excerpt = asEscapedString(input.slice(pos, pos + len)); // Pad the output if necessary. if (excerpt.length < len) { return excerpt + repeat(' ', len - excerpt.length).join(''); } return excerpt; } function asEscapedString(obj) { if (typeof obj === 'string') { // Replace non-printable characters with visible symbols. return obj .replace(/ /g, DOT_OPERATOR) .replace(/\t/g, SYMBOL_FOR_HORIZONTAL_TABULATION) .replace(/\n/g, SYMBOL_FOR_LINE_FEED) .replace(/\r/g, SYMBOL_FOR_CARRIAGE_RETURN); } return String(obj); } // ----------------- Trace ----------------- class Trace { constructor(input, pos1, pos2, expr, succeeded, bindings, optChildren) { this.input = input; this.pos = this.pos1 = pos1; this.pos2 = pos2; this.source = new Interval(input, pos1, pos2); this.expr = expr; this.bindings = bindings; this.children = optChildren || []; this.terminatingLREntry = null; this._flags = succeeded ? Flags.succeeded : 0; } get displayString() { return this.expr.toDisplayString(); } clone() { return this.cloneWithExpr(this.expr); } cloneWithExpr(expr) { const ans = new Trace( this.input, this.pos, this.pos2, expr, this.succeeded, this.bindings, this.children, ); ans.isHeadOfLeftRecursion = this.isHeadOfLeftRecursion; ans.isImplicitSpaces = this.isImplicitSpaces; ans.isMemoized = this.isMemoized; ans.isRootNode = this.isRootNode; ans.terminatesLR = this.terminatesLR; ans.terminatingLREntry = this.terminatingLREntry; return ans; } // Record the trace information for the terminating condition of the LR loop. recordLRTermination(ruleBodyTrace, value) { this.terminatingLREntry = new Trace( this.input, this.pos, this.pos2, this.expr, false, [value], [ruleBodyTrace], ); this.terminatingLREntry.terminatesLR = true; } // Recursively traverse this trace node and all its descendents, calling a visitor function // for each node that is visited. If `vistorObjOrFn` is an object, then its 'enter' property // is a function to call before visiting the children of a node, and its 'exit' property is // a function to call afterwards. If `visitorObjOrFn` is a function, it represe