UNPKG

ohm-js

Version:

An object-oriented language for parsing and pattern matching

1,764 lines (1,501 loc) 191 kB
'use strict'; Object.defineProperty(exports, '__esModule', { value: true }); // -------------------------------------------------------------------- // -------------------------------------------------------------------- // Exports // -------------------------------------------------------------------- function abstract(optMethodName) { const methodName = optMethodName || ''; return function() { throw new Error( 'this method ' + methodName + ' is abstract! ' + '(it has no implementation in class ' + this.constructor.name + ')', ); }; } function assert(cond, message) { if (!cond) { throw new Error(message || 'Assertion failed'); } } // Define a lazily-computed, non-enumerable property named `propName` // on the object `obj`. `getterFn` will be called to compute the value the // first time the property is accessed. function defineLazyProperty(obj, propName, getterFn) { let memo; Object.defineProperty(obj, propName, { get() { if (!memo) { memo = getterFn.call(this); } return memo; }, }); } function clone(obj) { if (obj) { return Object.assign({}, obj); } return obj; } function repeatFn(fn, n) { const arr = []; while (n-- > 0) { arr.push(fn()); } return arr; } function repeatStr(str, n) { return new Array(n + 1).join(str); } function repeat(x, n) { return repeatFn(() => x, n); } function getDuplicates(array) { const duplicates = []; for (let idx = 0; idx < array.length; idx++) { const x = array[idx]; if (array.lastIndexOf(x) !== idx && duplicates.indexOf(x) < 0) { duplicates.push(x); } } return duplicates; } function copyWithoutDuplicates(array) { const noDuplicates = []; array.forEach(entry => { if (noDuplicates.indexOf(entry) < 0) { noDuplicates.push(entry); } }); return noDuplicates; } function isSyntactic(ruleName) { const firstChar = ruleName[0]; return firstChar === firstChar.toUpperCase(); } function isLexical(ruleName) { return !isSyntactic(ruleName); } function padLeft(str, len, optChar) { const ch = optChar || ' '; if (str.length < len) { return repeatStr(ch, len - str.length) + str; } return str; } // StringBuffer function StringBuffer() { this.strings = []; } StringBuffer.prototype.append = function(str) { this.strings.push(str); }; StringBuffer.prototype.contents = function() { return this.strings.join(''); }; const escapeUnicode = str => String.fromCodePoint(parseInt(str, 16)); function unescapeCodePoint(s) { if (s.charAt(0) === '\\') { switch (s.charAt(1)) { case 'b': return '\b'; case 'f': return '\f'; case 'n': return '\n'; case 'r': return '\r'; case 't': return '\t'; case 'v': return '\v'; case 'x': return escapeUnicode(s.slice(2, 4)); case 'u': return s.charAt(2) === '{' ? escapeUnicode(s.slice(3, -1)) : escapeUnicode(s.slice(2, 6)); default: return s.charAt(1); } } else { return s; } } // Helper for producing a description of an unknown object in a safe way. // Especially useful for error messages where an unexpected type of object was encountered. function unexpectedObjToString(obj) { if (obj == null) { return String(obj); } const baseToString = Object.prototype.toString.call(obj); try { let typeName; if (obj.constructor && obj.constructor.name) { typeName = obj.constructor.name; } else if (baseToString.indexOf('[object ') === 0) { typeName = baseToString.slice(8, -1); // Extract e.g. "Array" from "[object Array]". } else { typeName = typeof obj; } return typeName + ': ' + JSON.stringify(String(obj)); } catch (e) { return baseToString; } } function checkNotNull(obj, message = 'unexpected null value') { if (obj == null) { throw new Error(message); } return obj; } var common = /*#__PURE__*/Object.freeze({ __proto__: null, abstract: abstract, assert: assert, defineLazyProperty: defineLazyProperty, clone: clone, repeatFn: repeatFn, repeatStr: repeatStr, repeat: repeat, getDuplicates: getDuplicates, copyWithoutDuplicates: copyWithoutDuplicates, isSyntactic: isSyntactic, isLexical: isLexical, padLeft: padLeft, StringBuffer: StringBuffer, unescapeCodePoint: unescapeCodePoint, unexpectedObjToString: unexpectedObjToString, checkNotNull: checkNotNull }); // These are just categories that are used in ES5/ES2015. // The full list of Unicode categories is here: http://www.fileformat.info/info/unicode/category/index.htm. const UnicodeCategories = { // Letters Lu: /\p{Lu}/u, Ll: /\p{Ll}/u, Lt: /\p{Lt}/u, Lm: /\p{Lm}/u, Lo: /\p{Lo}/u, // Numbers Nl: /\p{Nl}/u, Nd: /\p{Nd}/u, // Marks Mn: /\p{Mn}/u, Mc: /\p{Mc}/u, // Punctuation, Connector Pc: /\p{Pc}/u, // Separator, Space Zs: /\p{Zs}/u, // These two are not real Unicode categories, but our useful for Ohm. // L is a combination of all the letter categories. // Ltmo is a combination of Lt, Lm, and Lo. L: /\p{Letter}/u, Ltmo: /\p{Lt}|\p{Lm}|\p{Lo}/u, }; // -------------------------------------------------------------------- // Private stuff // -------------------------------------------------------------------- // General stuff class PExpr { constructor() { if (this.constructor === PExpr) { throw new Error("PExpr cannot be instantiated -- it's abstract"); } } // Set the `source` property to the interval containing the source for this expression. withSource(interval) { if (interval) { this.source = interval.trimmed(); } return this; } } // Any const any = Object.create(PExpr.prototype); // End const end = Object.create(PExpr.prototype); // Terminals class Terminal extends PExpr { constructor(obj) { super(); this.obj = obj; } } // Ranges class Range extends PExpr { constructor(from, to) { super(); this.from = from; this.to = to; // If either `from` or `to` is made up of multiple code units, then // the range should consume a full code point, not a single code unit. this.matchCodePoint = from.length > 1 || to.length > 1; } } // Parameters class Param extends PExpr { constructor(index) { super(); this.index = index; } } // Alternation class Alt extends PExpr { constructor(terms) { super(); this.terms = terms; } } // Extend is an implementation detail of rule extension class Extend extends Alt { constructor(superGrammar, name, body) { const origBody = superGrammar.rules[name].body; super([body, origBody]); this.superGrammar = superGrammar; this.name = name; this.body = body; } } // Splice is an implementation detail of rule overriding with the `...` operator. class Splice extends Alt { constructor(superGrammar, ruleName, beforeTerms, afterTerms) { const origBody = superGrammar.rules[ruleName].body; super([...beforeTerms, origBody, ...afterTerms]); this.superGrammar = superGrammar; this.ruleName = ruleName; this.expansionPos = beforeTerms.length; } } // Sequences class Seq extends PExpr { constructor(factors) { super(); this.factors = factors; } } // Iterators and optionals class Iter extends PExpr { constructor(expr) { super(); this.expr = expr; } } class Star extends Iter {} class Plus extends Iter {} class Opt extends Iter {} Star.prototype.operator = '*'; Plus.prototype.operator = '+'; Opt.prototype.operator = '?'; Star.prototype.minNumMatches = 0; Plus.prototype.minNumMatches = 1; Opt.prototype.minNumMatches = 0; Star.prototype.maxNumMatches = Number.POSITIVE_INFINITY; Plus.prototype.maxNumMatches = Number.POSITIVE_INFINITY; Opt.prototype.maxNumMatches = 1; // Predicates class Not extends PExpr { constructor(expr) { super(); this.expr = expr; } } class Lookahead extends PExpr { constructor(expr) { super(); this.expr = expr; } } // "Lexification" class Lex extends PExpr { constructor(expr) { super(); this.expr = expr; } } // Rule application class Apply extends PExpr { constructor(ruleName, args = []) { super(); this.ruleName = ruleName; this.args = args; } isSyntactic() { return isSyntactic(this.ruleName); } // This method just caches the result of `this.toString()` in a non-enumerable property. toMemoKey() { if (!this._memoKey) { Object.defineProperty(this, '_memoKey', {value: this.toString()}); } return this._memoKey; } } // Unicode character class UnicodeChar extends PExpr { constructor(category) { super(); this.category = category; this.pattern = UnicodeCategories[category]; } } // -------------------------------------------------------------------- // Private stuff // -------------------------------------------------------------------- function createError(message, optInterval) { let e; if (optInterval) { e = new Error(optInterval.getLineAndColumnMessage() + message); e.shortMessage = message; e.interval = optInterval; } else { e = new Error(message); } return e; } // ----------------- errors about intervals ----------------- function intervalSourcesDontMatch() { return createError("Interval sources don't match"); } // ----------------- errors about grammars ----------------- // Grammar syntax error function grammarSyntaxError(matchFailure) { const e = new Error(); Object.defineProperty(e, 'message', { enumerable: true, get() { return matchFailure.message; }, }); Object.defineProperty(e, 'shortMessage', { enumerable: true, get() { return 'Expected ' + matchFailure.getExpectedText(); }, }); e.interval = matchFailure.getInterval(); return e; } // Undeclared grammar function undeclaredGrammar(grammarName, namespace, interval) { const message = namespace ? `Grammar ${grammarName} is not declared in namespace '${namespace}'` : 'Undeclared grammar ' + grammarName; return createError(message, interval); } // Duplicate grammar declaration function duplicateGrammarDeclaration(grammar, namespace) { return createError('Grammar ' + grammar.name + ' is already declared in this namespace'); } function grammarDoesNotSupportIncrementalParsing(grammar) { return createError(`Grammar '${grammar.name}' does not support incremental parsing`); } // ----------------- rules ----------------- // Undeclared rule function undeclaredRule(ruleName, grammarName, optInterval) { return createError( 'Rule ' + ruleName + ' is not declared in grammar ' + grammarName, optInterval, ); } // Cannot override undeclared rule function cannotOverrideUndeclaredRule(ruleName, grammarName, optSource) { return createError( 'Cannot override rule ' + ruleName + ' because it is not declared in ' + grammarName, optSource, ); } // Cannot extend undeclared rule function cannotExtendUndeclaredRule(ruleName, grammarName, optSource) { return createError( 'Cannot extend rule ' + ruleName + ' because it is not declared in ' + grammarName, optSource, ); } // Duplicate rule declaration function duplicateRuleDeclaration(ruleName, grammarName, declGrammarName, optSource) { let message = "Duplicate declaration for rule '" + ruleName + "' in grammar '" + grammarName + "'"; if (grammarName !== declGrammarName) { message += " (originally declared in '" + declGrammarName + "')"; } return createError(message, optSource); } // Wrong number of parameters function wrongNumberOfParameters(ruleName, expected, actual, source) { return createError( 'Wrong number of parameters for rule ' + ruleName + ' (expected ' + expected + ', got ' + actual + ')', source, ); } // Wrong number of arguments function wrongNumberOfArguments(ruleName, expected, actual, expr) { return createError( 'Wrong number of arguments for rule ' + ruleName + ' (expected ' + expected + ', got ' + actual + ')', expr, ); } // Duplicate parameter names function duplicateParameterNames(ruleName, duplicates, source) { return createError( 'Duplicate parameter names in rule ' + ruleName + ': ' + duplicates.join(', '), source, ); } // Invalid parameter expression function invalidParameter(ruleName, expr) { return createError( 'Invalid parameter to rule ' + ruleName + ': ' + expr + ' has arity ' + expr.getArity() + ', but parameter expressions must have arity 1', expr.source, ); } // Application of syntactic rule from lexical rule const syntacticVsLexicalNote = 'NOTE: A _syntactic rule_ is a rule whose name begins with a capital letter. ' + 'See https://ohmjs.org/d/svl for more details.'; function applicationOfSyntacticRuleFromLexicalContext(ruleName, applyExpr) { return createError( 'Cannot apply syntactic rule ' + ruleName + ' from here (inside a lexical context)', applyExpr.source, ); } // Lexical rule application used with applySyntactic function applySyntacticWithLexicalRuleApplication(applyExpr) { const {ruleName} = applyExpr; return createError( `applySyntactic is for syntactic rules, but '${ruleName}' is a lexical rule. ` + syntacticVsLexicalNote, applyExpr.source, ); } // Application of applySyntactic in a syntactic context function unnecessaryExperimentalApplySyntactic(applyExpr) { return createError( 'applySyntactic is not required here (in a syntactic context)', applyExpr.source, ); } // Incorrect argument type function incorrectArgumentType(expectedType, expr) { return createError('Incorrect argument type: expected ' + expectedType, expr.source); } // Multiple instances of the super-splice operator (`...`) in the rule body. function multipleSuperSplices(expr) { return createError("'...' can appear at most once in a rule body", expr.source); } // Unicode code point escapes function invalidCodePoint(applyWrapper) { const node = applyWrapper._node; assert(node && node.isNonterminal() && node.ctorName === 'escapeChar_unicodeCodePoint'); // Get an interval that covers all of the hex digits. const digitIntervals = applyWrapper.children.slice(1, -1).map(d => d.source); const fullInterval = digitIntervals[0].coverageWith(...digitIntervals.slice(1)); return createError( `U+${fullInterval.contents} is not a valid Unicode code point`, fullInterval, ); } // ----------------- Kleene operators ----------------- function kleeneExprHasNullableOperand(kleeneExpr, applicationStack) { const actuals = applicationStack.length > 0 ? applicationStack[applicationStack.length - 1].args : []; const expr = kleeneExpr.expr.substituteParams(actuals); let message = 'Nullable expression ' + expr + " is not allowed inside '" + kleeneExpr.operator + "' (possible infinite loop)"; if (applicationStack.length > 0) { const stackTrace = applicationStack .map(app => new Apply(app.ruleName, app.args)) .join('\n'); message += '\nApplication stack (most recent application last):\n' + stackTrace; } return createError(message, kleeneExpr.expr.source); } // ----------------- arity ----------------- function inconsistentArity(ruleName, expected, actual, expr) { return createError( 'Rule ' + ruleName + ' involves an alternation which has inconsistent arity ' + '(expected ' + expected + ', got ' + actual + ')', expr.source, ); } // ----------------- convenience ----------------- function multipleErrors(errors) { const messages = errors.map(e => e.message); return createError(['Errors:'].concat(messages).join('\n- '), errors[0].interval); } // ----------------- semantic ----------------- function missingSemanticAction(ctorName, name, type, stack) { let stackTrace = stack .slice(0, -1) .map(info => { const ans = ' ' + info[0].name + ' > ' + info[1]; return info.length === 3 ? ans + " for '" + info[2] + "'" : ans; }) .join('\n'); stackTrace += '\n ' + name + ' > ' + ctorName; let moreInfo = ''; if (ctorName === '_iter') { moreInfo = [ '\nNOTE: as of Ohm v16, there is no default action for iteration nodes — see ', ' https://ohmjs.org/d/dsa for details.', ].join('\n'); } const message = [ `Missing semantic action for '${ctorName}' in ${type} '${name}'.${moreInfo}`, 'Action stack (most recent call last):', stackTrace, ].join('\n'); const e = createError(message); e.name = 'missingSemanticAction'; return e; } function throwErrors(errors) { if (errors.length === 1) { throw errors[0]; } if (errors.length > 1) { throw multipleErrors(errors); } } // -------------------------------------------------------------------- // Private stuff // -------------------------------------------------------------------- // Given an array of numbers `arr`, return an array of the numbers as strings, // right-justified and padded to the same length. function padNumbersToEqualLength(arr) { let maxLen = 0; const strings = arr.map(n => { const str = n.toString(); maxLen = Math.max(maxLen, str.length); return str; }); return strings.map(s => padLeft(s, maxLen)); } // Produce a new string that would be the result of copying the contents // of the string `src` onto `dest` at offset `offest`. function strcpy(dest, src, offset) { const origDestLen = dest.length; const start = dest.slice(0, offset); const end = dest.slice(offset + src.length); return (start + src + end).substr(0, origDestLen); } // Casts the underlying lineAndCol object to a formatted message string, // highlighting `ranges`. function lineAndColumnToMessage(...ranges) { const lineAndCol = this; const {offset} = lineAndCol; const {repeatStr} = common; const sb = new StringBuffer(); sb.append('Line ' + lineAndCol.lineNum + ', col ' + lineAndCol.colNum + ':\n'); // An array of the previous, current, and next line numbers as strings of equal length. const lineNumbers = padNumbersToEqualLength([ lineAndCol.prevLine == null ? 0 : lineAndCol.lineNum - 1, lineAndCol.lineNum, lineAndCol.nextLine == null ? 0 : lineAndCol.lineNum + 1, ]); // Helper for appending formatting input lines to the buffer. const appendLine = (num, content, prefix) => { sb.append(prefix + lineNumbers[num] + ' | ' + content + '\n'); }; // Include the previous line for context if possible. if (lineAndCol.prevLine != null) { appendLine(0, lineAndCol.prevLine, ' '); } // Line that the error occurred on. appendLine(1, lineAndCol.line, '> '); // Build up the line that points to the offset and possible indicates one or more ranges. // Start with a blank line, and indicate each range by overlaying a string of `~` chars. const lineLen = lineAndCol.line.length; let indicationLine = repeatStr(' ', lineLen + 1); for (let i = 0; i < ranges.length; ++i) { let startIdx = ranges[i][0]; let endIdx = ranges[i][1]; assert(startIdx >= 0 && startIdx <= endIdx, 'range start must be >= 0 and <= end'); const lineStartOffset = offset - lineAndCol.colNum + 1; startIdx = Math.max(0, startIdx - lineStartOffset); endIdx = Math.min(endIdx - lineStartOffset, lineLen); indicationLine = strcpy(indicationLine, repeatStr('~', endIdx - startIdx), startIdx); } const gutterWidth = 2 + lineNumbers[1].length + 3; sb.append(repeatStr(' ', gutterWidth)); indicationLine = strcpy(indicationLine, '^', lineAndCol.colNum - 1); sb.append(indicationLine.replace(/ +$/, '') + '\n'); // Include the next line for context if possible. if (lineAndCol.nextLine != null) { appendLine(2, lineAndCol.nextLine, ' '); } return sb.contents(); } // -------------------------------------------------------------------- // Exports // -------------------------------------------------------------------- let builtInRulesCallbacks = []; // Since Grammar.BuiltInRules is bootstrapped, most of Ohm can't directly depend it. // This function allows modules that do depend on the built-in rules to register a callback // that will be called later in the initialization process. function awaitBuiltInRules(cb) { builtInRulesCallbacks.push(cb); } function announceBuiltInRules(grammar) { builtInRulesCallbacks.forEach(cb => { cb(grammar); }); builtInRulesCallbacks = null; } // Return an object with the line and column information for the given // offset in `str`. function getLineAndColumn(str, offset) { let lineNum = 1; let colNum = 1; let currOffset = 0; let lineStartOffset = 0; let nextLine = null; let prevLine = null; let prevLineStartOffset = -1; while (currOffset < offset) { const c = str.charAt(currOffset++); if (c === '\n') { lineNum++; colNum = 1; prevLineStartOffset = lineStartOffset; lineStartOffset = currOffset; } else if (c !== '\r') { colNum++; } } // Find the end of the target line. let lineEndOffset = str.indexOf('\n', lineStartOffset); if (lineEndOffset === -1) { lineEndOffset = str.length; } else { // Get the next line. const nextLineEndOffset = str.indexOf('\n', lineEndOffset + 1); nextLine = nextLineEndOffset === -1 ? str.slice(lineEndOffset) : str.slice(lineEndOffset, nextLineEndOffset); // Strip leading and trailing EOL char(s). nextLine = nextLine.replace(/^\r?\n/, '').replace(/\r$/, ''); } // Get the previous line. if (prevLineStartOffset >= 0) { // Strip trailing EOL char(s). prevLine = str.slice(prevLineStartOffset, lineStartOffset).replace(/\r?\n$/, ''); } // Get the target line, stripping a trailing carriage return if necessary. const line = str.slice(lineStartOffset, lineEndOffset).replace(/\r$/, ''); return { offset, lineNum, colNum, line, prevLine, nextLine, toString: lineAndColumnToMessage, }; } // Return a nicely-formatted string describing the line and column for the // given offset in `str` highlighting `ranges`. function getLineAndColumnMessage(str, offset, ...ranges) { return getLineAndColumn(str, offset).toString(...ranges); } const uniqueId = (() => { let idCounter = 0; return prefix => '' + prefix + idCounter++; })(); // -------------------------------------------------------------------- // Private stuff // -------------------------------------------------------------------- class Interval { constructor(sourceString, startIdx, endIdx) { this.sourceString = sourceString; this.startIdx = startIdx; this.endIdx = endIdx; } get contents() { if (this._contents === undefined) { this._contents = this.sourceString.slice(this.startIdx, this.endIdx); } return this._contents; } get length() { return this.endIdx - this.startIdx; } coverageWith(...intervals) { return Interval.coverage(...intervals, this); } collapsedLeft() { return new Interval(this.sourceString, this.startIdx, this.startIdx); } collapsedRight() { return new Interval(this.sourceString, this.endIdx, this.endIdx); } getLineAndColumn() { return getLineAndColumn(this.sourceString, this.startIdx); } getLineAndColumnMessage() { const range = [this.startIdx, this.endIdx]; return getLineAndColumnMessage(this.sourceString, this.startIdx, range); } // Returns an array of 0, 1, or 2 intervals that represents the result of the // interval difference operation. minus(that) { if (this.sourceString !== that.sourceString) { throw intervalSourcesDontMatch(); } else if (this.startIdx === that.startIdx && this.endIdx === that.endIdx) { // `this` and `that` are the same interval! return []; } else if (this.startIdx < that.startIdx && that.endIdx < this.endIdx) { // `that` splits `this` into two intervals return [ new Interval(this.sourceString, this.startIdx, that.startIdx), new Interval(this.sourceString, that.endIdx, this.endIdx), ]; } else if (this.startIdx < that.endIdx && that.endIdx < this.endIdx) { // `that` contains a prefix of `this` return [new Interval(this.sourceString, that.endIdx, this.endIdx)]; } else if (this.startIdx < that.startIdx && that.startIdx < this.endIdx) { // `that` contains a suffix of `this` return [new Interval(this.sourceString, this.startIdx, that.startIdx)]; } else { // `that` and `this` do not overlap return [this]; } } // Returns a new Interval that has the same extent as this one, but which is relative // to `that`, an Interval that fully covers this one. relativeTo(that) { if (this.sourceString !== that.sourceString) { throw intervalSourcesDontMatch(); } assert( this.startIdx >= that.startIdx && this.endIdx <= that.endIdx, 'other interval does not cover this one', ); return new Interval( this.sourceString, this.startIdx - that.startIdx, this.endIdx - that.startIdx, ); } // Returns a new Interval which contains the same contents as this one, // but with whitespace trimmed from both ends. trimmed() { const {contents} = this; const startIdx = this.startIdx + contents.match(/^\s*/)[0].length; const endIdx = this.endIdx - contents.match(/\s*$/)[0].length; return new Interval(this.sourceString, startIdx, endIdx); } subInterval(offset, len) { const newStartIdx = this.startIdx + offset; return new Interval(this.sourceString, newStartIdx, newStartIdx + len); } } Interval.coverage = function(firstInterval, ...intervals) { let {startIdx, endIdx} = firstInterval; for (const interval of intervals) { if (interval.sourceString !== firstInterval.sourceString) { throw intervalSourcesDontMatch(); } else { startIdx = Math.min(startIdx, interval.startIdx); endIdx = Math.max(endIdx, interval.endIdx); } } return new Interval(firstInterval.sourceString, startIdx, endIdx); }; const MAX_CHAR_CODE = 0xffff; class InputStream { constructor(source) { this.source = source; this.pos = 0; this.examinedLength = 0; } atEnd() { const ans = this.pos >= this.source.length; this.examinedLength = Math.max(this.examinedLength, this.pos + 1); return ans; } next() { const ans = this.source[this.pos++]; this.examinedLength = Math.max(this.examinedLength, this.pos); return ans; } nextCharCode() { const nextChar = this.next(); return nextChar && nextChar.charCodeAt(0); } nextCodePoint() { const cp = this.source.slice(this.pos++).codePointAt(0); // If the code point is beyond plane 0, it takes up two characters. if (cp > MAX_CHAR_CODE) { this.pos += 1; } this.examinedLength = Math.max(this.examinedLength, this.pos); return cp; } matchString(s, optIgnoreCase) { let idx; if (optIgnoreCase) { /* Case-insensitive comparison is a tricky business. Some notable gotchas include the "Turkish I" problem (http://www.i18nguy.com/unicode/turkish-i18n.html) and the fact that the German Esszet (ß) turns into "SS" in upper case. This is intended to be a locale-invariant comparison, which means it may not obey locale-specific expectations (e.g. "i" => "İ"). */ for (idx = 0; idx < s.length; idx++) { const actual = this.next(); const expected = s[idx]; if (actual == null || actual.toUpperCase() !== expected.toUpperCase()) { return false; } } return true; } // Default is case-sensitive comparison. for (idx = 0; idx < s.length; idx++) { if (this.next() !== s[idx]) { return false; } } return true; } sourceSlice(startIdx, endIdx) { return this.source.slice(startIdx, endIdx); } interval(startIdx, optEndIdx) { return new Interval(this.source, startIdx, optEndIdx ? optEndIdx : this.pos); } } // -------------------------------------------------------------------- // Private stuff // -------------------------------------------------------------------- class MatchResult { constructor( matcher, input, startExpr, cst, cstOffset, rightmostFailurePosition, optRecordedFailures, ) { this.matcher = matcher; this.input = input; this.startExpr = startExpr; this._cst = cst; this._cstOffset = cstOffset; this._rightmostFailurePosition = rightmostFailurePosition; this._rightmostFailures = optRecordedFailures; if (this.failed()) { /* eslint-disable no-invalid-this */ defineLazyProperty(this, 'message', function() { const detail = 'Expected ' + this.getExpectedText(); return ( getLineAndColumnMessage(this.input, this.getRightmostFailurePosition()) + detail ); }); defineLazyProperty(this, 'shortMessage', function() { const detail = 'expected ' + this.getExpectedText(); const errorInfo = getLineAndColumn( this.input, this.getRightmostFailurePosition(), ); return 'Line ' + errorInfo.lineNum + ', col ' + errorInfo.colNum + ': ' + detail; }); /* eslint-enable no-invalid-this */ } } succeeded() { return !!this._cst; } failed() { return !this.succeeded(); } getRightmostFailurePosition() { return this._rightmostFailurePosition; } getRightmostFailures() { if (!this._rightmostFailures) { this.matcher.setInput(this.input); const matchResultWithFailures = this.matcher._match(this.startExpr, { tracing: false, positionToRecordFailures: this.getRightmostFailurePosition(), }); this._rightmostFailures = matchResultWithFailures.getRightmostFailures(); } return this._rightmostFailures; } toString() { return this.succeeded() ? '[match succeeded]' : '[match failed at position ' + this.getRightmostFailurePosition() + ']'; } // Return a string summarizing the expected contents of the input stream when // the match failure occurred. getExpectedText() { if (this.succeeded()) { throw new Error('cannot get expected text of a successful MatchResult'); } const sb = new StringBuffer(); let failures = this.getRightmostFailures(); // Filter out the fluffy failures to make the default error messages more useful failures = failures.filter(failure => !failure.isFluffy()); for (let idx = 0; idx < failures.length; idx++) { if (idx > 0) { if (idx === failures.length - 1) { sb.append(failures.length > 2 ? ', or ' : ' or '); } else { sb.append(', '); } } sb.append(failures[idx].toString()); } return sb.contents(); } getInterval() { const pos = this.getRightmostFailurePosition(); return new Interval(this.input, pos, pos); } } class PosInfo { constructor() { this.applicationMemoKeyStack = []; // active applications at this position this.memo = {}; this.maxExaminedLength = 0; this.maxRightmostFailureOffset = -1; this.currentLeftRecursion = undefined; } isActive(application) { return this.applicationMemoKeyStack.indexOf(application.toMemoKey()) >= 0; } enter(application) { this.applicationMemoKeyStack.push(application.toMemoKey()); } exit() { this.applicationMemoKeyStack.pop(); } startLeftRecursion(headApplication, memoRec) { memoRec.isLeftRecursion = true; memoRec.headApplication = headApplication; memoRec.nextLeftRecursion = this.currentLeftRecursion; this.currentLeftRecursion = memoRec; const {applicationMemoKeyStack} = this; const indexOfFirstInvolvedRule = applicationMemoKeyStack.indexOf(headApplication.toMemoKey()) + 1; const involvedApplicationMemoKeys = applicationMemoKeyStack.slice( indexOfFirstInvolvedRule, ); memoRec.isInvolved = function(applicationMemoKey) { return involvedApplicationMemoKeys.indexOf(applicationMemoKey) >= 0; }; memoRec.updateInvolvedApplicationMemoKeys = function() { for (let idx = indexOfFirstInvolvedRule; idx < applicationMemoKeyStack.length; idx++) { const applicationMemoKey = applicationMemoKeyStack[idx]; if (!this.isInvolved(applicationMemoKey)) { involvedApplicationMemoKeys.push(applicationMemoKey); } } }; } endLeftRecursion() { this.currentLeftRecursion = this.currentLeftRecursion.nextLeftRecursion; } // Note: this method doesn't get called for the "head" of a left recursion -- for LR heads, // the memoized result (which starts out being a failure) is always used. shouldUseMemoizedResult(memoRec) { if (!memoRec.isLeftRecursion) { return true; } const {applicationMemoKeyStack} = this; for (let idx = 0; idx < applicationMemoKeyStack.length; idx++) { const applicationMemoKey = applicationMemoKeyStack[idx]; if (memoRec.isInvolved(applicationMemoKey)) { return false; } } return true; } memoize(memoKey, memoRec) { this.memo[memoKey] = memoRec; this.maxExaminedLength = Math.max(this.maxExaminedLength, memoRec.examinedLength); this.maxRightmostFailureOffset = Math.max( this.maxRightmostFailureOffset, memoRec.rightmostFailureOffset, ); return memoRec; } clearObsoleteEntries(pos, invalidatedIdx) { if (pos + this.maxExaminedLength <= invalidatedIdx) { // Optimization: none of the rule applications that were memoized here examined the // interval of the input that changed, so nothing has to be invalidated. return; } const {memo} = this; this.maxExaminedLength = 0; this.maxRightmostFailureOffset = -1; Object.keys(memo).forEach(k => { const memoRec = memo[k]; if (pos + memoRec.examinedLength > invalidatedIdx) { delete memo[k]; } else { this.maxExaminedLength = Math.max(this.maxExaminedLength, memoRec.examinedLength); this.maxRightmostFailureOffset = Math.max( this.maxRightmostFailureOffset, memoRec.rightmostFailureOffset, ); } }); } } // -------------------------------------------------------------------- // Private stuff // -------------------------------------------------------------------- // Unicode characters that are used in the `toString` output. const BALLOT_X = '\u2717'; const CHECK_MARK = '\u2713'; const DOT_OPERATOR = '\u22C5'; const RIGHTWARDS_DOUBLE_ARROW = '\u21D2'; const SYMBOL_FOR_HORIZONTAL_TABULATION = '\u2409'; const SYMBOL_FOR_LINE_FEED = '\u240A'; const SYMBOL_FOR_CARRIAGE_RETURN = '\u240D'; const Flags = { succeeded: 1 << 0, isRootNode: 1 << 1, isImplicitSpaces: 1 << 2, isMemoized: 1 << 3, isHeadOfLeftRecursion: 1 << 4, terminatesLR: 1 << 5, }; function spaces(n) { return repeat(' ', n).join(''); } // Return a string representation of a portion of `input` at offset `pos`. // The result will contain exactly `len` characters. function getInputExcerpt(input, pos, len) { const excerpt = asEscapedString(input.slice(pos, pos + len)); // Pad the output if necessary. if (excerpt.length < len) { return excerpt + repeat(' ', len - excerpt.length).join(''); } return excerpt; } function asEscapedString(obj) { if (typeof obj === 'string') { // Replace non-printable characters with visible symbols. return obj .replace(/ /g, DOT_OPERATOR) .replace(/\t/g, SYMBOL_FOR_HORIZONTAL_TABULATION) .replace(/\n/g, SYMBOL_FOR_LINE_FEED) .replace(/\r/g, SYMBOL_FOR_CARRIAGE_RETURN); } return String(obj); } // ----------------- Trace ----------------- class Trace { constructor(input, pos1, pos2, expr, succeeded, bindings, optChildren) { this.input = input; this.pos = this.pos1 = pos1; this.pos2 = pos2; this.source = new Interval(input, pos1, pos2); this.expr = expr; this.bindings = bindings; this.children = optChildren || []; this.terminatingLREntry = null; this._flags = succeeded ? Flags.succeeded : 0; } get displayString() { return this.expr.toDisplayString(); } clone() { return this.cloneWithExpr(this.expr); } cloneWithExpr(expr) { const ans = new Trace( this.input, this.pos, this.pos2, expr, this.succeeded, this.bindings, this.children, ); ans.isHeadOfLeftRecursion = this.isHeadOfLeftRecursion; ans.isImplicitSpaces = this.isImplicitSpaces; ans.isMemoized = this.isMemoized; ans.isRootNode = this.isRootNode; ans.terminatesLR = this.terminatesLR; ans.terminatingLREntry = this.terminatingLREntry; return ans; } // Record the trace information for the terminating condition of the LR loop. recordLRTermination(ruleBodyTrace, value) { this.terminatingLREntry = new Trace( this.input, this.pos, this.pos2, this.expr, false, [value], [ruleBodyTrace], ); this.terminatingLREntry.terminatesLR = true; } // Recursively traverse this trace node and all its descendents, calling a visitor function // for each node that is visited. If `vistorObjOrFn` is an object, then its 'enter' property // is a function to call before visiting the children of a node, and its 'exit' property is // a function to call afterwards. If `visitorObjOrFn` is a function, it represents the 'enter' // function. // // The functions are called with three arguments: the Trace node, its parent Trace, and a number // representing the depth of the node in the tree. (The root node has depth 0.) `optThisArg`, if // specified, is the value to use for `this` when executing the visitor functions. walk(visitorObjOrFn, optThisArg) { let visitor = visitorObjOrFn; if (typeof visitor === 'function') { visitor = {enter: visitor}; } function _walk(node, parent, depth) { let recurse = true; if (visitor.enter) { if (visitor.enter.call(optThisArg, node, parent, depth) === Trace.prototype.SKIP) { recurse = false; } } if (recurse) { node.children.forEach(child => { _walk(child, node, depth + 1); }); if (visitor.exit) { visitor.exit.call(optThisArg, node, parent, depth); } } } if (this.isRootNode) { // Don't visit the root node itself, only its children. this.children.forEach(c => { _walk(c, null, 0); }); } else { _walk(this, null, 0); } } // Return a string representation of the trace. // Sample: // 12⋅+⋅2⋅*⋅3 ✓ exp ⇒ "12" // 12⋅+⋅2⋅*⋅3 ✓ addExp (LR) ⇒ "12" // 12⋅+⋅2⋅*⋅3 ✗ addExp_plus toString() { const sb = new StringBuffer(); this.walk((node, parent, depth) => { if (!node) { return this.SKIP; } const ctorName = node.expr.constructor.name; // Don't print anything for Alt nodes. if (ctorName === 'Alt') { return; // eslint-disable-line consistent-return } sb.append(getInputExcerpt(node.input, node.pos, 10) + spaces(depth * 2 + 1)); sb.append((node.succeeded ? CHECK_MARK : BALLOT_X) + ' ' + node.displayString); if (node.isHeadOfLeftRecursion) { sb.append(' (LR)'); } if (node.succeeded) { const contents = asEscapedString(node.source.contents); sb.append(' ' + RIGHTWARDS_DOUBLE_ARROW + ' '); sb.append(typeof contents === 'string' ? '"' + contents + '"' : contents); } sb.append('\n'); }); return sb.contents(); } } // A value that can be returned from visitor functions to indicate that a // node should not be recursed into. Trace.prototype.SKIP = {}; // For convenience, create a getter and setter for the boolean flags in `Flags`. Object.keys(Flags).forEach(name => { const mask = Flags[name]; Object.defineProperty(Trace.prototype, name, { get() { return (this._flags & mask) !== 0; }, set(val) { if (val) { this._flags |= mask; } else { this._flags &= ~mask; } }, }); }); // -------------------------------------------------------------------- // Operations // -------------------------------------------------------------------- /* Return true if we should skip spaces preceding this expression in a syntactic context. */ PExpr.prototype.allowsSkippingPrecedingSpace = abstract('allowsSkippingPrecedingSpace'); /* Generally, these are all first-order expressions and (with the exception of Apply) directly read from the input stream. */ any.allowsSkippingPrecedingSpace = end.allowsSkippingPrecedingSpace = Apply.prototype.allowsSkippingPrecedingSpace = Terminal.prototype.allowsSkippingPrecedingSpace = Range.prototype.allowsSkippingPrecedingSpace = UnicodeChar.prototype.allowsSkippingPrecedingSpace = function() { return true; }; /* Higher-order expressions that don't directly consume input. */ Alt.prototype.allowsSkippingPrecedingSpace = Iter.prototype.allowsSkippingPrecedingSpace = Lex.prototype.allowsSkippingPrecedingSpace = Lookahead.prototype.allowsSkippingPrecedingSpace = Not.prototype.allowsSkippingPrecedingSpace = Param.prototype.allowsSkippingPrecedingSpace = Seq.prototype.allowsSkippingPrecedingSpace = function() { return false; }; let BuiltInRules$1; awaitBuiltInRules(g => { BuiltInRules$1 = g; }); // -------------------------------------------------------------------- // Operations // -------------------------------------------------------------------- let lexifyCount; PExpr.prototype.assertAllApplicationsAreValid = function(ruleName, grammar) { lexifyCount = 0; this._assertAllApplicationsAreValid(ruleName, grammar); }; PExpr.prototype._assertAllApplicationsAreValid = abstract( '_assertAllApplicationsAreValid', ); any._assertAllApplicationsAreValid = end._assertAllApplicationsAreValid = Terminal.prototype._assertAllApplicationsAreValid = Range.prototype._assertAllApplicationsAreValid = Param.prototype._assertAllApplicationsAreValid = UnicodeChar.prototype._assertAllApplicationsAreValid = function(ruleName, grammar) { // no-op }; Lex.prototype._assertAllApplicationsAreValid = function(ruleName, grammar) { lexifyCount++; this.expr._assertAllApplicationsAreValid(ruleName, grammar); lexifyCount--; }; Alt.prototype._assertAllApplicationsAreValid = function(ruleName, grammar) { for (let idx = 0; idx < this.terms.length; idx++) { this.terms[idx]._assertAllApplicationsAreValid(ruleName, grammar); } }; Seq.prototype._assertAllApplicationsAreValid = function(ruleName, grammar) { for (let idx = 0; idx < this.factors.length; idx++) { this.factors[idx]._assertAllApplicationsAreValid(ruleName, grammar); } }; Iter.prototype._assertAllApplicationsAreValid = Not.prototype._assertAllApplicationsAreValid = Lookahead.prototype._assertAllApplicationsAreValid = function(ruleName, grammar) { this.expr._assertAllApplicationsAreValid(ruleName, grammar); }; Apply.prototype._assertAllApplicationsAreValid = function( ruleName, grammar, skipSyntacticCheck = false, ) { const ruleInfo = grammar.rules[this.ruleName]; const isContextSyntactic = isSyntactic(ruleName) && lexifyCount === 0; // Make sure that the rule exists... if (!ruleInfo) { throw undeclaredRule(this.ruleName, grammar.name, this.source); } // ...and that this application is allowed if (!skipSyntacticCheck && isSyntactic(this.ruleName) && !isContextSyntactic) { throw applicationOfSyntacticRuleFromLexicalContext(this.ruleName, this); } // ...and that this application has the correct number of arguments. const actual = this.args.length; const expected = ruleInfo.formals.length; if (actual !== expected) { throw wrongNumberOfArguments(this.ruleName, expected, actual, this.source); } const isBuiltInApplySyntactic = BuiltInRules$1 && ruleInfo === BuiltInRules$1.rules.applySyntactic; const isBuiltInCaseInsensitive = BuiltInRules$1 && ruleInfo === BuiltInRules$1.rules.caseInsensitive; // If it's an application of 'caseInsensitive', ensure that the argument is a Terminal. if (isBuiltInCaseInsensitive) { if (!(this.args[0] instanceof Terminal)) { throw incorrectArgumentType('a Terminal (e.g. "abc")', this.args[0]); } } if (isBuiltInApplySyntactic) { const arg = this.args[0]; if (!(arg instanceof Apply)) { throw incorrectArgumentType('a syntactic rule application', arg); } if (!isSyntactic(arg.ruleName)) { throw applySyntacticWithLexicalRuleApplication(arg); } if (isContextSyntactic) { throw unnecessaryExperimentalApplySyntactic(this); } } // ...and that all of the argument expressions only have valid applications and have arity 1. // If `this` is an application of the built-in applySyntactic rule, then its arg is // allowed (and expected) to be a syntactic rule, even if we're in a lexical context. this.args.forEach(arg => { arg._assertAllApplicationsAreValid(ruleName, grammar, isBuiltInApplySyntactic); if (arg.getArity() !== 1) { throw invalidParameter(this.ruleName, arg); } }); }; // -------------------------------------------------------------------- // Operations // -------------------------------------------------------------------- PExpr.prototype.assertChoicesHaveUniformArity = abstract( 'assertChoicesHaveUniformArity', ); any.assertChoicesHaveUniformArity = end.assertChoicesHaveUniformArity = Terminal.prototype.assertChoicesHaveUniformArity = Range.prototype.assertChoicesHaveUniformArity = Param.prototype.assertChoicesHaveUniformArity = Lex.prototype.assertChoicesHaveUniformArity = UnicodeChar.prototype.assertChoicesHaveUniformArity = function(ruleName) { // no-op }; Alt.prototype.assertChoicesHaveUniformArity = function(ruleName) { if (this.terms.length === 0) { return; } const arity = this.terms[0].getArity(); for (let idx = 0; idx < this.terms.length; idx++) { const term = this.terms[idx]; term.assertChoicesHaveUniformArity(); const otherArity = term.getArity(); if (arity !== otherArity) { throw inconsistentArity(ruleName, arity, otherArity, term); } } }; Extend.prototype.assertChoicesHaveUniformArity = function(ruleName) { // Extend is a special case of Alt that's guaranteed to have exactly two // cases: [extensions, origBody]. const actualArity = this.terms[0].getArity(); const expectedArity = this.terms[1].getArity(); if (actualArity !== expectedArity) { throw inconsistentArity(ruleName, expectedArity, actualArity, this.terms[0]); } }; Seq.prototype.assertChoicesHaveUniformArity = function(ruleName) { for (let idx = 0; idx < this.factors.length; idx++) { this.factors[idx].assertChoicesHaveUniformArity(ruleName); } }; Iter.prototype.assertChoicesHaveUniformArity = function(ruleName) { this.expr.assertChoicesHaveUniformArity(ruleName); }; Not.prototype.assertChoicesHaveUniformArity = function(ruleName) { // no-op (not required b/c the nested expr doesn't show up in the CST) }; Lookahead.prototype.assertChoicesHaveUniformArity = function(ruleName) { this.expr.assertChoicesHaveUniformArity(ruleName); }; Apply.prototype.assertChoicesHaveUniformArity = function(ruleName) { // The arities of the parameter expressions is required to be 1 by // `assertAllApplicationsAreValid()`. }; // -------------------------------------------------------------------- // Operations // -------------------------------------------------------------------- PExpr.prototype.assertIteratedExprsAreNotNullable = abstract( 'assertIteratedExprsAreNotNullable', ); any.assertIteratedExprsAreNotNullable = end.assertIteratedExprsAreNotNullable = Terminal.prototype.assertIteratedExprsAreNotNullable = Range.prototype.assertIteratedExprsAreNotNullable = Param.prototype.assertIteratedExprsAreNotNullable = UnicodeChar.prototype.assertIteratedExprsAreNotNullable = function(grammar) { // no-op }; Alt.prototype.assertIteratedExprsAreNotNullable = function(grammar) { for (let idx = 0; idx < this.terms.length; idx++) { this.terms[idx].assertIteratedExprsAreNotNullable(grammar); } }; Seq.prototype.assertIteratedExprsAreNotNullable = function(grammar) { for (let idx = 0; idx < this.factors.length; idx++) { this.factors[idx].assertIteratedExprsAreNotNullable(grammar); } }; Iter.prototype.assertIteratedExprsAreNotNullable = function(grammar) { // Note: this is the implementation of this method for `Star` and `Plus` expressions. // It is overridden for `Opt` below. this.expr.assertIteratedExprsAreNotNullable(grammar); if (this.expr.isNullable(grammar)) { throw kleeneExprHasNullableOperand(this, []); } }; Opt.prototype.assertIteratedExprsAreNotNullable = Not.prototype.assertIteratedExprsAreNotNullable = Lookahead.prototype.assertIteratedExprsAreNotNullable = Lex.prototype.assertIteratedExprsAreNotNullable = function(grammar) { this.expr.assertIteratedExprsAreNotNullable(grammar); }; Apply.prototype.assertIteratedExprsAreNotNullable = function(grammar) { this.args.forEach(arg => { arg.assertIteratedExprsAreNotNullable(grammar); }); }; // -------------------------------------------------------------------- // Private stuff // ------------------------------