ohm-js
Version:
402 lines (354 loc) • 13.1 kB
JavaScript
import {Trace} from './Trace.js';
import * as common from './common.js';
import * as errors from './errors.js';
import {IterationNode, NonterminalNode, TerminalNode} from './nodes.js';
import * as pexprs from './pexprs-main.js';
// --------------------------------------------------------------------
// Operations
// --------------------------------------------------------------------
/*
Evaluate the expression and return `true` if it succeeds, `false` otherwise. This method should
only be called directly by `State.prototype.eval(expr)`, which also updates the data structures
that are used for tracing. (Making those updates in a method of `State` enables the trace-specific
data structures to be "secrets" of that class, which is good for modularity.)
The contract of this method is as follows:
* When the return value is `true`,
- the state object will have `expr.getArity()` more bindings than it did before the call.
* When the return value is `false`,
- the state object may have more bindings than it did before the call, and
- its input stream's position may be anywhere.
Note that `State.prototype.eval(expr)`, unlike this method, guarantees that neither the state
object's bindings nor its input stream's position will change if the expression fails to match.
*/
pexprs.PExpr.prototype.eval = common.abstract('eval'); // function(state) { ... }
pexprs.any.eval = function(state) {
const {inputStream} = state;
const origPos = inputStream.pos;
const cp = inputStream.nextCodePoint();
if (cp !== undefined) {
state.pushBinding(new TerminalNode(String.fromCodePoint(cp).length), origPos);
return true;
} else {
state.processFailure(origPos, this);
return false;
}
};
pexprs.end.eval = function(state) {
const {inputStream} = state;
const origPos = inputStream.pos;
if (inputStream.atEnd()) {
state.pushBinding(new TerminalNode(0), origPos);
return true;
} else {
state.processFailure(origPos, this);
return false;
}
};
pexprs.Terminal.prototype.eval = function(state) {
const {inputStream} = state;
const origPos = inputStream.pos;
if (!inputStream.matchString(this.obj)) {
state.processFailure(origPos, this);
return false;
} else {
state.pushBinding(new TerminalNode(this.obj.length), origPos);
return true;
}
};
pexprs.Range.prototype.eval = function(state) {
const {inputStream} = state;
const origPos = inputStream.pos;
// A range can operate in one of two modes: matching a single, 16-bit _code unit_,
// or matching a _code point_. (Code points over 0xFFFF take up two 16-bit code units.)
const cp = this.matchCodePoint ? inputStream.nextCodePoint() : inputStream.nextCharCode();
// Always compare by code point value to get the correct result in all scenarios.
// Note that for strings of length 1, codePointAt(0) and charPointAt(0) are equivalent.
if (cp !== undefined && this.from.codePointAt(0) <= cp && cp <= this.to.codePointAt(0)) {
state.pushBinding(new TerminalNode(String.fromCodePoint(cp).length), origPos);
return true;
} else {
state.processFailure(origPos, this);
return false;
}
};
pexprs.Param.prototype.eval = function(state) {
return state.eval(state.currentApplication().args[this.index]);
};
pexprs.Lex.prototype.eval = function(state) {
state.enterLexifiedContext();
const ans = state.eval(this.expr);
state.exitLexifiedContext();
return ans;
};
pexprs.Alt.prototype.eval = function(state) {
for (let idx = 0; idx < this.terms.length; idx++) {
if (state.eval(this.terms[idx])) {
return true;
}
}
return false;
};
pexprs.Seq.prototype.eval = function(state) {
for (let idx = 0; idx < this.factors.length; idx++) {
const factor = this.factors[idx];
if (!state.eval(factor)) {
return false;
}
}
return true;
};
pexprs.Iter.prototype.eval = function(state) {
const {inputStream} = state;
const origPos = inputStream.pos;
const arity = this.getArity();
const cols = [];
const colOffsets = [];
while (cols.length < arity) {
cols.push([]);
colOffsets.push([]);
}
let numMatches = 0;
let prevPos = origPos;
let idx;
while (numMatches < this.maxNumMatches && state.eval(this.expr)) {
if (inputStream.pos === prevPos) {
throw errors.kleeneExprHasNullableOperand(this, state._applicationStack);
}
prevPos = inputStream.pos;
numMatches++;
const row = state._bindings.splice(state._bindings.length - arity, arity);
const rowOffsets = state._bindingOffsets.splice(
state._bindingOffsets.length - arity,
arity,
);
for (idx = 0; idx < row.length; idx++) {
cols[idx].push(row[idx]);
colOffsets[idx].push(rowOffsets[idx]);
}
}
if (numMatches < this.minNumMatches) {
return false;
}
let offset = state.posToOffset(origPos);
let matchLength = 0;
if (numMatches > 0) {
const lastCol = cols[arity - 1];
const lastColOffsets = colOffsets[arity - 1];
const endOffset =
lastColOffsets[lastColOffsets.length - 1] + lastCol[lastCol.length - 1].matchLength;
offset = colOffsets[0][0];
matchLength = endOffset - offset;
}
const isOptional = this instanceof pexprs.Opt;
for (idx = 0; idx < cols.length; idx++) {
state._bindings.push(
new IterationNode(cols[idx], colOffsets[idx], matchLength, isOptional),
);
state._bindingOffsets.push(offset);
}
return true;
};
pexprs.Not.prototype.eval = function(state) {
/*
TODO:
- Right now we're just throwing away all of the failures that happen inside a `not`, and
recording `this` as a failed expression.
- Double negation should be equivalent to lookahead, but that's not the case right now wrt
failures. E.g., ~~'foo' produces a failure for ~~'foo', but maybe it should produce
a failure for 'foo' instead.
*/
const {inputStream} = state;
const origPos = inputStream.pos;
state.pushFailuresInfo();
const ans = state.eval(this.expr);
state.popFailuresInfo();
if (ans) {
state.processFailure(origPos, this);
return false;
}
inputStream.pos = origPos;
return true;
};
pexprs.Lookahead.prototype.eval = function(state) {
const {inputStream} = state;
const origPos = inputStream.pos;
if (state.eval(this.expr)) {
inputStream.pos = origPos;
return true;
} else {
return false;
}
};
pexprs.Apply.prototype.eval = function(state) {
const caller = state.currentApplication();
const actuals = caller ? caller.args : [];
const app = this.substituteParams(actuals);
const posInfo = state.getCurrentPosInfo();
if (posInfo.isActive(app)) {
// This rule is already active at this position, i.e., it is left-recursive.
return app.handleCycle(state);
}
const memoKey = app.toMemoKey();
const memoRec = posInfo.memo[memoKey];
if (memoRec && posInfo.shouldUseMemoizedResult(memoRec)) {
if (state.hasNecessaryInfo(memoRec)) {
return state.useMemoizedResult(state.inputStream.pos, memoRec);
}
delete posInfo.memo[memoKey];
}
return app.reallyEval(state);
};
pexprs.Apply.prototype.handleCycle = function(state) {
const posInfo = state.getCurrentPosInfo();
const {currentLeftRecursion} = posInfo;
const memoKey = this.toMemoKey();
let memoRec = posInfo.memo[memoKey];
if (currentLeftRecursion && currentLeftRecursion.headApplication.toMemoKey() === memoKey) {
// We already know about this left recursion, but it's possible there are "involved
// applications" that we don't already know about, so...
memoRec.updateInvolvedApplicationMemoKeys();
} else if (!memoRec) {
// New left recursion detected! Memoize a failure to try to get a seed parse.
memoRec = posInfo.memoize(memoKey, {
matchLength: 0,
examinedLength: 0,
value: false,
rightmostFailureOffset: -1,
});
posInfo.startLeftRecursion(this, memoRec);
}
return state.useMemoizedResult(state.inputStream.pos, memoRec);
};
pexprs.Apply.prototype.reallyEval = function(state) {
const {inputStream} = state;
const origPos = inputStream.pos;
const origPosInfo = state.getCurrentPosInfo();
const ruleInfo = state.grammar.rules[this.ruleName];
const {body} = ruleInfo;
const {description} = ruleInfo;
state.enterApplication(origPosInfo, this);
if (description) {
state.pushFailuresInfo();
}
// Reset the input stream's examinedLength property so that we can track
// the examined length of this particular application.
const origInputStreamExaminedLength = inputStream.examinedLength;
inputStream.examinedLength = 0;
let value = this.evalOnce(body, state);
const currentLR = origPosInfo.currentLeftRecursion;
const memoKey = this.toMemoKey();
const isHeadOfLeftRecursion = currentLR && currentLR.headApplication.toMemoKey() === memoKey;
let memoRec;
if (state.doNotMemoize) {
state.doNotMemoize = false;
} else if (isHeadOfLeftRecursion) {
value = this.growSeedResult(body, state, origPos, currentLR, value);
origPosInfo.endLeftRecursion();
memoRec = currentLR;
memoRec.examinedLength = inputStream.examinedLength - origPos;
memoRec.rightmostFailureOffset = state._getRightmostFailureOffset();
origPosInfo.memoize(memoKey, memoRec); // updates origPosInfo's maxExaminedLength
} else if (!currentLR || !currentLR.isInvolved(memoKey)) {
// This application is not involved in left recursion, so it's ok to memoize it.
memoRec = origPosInfo.memoize(memoKey, {
matchLength: inputStream.pos - origPos,
examinedLength: inputStream.examinedLength - origPos,
value,
failuresAtRightmostPosition: state.cloneRecordedFailures(),
rightmostFailureOffset: state._getRightmostFailureOffset(),
});
}
const succeeded = !!value;
if (description) {
state.popFailuresInfo();
if (!succeeded) {
state.processFailure(origPos, this);
}
if (memoRec) {
memoRec.failuresAtRightmostPosition = state.cloneRecordedFailures();
}
}
// Record trace information in the memo table, so that it is available if the memoized result
// is used later.
if (state.isTracing() && memoRec) {
const entry = state.getTraceEntry(origPos, this, succeeded, succeeded ? [value] : []);
if (isHeadOfLeftRecursion) {
common.assert(entry.terminatingLREntry != null || !succeeded);
entry.isHeadOfLeftRecursion = true;
}
memoRec.traceEntry = entry;
}
// Fix the input stream's examinedLength -- it should be the maximum examined length
// across all applications, not just this one.
inputStream.examinedLength = Math.max(
inputStream.examinedLength,
origInputStreamExaminedLength,
);
state.exitApplication(origPosInfo, value);
return succeeded;
};
pexprs.Apply.prototype.evalOnce = function(expr, state) {
const {inputStream} = state;
const origPos = inputStream.pos;
if (state.eval(expr)) {
const arity = expr.getArity();
const bindings = state._bindings.splice(state._bindings.length - arity, arity);
const offsets = state._bindingOffsets.splice(state._bindingOffsets.length - arity, arity);
const matchLength = inputStream.pos - origPos;
return new NonterminalNode(this.ruleName, bindings, offsets, matchLength);
} else {
return false;
}
};
pexprs.Apply.prototype.growSeedResult = function(body, state, origPos, lrMemoRec, newValue) {
if (!newValue) {
return false;
}
const {inputStream} = state;
while (true) {
lrMemoRec.matchLength = inputStream.pos - origPos;
lrMemoRec.value = newValue;
lrMemoRec.failuresAtRightmostPosition = state.cloneRecordedFailures();
if (state.isTracing()) {
// Before evaluating the body again, add a trace node for this application to the memo entry.
// Its only child is a copy of the trace node from `newValue`, which will always be the last
// element in `state.trace`.
const seedTrace = state.trace[state.trace.length - 1];
lrMemoRec.traceEntry = new Trace(
state.input,
origPos,
inputStream.pos,
this,
true,
[newValue],
[seedTrace.clone()],
);
}
inputStream.pos = origPos;
newValue = this.evalOnce(body, state);
if (inputStream.pos - origPos <= lrMemoRec.matchLength) {
break;
}
if (state.isTracing()) {
state.trace.splice(-2, 1); // Drop the trace for the old seed.
}
}
if (state.isTracing()) {
// The last entry is for an unused result -- pop it and save it in the "real" entry.
lrMemoRec.traceEntry.recordLRTermination(state.trace.pop(), newValue);
}
inputStream.pos = origPos + lrMemoRec.matchLength;
return lrMemoRec.value;
};
pexprs.UnicodeChar.prototype.eval = function(state) {
const {inputStream} = state;
const origPos = inputStream.pos;
const ch = inputStream.next();
if (ch && this.pattern.test(ch)) {
state.pushBinding(new TerminalNode(ch.length), origPos);
return true;
} else {
state.processFailure(origPos, this);
return false;
}
};