peggy
Version:
Parser generator for JavaScript
1,604 lines (1,467 loc) • 52.7 kB
JavaScript
// @ts-check
"use strict";
const asts = require("../asts");
const op = require("../opcodes");
const Stack = require("../stack");
const { version } = require("../../version");
const { stringEscape, regexpClassEscape } = require("../utils");
const { SourceNode } = require("source-map-generator");
const GrammarLocation = require("../../grammar-location");
const { parse } = require("../../parser.js");
/**
* @typedef {import("../../peg")} PEG
*/
/**
* Converts source text from the grammar into the `source-map` object
*
* @param {string} code Multiline string with source code
* @param {PEG.LocationRange} location
* Location that represents code block in the grammar
* @param {string} [name] Name of the code chunk
*
* @returns {SourceNode} New node that represents code chunk.
* Code will be splitted by lines if necessary
*/
function toSourceNode(code, location, name) {
const start = GrammarLocation.offsetStart(location);
const line = start.line;
// `source-map` columns are 0-based, peggy columns is 1-based
const column = start.column - 1;
const lines = code.split("\n");
if (lines.length === 1) {
return new SourceNode(
line, column, String(location.source), code, name
);
}
return new SourceNode(
null, null, String(location.source), lines.map((l, i) => new SourceNode(
line + i,
i === 0 ? column : 0,
String(location.source),
i === lines.length - 1 ? l : [l, "\n"],
name
))
);
}
/**
* Wraps code line that consists from three parts into `SourceNode`.
*
* @param {string} prefix String that will be prepended before mapped chunk
* @param {string} chunk Chunk for mapping (possible multiline)
* @param {PEG.LocationRange} location
* Location that represents chunk in the grammar
* @param {string} suffix String that will be appended after mapped chunk
* @param {string} [name] Name of the code chunk
*
* @returns {SourceNode} New node that represents code chunk.
* Code will be splitted by lines if necessary
*/
function wrapInSourceNode(prefix, chunk, location, suffix, name) {
// If location is not defined (for example, AST node was replaced
// by a plugin and does not provide location information, see
// plugin-api.spec.js/"can replace parser") returns original chunk
if (location) {
const end = GrammarLocation.offsetEnd(location);
return new SourceNode(null, null, String(location.source), [
prefix,
toSourceNode(chunk, location, name),
// Mark end location with column information otherwise
// mapping will be always continue to the end of line
new SourceNode(
end.line,
// `source-map` columns are 0-based, peggy columns is 1-based
end.column - 1,
String(location.source),
suffix
),
]);
}
return new SourceNode(null, null, null, [prefix, chunk, suffix]);
}
/**
* @typedef {(string|SourceNode)[]} SourceArray
*
* @typedef {PEG.SourceBuildOptions<PEG.SourceOutputs>} SourceBuildOptions
* @typedef {object} ExtraOptions
* @property {PEG.Dependencies} [dependencies]
* @property {string} [exportVar]
* @typedef {SourceBuildOptions & ExtraOptions} Options
*/
/**
* Generates parser JavaScript code.
*
* @param {PEG.ast.Grammar} ast
* @param {Options} options
*/
function generateJS(ast, options) {
if (!ast.literals || !ast.locations || !ast.classes
|| !ast.expectations || !ast.functions || !ast.importedNames) {
throw new Error(
"generateJS: generate bytecode was not called."
);
}
const {
literals, locations, classes, expectations, functions, importedNames,
} = ast;
if (!options.allowedStartRules) {
throw new Error(
"generateJS: options.allowedStartRules was not set."
);
}
const { allowedStartRules } = options;
/** @type {PEG.Dependencies} */
const dependencies = options.dependencies || {};
/**
* Re-indent beginnings of each line so output is indented no matter what
* your grammar indentation preference is.
*
* @param {string} str
* @param {string} [indent = ""]
* @returns
*/
function reIndent(str, indent = "") {
const leadingSpace = str.match(/^\n*([ \t]+)/);
const body = leadingSpace
? str.replace(new RegExp(`^${leadingSpace[1]}`, "gm"), indent)
: str;
return body.replace(/[ \t]+$/, "");
}
/**
* @overload
* @param {string} code
* @returns {string}
*/
/**
* @overload
* @param {SourceArray} code
* @returns {SourceArray}
*/
/**
* These only indent non-empty lines to avoid trailing whitespace.
* @param {SourceArray} code
* @returns {SourceArray}
*/
function indent2(code) {
/*
* - raw lines (outside of SourceNodes) have implict newlines
* that get inserted at the end of processing, so indent
* should always be applied to the next string.
*
* - chunks inside SourceNodes are assumed to have explict
* new lines, and often have several chunks on one line.
* we therefore shouldn't indent them, unless we've seen
* an explicit new line, or the previous line was raw.
*
* So eg:
* [
* SourceNode(["a ", "b", "\nfoo "]),
* "x",
* "y",
* ]
*
* Should end up as
* [
* SourceNode([" a ", "b", "\n foo "]),
* "x",
* " y",
* ]
*
* sawEol, and inSourceNode are used to keep track of when
* we should apply the indent.
*/
let sawEol = true;
let inSourceNode = 0;
/**
* @overload
* @param {string | SourceNode} code
* @returns {string | SourceNode}
*/
/**
* @overload
* @param {SourceNode} code
* @returns {SourceNode}
*/
/**
* @overload
* @param {SourceNode[]} code
* @returns {SourceNode[]}
*/
/**
* @overload
* @param {SourceArray} code
* @returns {SourceArray}
*/
/**
* @param {SourceArray | string | SourceNode} code
* @returns {SourceArray | string | SourceNode}
*/
function helper(code) {
if (Array.isArray(code)) {
return code.map(s => helper(s));
}
if (code instanceof SourceNode) {
inSourceNode++;
code.children = helper(code.children);
inSourceNode--;
return code;
}
if (sawEol) {
// There was an immediately prior newline, so
// indent at the start of every line
code = code.replace(/^(.+)$/gm, " $1");
} else {
// This line will be appended directly to
// the end of the previous one, so only indent
// after each contained newline (and only if
// there's non-whitespace following the newline)
code = code.replace(/\n(\s*\S)/g, "\n $1");
}
sawEol = !inSourceNode || code.endsWith("\n");
return code;
}
return helper(code);
}
/** @param {number} i */
function l(i) { return "peg$c" + i; } // |literals[i]| of the abstract machine
/** @param {number} i */
function r(i) { return "peg$r" + i; } // |classes[i]| of the abstract machine
/** @param {number} i */
function e(i) { return "peg$e" + i; } // |expectations[i]| of the abstract machine
/** @param {number} i */
function f(i) { return "peg$f" + i; } // |actions[i]| of the abstract machine
/** @param {number} i */
function gi(i) { return "peg$import" + i; } // |grammar_import[i]|
/**
* Generates name of the function that parses specified rule.
* @param {string} name
*/
function name(name) { return "peg$parse" + name; }
function generateTables() {
/** @param {string} literal */
function buildLiteral(literal) {
return "\"" + stringEscape(literal) + "\"";
}
/** @param {PEG.ast.GrammarCharacterClass} cls */
function buildRegexp(cls) {
return "/^["
+ (cls.inverted ? "^" : "")
+ cls.value.map(part => (Array.isArray(part)
? regexpClassEscape(part[0])
+ "-"
+ regexpClassEscape(part[1])
: regexpClassEscape(part))).join("")
+ "]/" + (cls.ignoreCase ? "i" : "") + (cls.unicode ? "u" : "");
}
/** @param {PEG.ast.GrammarExpectation} e */
function buildExpectation(e) {
switch (e.type) {
case "rule": {
return "peg$otherExpectation(\"" + stringEscape(e.value) + "\")";
}
case "literal": {
return "peg$literalExpectation(\""
+ stringEscape(e.value)
+ "\", "
+ e.ignoreCase
+ ")";
}
case "class": {
const parts = e.value.map(part => (Array.isArray(part)
? "[\"" + stringEscape(part[0]) + "\", \"" + stringEscape(part[1]) + "\"]"
: "\"" + stringEscape(part) + "\"")).join(", ");
return "peg$classExpectation(["
+ parts + "], "
+ e.inverted + ", "
+ e.ignoreCase + ", "
+ e.unicode
+ ")";
}
case "any": return "peg$anyExpectation()";
// istanbul ignore next Because we never generate expectation type we cannot reach this branch
default: throw new Error("Unknown expectation type (" + JSON.stringify(e) + ")");
}
}
/**
* @param {PEG.ast.FunctionConst} a
* @param {number} i
*/
function buildFunc(a, i) {
return wrapInSourceNode(
`\n function ${f(i)}(${a.params.join(", ")}) {`,
reIndent(a.body, " "),
a.location,
" }"
);
}
return new SourceNode(
null, null, options.grammarSource, [
literals.map(
(c, i) => " const " + l(i) + " = " + buildLiteral(c) + ";"
).concat("", classes.map(
(c, i) => " const " + r(i) + " = " + buildRegexp(c) + ";"
)).concat("", expectations.map(
(c, i) => " const " + e(i) + " = " + buildExpectation(c) + ";"
)).concat("").join("\n"),
...functions.map(buildFunc),
]
);
}
/**
* @param {string} ruleNameCode
* @param {number} ruleIndexCode
*/
function generateRuleHeader(ruleNameCode, ruleIndexCode) {
/** @type {string[]} */
const parts = [];
parts.push("");
if (options.trace) {
parts.push(
"peg$tracer.trace({",
" type: \"rule.enter\",",
" rule: " + ruleNameCode + ",",
" location: peg$computeLocation(startPos, startPos, true)",
"});",
""
);
}
if (options.cache) {
parts.push(
"const key = peg$currPos * " + ast.rules.length + " + " + ruleIndexCode + ";",
"const cached = peg$resultsCache[key];",
"",
"if (cached) {",
" peg$currPos = cached.nextPos;",
""
);
if (options.trace) {
parts.push(
"if (cached.result !== peg$FAILED) {",
" peg$tracer.trace({",
" type: \"rule.match\",",
" rule: " + ruleNameCode + ",",
" result: cached.result,",
" location: peg$computeLocation(startPos, peg$currPos, true)",
" });",
"} else {",
" peg$tracer.trace({",
" type: \"rule.fail\",",
" rule: " + ruleNameCode + ",",
" location: peg$computeLocation(startPos, startPos, true)",
" });",
"}",
""
);
}
parts.push(
" return cached.result;",
"}",
""
);
}
return parts;
}
/**
* @param {string} ruleNameCode
* @param {string} resultCode
*/
function generateRuleFooter(ruleNameCode, resultCode) {
/** @type {string[]} */
const parts = [];
if (options.cache) {
parts.push(
"",
"peg$resultsCache[key] = { nextPos: peg$currPos, result: " + resultCode + " };"
);
}
if (options.trace) {
parts.push(
"",
"if (" + resultCode + " !== peg$FAILED) {",
" peg$tracer.trace({",
" type: \"rule.match\",",
" rule: " + ruleNameCode + ",",
" result: " + resultCode + ",",
" location: peg$computeLocation(startPos, peg$currPos, true)",
" });",
"} else {",
" peg$tracer.trace({",
" type: \"rule.fail\",",
" rule: " + ruleNameCode + ",",
" location: peg$computeLocation(startPos, startPos, true)",
" });",
"}"
);
}
parts.push(
"",
"return " + resultCode + ";"
);
return parts;
}
/** @param {PEG.ast.Rule} rule */
function generateRuleFunction(rule) {
/** @type {SourceArray} */
const parts = [];
const bytecode = /** @type {number[]} */(rule.bytecode);
const stack = new Stack(rule.name, "s", "let", bytecode);
/** @param {number[]} bc */
function compile(bc) {
let ip = 0;
const end = bc.length;
const parts = [];
// eslint-disable-next-line no-useless-assignment
let value = undefined;
/**
* @param {string} cond
* @param {number} argCount
* @param {((bc: number[])=>SourceArray) | null} [thenFn]
*/
function compileCondition(cond, argCount, thenFn) {
const baseLength = argCount + 3;
const thenLength = bc[ip + baseLength - 2];
const elseLength = bc[ip + baseLength - 1];
const [thenCode, elseCode] = stack.checkedIf(
ip,
() => {
ip += baseLength + thenLength;
return (thenFn || compile)(bc.slice(ip - thenLength, ip));
},
(elseLength > 0)
? () => {
ip += elseLength;
return compile(bc.slice(ip - elseLength, ip));
}
: null
);
parts.push("if (" + cond + ") {");
parts.push(...indent2(thenCode));
if (elseLength > 0) {
parts.push("} else {");
parts.push(...indent2(elseCode));
}
parts.push("}");
}
/**
* Get the code to retrieve an input chunk. -1 means "one full Unicode
* character", which might be one or two UTF-16 code units (JS chars).
* @param {number} inputChunkLength
* @returns {string}
*/
function getChunkCode(inputChunkLength) {
switch (inputChunkLength) {
case -1:
return "peg$getUnicode()";
case 1:
return "input.charAt(peg$currPos)";
default:
return `input.substr(peg$currPos, ${inputChunkLength})`;
}
}
/**
* Get the code that increments peg$currPos correctly. -1 means "one
* full Unicode character", which might be one or two UTF-16 code units
* (JS chars), in which case the varname is required.
*
* @param {number} inputChunkLength
* @param {string} varName
* @returns {string}
*/
function getIncrCode(inputChunkLength, varName) {
switch (inputChunkLength) {
case -1:
return `peg$currPos += ${varName}.length;`;
case 1:
return "peg$currPos++;";
default:
return "peg$currPos += (" + inputChunkLength + ");";
}
}
/**
MATCH_* opcodes typically do something like
if (<test>(input.substr(peg$currPos, length))) {
sN = input.substr(peg$currPos, length);
...
} else {
sN = peg$FAILED;
...
}
compileInputChunkCondition will convert that to
sN = input.substr(peg$currPos, length);
if (<test>(sN)) {
...
} else {
sN = peg$FAILED;
...
}
and avoid extracting the sub string twice.
@param {(chunk:string, optimized:boolean)=>string} condFn
@param {number} argCount
@param {number} inputChunkLength
*/
function compileInputChunkCondition(
condFn, argCount, inputChunkLength
) {
const baseLength = argCount + 3;
let inputChunk = getChunkCode(inputChunkLength);
let thenFn = null;
if (bc[ip + baseLength] === op.ACCEPT_N
&& bc[ip + baseLength + 1] === inputChunkLength) {
// Push the assignment to the next available variable.
parts.push(stack.push(inputChunk));
inputChunk = stack.pop();
/** @param {number[]} bc */
thenFn = bc => {
// The bc[0] is an ACCEPT_N, and bc[1] is the N. We've already done
// the assignment (before the if), so we just need to bump the
// stack, and increment peg$currPos appropriately.
stack.sp++;
const code = compile(bc.slice(2));
code.unshift(getIncrCode(inputChunkLength, inputChunk));
return code;
};
}
compileCondition(condFn(inputChunk, thenFn !== null), argCount, thenFn);
}
/** @param {string} cond */
function compileLoop(cond) {
const baseLength = 2;
const bodyLength = bc[ip + baseLength - 1];
const bodyCode = stack.checkedLoop(ip, () => {
ip += baseLength + bodyLength;
return compile(bc.slice(ip - bodyLength, ip));
});
parts.push("while (" + cond + ") {");
parts.push(...indent2(bodyCode));
parts.push("}");
}
/** @param {number} baseLength */
function compileCall(baseLength) {
const paramsLength = bc[ip + baseLength - 1];
return f(bc[ip + 1]) + "("
+ bc.slice(ip + baseLength, ip + baseLength + paramsLength).map(
p => stack.index(p)
).join(", ")
+ ")";
}
while (ip < end) {
switch (bc[ip]) {
case op.PUSH_EMPTY_STRING: // PUSH_EMPTY_STRING
parts.push(stack.push("''"));
ip++;
break;
case op.PUSH_CURR_POS: // PUSH_CURR_POS
parts.push(stack.push("peg$currPos"));
ip++;
break;
case op.PUSH_UNDEFINED: // PUSH_UNDEFINED
parts.push(stack.push("undefined"));
ip++;
break;
case op.PUSH_NULL: // PUSH_NULL
parts.push(stack.push("null"));
ip++;
break;
case op.PUSH_FAILED: // PUSH_FAILED
parts.push(stack.push("peg$FAILED"));
ip++;
break;
case op.PUSH_EMPTY_ARRAY: // PUSH_EMPTY_ARRAY
parts.push(stack.push("[]"));
ip++;
break;
case op.POP: // POP
stack.pop();
ip++;
break;
case op.POP_CURR_POS: // POP_CURR_POS
parts.push("peg$currPos = " + stack.pop() + ";");
ip++;
break;
case op.POP_N: // POP_N n
stack.pop(bc[ip + 1]);
ip += 2;
break;
case op.NIP: // NIP
value = stack.pop();
stack.pop();
parts.push(stack.push(value));
ip++;
break;
case op.APPEND: // APPEND
value = stack.pop();
parts.push(stack.top() + ".push(" + value + ");");
ip++;
break;
case op.WRAP: // WRAP n
parts.push(
stack.push("[" + stack.pop(bc[ip + 1]).join(", ") + "]")
);
ip += 2;
break;
case op.TEXT: // TEXT
parts.push(
stack.push("input.substring(" + stack.pop() + ", peg$currPos)")
);
ip++;
break;
case op.PLUCK: { // PLUCK n, k, p1, ..., pK
const baseLength = 3;
const paramsLength = bc[ip + baseLength - 1];
const n = baseLength + paramsLength;
value = bc.slice(ip + baseLength, ip + n);
value = paramsLength === 1
? stack.index(value[0])
: `[ ${
value.map(p => stack.index(p)).join(", ")
} ]`;
stack.pop(bc[ip + 1]);
parts.push(stack.push(value));
ip += n;
break;
}
case op.IF: // IF t, f
compileCondition(stack.top(), 0);
break;
case op.IF_ERROR: // IF_ERROR t, f
compileCondition(stack.top() + " === peg$FAILED", 0);
break;
case op.IF_NOT_ERROR: // IF_NOT_ERROR t, f
compileCondition(stack.top() + " !== peg$FAILED", 0);
break;
case op.IF_LT: // IF_LT min, t, f
compileCondition(stack.top() + ".length < " + bc[ip + 1], 1);
break;
case op.IF_GE: // IF_GE max, t, f
compileCondition(stack.top() + ".length >= " + bc[ip + 1], 1);
break;
case op.IF_LT_DYNAMIC: // IF_LT_DYNAMIC min, t, f
compileCondition(stack.top() + ".length < (" + stack.index(bc[ip + 1]) + "|0)", 1);
break;
case op.IF_GE_DYNAMIC: // IF_GE_DYNAMIC max, t, f
compileCondition(stack.top() + ".length >= (" + stack.index(bc[ip + 1]) + "|0)", 1);
break;
case op.WHILE_NOT_ERROR: // WHILE_NOT_ERROR b
compileLoop(stack.top() + " !== peg$FAILED");
break;
case op.MATCH_ANY: // MATCH_ANY a, f, ...
compileCondition("input.length > peg$currPos", 0);
break;
case op.MATCH_STRING: { // MATCH_STRING s, a, f, ...
const litNum = bc[ip + 1];
const literal = literals[litNum];
compileInputChunkCondition(
(inputChunk, optimized) => {
if (literal.length > 1) {
return `${inputChunk} === ${l(litNum)}`;
}
inputChunk = !optimized
? "input.charCodeAt(peg$currPos)"
: `${inputChunk}.charCodeAt(0)`;
return `${inputChunk} === ${literal.charCodeAt(0)}`;
},
1,
literal.length
);
break;
}
case op.MATCH_STRING_IC: { // MATCH_STRING_IC s, a, f, ...
const litNum = bc[ip + 1];
compileInputChunkCondition(
inputChunk => `${inputChunk}.toLowerCase() === ${l(litNum)}`,
1,
literals[litNum].length
);
break;
}
case op.MATCH_CHAR_CLASS: { // MATCH_CHAR_CLASS c, a, f, ...
const regNum = bc[ip + 1];
compileInputChunkCondition(
inputChunk => `${r(regNum)}.test(${inputChunk})`, 1, 1
);
break;
}
case op.MATCH_UNICODE_CLASS: { // MATCH_UNICODE_CLASS c, a, f, ...
const regNum = bc[ip + 1];
compileInputChunkCondition(
inputChunk => `${r(regNum)}.test(${inputChunk})`, 1, -1
);
break;
}
case op.ACCEPT_N: // ACCEPT_N n
parts.push(stack.push(getChunkCode(bc[ip + 1])));
parts.push(getIncrCode(bc[ip + 1], stack.top()));
ip += 2;
break;
case op.ACCEPT_STRING: // ACCEPT_STRING s
parts.push(stack.push(l(bc[ip + 1])));
parts.push(
literals[bc[ip + 1]].length > 1
? "peg$currPos += " + literals[bc[ip + 1]].length + ";"
: "peg$currPos++;"
);
ip += 2;
break;
case op.FAIL: // FAIL e
parts.push(stack.push("peg$FAILED"));
parts.push("if (peg$silentFails === 0) { peg$fail(" + e(bc[ip + 1]) + "); }");
ip += 2;
break;
case op.LOAD_SAVED_POS: // LOAD_SAVED_POS p
parts.push("peg$savedPos = " + stack.index(bc[ip + 1]) + ";");
ip += 2;
break;
case op.UPDATE_SAVED_POS: // UPDATE_SAVED_POS
parts.push("peg$savedPos = peg$currPos;");
ip++;
break;
case op.CALL: // CALL f, n, pc, p1, p2, ..., pN
value = compileCall(4);
stack.pop(bc[ip + 2]);
parts.push(stack.push(value));
ip += 4 + bc[ip + 3];
break;
case op.RULE: // RULE r
parts.push(stack.push(name(ast.rules[bc[ip + 1]].name) + "()"));
ip += 2;
break;
case op.LIBRARY_RULE: { // LIBRARY_RULE module, name
const nm = bc[ip + 2];
const cnm = (nm === -1) ? "" : ", \"" + importedNames[nm] + "\"";
parts.push(stack.push("peg$callLibrary("
+ gi(bc[ip + 1])
+ cnm
+ ")"));
ip += 3;
break;
}
case op.SILENT_FAILS_ON: // SILENT_FAILS_ON
parts.push("peg$silentFails++;");
ip++;
break;
case op.SILENT_FAILS_OFF: // SILENT_FAILS_OFF
parts.push("peg$silentFails--;");
ip++;
break;
case op.SOURCE_MAP_PUSH:
stack.sourceMapPush(
parts,
locations[bc[ip + 1]]
);
ip += 2;
break;
case op.SOURCE_MAP_POP: {
stack.sourceMapPop();
ip++;
break;
}
case op.SOURCE_MAP_LABEL_PUSH:
stack.labels[bc[ip + 1]] = {
label: literals[bc[ip + 2]],
location: locations[bc[ip + 3]],
};
ip += 4;
break;
case op.SOURCE_MAP_LABEL_POP:
delete stack.labels[bc[ip + 1]];
ip += 2;
break;
// istanbul ignore next Because we never generate invalid bytecode we cannot reach this branch
default:
throw new Error("Invalid opcode: " + bc[ip] + ".");
}
}
return parts;
}
const code = compile(bytecode);
parts.push(wrapInSourceNode(
"function ",
name(rule.name),
rule.nameLocation,
"() {\n",
rule.name
));
if (options.trace) {
parts.push(" var startPos = peg$currPos;");
}
parts.push(indent2(stack.defines()));
parts.push(...indent2(generateRuleHeader(
"\"" + stringEscape(rule.name) + "\"",
asts.indexOfRule(ast, rule.name)
)));
parts.push(...indent2(code));
parts.push(...indent2(generateRuleFooter(
"\"" + stringEscape(rule.name) + "\"",
stack.result()
)));
parts.push("}");
parts.push("");
return parts;
}
/**
* @template {string} T
* @param {PEG.ast.CodeBlock<T>} node
*/
function ast2SourceNode(node) {
// If location is not defined (for example, AST node was replaced
// by a plugin and does not provide location information, see
// plugin-api.spec.js/"can replace parser") returns initializer code
if (node.codeLocation) {
// Append "$" to the name to create an impossible rule name
// so that names will not collide with rule names
return toSourceNode(node.code, node.codeLocation, "$" + node.type);
}
return node.code;
}
function generateToplevel() {
const parts = [];
let topLevel = ast.topLevelInitializer;
if (topLevel) {
if (Array.isArray(topLevel)) {
if (options.format === "es") {
const imps = [];
const codes = [];
for (const tli of topLevel) {
const [
imports,
code,
] = /** @type {PEG.ast.TopLevelInitializer[]} */ (
parse(tli.code, {
startRule: "ImportsAndSource",
grammarSource: new GrammarLocation(
tli.codeLocation.source,
tli.codeLocation.start
),
})
);
if (imports.code) {
imps.push(imports);
codes.push(code);
} else {
// Prefer the original
codes.push(tli);
}
}
// Imports go at the end so that when reversed, they end up in front.
topLevel = codes.concat(imps);
}
// Put library code before code using it.
const reversed = topLevel.slice(0).reverse();
for (const tli of reversed) {
parts.push(ast2SourceNode(tli));
parts.push("");
}
} else {
parts.push(ast2SourceNode(topLevel));
parts.push("");
}
}
parts.push(
"class peg$SyntaxError extends SyntaxError {",
" constructor(message, expected, found, location) {",
" super(message);",
" this.expected = expected;",
" this.found = found;",
" this.location = location;",
" this.name = \"SyntaxError\";",
" }",
"",
" format(sources) {",
" let str = \"Error: \" + this.message;",
" if (this.location) {",
" let src = null;",
" const st = sources.find(s => s.source === this.location.source);",
" if (st) {",
" src = st.text.split(/\\r\\n|\\n|\\r/g);",
" }",
" const s = this.location.start;",
" const offset_s = (this.location.source && (typeof this.location.source.offset === \"function\"))",
" ? this.location.source.offset(s)",
" : s;",
" const loc = this.location.source + \":\" + offset_s.line + \":\" + offset_s.column;",
" if (src) {",
" const e = this.location.end;",
" const filler = \"\".padEnd(offset_s.line.toString().length, \" \");",
" const line = src[s.line - 1];",
" const last = s.line === e.line ? e.column : line.length + 1;",
" const hatLen = (last - s.column) || 1;",
" str += \"\\n --> \" + loc + \"\\n\"",
" + filler + \" |\\n\"",
" + offset_s.line + \" | \" + line + \"\\n\"",
" + filler + \" | \" + \"\".padEnd(s.column - 1, \" \")",
" + \"\".padEnd(hatLen, \"^\");",
" } else {",
" str += \"\\n at \" + loc;",
" }",
" }",
" return str;",
" }",
"",
" static buildMessage(expected, found) {",
" function hex(ch) {",
" return ch.codePointAt(0).toString(16).toUpperCase();",
" }",
"",
" const nonPrintable = Object.prototype.hasOwnProperty.call(RegExp.prototype, \"unicode\")",
" ? new RegExp(\"[\\\\p{C}\\\\p{Mn}\\\\p{Mc}]\", \"gu\")",
" : null;",
" function unicodeEscape(s) {",
" if (nonPrintable) {",
" return s.replace(nonPrintable, ch => \"\\\\u{\" + hex(ch) + \"}\");",
" }",
" return s;",
" }",
"",
" function literalEscape(s) {",
" return unicodeEscape(s",
" .replace(/\\\\/g, \"\\\\\\\\\")", // Backslash
" .replace(/\"/g, \"\\\\\\\"\")", // Closing double quote
" .replace(/\\0/g, \"\\\\0\")", // Null
" .replace(/\\t/g, \"\\\\t\")", // Horizontal tab
" .replace(/\\n/g, \"\\\\n\")", // Line feed
" .replace(/\\r/g, \"\\\\r\")", // Carriage return
" .replace(/[\\x00-\\x0F]/g, ch => \"\\\\x0\" + hex(ch))",
" .replace(/[\\x10-\\x1F\\x7F-\\x9F]/g, ch => \"\\\\x\" + hex(ch)));",
" }",
"",
" function classEscape(s) {",
" return unicodeEscape(s",
" .replace(/\\\\/g, \"\\\\\\\\\")", // Backslash
" .replace(/\\]/g, \"\\\\]\")", // Closing bracket
" .replace(/\\^/g, \"\\\\^\")", // Caret
" .replace(/-/g, \"\\\\-\")", // Dash
" .replace(/\\0/g, \"\\\\0\")", // Null
" .replace(/\\t/g, \"\\\\t\")", // Horizontal tab
" .replace(/\\n/g, \"\\\\n\")", // Line feed
" .replace(/\\r/g, \"\\\\r\")", // Carriage return
" .replace(/[\\x00-\\x0F]/g, ch => \"\\\\x0\" + hex(ch))",
" .replace(/[\\x10-\\x1F\\x7F-\\x9F]/g, ch => \"\\\\x\" + hex(ch)));",
" }",
"",
" const DESCRIBE_EXPECTATION_FNS = {",
" literal(expectation) {",
" return \"\\\"\" + literalEscape(expectation.text) + \"\\\"\";",
" },",
"",
" class(expectation) {",
" const escapedParts = expectation.parts.map(",
" part => (Array.isArray(part)",
" ? classEscape(part[0]) + \"-\" + classEscape(part[1])",
" : classEscape(part))",
" );",
"",
" return \"[\" + (expectation.inverted ? \"^\" : \"\") + escapedParts.join(\"\") + \"]\" + (expectation.unicode ? \"u\" : \"\");",
" },",
"",
" any() {",
" return \"any character\";",
" },",
"",
" end() {",
" return \"end of input\";",
" },",
"",
" other(expectation) {",
" return expectation.description;",
" },",
" };",
"",
" function describeExpectation(expectation) {",
" return DESCRIBE_EXPECTATION_FNS[expectation.type](expectation);",
" }",
"",
" function describeExpected(expected) {",
" const descriptions = expected.map(describeExpectation);",
" descriptions.sort();",
"",
" if (descriptions.length > 0) {",
" let j = 1;",
" for (let i = 1; i < descriptions.length; i++) {",
" if (descriptions[i - 1] !== descriptions[i]) {",
" descriptions[j] = descriptions[i];",
" j++;",
" }",
" }",
" descriptions.length = j;",
" }",
"",
" switch (descriptions.length) {",
" case 1:",
" return descriptions[0];",
"",
" case 2:",
" return descriptions[0] + \" or \" + descriptions[1];",
"",
" default:",
" return descriptions.slice(0, -1).join(\", \")",
" + \", or \"",
" + descriptions[descriptions.length - 1];",
" }",
" }",
"",
" function describeFound(found) {",
" return found ? \"\\\"\" + literalEscape(found) + \"\\\"\" : \"end of input\";",
" }",
"",
" return \"Expected \" + describeExpected(expected) + \" but \" + describeFound(found) + \" found.\";",
" }",
"}",
""
);
if (options.trace) {
parts.push(
"class peg$DefaultTracer {",
" constructor() {",
" this.indentLevel = 0;",
" }",
"",
" trace(event) {",
" const that = this;",
"",
" function log(event) {",
" console?.log?.(",
" event.location.start.line + \":\" + event.location.start.column + \"-\"",
" + event.location.end.line + \":\" + event.location.end.column + \" \"",
" + event.type.padEnd(10, \" \")",
" + \"\".padEnd((that.indentLevel * 2) + 1, \" \") + event.rule",
" );",
" }",
"",
" switch (event.type) {",
" case \"rule.enter\":",
" log(event);",
" this.indentLevel++;",
" break;",
"",
" case \"rule.match\":",
" this.indentLevel--;",
" log(event);",
" break;",
"",
" case \"rule.fail\":",
" this.indentLevel--;",
" log(event);",
" break;",
"",
" default:",
" throw new Error(\"Invalid event type: \" + event.type + \".\");",
" }",
" }",
"}",
""
);
}
const startRuleFunctions = "{\n"
+ allowedStartRules.map(r => ` ${r}: ${name(r)},\n`).join("")
+ " }";
const startRuleFunction = name(allowedStartRules[0]);
parts.push(
"function peg$parse(input, options) {",
" options = options !== undefined ? options : {};",
"",
" const peg$FAILED = {};",
" const peg$source = options.grammarSource;",
"",
" const peg$startRuleFunctions = " + startRuleFunctions + ";",
" let peg$startRuleFunction = " + startRuleFunction + ";",
"",
generateTables(),
"",
" let peg$currPos = options.peg$currPos | 0;",
" let peg$savedPos = peg$currPos;",
" const peg$posDetailsCache = [{ line: 1, column: 1 }];",
" let peg$maxFailPos = peg$currPos;",
" let peg$maxFailExpected = options.peg$maxFailExpected || [];",
" let peg$silentFails = options.peg$silentFails | 0;", // 0 = report failures, > 0 = silence failures
""
);
if (options.cache) {
parts.push(
" let peg$resultsCache = {};",
""
);
}
if (options.trace) {
parts.push(
" let peg$tracer = \"tracer\" in options ? options.tracer : new peg$DefaultTracer();",
""
);
}
parts.push(
" let peg$result;",
"",
" if (options.startRule) {",
" if (!(options.startRule in peg$startRuleFunctions)) {",
" throw new Error(\"Can't start parsing from rule \\\"\" + options.startRule + \"\\\".\");",
" }",
"",
" peg$startRuleFunction = peg$startRuleFunctions[options.startRule];",
" }",
"",
" function text() {",
" return input.substring(peg$savedPos, peg$currPos);",
" }",
"",
" function offset() {",
" return peg$savedPos;",
" }",
"",
" function range() {",
" return {",
" source: peg$source,",
" start: peg$savedPos,",
" end: peg$currPos,",
" };",
" }",
"",
" function location() {",
" return peg$computeLocation(peg$savedPos, peg$currPos);",
" }",
"",
" function expected(description, location) {",
" location = location !== undefined",
" ? location",
" : peg$computeLocation(peg$savedPos, peg$currPos);",
"",
" throw peg$buildStructuredError(",
" [peg$otherExpectation(description)],",
" input.substring(peg$savedPos, peg$currPos),",
" location",
" );",
" }",
"",
" function error(message, location) {",
" location = location !== undefined",
" ? location",
" : peg$computeLocation(peg$savedPos, peg$currPos);",
"",
" throw peg$buildSimpleError(message, location);",
" }",
"",
" function peg$getUnicode(pos = peg$currPos) {",
" const cp = input.codePointAt(pos);",
" if (cp === undefined) {",
" return \"\";",
" }",
" return String.fromCodePoint(cp);",
" }",
"",
" function peg$literalExpectation(text, ignoreCase) {",
" return { type: \"literal\", text, ignoreCase };",
" }",
"",
" function peg$classExpectation(parts, inverted, ignoreCase, unicode) {",
" return { type: \"class\", parts, inverted, ignoreCase, unicode };",
" }",
"",
" function peg$anyExpectation() {",
" return { type: \"any\" };",
" }",
"",
" function peg$endExpectation() {",
" return { type: \"end\" };",
" }",
"",
" function peg$otherExpectation(description) {",
" return { type: \"other\", description };",
" }",
"",
" function peg$computePosDetails(pos) {",
" let details = peg$posDetailsCache[pos];",
" let p;",
"",
" if (details) {",
" return details;",
" } else {",
" if (pos >= peg$posDetailsCache.length) {",
" p = peg$posDetailsCache.length - 1;",
" } else {",
" p = pos;",
" while (!peg$posDetailsCache[--p]) {}",
" }",
"",
" details = peg$posDetailsCache[p];",
" details = {",
" line: details.line,",
" column: details.column,",
" };",
"",
" while (p < pos) {",
" if (input.charCodeAt(p) === 10) {",
" details.line++;",
" details.column = 1;",
" } else {",
" details.column++;",
" }",
"",
" p++;",
" }",
"",
" peg$posDetailsCache[pos] = details;",
"",
" return details;",
" }",
" }",
"",
" function peg$computeLocation(startPos, endPos, offset) {",
" const startPosDetails = peg$computePosDetails(startPos);",
" const endPosDetails = peg$computePosDetails(endPos);",
"",
" const res = {",
" source: peg$source,",
" start: {",
" offset: startPos,",
" line: startPosDetails.line,",
" column: startPosDetails.column,",
" },",
" end: {",
" offset: endPos,",
" line: endPosDetails.line,",
" column: endPosDetails.column,",
" },",
" };",
" if (offset && peg$source && (typeof peg$source.offset === \"function\")) {",
" res.start = peg$source.offset(res.start);",
" res.end = peg$source.offset(res.end);",
" }",
" return res;",
" }",
"",
" function peg$fail(expected) {",
" if (peg$currPos < peg$maxFailPos) { return; }",
"",
" if (peg$currPos > peg$maxFailPos) {",
" peg$maxFailPos = peg$currPos;",
" peg$maxFailExpected = [];",
" }",
"",
" peg$maxFailExpected.push(expected);",
" }",
"",
" function peg$buildSimpleError(message, location) {",
" return new peg$SyntaxError(message, null, null, location);",
" }",
"",
" function peg$buildStructuredError(expected, found, location) {",
" return new peg$SyntaxError(",
" peg$SyntaxError.buildMessage(expected, found),",
" expected,",
" found,",
" location",
" );",
" }",
""
);
if (ast.imports.length > 0) {
parts.push(
" function peg$callLibrary(lib, startRule) {",
" const opts = Object.assign({}, options, {",
" startRule: startRule,",
" peg$currPos: peg$currPos,",
" peg$silentFails: peg$silentFails,",
" peg$library: true,",
" peg$maxFailExpected: peg$maxFailExpected",
" });",
" const res = lib.parse(input, opts);",
" peg$currPos = res.peg$currPos;",
" peg$maxFailPos = res.peg$maxFailPos;",
" peg$maxFailExpected = res.peg$maxFailExpected;",
" return (res.peg$result === res.peg$FAILED) ? peg$FAILED : res.peg$result;",
" }",
""
);
}
ast.rules.forEach(rule => {
parts.push(...indent2(generateRuleFunction(rule)));
});
if (ast.initializer) {
if (Array.isArray(ast.initializer)) {
for (const init of ast.initializer) {
parts.push(ast2SourceNode(init));
parts.push("");
}
} else {
parts.push(ast2SourceNode(ast.initializer));
parts.push("");
}
}
parts.push(
" peg$result = peg$startRuleFunction();",
"",
" const peg$success = (peg$result !== peg$FAILED && peg$currPos === input.length);",
" function peg$throw() {",
" if (peg$result !== peg$FAILED && peg$currPos < input.length) {",
" peg$fail(peg$endExpectation());",
" }",
"",
" throw peg$buildStructuredError(",
" peg$maxFailExpected,",
" peg$maxFailPos < input.length ? peg$getUnicode(peg$maxFailPos) : null,",
" peg$maxFailPos < input.length",
" ? peg$computeLocation(peg$maxFailPos, peg$maxFailPos + 1)",
" : peg$computeLocation(peg$maxFailPos, peg$maxFailPos)",
" );",
" }",
" if (options.peg$library) {",
// Hide this from TypeScript. It's internal-only until library mode is stabilized.
" return /** @type {any} */ ({",
" peg$result,",
" peg$currPos,",
" peg$FAILED,",
" peg$maxFailExpected,",
" peg$maxFailPos,",
" peg$success,",
" peg$throw: peg$success ? undefined : peg$throw,",
" });",
" }",
" if (peg$success) {",
" return peg$result;",
" } else {",
" peg$throw();",
" }",
"}"
);
return new SourceNode(
// This expression has a better readability when on two lines
// eslint-disable-next-line @stylistic/function-call-argument-newline
null, null, options.grammarSource,
parts.map(s => (s instanceof SourceNode ? s : s + "\n"))
);
}
/** @param {SourceNode} toplevelCode */
function generateWrapper(toplevelCode) {
/** @return {(string|SourceNode)[]} */
function generateGeneratedByComment() {
return [
`// @generated by Peggy ${version}.`,
"//",
"// https://peggyjs.org/",
];
}
function generateParserObject() {
const res = ["{"];
if (options.trace) {
res.push(" DefaultTracer: peg$DefaultTracer,");
}
if (options.allowedStartRules) {
res.push(" StartRules: [" + options.allowedStartRules.map(r => '"' + r + '"').join(", ") + "],");
}
res.push(
" SyntaxError: peg$SyntaxError,",
" parse: peg$parse,"
);
res.push("}");
return res.join("\n");
}
const generators = {
bare() {
if ((Object.keys(dependencies).length > 0)
|| (ast.imports.length > 0)) {
throw new Error("Dependencies not supported in format 'bare'.");
}
return [
...generateGeneratedByComment(),
"(function() {",
" \"use strict\";",
toplevelCode,
indent2("return " + generateParserObject() + ";"),
"})()",
];
},
commonjs() {
const dependencyVars = Object.keys(dependencies);
const parts = generateGeneratedByComment();
parts.push(
"",
"\"use strict\";"
);
if (dependencyVars.length > 0) {
dependencyVars.forEach(variable => {
parts.push(
"const " + variable
+ " = require(\""
+ stringEscape(dependencies[variable])
+ "\");"
);
});
parts.push("");
}
const impLen = ast.imports.length;
for (let i = 0; i < impLen; i++) {
parts.push(
"const " + gi(i)
+ " = require(\""
+ stringEscape(ast.imports[i].from.module)
+ "\");"
);
}
parts.push(
"",
toplevelCode,
"",
"module.exports = " + generateParserObject() + ";"
);
return parts;
},
es() {
const dependencyVars = Object.keys(dependencies);
const parts = generateGeneratedByComment();
parts.push("");
if (dependencyVars.length > 0) {
dependencyVars.forEach(variable => {
parts.push(
"import " + variable
+ " from \""
+ stringEscape(dependencies[variable])
+ "\";"
);
});
parts.push("");
}
for (let i = 0; i < ast.imports.length; i++) {
parts.push(
"import * as " + gi(i)
+ " from \""
+ stringEscape(ast.imports[i].from.module)
+ "\";"
);
}
parts.push(
"",
toplevelCode,
""
);
parts.push(
"const peg$allowedStartRules = [",
" " + (options.allowedStartRules ? options.allowedStartRules.map(r => '"' + r + '"').join(",\n ") : ""),
"];",
""
);
parts.push(
"export {"
);
if (options.trace) {
parts.push(" peg$DefaultTracer as DefaultTracer,");
}
parts.push(
" peg$allowedStartRules as StartRules,",
" peg$SyntaxError as SyntaxError,",
" peg$parse as parse",
"};"
);
return parts;
},
amd() {
if (ast.imports.length > 0) {
throw new Error("Imports are not supported in format 'amd'.");
}
const dependencyVars = Object.keys(dependencies);
const dependencyIds = dependencyVars.map(v => dependencies[v]);
const deps = "["
+ dependencyIds.map(
id => "\"" + stringEscape(id) + "\""
).join(", ")
+ "]";
const params = dependencyVars.join(", ");
return [
...generateGeneratedByComment(),
"defin