wikiparser-node
Version:
A Node.js parser for MediaWiki markup with AST
224 lines (223 loc) • 10.1 kB
JavaScript
Object.defineProperty(exports, "__esModule", { value: true });
exports.parseBraces = void 0;
const common_1 = require("@bhsd/common");
const string_1 = require("../util/string");
const heading_1 = require("../src/heading");
const transclude_1 = require("../src/transclude");
const arg_1 = require("../src/arg");
/* NOT FOR BROWSER */
const constants_1 = require("../util/constants");
/* NOT FOR BROWSER END */
/* NOT FOR BROWSER ONLY */
const v8_1 = require("v8");
const MAXHEAP = (0, v8_1.getHeapStatistics)().heap_size_limit * 0.9;
/* NOT FOR BROWSER ONLY END */
const closes = {
'=': String.raw `\n(?!(?:[^\S\n]|\0\d+[cn]\x7F)*\n)`,
'{': String.raw `\}{2,}|\|`,
'-': String.raw `\}-`,
'[': String.raw `\]\]`,
}, lbrack = String.raw `\[(?!\[)`, newline = String.raw `\n(?![=\0])`, openBraces = String.raw `|\{{2,}`, marks = new Map([['!', '!'], ['!!', '+'], ['(!', '{'], ['!)', '}'], ['!-', '-'], ['=', '~'], ['server', 'm']]), getExecRegex = (0, common_1.getRegex)(s => new RegExp(s, 'gmu'));
let reReplace;
/* NOT FOR BROWSER ONLY */
try {
reReplace = new RegExp(String.raw `(?<!\{)\{\{((?:[^\n{}[]|${lbrack}|${newline})*)\}\}` // eslint-disable-line prefer-template
+ '|'
+ String.raw `\{\{((?:[^\n{}[]|${lbrack}|${newline})*)\}\}(?!\})`
+ '|'
+ String.raw `\[\[(?:[^\n[\]{]|${newline})*\]\]`
+ '|'
+ String.raw `-\{(?:[^\n{}[]|${lbrack}|${newline})*\}-`, 'gu');
}
catch {
/* NOT FOR BROWSER ONLY END */
reReplace = new RegExp(String.raw `\{\{((?:[^\n{}[]|${lbrack}|${newline})*)\}\}(?!\})` // eslint-disable-line prefer-template
+ '|'
+ String.raw `\[\[(?:[^\n[\]{]|${newline})*\]\]`
+ '|'
+ String.raw `-\{(?:[^\n{}[]|${lbrack}|${newline})*\}-`, 'gu');
}
/**
* 获取模板或魔术字对应的字符
* @param s 模板或魔术字名
*/
const getSymbol = (s) => {
const name = (0, string_1.trimLc)((0, string_1.removeComment)(s));
if (marks.has(name)) {
return marks.get(name); // 标记{{!}}等
}
else if (/^(?:filepath|(?:full|canonical)urle?):./u.test(name)) {
return 'm';
}
else if (/^#vardefine:./u.test(name)) {
return 'n';
}
return 't';
};
/**
* 解析花括号
* @param wikitext
* @param config
* @param accum
* @throws `RangeError` Maximum iteration exceeded
* @throws `TranscludeToken.constructor()`
*/
const parseBraces = (wikitext, config, accum) => {
const source = String.raw `${config.excludes.includes('heading') ? '' : String.raw `^((?:\0\d+[cno]\x7F)*)={1,6}|`}\[\[|-\{(?!\{)`, { parserFunction: [, , , subst] } = config, stack = [], linkStack = [];
/**
* 填入模板内容
* @param text wikitext全文
* @param parts 模板参数
* @param lastIndex 匹配的起始位置
* @param index 匹配位置
*/
const push = (text, parts, lastIndex, index) => {
parts[parts.length - 1].push((0, string_1.restore)(text.slice(lastIndex, index), linkStack));
};
let replaced;
do {
if (replaced !== undefined) {
wikitext = replaced;
}
replaced = wikitext.replace(reReplace, (m, p1, p2) => {
if (p1 !== undefined || typeof p2 === 'string') {
try {
const { length } = accum, parts = (p1 ?? p2).split('|');
// @ts-expect-error abstract class
new transclude_1.TranscludeToken((0, string_1.restore)(parts[0], linkStack), parts.slice(1).map(part => {
const i = part.indexOf('=');
return (i === -1 ? [part] : [part.slice(0, i), part.slice(i + 1)])
.map(s => (0, string_1.restore)(s, linkStack));
}), config, accum);
return `\0${length}${getSymbol(parts[0])}\x7F`;
}
catch (e) {
/* istanbul ignore if */
if (!(e instanceof SyntaxError) || e.message !== 'Invalid template name') {
throw e;
}
}
}
linkStack.push((0, string_1.restore)(m, linkStack));
return `\0${linkStack.length - 1}\x7F`;
});
} while (replaced !== wikitext);
wikitext = replaced;
const lastBraces = wikitext.lastIndexOf('}}') - wikitext.length;
let moreBraces = lastBraces + wikitext.length !== -1;
// eslint-disable-next-line @typescript-eslint/no-unused-expressions
/^((?:\0\d+[cno]\x7F)*)={1,6}|\[\[|-\{(?!\{)|\{{2,}|\n(?!(?:[^\S\n]|\0\d+[cn]\x7F)*\n)|[|=]|\}{2,}|\}-|\]\]/gmu;
let regex = getExecRegex(source + (moreBraces ? openBraces : '')), mt = regex.exec(wikitext), lastIndex;
while (mt
|| lastIndex !== undefined && lastIndex <= wikitext.length
&& stack[stack.length - 1]?.[0]?.startsWith('=')) {
/* NOT FOR BROWSER ONLY */
if (process.memoryUsage().heapUsed > MAXHEAP) {
throw new RangeError('Maximum heap size exceeded');
}
/* NOT FOR BROWSER ONLY END */
if (mt?.[1]) {
const [, { length }] = mt;
mt[0] = mt[0].slice(length);
mt.index += length;
}
const { 0: syntax, index: curIndex } = mt ?? { 0: '\n', index: wikitext.length }, top = stack.pop() ?? {}, { 0: open, index, parts, findEqual: topFindEqual, pos: topPos } = top, innerEqual = syntax === '=' && topFindEqual;
if (syntax === ']]' || syntax === '}-') { // 情形1:闭合内链或转换
lastIndex = curIndex + 2;
}
else if (syntax === '\n') { // 情形2:闭合标题或文末
lastIndex = curIndex + 1;
const { pos, findEqual } = stack[stack.length - 1] ?? {};
if (pos === undefined || findEqual || (0, string_1.removeComment)(wikitext.slice(pos, index)) !== '') {
const rmt = /^(={1,6})(.+)\1((?:\s|\0\d+[cn]\x7F)*)$/u
.exec(wikitext.slice(index, curIndex));
if (rmt) {
rmt[2] = (0, string_1.restore)(rmt[2], linkStack);
if (!rmt[2].includes('\n')) {
wikitext = `${wikitext.slice(0, index)}\0${accum.length}h\x7F${wikitext.slice(curIndex)}`;
lastIndex = index + 4 + String(accum.length).length;
// @ts-expect-error abstract class
new heading_1.HeadingToken(rmt[1].length, rmt.slice(2), config, accum);
}
}
}
}
else if (syntax === '|' || innerEqual) { // 情形3:模板内部,含行首单个'='
lastIndex = curIndex + 1;
push(wikitext, parts, topPos, curIndex);
if (syntax === '|') {
parts.push([]);
}
top.pos = lastIndex;
top.findEqual = syntax === '|';
stack.push(top);
}
else if (syntax.startsWith('}}')) { // 情形4:闭合模板
const close = syntax.slice(0, Math.min(open.length, 3)), rest = open.length - close.length, { length } = accum;
lastIndex = curIndex + close.length; // 这不是最终的lastIndex
push(wikitext, parts, topPos, curIndex);
let skip = false, ch = 't';
if (close.length === 3) {
const argParts = parts.map(part => part.join('=')), str = argParts.length > 1 && (0, string_1.removeComment)(argParts[1]).trim();
// @ts-expect-error abstract class
new arg_1.ArgToken(argParts, config, accum);
if (str && str.endsWith(':')
&& subst.includes(str.slice(0, -1).toLowerCase())) {
ch = 's';
}
}
else {
try {
// @ts-expect-error abstract class
new transclude_1.TranscludeToken(parts[0][0], parts.slice(1), config, accum);
ch = getSymbol(parts[0][0]);
}
catch (e) {
/* istanbul ignore else */
if (e instanceof SyntaxError && e.message === 'Invalid template name') {
skip = true;
}
else {
throw e;
}
}
}
if (!skip) {
wikitext = `${wikitext.slice(0, index + rest)}\0${length}${ch}\x7F${wikitext.slice(lastIndex)}`;
lastIndex = index + rest + 3 + String(length).length;
if (rest > 1) {
stack.push({ 0: open.slice(0, rest), index: index, pos: index + rest, parts: [[]] });
}
else if (rest === 1 && wikitext[index - 1] === '-') {
stack.push({ 0: '-{', index: index - 1, pos: index + 1, parts: [[]] });
}
}
}
else { // 情形5:开启
lastIndex = curIndex + syntax.length;
if (syntax.startsWith('{')) {
mt.pos = lastIndex;
mt.parts = [[]];
}
stack.push(...'0' in top ? [top] : [], mt);
}
let curTop = stack[stack.length - 1];
if (moreBraces && lastBraces + wikitext.length < lastIndex) {
moreBraces = false;
while (curTop?.[0]?.startsWith('{')) {
stack.pop();
curTop = stack[stack.length - 1];
}
}
/\{\{\s*([!=]|!!|\(!|!\)|!-)\s*\}\}(?!\})/gu; // eslint-disable-line @typescript-eslint/no-unused-expressions
regex = getExecRegex(source
+ (moreBraces ? openBraces : '')
+ (curTop ? `|${closes[curTop[0][0]]}${curTop.findEqual ? '|=' : ''}` : ''));
regex.lastIndex = lastIndex;
mt = regex.exec(wikitext);
}
return (0, string_1.restore)(wikitext, linkStack);
};
exports.parseBraces = parseBraces;
constants_1.parsers['parseBraces'] = __filename;
;