wikiparser-node
Version:
A Node.js parser for MediaWiki markup with AST
443 lines (442 loc) • 16.7 kB
JavaScript
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
/* eslint n/exports-style: 0 */
const base_1 = require("./base");
const debug_1 = require("./util/debug");
const constants_1 = require("./util/constants");
const string_1 = require("./util/string");
const lintConfig_1 = require("./lib/lintConfig");
/* NOT FOR BROWSER */
const common_1 = require("@bhsd/common");
const redirectMap_1 = require("./lib/redirectMap");
/* NOT FOR BROWSER END */
/* NOT FOR BROWSER ONLY */
const fs_1 = __importDefault(require("fs"));
const path_1 = __importDefault(require("path"));
const common_2 = require("@bhsd/common");
const diff_1 = require("./util/diff");
const config_1 = __importDefault(require("./bin/config"));
/* NOT FOR BROWSER ONLY */
const re = new RegExp(String.raw `^https?:\/\/([^./]+)\.(${common_2.wmf})\.org`, 'iu');
/**
* 从根路径require
* @param file 文件名
* @param dir 子路径
*/
const rootRequire = (file, dir) => require(path_1.default.isAbsolute(file)
? /* istanbul ignore next */ file
: path_1.default.join('..', file.includes('/') ? '' : dir, file));
/* NOT FOR BROWSER ONLY END */
let viewOnly = false;
/* NOT FOR BROWSER */
const promises = [Promise.resolve()];
/^(zh|en)\s*:/diu; // eslint-disable-line @typescript-eslint/no-unused-expressions
const getInterwikiRegex = (0, common_1.getRegex)(interwiki => new RegExp(String.raw `^(${interwiki.join('|')})\s*:`, 'diu'));
let redirectMap = new redirectMap_1.RedirectMap();
/* NOT FOR BROWSER END */
let lintConfig = (() => {
LINT: return new lintConfig_1.LintConfiguration(); // eslint-disable-line no-unused-labels
})(), i18n;
const Parser = {
config: 'default',
/** @implements */
get rules() {
LINT: return base_1.rules; // eslint-disable-line no-unused-labels
},
/** @implements */
get i18n() {
LINT: return { ...constants_1.enMsg, ...i18n }; // eslint-disable-line no-unused-labels
},
set i18n(data) {
/* NOT FOR BROWSER ONLY */
if (typeof data === 'string') { // eslint-disable-line unicorn/prefer-ternary
i18n = rootRequire(data, 'i18n');
}
else {
/* NOT FOR BROWSER ONLY END */
LINT: i18n = data; // eslint-disable-line no-unused-labels
}
},
/** @implements */
get lintConfig() {
LINT: return lintConfig; // eslint-disable-line no-unused-labels
},
set lintConfig(config) {
LINT: lintConfig = new lintConfig_1.LintConfiguration(config); // eslint-disable-line no-unused-labels
},
/** @implements */
get viewOnly() {
return viewOnly;
},
set viewOnly(value) {
if (viewOnly && !value) {
debug_1.Shadow.rev++;
}
viewOnly = value;
},
/* NOT FOR BROWSER */
conversionTable: new Map(),
templates: new redirectMap_1.RedirectMap(undefined, false),
warning: true,
debugging: false,
functionHooks: new Map(),
tagHooks: new Map(),
/** @implements */
get redirects() {
return redirectMap;
},
set redirects(redirects) {
redirectMap = redirects instanceof redirectMap_1.RedirectMap ? redirects : new redirectMap_1.RedirectMap(redirects);
},
/* NOT FOR BROWSER END */
/* NOT FOR BROWSER ONLY */
configPaths: [],
/* NOT FOR BROWSER ONLY END */
/** @implements */
getConfig(config) {
/* NOT FOR BROWSER ONLY */
if (!config && typeof this.config === 'string') {
if (!path_1.default.isAbsolute(this.config)) {
for (const p of this.configPaths) {
try {
this.config = require(path_1.default.resolve(process.cwd(), p, this.config));
break;
}
catch { }
}
}
if (typeof this.config === 'string') {
this.config = rootRequire(this.config, 'config');
}
/* istanbul ignore if */
if (this.config.doubleUnderscore.length < 3
|| Array.isArray(this.config.parserFunction[1])
|| !('functionHook' in this.config)) {
(0, diff_1.error)(`The schema (${path_1.default.join(__dirname, '..', 'config', '.schema.json')}) of parser configuration is updated.`);
}
return this.getConfig();
}
/* NOT FOR BROWSER ONLY END */
const parserConfig = config ?? this.config, { doubleUnderscore, ext, parserFunction, variable,
/* NOT FOR BROWSER */
conversionTable, redirects, } = parserConfig;
for (let i = 0; i < 2; i++) {
if (doubleUnderscore.length > i + 2 && doubleUnderscore[i].length === 0) {
doubleUnderscore[i] = Object.keys(doubleUnderscore[i + 2]);
}
}
if (ext.includes('translate') && !variable.includes('translationlanguage')) {
variable.push('translationlanguage');
/* istanbul ignore if */
if (Array.isArray(parserFunction[1])) {
parserFunction[1].push('TRANSLATIONLANGUAGE');
}
else {
parserFunction[1]['TRANSLATIONLANGUAGE'] = 'translationlanguage';
}
}
/* NOT FOR BROWSER */
if (conversionTable) {
this.conversionTable = new Map(conversionTable);
}
if (redirects) {
this.redirects = new redirectMap_1.RedirectMap(redirects);
}
/* NOT FOR BROWSER END */
return {
...parserConfig,
excludes: [],
};
},
/** @implements */
msg(msg, arg = '') {
LINT: { // eslint-disable-line no-unused-labels
return msg && (this.i18n[msg] ?? msg).replace('$1', this.msg(arg));
}
},
/** @implements */
normalizeTitle(title, defaultNs = 0, include, config = Parser.getConfig(), opt) {
const { Title } = require('./lib/title');
let titleObj;
if (opt?.halfParsed) {
titleObj = new Title(title, defaultNs, config, opt);
}
else {
const { Token } = require('./src/index');
titleObj = debug_1.Shadow.run(() => {
const root = new Token(title, config);
root.type = 'root';
root.parseOnce(0, include).parseOnce();
const t = new Title(root.toString(), defaultNs, config, opt);
root.build();
for (const key of ['main', 'fragment']) {
const str = t[key];
if (str?.includes('\0')) {
const s = root.buildFromStr(str, constants_1.BuildMethod.Text);
if (key === 'main') {
t.main = s;
}
else {
t.setFragment(s);
}
}
}
return t;
});
}
/* NOT FOR BROWSER */
titleObj.conversionTable = this.conversionTable;
titleObj.redirects = this.redirects;
/* NOT FOR BROWSER END */
return titleObj;
},
/** @implements */
parse(wikitext, include, maxStage = constants_1.MAX_STAGE, config = Parser.getConfig()) {
wikitext = (0, string_1.tidy)(wikitext);
let types;
LINT: { // eslint-disable-line no-unused-labels
if (typeof maxStage !== 'number') {
types = Array.isArray(maxStage) ? maxStage : [maxStage];
maxStage = Math.max(...types.map(t => base_1.stages[t] || constants_1.MAX_STAGE));
}
}
const { Token } = require('./src/index');
const root = debug_1.Shadow.run(() => {
const token = new Token(wikitext, config);
token.type = 'root';
try {
return token.parse(maxStage, include);
/* NOT FOR BROWSER ONLY */
}
catch (e) /* istanbul ignore next */ {
if (e instanceof Error) {
const file = path_1.default.join(__dirname, '..', 'errors', new Date().toISOString()), stage = token.getAttribute('stage');
for (const k in config) {
if (k.startsWith('regex') || config[k] instanceof Set) {
delete config[k];
}
}
fs_1.default.writeFileSync(file, stage === constants_1.MAX_STAGE ? wikitext : token.toString());
fs_1.default.writeFileSync(`${file}.err`, e.stack);
fs_1.default.writeFileSync(`${file}.json`, JSON.stringify({ stage, include, config }, null, '\t'));
}
throw e;
}
/* NOT FOR BROWSER ONLY END */
});
/* NOT FOR BROWSER */
if (types?.includes('list-range')) {
root.buildLists();
}
/* istanbul ignore if */
if (this.debugging) {
let restored = root.toString(), proc = 'parsing';
if (restored === wikitext) {
const entities = { lt: '<', gt: '>', amp: '&' };
restored = root.print().replace(/<[^<]+?>|&([lg]t|amp);/gu, (_, s) => s ? entities[s] : '');
proc = 'printing';
}
if (restored !== wikitext) {
const { 0: cur, length } = promises;
promises.unshift((async () => {
await cur;
this.error(`Original wikitext is altered when ${proc}!`);
return (0, diff_1.diff)(wikitext, restored, length);
})());
}
}
/* NOT FOR BROWSER END */
return root;
},
/** @implements */
async partialParse(wikitext, watch, include, config = Parser.getConfig()) {
LSP: { // eslint-disable-line no-unused-labels
const { Token } = require('./src/index');
const set = typeof setImmediate === 'function' ? setImmediate : /* istanbul ignore next */ setTimeout, { running } = debug_1.Shadow;
debug_1.Shadow.running = true;
const token = new Token((0, string_1.tidy)(wikitext), config);
token.type = 'root';
let i = 0;
try {
await new Promise(resolve => {
const /** @ignore */ check = () => {
if (watch() === wikitext) {
i++;
set(parseOnce, 0);
}
else {
resolve();
}
},
/** @ignore */ parseOnce = () => {
if (i === constants_1.MAX_STAGE + 1) {
token.afterBuild();
resolve();
}
else {
token[i === constants_1.MAX_STAGE ? 'build' : 'parseOnce'](i, include);
check();
}
};
set(parseOnce, 0);
});
}
catch (e) /* istanbul ignore next */ {
debug_1.Shadow.running = running;
throw e;
}
debug_1.Shadow.running = running;
return token;
}
},
/** @implements */
createLanguageService(uri = {}) {
LSP: { // eslint-disable-line no-unused-labels
const mod = require('./lib/lsp');
const { LanguageService, tasks } = mod;
this.viewOnly = true;
return tasks.get(uri) ?? new LanguageService(uri);
}
},
/* NOT FOR BROWSER ONLY */
/** @implements */
getWMFSite(url) {
const mt = re.exec(url);
/* istanbul ignore if */
if (!mt) {
throw new RangeError('Not a recognizable WMF site!');
}
const type = mt[2].toLowerCase();
return [mt[1].toLowerCase() + (type === 'wikipedia' ? 'wiki' : type), mt[0]];
},
/* istanbul ignore next */
/** @implements */
async fetchConfig(site, url, user) {
return this.getConfig(await (0, config_1.default)(site, url, user, false, true));
},
/* NOT FOR BROWSER ONLY END */
/* NOT FOR BROWSER */
/** @implements */
setFunctionHook(name, hook) {
this.functionHooks.set(name, hook);
},
/** @implements */
setHook(name, hook) {
this.tagHooks.set(name, hook);
},
/** @implements */
warn(msg, ...args) {
/* istanbul ignore if */
if (this.warning) {
try {
const chalk = require('chalk');
console.warn(chalk.yellow(msg), ...args);
}
catch {
console.warn(msg, ...args);
}
}
},
/** @implements */
debug(msg, ...args) {
/* istanbul ignore if */
if (this.debugging) {
try {
const chalk = require('chalk');
console.debug(chalk.blue(msg), ...args);
}
catch {
console.debug(msg, ...args);
}
}
},
error: diff_1.error,
info: diff_1.info,
/* istanbul ignore next */
/** @implements */
log(f) {
if (typeof f === 'function') {
console.log(String(f));
}
},
/** @implements */
require(name) {
// eslint-disable-next-line @typescript-eslint/no-unsafe-member-access
return Object.hasOwn(constants_1.classes, name) ? require(constants_1.classes[name])[name] : require(path_1.default.join(__dirname, name));
},
/* istanbul ignore next */
/** @implements */
async clearCache() {
await (0, diff_1.cmd)('npm', ['--prefix', path_1.default.join(__dirname, '..'), 'run', 'build:core']);
const entries = Object.entries(constants_1.classes);
for (const [, filePath] of entries) {
try {
delete require.cache[require.resolve(filePath)];
}
catch { }
}
for (const [name, filePath] of entries) {
if (name in globalThis) {
// eslint-disable-next-line @typescript-eslint/no-unsafe-member-access
Object.assign(globalThis, { [name]: require(filePath)[name] });
}
}
this.info('已重新加载Parser');
},
/** @implements */
isInterwiki(title, { interwiki } = Parser.getConfig()) {
return interwiki.length > 0
? getInterwikiRegex(interwiki).exec(title.replaceAll('_', ' ').replace(/^\s*:?\s*/u, ''))
: null;
},
/* istanbul ignore next */
/** @implements */
reparse(date = '') {
const main = fs_1.default.readdirSync(path_1.default.join(__dirname, '..', 'errors'))
.find(name => name.startsWith(date) && name.endsWith('Z'));
if (!main) {
throw new RangeError(`找不到对应时间戳的错误记录:${date}`);
}
const file = path_1.default.join(__dirname, '..', 'errors', main), wikitext = fs_1.default.readFileSync(file, 'utf8');
const { stage, include, config } = require(`${file}.json`), { Token } = require('./src/index');
debug_1.Shadow.run(() => {
const halfParsed = stage < constants_1.MAX_STAGE, token = new Token(halfParsed ? wikitext : (0, string_1.tidy)(wikitext), config);
token.type = 'root';
if (halfParsed) {
token.setAttribute('stage', stage);
token.parseOnce(stage, include);
}
else {
token.parse(undefined, include);
}
fs_1.default.unlinkSync(file);
fs_1.default.unlinkSync(`${file}.err`);
fs_1.default.unlinkSync(`${file}.json`);
});
},
};
const def = {
default: { value: Parser },
}, enumerable = new Set([
'lintConfig',
'normalizeTitle',
'parse',
'createLanguageService',
/* NOT FOR BROWSER ONLY */
'fetchConfig',
/* NOT FOR BROWSER ONLY END */
/* NOT FOR BROWSER */
'warning',
'debugging',
'isInterwiki',
]);
for (const key in Parser) {
if (!enumerable.has(key)) {
def[key] = { enumerable: false };
}
}
Object.defineProperties(Parser, def);
exports.default = Parser;
module.exports = Parser;
;