UNPKG

wikiparser-node

Version:

A Node.js parser for MediaWiki markup with AST

443 lines (442 loc) 16.7 kB
"use strict"; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; /* eslint n/exports-style: 0 */ const base_1 = require("./base"); const debug_1 = require("./util/debug"); const constants_1 = require("./util/constants"); const string_1 = require("./util/string"); const lintConfig_1 = require("./lib/lintConfig"); /* NOT FOR BROWSER */ const common_1 = require("@bhsd/common"); const redirectMap_1 = require("./lib/redirectMap"); /* NOT FOR BROWSER END */ /* NOT FOR BROWSER ONLY */ const fs_1 = __importDefault(require("fs")); const path_1 = __importDefault(require("path")); const common_2 = require("@bhsd/common"); const diff_1 = require("./util/diff"); const config_1 = __importDefault(require("./bin/config")); /* NOT FOR BROWSER ONLY */ const re = new RegExp(String.raw `^https?:\/\/([^./]+)\.(${common_2.wmf})\.org`, 'iu'); /** * 从根路径require * @param file 文件名 * @param dir 子路径 */ const rootRequire = (file, dir) => require(path_1.default.isAbsolute(file) ? /* istanbul ignore next */ file : path_1.default.join('..', file.includes('/') ? '' : dir, file)); /* NOT FOR BROWSER ONLY END */ let viewOnly = false; /* NOT FOR BROWSER */ const promises = [Promise.resolve()]; /^(zh|en)\s*:/diu; // eslint-disable-line @typescript-eslint/no-unused-expressions const getInterwikiRegex = (0, common_1.getRegex)(interwiki => new RegExp(String.raw `^(${interwiki.join('|')})\s*:`, 'diu')); let redirectMap = new redirectMap_1.RedirectMap(); /* NOT FOR BROWSER END */ let lintConfig = (() => { LINT: return new lintConfig_1.LintConfiguration(); // eslint-disable-line no-unused-labels })(), i18n; const Parser = { config: 'default', /** @implements */ get rules() { LINT: return base_1.rules; // eslint-disable-line no-unused-labels }, /** @implements */ get i18n() { LINT: return { ...constants_1.enMsg, ...i18n }; // eslint-disable-line no-unused-labels }, set i18n(data) { /* NOT FOR BROWSER ONLY */ if (typeof data === 'string') { // eslint-disable-line unicorn/prefer-ternary i18n = rootRequire(data, 'i18n'); } else { /* NOT FOR BROWSER ONLY END */ LINT: i18n = data; // eslint-disable-line no-unused-labels } }, /** @implements */ get lintConfig() { LINT: return lintConfig; // eslint-disable-line no-unused-labels }, set lintConfig(config) { LINT: lintConfig = new lintConfig_1.LintConfiguration(config); // eslint-disable-line no-unused-labels }, /** @implements */ get viewOnly() { return viewOnly; }, set viewOnly(value) { if (viewOnly && !value) { debug_1.Shadow.rev++; } viewOnly = value; }, /* NOT FOR BROWSER */ conversionTable: new Map(), templates: new redirectMap_1.RedirectMap(undefined, false), warning: true, debugging: false, functionHooks: new Map(), tagHooks: new Map(), /** @implements */ get redirects() { return redirectMap; }, set redirects(redirects) { redirectMap = redirects instanceof redirectMap_1.RedirectMap ? redirects : new redirectMap_1.RedirectMap(redirects); }, /* NOT FOR BROWSER END */ /* NOT FOR BROWSER ONLY */ configPaths: [], /* NOT FOR BROWSER ONLY END */ /** @implements */ getConfig(config) { /* NOT FOR BROWSER ONLY */ if (!config && typeof this.config === 'string') { if (!path_1.default.isAbsolute(this.config)) { for (const p of this.configPaths) { try { this.config = require(path_1.default.resolve(process.cwd(), p, this.config)); break; } catch { } } } if (typeof this.config === 'string') { this.config = rootRequire(this.config, 'config'); } /* istanbul ignore if */ if (this.config.doubleUnderscore.length < 3 || Array.isArray(this.config.parserFunction[1]) || !('functionHook' in this.config)) { (0, diff_1.error)(`The schema (${path_1.default.join(__dirname, '..', 'config', '.schema.json')}) of parser configuration is updated.`); } return this.getConfig(); } /* NOT FOR BROWSER ONLY END */ const parserConfig = config ?? this.config, { doubleUnderscore, ext, parserFunction, variable, /* NOT FOR BROWSER */ conversionTable, redirects, } = parserConfig; for (let i = 0; i < 2; i++) { if (doubleUnderscore.length > i + 2 && doubleUnderscore[i].length === 0) { doubleUnderscore[i] = Object.keys(doubleUnderscore[i + 2]); } } if (ext.includes('translate') && !variable.includes('translationlanguage')) { variable.push('translationlanguage'); /* istanbul ignore if */ if (Array.isArray(parserFunction[1])) { parserFunction[1].push('TRANSLATIONLANGUAGE'); } else { parserFunction[1]['TRANSLATIONLANGUAGE'] = 'translationlanguage'; } } /* NOT FOR BROWSER */ if (conversionTable) { this.conversionTable = new Map(conversionTable); } if (redirects) { this.redirects = new redirectMap_1.RedirectMap(redirects); } /* NOT FOR BROWSER END */ return { ...parserConfig, excludes: [], }; }, /** @implements */ msg(msg, arg = '') { LINT: { // eslint-disable-line no-unused-labels return msg && (this.i18n[msg] ?? msg).replace('$1', this.msg(arg)); } }, /** @implements */ normalizeTitle(title, defaultNs = 0, include, config = Parser.getConfig(), opt) { const { Title } = require('./lib/title'); let titleObj; if (opt?.halfParsed) { titleObj = new Title(title, defaultNs, config, opt); } else { const { Token } = require('./src/index'); titleObj = debug_1.Shadow.run(() => { const root = new Token(title, config); root.type = 'root'; root.parseOnce(0, include).parseOnce(); const t = new Title(root.toString(), defaultNs, config, opt); root.build(); for (const key of ['main', 'fragment']) { const str = t[key]; if (str?.includes('\0')) { const s = root.buildFromStr(str, constants_1.BuildMethod.Text); if (key === 'main') { t.main = s; } else { t.setFragment(s); } } } return t; }); } /* NOT FOR BROWSER */ titleObj.conversionTable = this.conversionTable; titleObj.redirects = this.redirects; /* NOT FOR BROWSER END */ return titleObj; }, /** @implements */ parse(wikitext, include, maxStage = constants_1.MAX_STAGE, config = Parser.getConfig()) { wikitext = (0, string_1.tidy)(wikitext); let types; LINT: { // eslint-disable-line no-unused-labels if (typeof maxStage !== 'number') { types = Array.isArray(maxStage) ? maxStage : [maxStage]; maxStage = Math.max(...types.map(t => base_1.stages[t] || constants_1.MAX_STAGE)); } } const { Token } = require('./src/index'); const root = debug_1.Shadow.run(() => { const token = new Token(wikitext, config); token.type = 'root'; try { return token.parse(maxStage, include); /* NOT FOR BROWSER ONLY */ } catch (e) /* istanbul ignore next */ { if (e instanceof Error) { const file = path_1.default.join(__dirname, '..', 'errors', new Date().toISOString()), stage = token.getAttribute('stage'); for (const k in config) { if (k.startsWith('regex') || config[k] instanceof Set) { delete config[k]; } } fs_1.default.writeFileSync(file, stage === constants_1.MAX_STAGE ? wikitext : token.toString()); fs_1.default.writeFileSync(`${file}.err`, e.stack); fs_1.default.writeFileSync(`${file}.json`, JSON.stringify({ stage, include, config }, null, '\t')); } throw e; } /* NOT FOR BROWSER ONLY END */ }); /* NOT FOR BROWSER */ if (types?.includes('list-range')) { root.buildLists(); } /* istanbul ignore if */ if (this.debugging) { let restored = root.toString(), proc = 'parsing'; if (restored === wikitext) { const entities = { lt: '<', gt: '>', amp: '&' }; restored = root.print().replace(/<[^<]+?>|&([lg]t|amp);/gu, (_, s) => s ? entities[s] : ''); proc = 'printing'; } if (restored !== wikitext) { const { 0: cur, length } = promises; promises.unshift((async () => { await cur; this.error(`Original wikitext is altered when ${proc}!`); return (0, diff_1.diff)(wikitext, restored, length); })()); } } /* NOT FOR BROWSER END */ return root; }, /** @implements */ async partialParse(wikitext, watch, include, config = Parser.getConfig()) { LSP: { // eslint-disable-line no-unused-labels const { Token } = require('./src/index'); const set = typeof setImmediate === 'function' ? setImmediate : /* istanbul ignore next */ setTimeout, { running } = debug_1.Shadow; debug_1.Shadow.running = true; const token = new Token((0, string_1.tidy)(wikitext), config); token.type = 'root'; let i = 0; try { await new Promise(resolve => { const /** @ignore */ check = () => { if (watch() === wikitext) { i++; set(parseOnce, 0); } else { resolve(); } }, /** @ignore */ parseOnce = () => { if (i === constants_1.MAX_STAGE + 1) { token.afterBuild(); resolve(); } else { token[i === constants_1.MAX_STAGE ? 'build' : 'parseOnce'](i, include); check(); } }; set(parseOnce, 0); }); } catch (e) /* istanbul ignore next */ { debug_1.Shadow.running = running; throw e; } debug_1.Shadow.running = running; return token; } }, /** @implements */ createLanguageService(uri = {}) { LSP: { // eslint-disable-line no-unused-labels const mod = require('./lib/lsp'); const { LanguageService, tasks } = mod; this.viewOnly = true; return tasks.get(uri) ?? new LanguageService(uri); } }, /* NOT FOR BROWSER ONLY */ /** @implements */ getWMFSite(url) { const mt = re.exec(url); /* istanbul ignore if */ if (!mt) { throw new RangeError('Not a recognizable WMF site!'); } const type = mt[2].toLowerCase(); return [mt[1].toLowerCase() + (type === 'wikipedia' ? 'wiki' : type), mt[0]]; }, /* istanbul ignore next */ /** @implements */ async fetchConfig(site, url, user) { return this.getConfig(await (0, config_1.default)(site, url, user, false, true)); }, /* NOT FOR BROWSER ONLY END */ /* NOT FOR BROWSER */ /** @implements */ setFunctionHook(name, hook) { this.functionHooks.set(name, hook); }, /** @implements */ setHook(name, hook) { this.tagHooks.set(name, hook); }, /** @implements */ warn(msg, ...args) { /* istanbul ignore if */ if (this.warning) { try { const chalk = require('chalk'); console.warn(chalk.yellow(msg), ...args); } catch { console.warn(msg, ...args); } } }, /** @implements */ debug(msg, ...args) { /* istanbul ignore if */ if (this.debugging) { try { const chalk = require('chalk'); console.debug(chalk.blue(msg), ...args); } catch { console.debug(msg, ...args); } } }, error: diff_1.error, info: diff_1.info, /* istanbul ignore next */ /** @implements */ log(f) { if (typeof f === 'function') { console.log(String(f)); } }, /** @implements */ require(name) { // eslint-disable-next-line @typescript-eslint/no-unsafe-member-access return Object.hasOwn(constants_1.classes, name) ? require(constants_1.classes[name])[name] : require(path_1.default.join(__dirname, name)); }, /* istanbul ignore next */ /** @implements */ async clearCache() { await (0, diff_1.cmd)('npm', ['--prefix', path_1.default.join(__dirname, '..'), 'run', 'build:core']); const entries = Object.entries(constants_1.classes); for (const [, filePath] of entries) { try { delete require.cache[require.resolve(filePath)]; } catch { } } for (const [name, filePath] of entries) { if (name in globalThis) { // eslint-disable-next-line @typescript-eslint/no-unsafe-member-access Object.assign(globalThis, { [name]: require(filePath)[name] }); } } this.info('已重新加载Parser'); }, /** @implements */ isInterwiki(title, { interwiki } = Parser.getConfig()) { return interwiki.length > 0 ? getInterwikiRegex(interwiki).exec(title.replaceAll('_', ' ').replace(/^\s*:?\s*/u, '')) : null; }, /* istanbul ignore next */ /** @implements */ reparse(date = '') { const main = fs_1.default.readdirSync(path_1.default.join(__dirname, '..', 'errors')) .find(name => name.startsWith(date) && name.endsWith('Z')); if (!main) { throw new RangeError(`找不到对应时间戳的错误记录:${date}`); } const file = path_1.default.join(__dirname, '..', 'errors', main), wikitext = fs_1.default.readFileSync(file, 'utf8'); const { stage, include, config } = require(`${file}.json`), { Token } = require('./src/index'); debug_1.Shadow.run(() => { const halfParsed = stage < constants_1.MAX_STAGE, token = new Token(halfParsed ? wikitext : (0, string_1.tidy)(wikitext), config); token.type = 'root'; if (halfParsed) { token.setAttribute('stage', stage); token.parseOnce(stage, include); } else { token.parse(undefined, include); } fs_1.default.unlinkSync(file); fs_1.default.unlinkSync(`${file}.err`); fs_1.default.unlinkSync(`${file}.json`); }); }, }; const def = { default: { value: Parser }, }, enumerable = new Set([ 'lintConfig', 'normalizeTitle', 'parse', 'createLanguageService', /* NOT FOR BROWSER ONLY */ 'fetchConfig', /* NOT FOR BROWSER ONLY END */ /* NOT FOR BROWSER */ 'warning', 'debugging', 'isInterwiki', ]); for (const key in Parser) { if (!enumerable.has(key)) { def[key] = { enumerable: false }; } } Object.defineProperties(Parser, def); exports.default = Parser; module.exports = Parser;