wikiparser-node
Version:
A Node.js parser for MediaWiki markup with AST
192 lines (191 loc) • 7.16 kB
JavaScript
;
/* eslint-disable n/no-process-exit */
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
const path_1 = __importDefault(require("path"));
const fs_1 = __importDefault(require("fs"));
const child_process_1 = require("child_process");
const strict_1 = __importDefault(require("assert/strict"));
const cm_util_1 = require("@bhsd/cm-util");
const diff_1 = require("../util/diff");
/**
* Converts an array to an object.
* @param config parser configuration
* @param config.articlePath article path
*/
const arrToObj = ({ articlePath, ...obj }) => {
for (const [k, v] of Object.entries(obj)) {
if (Array.isArray(v) && v.every(x => typeof x === 'string')) {
Object.assign(obj, { [k]: Object.fromEntries(v.map(x => [x, true])) });
}
}
return obj;
};
/**
* Gets the aliases of magic words.
* @param magicwords magic words
* @param targets magic word names
*/
const getAliases = (magicwords, targets) => magicwords
.filter(({ name }) => targets.has(name))
.flatMap(({ aliases }) => aliases.map(s => s.replace(/:$/u, '').toLowerCase()));
/**
* Filters out gadget-related namespaces.
* @param id namespace ID
*/
const filterGadget = (id) => {
const n = Number(id);
return n < 2300 || n > 2303; // Gadget, Gadget talk, Gadget definition, Gadget definition talk
};
/**
* Execute the data script.
* @param obj MediaWiki module implementation
*/
const execute = (obj) => {
Object.entries(obj.files).find(([k]) => k.endsWith('.data.js'))[1]();
};
const mw = {
loader: {
done: false,
/** @ignore */
impl(callback) {
execute(callback()[1]);
},
/** @ignore */
implement(name, callback) {
if (typeof callback === 'object') {
execute(callback);
}
else if (!this.done) {
callback();
}
if (name.startsWith('ext.CodeMirror.data')) {
this.done = true;
}
},
/** @ignore */
state() {
//
},
},
config: {
/** @ignore */
set({ extCodeMirrorConfig }) {
mwConfig = extCodeMirrorConfig;
},
},
};
const pkg = "wikiparser-node", version = "1.27.0";
let mwConfig;
/**
* Get the parser configuration for a Wikimedia Foundation project.
* @param site site nickname
* @param url script path
* @param user URI for wiki userpage or email address of the user
* @param force whether to overwrite the existing configuration
* @param internal for internal use
*/
exports.default = async (site, url, user, force, internal) => {
// wrong calls
if (!site || !url) {
if (internal) {
throw new RangeError('Site nickname and script path are required!');
}
else {
(0, diff_1.error)('Usage: npx getParserConfig <site> <script path> [user] [force]');
process.exit(1);
}
}
// internal calls with stored configuration
const dir = path_1.default.join('..', '..', 'config'), file = path_1.default.join(__dirname, dir, `${site}.json`);
if (internal && !force && fs_1.default.existsSync(file)) {
return require(file);
}
// fetching configuration
if (/(?:\.php|\/)$/u.test(url)) {
url = url.slice(0, url.lastIndexOf('/'));
}
if (user === 'git') {
user = (0, child_process_1.execSync)('git config user.email', { encoding: 'utf8' }).trim();
}
const headers = user
? {
headers: {
'User-Agent': `${pkg}/${version} (https://www.npmjs.com/package/${pkg}; ${user}) Node.js/${process.version}`,
},
}
: undefined, m = await (await fetch(`${url}/load.php?modules=ext.CodeMirror.data|ext.CodeMirror`, headers)).text(), params = {
action: 'query',
meta: 'siteinfo',
siprop: 'general|magicwords|functionhooks|namespaces|namespacealiases',
format: 'json',
formatversion: '2',
}, { query: { general: { articlepath, variants }, magicwords, namespaces, namespacealiases, functionhooks, }, } = await (await fetch(`${url}/api.php?${new URLSearchParams(params).toString()}`, headers)).json();
try {
eval(m); // eslint-disable-line no-eval
}
catch (e) {
console.log(m);
throw e;
}
if (!mwConfig) {
throw new RangeError('Extension:CodeMirror is not installed!');
}
const ns = Object.entries(namespaces).filter(([id]) => filterGadget(id))
.flatMap(([id, { name, canonical = '' }]) => [
[id, name],
...name === canonical ? [] : [[id, canonical]],
]), config = {
...(0, cm_util_1.getParserConfig)(require(path_1.default.join(dir, 'minimum')), mwConfig),
...(0, cm_util_1.getKeywords)(magicwords),
variants: (0, cm_util_1.getVariants)(variants),
namespaces: Object.fromEntries(ns),
nsid: Object.fromEntries([
...ns.map(([id, canonical]) => [canonical.toLowerCase(), Number(id)]),
...namespacealiases.filter(({ id }) => filterGadget(id)).map(({ id, alias }) => [alias.toLowerCase(), id]),
]),
articlePath: articlepath,
};
config.doubleUnderscore[0] = [];
config.doubleUnderscore[1] = [];
Object.assign(config.parserFunction[0], (0, cm_util_1.getConfig)(magicwords, ({ name }) => name === 'msgnw'));
config.parserFunction[2] = getAliases(magicwords, new Set(['msg', 'raw']));
config.parserFunction[3] = getAliases(magicwords, new Set(['subst', 'safesubst']));
if (!mwConfig.functionHooks) {
Object.assign(config, { functionHook: [...functionhooks.map(s => s.toLowerCase()), 'msgnw'] });
}
if (!mwConfig.variableIDs) {
const { query: { variables } } = await (await fetch(`${url}/api.php?${new URLSearchParams({ ...params, siprop: 'variables' }).toString()}`, headers)).json();
Object.assign(config, { variable: [...new Set([...variables, '='])] });
}
if ('#choose' in config.parserFunction[0]) {
delete config.parserFunction[0]['choose'];
const i = config.variable.indexOf('choose');
if (i !== -1) {
config.variable.splice(i, 1);
}
}
// saving configuration
if (force || !fs_1.default.existsSync(file)) {
fs_1.default.writeFileSync(file, `${JSON.stringify(config, null, '\t')}\n`);
}
else {
const oldConfig = arrToObj(require(file)), newConfig = arrToObj(config);
for (const [k, v] of Object.entries(newConfig)) {
try {
strict_1.default.deepStrictEqual(oldConfig[k], v);
}
catch (e) {
if (e instanceof strict_1.default.AssertionError) {
(0, diff_1.error)(`Configuration mismatch for "${k}"`);
delete e.actual;
delete e.expected;
}
throw e;
}
}
}
return config;
};