UNPKG

mwoffliner

Version:
517 lines 21.7 kB
import * as pathParser from 'path'; import * as logger from './Logger.js'; import * as util from './util/index.js'; import Downloader from './Downloader.js'; import Gadgets from './Gadgets.js'; import qs from 'querystring'; import semver from 'semver'; import basicURLDirector from './util/builders/url/basic.director.js'; import BaseURLDirector from './util/builders/url/base.director.js'; import ApiURLDirector from './util/builders/url/api.director.js'; import WikimediaDesktopURLDirector from './util/builders/url/desktop.director.js'; import WikimediaMobileURLDirector from './util/builders/url/mobile.director.js'; import VisualEditorURLDirector from './util/builders/url/visual-editor.director.js'; import RestApiURLDirector from './util/builders/url/rest-api.director.js'; import ActionParseURLDirector from './util/builders/url/action-parse.director.js'; import { checkApiAvailability } from './util/mw-api.js'; import { BLACKLISTED_NS } from './util/const.js'; import { config } from './config.js'; class MediaWiki { static instance; static getInstance() { if (!MediaWiki.instance) { MediaWiki.instance = new MediaWiki(); } return MediaWiki.instance; } metaData; baseUrl; getCategories; namespaces = {}; namespacesToMirror = []; apiCheckArticleId; queryOpts; urlDirector; skin = 'vector'; // Default fallback #wikiPath; #indexPhpPath; #actionApiPath; #modulePathOpt; #restApiPath; #username; #password; #domain; wikimediaDesktopUrlDirector; wikimediaMobileUrlDirector; visualEditorUrlDirector; restApiUrlDirector; actionParseUrlDirector; visualEditorApiUrl; actionApiUrl; restApiUrl; webUrl; wikimediaDesktopApiUrl; wikimediaMobileApiUrl; modulePath; // only for reading mobileModulePath; #apiUrlDirector; #hasWikimediaDesktopApi; #hasWikimediaMobileApi; #hasVisualEditorApi; #hasRestApi; #hasActionParseApi; #hasCoordinates; #hasModuleApi; set username(value) { this.#username = value; } set password(value) { this.#password = value; } get actionApiPath() { return this.#actionApiPath; } set actionApiPath(value) { if (value) { this.#actionApiPath = value; this.actionApiUrl = this.urlDirector.buildURL(this.#actionApiPath); this.setVisualEditorURL(); } } set restApiPath(value) { if (value) { this.#restApiPath = value; this.setRestApiURL(); } } set domain(value) { this.#domain = value; } set wikiPath(value) { if (value) { this.#wikiPath = value; this.webUrl = this.urlDirector.buildURL(this.#wikiPath); logger.log(`webUrl: ${this.webUrl}`); } } set indexPhpPath(value) { if (value) { this.#indexPhpPath = value; } } set base(value) { if (value) { this.baseUrl = basicURLDirector.buildMediawikiBaseURL(value); this.urlDirector = new BaseURLDirector(this.baseUrl.href); this.webUrl = this.urlDirector.buildURL(this.#wikiPath); this.actionApiUrl = this.urlDirector.buildURL(this.#actionApiPath); this.setWikimediaDesktopApiUrl(); this.setWikimediaMobileApiUrl(); this.setRestApiURL(); this.setVisualEditorURL(); this.setModuleURL(); this.setMobileModuleUrl(); } } set modulePathOpt(value) { if (value !== undefined) { this.#modulePathOpt = value; } if (this.urlDirector) { this.setModuleURL(); } else if (value) { logger.error('Base url director should be specified first'); } } initializeMediaWikiDefaults() { this.#domain = ''; this.#username = ''; this.#password = ''; this.getCategories = false; this.#actionApiPath = '/w/api.php'; this.#restApiPath = '/w/rest.php'; this.#wikiPath = '/wiki/'; this.#indexPhpPath = '/w/index.php'; this.#modulePathOpt = '/w/load.php'; this.namespaces = {}; this.namespacesToMirror = []; this.apiCheckArticleId = 'MediaWiki:Sidebar'; this.queryOpts = { action: 'query', format: 'json', prop: 'info|redirects|revisions', rdlimit: 'max', rdnamespace: '0', // pageid in rdprop is not mandatory in general, but required for proper // mdwiki API operation rdprop: 'pageid|title|fragment', redirects: false, formatversion: '2', maxlag: config.defaults.maxlag, }; this.#hasWikimediaDesktopApi = null; this.#hasWikimediaMobileApi = null; this.#hasVisualEditorApi = null; this.#hasRestApi = null; this.#hasActionParseApi = null; this.#hasCoordinates = null; this.#hasModuleApi = null; this.metaData = null; } constructor() { this.initializeMediaWikiDefaults(); } async hasWikimediaDesktopApi() { if (this.#hasWikimediaDesktopApi === null) { this.wikimediaDesktopUrlDirector = new WikimediaDesktopURLDirector(this.wikimediaDesktopApiUrl.href); const checkUrl = this.wikimediaDesktopUrlDirector.buildArticleURL(this.apiCheckArticleId); this.#hasWikimediaDesktopApi = await checkApiAvailability(checkUrl); logger.log('Checked for WikimediaDesktopApi at', checkUrl, '-- result is: ', this.#hasWikimediaDesktopApi); } return this.#hasWikimediaDesktopApi; } async hasWikimediaMobileApi() { if (this.#hasWikimediaMobileApi === null) { this.wikimediaMobileUrlDirector = new WikimediaMobileURLDirector(this.wikimediaMobileApiUrl.href); const checkUrl = this.wikimediaMobileUrlDirector.buildArticleURL(this.apiCheckArticleId); this.#hasWikimediaMobileApi = await checkApiAvailability(checkUrl); logger.log('Checked for WikimediaMobileApi at', checkUrl, '-- result is: ', this.#hasWikimediaMobileApi); } return this.#hasWikimediaMobileApi; } async hasVisualEditorApi() { if (this.#hasVisualEditorApi === null) { this.visualEditorUrlDirector = new VisualEditorURLDirector(this.visualEditorApiUrl.href); const checkUrl = this.visualEditorUrlDirector.buildArticleURL(this.apiCheckArticleId); this.#hasVisualEditorApi = await checkApiAvailability(checkUrl, this.visualEditorUrlDirector.validMimeTypes); logger.log('Checked for VisualEditorApi at', checkUrl, '-- result is: ', this.#hasVisualEditorApi); } return this.#hasVisualEditorApi; } async hasRestApi() { if (this.#hasRestApi === null) { this.restApiUrlDirector = new RestApiURLDirector(this.restApiUrl.href); const checkUrl = this.restApiUrlDirector.buildArticleURL(this.apiCheckArticleId); this.#hasRestApi = await checkApiAvailability(checkUrl); logger.log('Checked for RestApi at', checkUrl, '-- result is: ', this.#hasRestApi); } return this.#hasRestApi; } async hasActionParseApi() { if (this.#hasActionParseApi === null) { this.actionParseUrlDirector = new ActionParseURLDirector(this.actionApiUrl.href, this.skin, this.metaData.langVar); const checkUrl = this.actionParseUrlDirector.buildArticleURL(this.apiCheckArticleId); this.#hasActionParseApi = await checkApiAvailability(checkUrl); logger.log(`Checked for ActionParseApi at ${checkUrl} -- result is: ${this.#hasActionParseApi}`); } return this.#hasActionParseApi; } async hasCoordinates() { if (this.#hasCoordinates === null) { const validNamespaceIds = this.namespacesToMirror.map((ns) => this.namespaces[ns].num); const reqOpts = { ...this.queryOpts, prop: this.queryOpts.prop + '|coordinates', // add coordinates for this call to get proper warning if not supported rdnamespace: validNamespaceIds.join('|'), }; const resp = await Downloader.getJSON(this.#apiUrlDirector.buildQueryURL(reqOpts)); const isCoordinateWarning = JSON.stringify(resp?.warnings?.query ?? '').includes('coordinates'); if (isCoordinateWarning) { logger.log('Coordinates not available on this wiki'); return (this.#hasCoordinates = false); } logger.log('Coordinates available on this wiki'); return (this.#hasCoordinates = true); } return this.#hasCoordinates; } async hasModuleApi() { if (this.#hasModuleApi === null) { // startup JS module is supposed to be available on all Mediawikis const checkUrl = `${this.modulePath}lang=en&modules=startup&only=scripts`; this.#hasModuleApi = await checkApiAvailability(checkUrl); logger.log('Checked for Module API at', checkUrl, '-- result is: ', this.#hasRestApi); } return this.#hasModuleApi; } setWikimediaDesktopApiUrl() { this.wikimediaDesktopApiUrl = this.urlDirector.buildWikimediaDesktopApiUrl(); } setWikimediaMobileApiUrl() { this.wikimediaMobileApiUrl = this.urlDirector.buildWikimediaMobileApiUrl(); } setRestApiURL() { this.restApiUrl = this.urlDirector.buildRestApiUrl(this.#restApiPath); } setVisualEditorURL() { this.#apiUrlDirector = new ApiURLDirector(this.actionApiUrl.href); this.visualEditorApiUrl = this.#apiUrlDirector.buildVisualEditorURL(); } setModuleURL() { this.modulePath = this.urlDirector.buildModuleURL(this.#modulePathOpt); } setMobileModuleUrl() { this.mobileModulePath = this.urlDirector.buildMobileModuleURL(); } async login() { if (this.#username && this.#password) { let url = this.actionApiUrl.href + '?'; // Add domain if configured if (this.#domain) { url = `${url}lgdomain=${this.#domain}&`; } // Getting token to login. const { content } = await Downloader.downloadContent(url + 'action=query&meta=tokens&type=login&format=json&formatversion=2&maxlag=' + config.defaults.maxlag, 'data'); // Logging in await Downloader.request({ url: this.actionApiUrl.href, data: qs.stringify({ action: 'login', format: 'json', lgname: this.#username, lgpassword: this.#password, lgtoken: JSON.parse(content.toString()).query.tokens.logintoken, }), headers: { 'Content-Type': 'application/x-www-form-urlencoded', }, method: 'POST', }).then(async (resp) => { if (resp.data.login?.result !== 'Success') { const reason = resp.data.login?.reason; if (reason) { logger.error(reason); } throw new Error('Login Failed'); } else { logger.log('Login Success'); } }); } } setNamespaces(json, addNamespaces) { ; ['namespaces', 'namespacealiases'].forEach((type) => { const entries = json[type]; Object.keys(entries).forEach((key) => { const entry = entries[key]; const name = type === 'namespaces' ? entry.name : entry.alias; const num = entry.id; const allowedSubpages = 'subpages' in entry; const isContent = type === 'namespaces' ? !!(entry.content || util.contains(addNamespaces, num)) : !!(entry.content !== undefined || util.contains(addNamespaces, num)); const isBlacklisted = BLACKLISTED_NS.includes(name); const canonical = entry.canonical ? entry.canonical : ''; const details = { num, allowedSubpages, isContent }; /* Namespaces in local language */ this.namespaces[util.lcFirst(name)] = details; this.namespaces[util.ucFirst(name)] = details; /* Namespaces in English (if available) */ if (canonical) { this.namespaces[util.lcFirst(canonical)] = details; this.namespaces[util.ucFirst(canonical)] = details; } /* Is content to mirror */ if (isContent && !isBlacklisted) { this.namespacesToMirror.push(name); } }); }); } extractPageTitleFromHref(href) { try { const pathname = new URL(href, this.baseUrl).pathname; // Link to domain root when main page is domain root if (href === '/' && this.metaData.mainPageIsDomainRoot) { return this.metaData.mainPage; } // Link to index.php with query parameters like "/w/index.php?title=Blue_whale" if (pathname === this.#indexPhpPath) { const queryString = href.split('?')[1]; const params = new URLSearchParams(queryString); return params.get('title'); } // Local relative URL if (href.indexOf('./') === 0) { return util.decodeURIComponent(pathname.substr(1)); } // Absolute path if (pathname.startsWith(this.webUrl.pathname)) { return util.decodeURIComponent(pathname.substr(this.webUrl.pathname.length)); } const isPaginatedRegExp = /\/[0-9]+(\.|$)/; const isPaginated = isPaginatedRegExp.test(href); if (isPaginated) { const withoutDotHtml = href.split('.').slice(0, -1).join('.'); const lastTwoSlashes = withoutDotHtml.split('/').slice(-2).join('/'); return lastTwoSlashes; } if (pathParser.parse(href).dir.includes('../')) { return pathParser.parse(href).name; } return null; /* Interwiki link? -- return null */ } catch { logger.warn(`Unable to parse href ${href}`); return null; } } getCreatorName() { /* * Find a suitable name to use for ZIM (content) creator * Heuristic: Use basename of the domain unless * - it happens to be a wikimedia project OR * - some domain where the second part of the hostname is longer than the first part */ const hostParts = this.baseUrl.hostname.split('.'); let creator = hostParts[0]; if (hostParts.length > 1) { const wmProjects = new Set(['wikipedia', 'wikisource', 'wikibooks', 'wikiquote', 'wikivoyage', 'wikiversity', 'wikinews', 'wiktionary']); if (wmProjects.has(hostParts[1]) || hostParts[0].length < hostParts[1].length) { creator = hostParts[1]; // Name of the wikimedia project } } creator = creator.charAt(0).toUpperCase() + creator.substr(1); return creator; } getDefaultSkin(skins) { const defaultSkins = skins.filter((skin) => skin.default).map((skin) => skin.code); if (defaultSkins.length == 0) { throw new Error(`This wiki has no default skin:\n${JSON.stringify(skins)}`); } if (defaultSkins.length > 1) { logger.warn('Multiple default skins found, defaulting to first default one'); } return defaultSkins[0]; } async getSiteInfo({ mwWikiPath, mwIndexPhpPath, addNamespaces, mwRestApiPath, mwModulePath, forceSkin, langVariant } = {}) { logger.log('Getting site info...'); const body = await Downloader.querySiteInfo(); const generalEntries = body.query.general; // Checking mediawiki version const mwVersion = semver.coerce(generalEntries.generator).raw; const mwMinimalVersion = 1.27; if (!generalEntries.generator || !semver.satisfies(mwVersion, `>=${mwMinimalVersion}`)) { throw new Error(`Mediawiki version ${mwVersion} not supported should be >=${mwMinimalVersion}`); } this.setNamespaces(body.query, addNamespaces || []); Gadgets.setGadgets(body.query.gadgets); const { url: licenseUrl, text: licenseName } = body.query.rightsinfo; const subTitle = body.query.allmessages[0].content || ''; const mainPage = generalEntries.mainpage.replace(/ /g, '_'); const mainPageIsDomainRoot = generalEntries.mainpageisdomainroot; const siteName = generalEntries.sitename; const logo = generalEntries.logo; const langMw = generalEntries.lang; const textDir = generalEntries.rtl ? 'rtl' : 'ltr'; logger.log(`Text direction is [${textDir}]`); // Gather languages codes (en remove the 'dialect' part) const langs = [langMw].concat(generalEntries.fallback.map((e) => e.code)).map(function (e) { return e.replace(/-.*/, ''); }); const [langIso2, langIso3] = await Promise.all(langs.map(async (lang) => { let langIso3; try { langIso3 = await util.getIso3(lang); } catch { langIso3 = lang; } try { return [lang, langIso3]; } catch { return false; } })).then((possibleLangPairs) => { possibleLangPairs = possibleLangPairs.filter((a) => a); return possibleLangPairs[0] || ['en', 'eng']; }); // Use CLI parameter and set MediaWiki config if (mwWikiPath) { mwWikiPath = mwWikiPath + '$1'; if (mwWikiPath !== generalEntries.articlepath) { logger.warn(`mwWikiPath [${mwWikiPath}] does not match the path [${generalEntries.articlepath}] returned by the wiki.`); } } const articlepath = mwWikiPath || generalEntries.articlepath; if (articlepath.includes('?') || !articlepath.endsWith('$1')) { throw new Error(`Article path [${articlepath}] is not supported`); } this.wikiPath = articlepath.replace('$1', ''); if (mwIndexPhpPath && mwIndexPhpPath !== generalEntries.script) { logger.warn(`mwIndexPhpPath [${mwIndexPhpPath}] does not match the path [${generalEntries.script}] returned by the wiki.`); } this.indexPhpPath = mwIndexPhpPath || generalEntries.script; this.restApiPath = mwRestApiPath || generalEntries.scriptpath + '/rest.php'; this.modulePathOpt = mwModulePath || generalEntries.scriptpath + '/load.php'; const skins = body.query.skins.filter((skin) => !skin.unusable); if (forceSkin && !skins.map((skin) => skin.code).includes(forceSkin)) { throw new Error(`Skin [${forceSkin}] is not usable on the wiki`); } this.skin = forceSkin || this.getDefaultSkin(skins); const validLangVars = (generalEntries.variants || []).map((e) => e.code); if (langVariant && !validLangVars.includes(langVariant)) { throw new Error(`Language variant [${langVariant}] is not available on the wiki`); } const langVar = langVariant || null; return { mainPage, mainPageIsDomainRoot, siteName, textDir, langMw, langVar, langIso2, langIso3, logo, licenseName, licenseUrl, subTitle, }; } async getMwMetaData(argvOpts = {}) { if (this.metaData) { return this.metaData; } const creator = this.getCreatorName() || 'Kiwix'; const { langIso2, langIso3, mainPage, mainPageIsDomainRoot, siteName, logo, langMw, langVar, textDir, licenseName, licenseUrl, subTitle } = await this.getSiteInfo(argvOpts); const mwMetaData = { webUrl: this.webUrl.href, actionApiUrl: this.actionApiUrl.href, restApiUrl: this.restApiUrl.href, modulePathOpt: this.#modulePathOpt, modulePath: this.modulePath, mobileModulePath: this.mobileModulePath, webUrlPath: this.webUrl.pathname, wikiPath: this.#wikiPath, indexPhpPath: this.#indexPhpPath, baseUrl: this.baseUrl.href, actionApiPath: this.#actionApiPath, restApiPath: this.#restApiPath, domain: this.#domain, textDir: textDir, langMw, langVar, langIso2, langIso3, title: siteName, subTitle, creator, mainPage, mainPageIsDomainRoot, logo, licenseName, licenseUrl, }; this.metaData = mwMetaData; return mwMetaData; } reset() { this.initializeMediaWikiDefaults(); } } const mw = MediaWiki.getInstance(); export default mw; //# sourceMappingURL=MediaWiki.js.map