UNPKG

mwoffliner

Version:
442 lines 17.8 kB
import * as pathParser from 'path'; import * as logger from './Logger.js'; import * as util from './util/index.js'; import * as domino from 'domino'; import qs from 'querystring'; import semver from 'semver'; import basicURLDirector from './util/builders/url/basic.director.js'; import BaseURLDirector from './util/builders/url/base.director.js'; import ApiURLDirector from './util/builders/url/api.director.js'; import WikimediaDesktopURLDirector from './util/builders/url/desktop.director.js'; import WikimediaMobileURLDirector from './util/builders/url/mobile.director.js'; import VisualEditorURLDirector from './util/builders/url/visual-editor.director.js'; import RestApiURLDirector from './util/builders/url/rest-api.director.js'; import { checkApiAvailability } from './util/mw-api.js'; import { BLACKLISTED_NS } from './util/const.js'; class MediaWiki { static instance; static getInstance() { if (!MediaWiki.instance) { MediaWiki.instance = new MediaWiki(); } return MediaWiki.instance; } metaData; baseUrl; getCategories; namespaces = {}; namespacesToMirror = []; apiCheckArticleId; queryOpts; urlDirector; #wikiPath; #actionApiPath; #modulePathOpt; #restApiPath; #username; #password; #domain; wikimediaDesktopUrlDirector; wikimediaMobileUrlDirector; visualEditorUrlDirector; restApiUrlDirector; visualEditorApiUrl; actionApiUrl; restApiUrl; webUrl; wikimediaDesktopApiUrl; wikimediaMobileApiUrl; modulePath; // only for reading mobileModulePath; #apiUrlDirector; #hasWikimediaDesktopApi; #hasWikimediaMobileApi; #hasVisualEditorApi; #hasRestApi; #hasCoordinates; set username(value) { this.#username = value; } set password(value) { this.#password = value; } set actionApiPath(value) { if (value) { this.#actionApiPath = value; this.actionApiUrl = this.urlDirector.buildURL(this.#actionApiPath); this.setVisualEditorURL(); } } set restApiPath(value) { if (value) { this.#restApiPath = value; this.setRestApiURL(); } } set domain(value) { this.#domain = value; } set wikiPath(value) { if (value) { this.#wikiPath = value; this.webUrl = this.urlDirector.buildURL(this.#wikiPath); } } set base(value) { if (value) { this.baseUrl = basicURLDirector.buildMediawikiBaseURL(value); this.urlDirector = new BaseURLDirector(this.baseUrl.href); this.webUrl = this.urlDirector.buildURL(this.#wikiPath); this.actionApiUrl = this.urlDirector.buildURL(this.#actionApiPath); this.setWikimediaDesktopApiUrl(); this.setWikimediaMobileApiUrl(); this.setRestApiURL(); this.setVisualEditorURL(); this.setModuleURL(); this.setMobileModuleUrl(); } } set modulePathOpt(value) { if (value !== undefined) { this.#modulePathOpt = value; } if (this.urlDirector) { this.setModuleURL(); } else if (value) { logger.error('Base url director should be specified first'); } } initializeMediaWikiDefaults() { this.#domain = ''; this.#username = ''; this.#password = ''; this.getCategories = false; this.#actionApiPath = '/w/api.php'; this.#restApiPath = '/w/rest.php'; this.#wikiPath = '/wiki/'; this.#modulePathOpt = '/w/load.php'; this.namespaces = {}; this.namespacesToMirror = []; this.apiCheckArticleId = 'MediaWiki:Sidebar'; this.queryOpts = { action: 'query', format: 'json', prop: 'redirects|revisions', rdlimit: 'max', rdnamespace: 0, redirects: false, formatversion: '2', }; this.#hasWikimediaDesktopApi = null; this.#hasWikimediaMobileApi = null; this.#hasVisualEditorApi = null; this.#hasRestApi = null; this.#hasCoordinates = null; } constructor() { this.initializeMediaWikiDefaults(); } async hasWikimediaDesktopApi(downloader) { if (this.#hasWikimediaDesktopApi === null) { this.wikimediaDesktopUrlDirector = new WikimediaDesktopURLDirector(this.wikimediaDesktopApiUrl.href); const checkUrl = this.wikimediaDesktopUrlDirector.buildArticleURL(this.apiCheckArticleId); this.#hasWikimediaDesktopApi = await checkApiAvailability(downloader, checkUrl); logger.log('Checked for WikimediaDesktopApi at', checkUrl, '-- result is: ', this.#hasWikimediaDesktopApi); return this.#hasWikimediaDesktopApi; } return this.#hasWikimediaDesktopApi; } async hasWikimediaMobileApi(downloader) { if (this.#hasWikimediaMobileApi === null) { this.wikimediaMobileUrlDirector = new WikimediaMobileURLDirector(this.wikimediaMobileApiUrl.href); const checkUrl = this.wikimediaMobileUrlDirector.buildArticleURL(this.apiCheckArticleId); this.#hasWikimediaMobileApi = await checkApiAvailability(downloader, checkUrl); logger.log('Checked for WikimediaMobileApi at', checkUrl, '-- result is: ', this.#hasWikimediaMobileApi); return this.#hasWikimediaMobileApi; } return this.#hasWikimediaMobileApi; } async hasVisualEditorApi(downloader) { if (this.#hasVisualEditorApi === null) { this.visualEditorUrlDirector = new VisualEditorURLDirector(this.visualEditorApiUrl.href); const checkUrl = this.visualEditorUrlDirector.buildArticleURL(this.apiCheckArticleId); this.#hasVisualEditorApi = await checkApiAvailability(downloader, checkUrl, this.visualEditorUrlDirector.validMimeTypes); logger.log('Checked for VisualEditorApi at', checkUrl, '-- result is: ', this.#hasVisualEditorApi); return this.#hasVisualEditorApi; } return this.#hasVisualEditorApi; } async hasRestApi(downloader) { if (this.#hasRestApi === null) { this.restApiUrlDirector = new RestApiURLDirector(this.restApiUrl.href); const checkUrl = this.restApiUrlDirector.buildArticleURL(this.apiCheckArticleId); this.#hasRestApi = await checkApiAvailability(downloader, checkUrl); logger.log('Checked for RestApi at', checkUrl, '-- result is: ', this.#hasRestApi); return this.#hasRestApi; } return this.#hasRestApi; } async hasCoordinates(downloader) { if (this.#hasCoordinates === null) { const validNamespaceIds = this.namespacesToMirror.map((ns) => this.namespaces[ns].num); const reqOpts = { ...this.queryOpts, prop: this.queryOpts.prop + '|coordinates', rdnamespace: validNamespaceIds, }; const resp = await downloader.getJSON(this.#apiUrlDirector.buildQueryURL(reqOpts)); const isCoordinateWarning = JSON.stringify(resp?.warnings?.query ?? '').includes('coordinates'); if (isCoordinateWarning) { logger.log('Coordinates not available on this wiki'); return (this.#hasCoordinates = false); } logger.log('Coordinates available on this wiki'); return (this.#hasCoordinates = true); } return this.#hasCoordinates; } setWikimediaDesktopApiUrl() { this.wikimediaDesktopApiUrl = this.urlDirector.buildWikimediaDesktopApiUrl(); } setWikimediaMobileApiUrl() { this.wikimediaMobileApiUrl = this.urlDirector.buildWikimediaMobileApiUrl(); } setRestApiURL() { this.restApiUrl = this.urlDirector.buildRestApiUrl(this.#restApiPath); } setVisualEditorURL() { this.#apiUrlDirector = new ApiURLDirector(this.actionApiUrl.href); this.visualEditorApiUrl = this.#apiUrlDirector.buildVisualEditorURL(); } setModuleURL() { this.modulePath = this.urlDirector.buildModuleURL(this.#modulePathOpt); } setMobileModuleUrl() { this.mobileModulePath = this.urlDirector.buildMobileModuleURL(); } async login(downloader) { if (this.#username && this.#password) { let url = this.actionApiUrl.href + '?'; // Add domain if configured if (this.#domain) { url = `${url}lgdomain=${this.#domain}&`; } // Getting token to login. const { content } = await downloader.downloadContent(url + 'action=query&meta=tokens&type=login&format=json&formatversion=2', 'data'); // Logging in await downloader .request({ url: this.actionApiUrl.href, ...downloader.arrayBufferRequestOptions, data: qs.stringify({ action: 'login', format: 'json', lgname: this.#username, lgpassword: this.#password, lgtoken: JSON.parse(content.toString()).query.tokens.logintoken, }), headers: { 'Content-Type': 'application/x-www-form-urlencoded', }, method: 'POST', }) .then(async (resp) => { if (resp.data.login.result !== 'Success') { throw new Error('Login Failed'); } downloader.loginCookie = resp.headers['set-cookie'].join(';'); }) .catch((err) => { throw err; }); } } async getNamespaces(addNamespaces, downloader) { const url = this.#apiUrlDirector.buildNamespacesURL(); const json = await downloader.getJSON(url); ['namespaces', 'namespacealiases'].forEach((type) => { const entries = json.query[type]; Object.keys(entries).forEach((key) => { const entry = entries[key]; const name = type === 'namespaces' ? entry.name : entry.alias; const num = entry.id; const allowedSubpages = 'subpages' in entry; const isContent = type === 'namespaces' ? !!(entry.content || util.contains(addNamespaces, num)) : !!(entry.content !== undefined || util.contains(addNamespaces, num)); const isBlacklisted = BLACKLISTED_NS.includes(name); const canonical = entry.canonical ? entry.canonical : ''; const details = { num, allowedSubpages, isContent }; /* Namespaces in local language */ this.namespaces[util.lcFirst(name)] = details; this.namespaces[util.ucFirst(name)] = details; /* Namespaces in English (if available) */ if (canonical) { this.namespaces[util.lcFirst(canonical)] = details; this.namespaces[util.ucFirst(canonical)] = details; } /* Is content to mirror */ if (isContent && !isBlacklisted) { this.namespacesToMirror.push(name); } }); }); } extractPageTitleFromHref(href) { try { const pathname = new URL(href, this.baseUrl).pathname; // Local relative URL if (href.indexOf('./') === 0) { return util.decodeURIComponent(pathname.substr(1)); } // Absolute path if (pathname.indexOf(this.webUrl.pathname) === 0) { return util.decodeURIComponent(pathname.substr(this.webUrl.pathname.length)); } const isPaginatedRegExp = /\/[0-9]+(\.|$)/; const isPaginated = isPaginatedRegExp.test(href); if (isPaginated) { const withoutDotHtml = href.split('.').slice(0, -1).join('.'); const lastTwoSlashes = withoutDotHtml.split('/').slice(-2).join('/'); return lastTwoSlashes; } if (pathParser.parse(href).dir.includes('../')) { return pathParser.parse(href).name; } return null; /* Interwiki link? -- return null */ } catch (error) { logger.warn(`Unable to parse href ${href}`); return null; } } getCreatorName() { /* * Find a suitable name to use for ZIM (content) creator * Heuristic: Use basename of the domain unless * - it happens to be a wikimedia project OR * - some domain where the second part of the hostname is longer than the first part */ const hostParts = this.baseUrl.hostname.split('.'); let creator = hostParts[0]; if (hostParts.length > 1) { const wmProjects = new Set(['wikipedia', 'wikisource', 'wikibooks', 'wikiquote', 'wikivoyage', 'wikiversity', 'wikinews', 'wiktionary']); if (wmProjects.has(hostParts[1]) || hostParts[0].length < hostParts[1].length) { creator = hostParts[1]; // Name of the wikimedia project } } creator = creator.charAt(0).toUpperCase() + creator.substr(1); return creator; } async getTextDirection(downloader) { logger.log('Getting text direction...'); const { content } = await downloader.downloadContent(this.webUrl.href, 'data'); const body = content.toString(); const doc = domino.createDocument(body); const contentNode = doc.getElementById('mw-content-text'); const languageDirectionRegex = /"pageLanguageDir":"(.*?)"/; const parts = languageDirectionRegex.exec(body); let isLtr = true; if (parts && parts[1]) { isLtr = parts[1] === 'ltr'; } else if (contentNode) { isLtr = contentNode.getAttribute('dir') === 'ltr'; } else { logger.log('Unable to get the language direction, fallback to ltr'); isLtr = true; } const textDir = isLtr ? 'ltr' : 'rtl'; logger.log(`Text direction is [${textDir}]`); return textDir; } async getSiteInfo(downloader) { logger.log('Getting site info...'); const body = await downloader.query(); const entries = body.query.general; // Checking mediawiki version const mwVersion = semver.coerce(entries.generator).raw; const mwMinimalVersion = 1.27; if (!entries.generator || !semver.satisfies(mwVersion, `>=${mwMinimalVersion}`)) { throw new Error(`Mediawiki version ${mwVersion} not supported should be >=${mwMinimalVersion}`); } const mainPage = entries.mainpage.replace(/ /g, '_'); const siteName = entries.sitename; // Gather languages codes (en remove the 'dialect' part) const langs = [entries.lang].concat(entries.fallback.map((e) => e.code)).map(function (e) { return e.replace(/\-.*/, ''); }); const [langIso2, langIso3] = await Promise.all(langs.map(async (lang) => { let langIso3; try { langIso3 = await util.getIso3(lang); } catch (err) { langIso3 = lang; } try { return [lang, langIso3]; } catch (err) { return false; } })).then((possibleLangPairs) => { possibleLangPairs = possibleLangPairs.filter((a) => a); return possibleLangPairs[0] || ['en', 'eng']; }); return { mainPage, siteName, langIso2, langIso3, }; } async getSubTitle(downloader) { logger.log('Getting sub-title...'); const { content } = await downloader.downloadContent(this.webUrl.href, 'data'); const html = content.toString(); const doc = domino.createDocument(html); const subTitleNode = doc.getElementById('siteSub'); return subTitleNode ? subTitleNode.innerHTML : ''; } async getMwMetaData(downloader) { if (this.metaData) { return this.metaData; } const creator = this.getCreatorName() || 'Kiwix'; const [textDir, { langIso2, langIso3, mainPage, siteName }, subTitle] = await Promise.all([ this.getTextDirection(downloader), this.getSiteInfo(downloader), this.getSubTitle(downloader), ]); const mwMetaData = { webUrl: this.webUrl.href, actionApiUrl: this.actionApiUrl.href, restApiUrl: this.restApiUrl.href, modulePathOpt: this.#modulePathOpt, modulePath: this.modulePath, mobileModulePath: this.mobileModulePath, webUrlPath: this.webUrl.pathname, wikiPath: this.#wikiPath, baseUrl: this.baseUrl.href, actionApiPath: this.#actionApiPath, restApiPath: this.#restApiPath, domain: this.#domain, textDir: textDir, langIso2, langIso3, title: siteName, subTitle, creator, mainPage, }; this.metaData = mwMetaData; return mwMetaData; } reset() { this.initializeMediaWikiDefaults(); } } const mw = MediaWiki.getInstance(); export default mw; //# sourceMappingURL=MediaWiki.js.map