UNPKG

mwoffliner

Version:
693 lines 33.7 kB
import * as domino from 'domino'; import * as logger from '../Logger.js'; import * as QueryStringParser from 'querystring'; import htmlMinifier from 'html-minifier-terser'; import MediaWiki from '../MediaWiki.js'; import RedisStore from '../RedisStore.js'; import DOMUtils from '../DOMUtils.js'; import DU from '../DOMUtils.js'; import { config } from '../config.js'; import { rewriteUrlsOfDoc } from '../util/rewriteUrls.js'; import { footerTemplate } from '../Templates.js'; import { getFullUrl, getMediaBase, getRelativeFilePath, interpolateTranslationString, encodeArticleIdForZimHtmlUrl, getStaticFiles } from '../util/misc.js'; import { processStylesheetContent } from '../util/dump.js'; import { isMainPage, isSubpage } from '../util/articles.js'; export class Renderer { staticFilesListCommon = []; constructor() { if (this.staticFilesListCommon.length === 0) { this.staticFilesListCommon = getStaticFiles(config.output.jsResourcesCommon, config.output.cssResourcesCommon); } } async treatAudioVideo(dump, srcCache, articleId, audioVideoEl) { const imageDependencies = []; const videoDependencies = []; const subtitles = []; if (dump.nopic || dump.novid || dump.nodet) { DOMUtils.deleteNode(audioVideoEl); return { imageDependencies, videoDependencies, subtitles }; } const isVideo = audioVideoEl.tagName === 'VIDEO'; const isAudio = audioVideoEl.tagName === 'AUDIO'; if (!isAudio && !isVideo) { throw new Error(`Cannot treat non audio/video HTML element: \n${audioVideoEl.outerHTML}`); } if (isVideo) { this.adjustVideoElementAttributes(audioVideoEl); } const chosenAudioVideoSourceEl = isVideo ? this.chooseBestVideoSource(audioVideoEl) : this.chooseBestAudioSource(audioVideoEl); if (!chosenAudioVideoSourceEl) { logger.warn(`Deleting audio/video HTML node missing an appropriate source in article '${articleId}':\n${audioVideoEl.outerHTML}`); DOMUtils.deleteNode(audioVideoEl); return { imageDependencies, videoDependencies, subtitles }; } if (isVideo) { this.handleVideoPoster(audioVideoEl, articleId, imageDependencies, srcCache); } this.updateAudioVideoSrc(chosenAudioVideoSourceEl, articleId, srcCache, videoDependencies); const trackElements = Array.from(audioVideoEl.querySelectorAll('track')); for (const track of trackElements) { subtitles.push(await this.treatSubtitle(track, articleId)); } return { imageDependencies, videoDependencies, subtitles }; } adjustVideoElementAttributes(videoEl) { if (videoEl.getAttribute('height') && videoEl.getAttribute('height') < 40) { videoEl.setAttribute('height', '40'); } videoEl.setAttribute('controls', '40'); } chooseBestVideoSource(videoEl) { /* Choose best fiting resolution <source> video node */ if (videoEl.tagName !== 'VIDEO') { throw new Error(`Cannot choose best video source for non-video HTML element: \n${videoEl.outerHTML}`); } const videoSourceEls = Array.from(videoEl.children).filter((child) => child.tagName === 'SOURCE'); const originalSrc = videoEl.getAttribute('src'); // Take into account the rare edge case where <video> has no <source> child if (videoSourceEls.length == 0) { if (originalSrc && originalSrc.endsWith('.webm')) { // If video has a webm `src` attribute, this is an acceptable src return videoEl; } else { return null; } } // Remove src attribute should it exists since there are <source> element(s) if (originalSrc) { videoEl.removeAttribute('src'); } const videoDisplayedWidth = Number(videoEl.getAttribute('width')); let bestWidthDiff = 424242; let chosenVideoSourceEl = null; videoSourceEls.forEach((videoSourceEl) => { // Ignore non-webm sources if (!videoSourceEl.getAttribute('src').endsWith('.webm')) { DOMUtils.deleteNode(videoSourceEl); return; } // If undefined displayed width, then take the best <source> resolution const videoSourceElWidth = Number(videoSourceEl.getAttribute('data-file-width') || videoSourceEl.getAttribute('data-width') || 0); if (!videoDisplayedWidth) { const chosenVideoSourceElWidth = chosenVideoSourceEl ? chosenVideoSourceEl.getAttribute('data-file-width') || chosenVideoSourceEl.getAttribute('data-width') || 0 : 0; if (videoSourceElWidth > chosenVideoSourceElWidth || (videoSourceElWidth === chosenVideoSourceElWidth && videoSourceEl.getAttribute('src').endsWith('.vp9.webm'))) { DOMUtils.deleteNode(chosenVideoSourceEl); chosenVideoSourceEl = videoSourceEl; return; } } // Otherwise, choose <source> with better (smaller) width diff else { const widthDiff = Number(videoSourceElWidth - videoDisplayedWidth); // If no source has been picked so far, just take this one if (!chosenVideoSourceEl) { chosenVideoSourceEl = videoSourceEl; bestWidthDiff = widthDiff; return; } // Resolution of source is higher than displayed resolution else if (widthDiff >= 0) { if (bestWidthDiff < 0 || widthDiff < bestWidthDiff || (widthDiff === bestWidthDiff && videoSourceEl.getAttribute('src').endsWith('.vp9.webm'))) { DOMUtils.deleteNode(chosenVideoSourceEl); chosenVideoSourceEl = videoSourceEl; bestWidthDiff = widthDiff; return; } } // Resolution of source is smaller than displayed resolution else { if (widthDiff > bestWidthDiff || (widthDiff === bestWidthDiff && videoSourceEl.getAttribute('src').endsWith('.vp9.webm'))) { DOMUtils.deleteNode(chosenVideoSourceEl); chosenVideoSourceEl = videoSourceEl; bestWidthDiff = widthDiff; return; } } } // Delete all other nodes DOMUtils.deleteNode(videoSourceEl); }); return chosenVideoSourceEl; } chooseBestAudioSource(audioEl) { /* Choose best fiting resolution <source> audio node */ if (audioEl.tagName !== 'AUDIO') { throw new Error(`Cannot choose best audio source for non-audio HTML element: \n${audioEl.outerHTML}`); } const audioSourceEls = Array.from(audioEl.children).filter((child) => child.tagName === 'SOURCE'); // Looks like in some cases we get the `src` in `resource` attribute const originalSrc = audioEl.getAttribute('src') || audioEl.getAttribute('resource'); // Cleanup src and resource attributes should they exist audioEl.removeAttribute('src'); audioEl.removeAttribute('resource'); // Take into account the standard case where <audio> has no <source> child if (audioSourceEls.length == 0) { if (originalSrc) { // If audio has a single `src` or `resource` attribute, this is an acceptable src // Set this source back in `src`, no matter where it originally was audioEl.setAttribute('src', originalSrc); return audioEl; } else { return null; } } // Dive into audio sources and choose best one: for now, preferably the first ogg one, otherwise the first one let chosenAudioSourceEl = null; function isOggAudio(audioEl) { return audioEl.getAttribute('type')?.startsWith('audio/ogg') || audioEl.getAttribute('src')?.endsWith('.ogg'); } audioSourceEls.forEach((audioSourceEl) => { if (!chosenAudioSourceEl) { // use first element by default chosenAudioSourceEl = audioSourceEl; } else if (!isOggAudio(chosenAudioSourceEl) && isOggAudio(audioSourceEl)) { // if we've found an ogg audio and current best source is not ogg, then we prefer this source DOMUtils.deleteNode(chosenAudioSourceEl); chosenAudioSourceEl = audioSourceEl; } else { // delete other sources DOMUtils.deleteNode(audioSourceEl); } }); return chosenAudioSourceEl; } handleVideoPoster(videoEl, articleId, imageDependencies, srcCache) { const posterUrl = videoEl.getAttribute('poster'); if (posterUrl) { const videoPosterUrl = getFullUrl(posterUrl, MediaWiki.baseUrl); const newVideoPosterUrl = getRelativeFilePath(articleId, getMediaBase(videoPosterUrl, true)); if (posterUrl) { videoEl.setAttribute('poster', newVideoPosterUrl); } videoEl.removeAttribute('resource'); // eslint-disable-next-line no-prototype-builtins if (!srcCache.hasOwnProperty(videoPosterUrl)) { srcCache[videoPosterUrl] = true; imageDependencies.push(videoPosterUrl); } } } updateAudioVideoSrc(chosenVideoSourceEl, articleId, srcCache, videoDependencies) { /* Download content, but avoid duplicate calls */ const sourceUrl = getFullUrl(chosenVideoSourceEl.getAttribute('src'), MediaWiki.baseUrl); // eslint-disable-next-line no-prototype-builtins if (!srcCache.hasOwnProperty(sourceUrl)) { srcCache[sourceUrl] = true; videoDependencies.push(sourceUrl); } /* Set new URL for the video element */ const fileBase = getMediaBase(sourceUrl, true); chosenVideoSourceEl.setAttribute('src', getRelativeFilePath(articleId, fileBase)); } async treatSubtitle(trackEle, articleId) { const subtitleSourceUrl = getFullUrl(trackEle.getAttribute('src'), MediaWiki.baseUrl); const { title, lang } = QueryStringParser.parse(subtitleSourceUrl); // The source URL we get from Mediawiki article is in srt format, so we replace it to vtt which is standard subtitle trackformat for <track> src attribute. const vttFormatUrl = new URL(subtitleSourceUrl); vttFormatUrl.searchParams.set('trackformat', 'vtt'); trackEle.setAttribute('src', `${getRelativeFilePath(articleId, title)}-${lang}.vtt`); return vttFormatUrl.href; } treatImageFrames(dump, parsoidDoc, imageNode) { const image = imageNode.getElementsByTagName('img')[0] || imageNode.getElementsByTagName('video')[0]; if (!this.shouldKeepNode(dump, imageNode, image)) { DOMUtils.deleteNode(imageNode); return; } if (this.constructor.name === 'ActionParseRenderer') { return; } const descriptions = imageNode.getElementsByTagName('figcaption'); const description = descriptions.length > 0 ? descriptions[0] : undefined; const imageWidth = parseInt(image.getAttribute('width'), 10); const thumbDiv = this.makeThumbDiv(dump, parsoidDoc, imageNode); const thumbinnerDiv = parsoidDoc.createElement('div'); thumbinnerDiv.setAttribute('class', 'thumbinner'); thumbinnerDiv.setAttribute('style', `width:${imageWidth + 2}px`); const thumbcaptionDiv = parsoidDoc.createElement('div'); thumbcaptionDiv.setAttribute('class', 'thumbcaption'); const autoAlign = dump.mwMetaData.textDir === 'ltr' ? 'left' : 'right'; thumbcaptionDiv.setAttribute('style', `text-align: ${autoAlign}`); if (description) { thumbcaptionDiv.innerHTML = description.innerHTML; } thumbinnerDiv.appendChild(this.isStillLinked(image) ? image.parentNode : image); thumbinnerDiv.appendChild(thumbcaptionDiv); thumbDiv.appendChild(thumbinnerDiv); imageNode.parentNode.replaceChild(thumbDiv, imageNode); } async treatImage(dump, srcCache, articleId, img) { const imageDependencies = []; if (!this.shouldKeepImage(dump, img)) { DOMUtils.deleteNode(img); return { imageDependencies }; } /* Rewrite image src attribute */ const src = getFullUrl(img.getAttribute('src'), MediaWiki.baseUrl); let newSrc; try { const slashesInUrl = articleId.split('/').length - 1; const upStr = slashesInUrl ? '../'.repeat(slashesInUrl) : './'; newSrc = upStr + getMediaBase(src, true); /* Download image, but avoid duplicate calls */ // eslint-disable-next-line no-prototype-builtins if (!srcCache.hasOwnProperty(src)) { srcCache[src] = true; imageDependencies.push(src); } /* Change image source attribute to point to the local image */ img.setAttribute('src', newSrc); /* Remove useless 'resource' attribute */ img.removeAttribute('resource'); /* Remove srcset */ img.removeAttribute('srcset'); } catch { DOMUtils.deleteNode(img); } /* Add lazy loading */ img.setAttribute('loading', 'lazy'); return { imageDependencies }; } shouldKeepImage(dump, img) { const imageNodeClass = img.getAttribute('class') || ''; const src = img.getAttribute('src'); return ((!dump.nopic || imageNodeClass.includes('mwe-math-fallback-image-inline') || img.getAttribute('typeof') === 'mw:Extension/math') && src && !src.includes('./Special:FilePath/')); } async treatMedias(parsoidDoc, dump, articleId) { let imageDependencies = []; let videoDependencies = []; let subtitles = []; /* Clean/rewrite image tags */ const imgs = Array.from(parsoidDoc.getElementsByTagName('img')); const audiosAndVideos = Array.from(parsoidDoc.querySelectorAll('video, audio')); const srcCache = {}; for (const audioVideoEl of audiosAndVideos) { // <video /> and <audio /> const ret = await this.treatAudioVideo(dump, srcCache, articleId, audioVideoEl); imageDependencies = imageDependencies.concat(ret.imageDependencies); videoDependencies = videoDependencies.concat(ret.videoDependencies); subtitles = subtitles.concat(ret.subtitles); } for (const imgEl of imgs) { const ret = await this.treatImage(dump, srcCache, articleId, imgEl); imageDependencies = imageDependencies.concat(ret.imageDependencies); } /* Treat image figures + special spans */ const figures = parsoidDoc.getElementsByTagName('figure'); const spans = parsoidDoc.querySelectorAll('span[typeof~=mw:Image/Frameless],span[typeof~=mw:File/Frameless]'); const imageNodes = Array.prototype.slice.call(figures).concat(Array.prototype.slice.call(spans)); for (const imageNode of imageNodes) { this.treatImageFrames(dump, parsoidDoc, imageNode); } return { doc: parsoidDoc, imageDependencies, videoDependencies, subtitles }; } isStillLinked(image) { return image && image.parentNode && image.parentNode.tagName === 'A'; } shouldKeepNode(dump, imageNode, image) { return !dump.nopic && imageNode && image; } makeThumbDiv(dump, parsoidDoc, imageNode) { const imageNodeClass = imageNode.getAttribute('class') || ''; let thumbDiv = parsoidDoc.createElement('div'); thumbDiv.setAttribute('class', 'thumb'); if (imageNodeClass.search('mw-halign-right') >= 0) { DOMUtils.appendToAttr(thumbDiv, 'class', 'tright'); } else if (imageNodeClass.search('mw-halign-left') >= 0) { DOMUtils.appendToAttr(thumbDiv, 'class', 'tleft'); } else if (imageNodeClass.search('mw-halign-center') >= 0) { DOMUtils.appendToAttr(thumbDiv, 'class', 'tnone'); const centerDiv = parsoidDoc.createElement('center'); centerDiv.appendChild(thumbDiv); thumbDiv = centerDiv; } else { const revAutoAlign = dump.mwMetaData.textDir === 'ltr' ? 'right' : 'left'; DOMUtils.appendToAttr(thumbDiv, 'class', `t${revAutoAlign}`); } return thumbDiv; } // TODO: The first part of this method is common for all renders async processHtml(processHtmlOpts) { const { html, dump, articleId, articleDetail, displayTitle, moduleDependencies, callback } = processHtmlOpts; let { articleSubtitle } = processHtmlOpts; let imageDependencies = []; let videoDependencies = []; let mediaDependencies = []; let subtitles = []; let doc = domino.createDocument(html); const ruRet = await rewriteUrlsOfDoc(doc, articleId, dump); doc = ruRet.doc; mediaDependencies = mediaDependencies.concat(ruRet.mediaDependencies .filter((a) => a) .map((url) => { const path = getMediaBase(url, false); return { url, path }; })); doc = this.applyOtherTreatments(doc, dump, articleId); const tmRet = await this.treatMedias(doc, dump, articleId); doc = tmRet.doc; // Subtitle if (articleSubtitle) { let articleSubtitleDoc = domino.createDocument(articleSubtitle); const ruRetSubtitle = await rewriteUrlsOfDoc(articleSubtitleDoc, articleId, dump); articleSubtitleDoc = ruRetSubtitle.doc; articleSubtitle = articleSubtitleDoc.getElementsByTagName('body')[0].innerHTML; } videoDependencies = videoDependencies.concat(tmRet.videoDependencies .filter((a) => a) .map((url) => { const path = getMediaBase(url, false); return { url, path }; })); imageDependencies = imageDependencies.concat(tmRet.imageDependencies .filter((a) => a) .map((url) => { const path = getMediaBase(url, false); return { url, path }; })); subtitles = subtitles.concat(tmRet.subtitles .filter((a) => a) .map((url) => { const { title, lang } = QueryStringParser.parse(url); const path = `${title}-${lang}.vtt`; return { url, path }; })); if (!isMainPage(articleId) && dump.customProcessor?.preProcessArticle) { doc = await dump.customProcessor.preProcessArticle(articleId, doc); } let templatedDoc = callback(moduleDependencies, articleId); templatedDoc = await this.mergeTemplateDoc(templatedDoc, doc, dump, articleDetail, RedisStore.articleDetailXId, articleId, displayTitle, articleSubtitle); if (dump.customProcessor && dump.customProcessor.postProcessArticle) { templatedDoc = await dump.customProcessor.postProcessArticle(articleId, templatedDoc); } let outHtml = templatedDoc.documentElement.outerHTML; if (dump.opts.minifyHtml) { outHtml = await htmlMinifier.minify(outHtml, { removeComments: true, conservativeCollapse: true, collapseBooleanAttributes: true, removeRedundantAttributes: true, removeEmptyAttributes: true, minifyCSS: true, }); } const finalHTML = '<!DOCTYPE html>\n' + outHtml; return { finalHTML, mediaDependencies, imageDependencies, videoDependencies, subtitles, }; } async mergeTemplateDoc(htmlTemplateDoc, parsoidDoc, dump, articleDetail, articleDetailXId, articleId, displayTitle, articleSubtitle) { /* Create final document by merging template and parsoid documents */ const mwContentText = htmlTemplateDoc.getElementById('mw-content-text'); mwContentText.lang = articleDetail.pagelang; mwContentText.dir = articleDetail.pagedir; mwContentText.classList.add('mw-content-' + articleDetail.pagedir); mwContentText.innerHTML = parsoidDoc.getElementsByTagName('body')[0].innerHTML; /* Title */ const articleTitle = htmlTemplateDoc.getElementById('title_0') ? htmlTemplateDoc.getElementById('title_0').textContent : articleId.replace(/_/g, ' '); htmlTemplateDoc.getElementsByTagName('title')[0].innerHTML = articleTitle; // Set inline page title when missing const inlineTitle = htmlTemplateDoc.getElementById('firstHeading'); if (inlineTitle && !inlineTitle.innerHTML) { inlineTitle.innerHTML = displayTitle || articleTitle; } DOMUtils.deleteNode(htmlTemplateDoc.getElementById('titleHeading')); /* Subpage */ if (this.constructor.name === 'ActionParseRenderer') { const mwContentSubtitle = htmlTemplateDoc.getElementById('mw-content-subtitle'); mwContentSubtitle.innerHTML = articleSubtitle || ''; } else if (isSubpage(articleId) && !isMainPage(articleId)) { const headingNode = htmlTemplateDoc.getElementById('mw-content-text'); const subpagesNode = htmlTemplateDoc.createElement('span'); const parents = articleId.split('/'); parents.pop(); let subpages = ''; await Promise.all(parents.map(async (parent) => { const label = parent.replace(/_/g, ' '); const isParentMirrored = await articleDetailXId.exists(`${articleId.split(parent)[0]}${parent}`); subpages += `&lt; ${isParentMirrored ? `<a href="${'../'.repeat(parents.length)}${encodeArticleIdForZimHtmlUrl(`${articleId.split(parent)[0]}${parent}`)}" title="${label}">` : ''}${label}${isParentMirrored ? '</a> ' : ' '}`; })); subpagesNode.innerHTML = subpages; subpagesNode.setAttribute('class', 'subpages'); headingNode.parentNode.insertBefore(subpagesNode, headingNode); } /* Set footer */ const div = htmlTemplateDoc.createElement('div'); /* Revision date */ const date = new Date(articleDetail.timestamp); const lastEditedOnString = date ? interpolateTranslationString(dump.strings.LAST_EDITED_ON, { date: date.toISOString().substring(0, 10), }) : null; const creatorLink = '<a class="external text" ' + `${lastEditedOnString ? `title="${lastEditedOnString}"` : ''} ` + `href="${MediaWiki.webUrl.href}?title=${encodeURIComponent(articleId)}&oldid=${articleDetail.revisionId}">` + `${dump.mwMetaData.title || dump.mwMetaData.creator}</a>`; const licenseLink = (dump.mwMetaData.licenseUrl ? `<a class="external text" href="${dump.mwMetaData.licenseUrl}">` : '') + (dump.mwMetaData.licenseName || dump.mwMetaData.licenseUrl || dump.strings.LICENSE_UNKNOWN).replace(/</g, '&lt;') + (dump.mwMetaData.licenseUrl ? `</a>` : ''); div.innerHTML = footerTemplate({ disclaimer: interpolateTranslationString(dump.strings.DISCLAIMER, { creator: creatorLink, license: licenseLink, }), strings: dump.strings, }); htmlTemplateDoc.getElementById('mw-content-text').appendChild(div); this.addNoIndexCommentToElement(div); /* Geo-coordinates */ if (articleDetail.coordinates) { const geoCoordinates = articleDetail.coordinates; const metaNode = htmlTemplateDoc.createElement('meta'); metaNode.name = 'geo.position'; metaNode.content = geoCoordinates; htmlTemplateDoc.getElementsByTagName('head')[0].appendChild(metaNode); } return htmlTemplateDoc; } addNoIndexCommentToElement(element) { const slices = element.parentElement.innerHTML.split(element.outerHTML); element.parentElement.innerHTML = `${slices[0]}<!--htdig_noindex-->${element.outerHTML}<!--/htdig_noindex-->${slices[1]}`; } removeIframeTags(parsoidDoc) { // Remove all <iframe> tags const iframes = Array.from(parsoidDoc.getElementsByTagName('iframe')); for (const iframe of iframes) { DU.deleteNode(iframe); } } removeCitations(parsoidDoc) { // Remove all citation-related material const sups = Array.from(parsoidDoc.getElementsByTagName('sup')); for (const sup of sups) { if (!(sup.getAttribute('class') || '').includes('reference')) { continue; } DU.deleteNode(sup); } const citeErrors = Array.from(parsoidDoc.getElementsByClassName('mw-ext-cite-error')); for (const citeError of citeErrors) { DU.deleteNode(citeError); } const references = Array.from(parsoidDoc.getElementsByClassName('mw-references-wrap')); for (const reference of references) { DU.deleteNode(reference); } } clearLinkAndInputTags(parsoidDoc, filtersConfig, dump) { /* Don't need <link> and <input> tags */ const nodesToDelete = [{ tag: 'link' }, { tag: 'input' }]; /* Remove "map" tags if necessary */ if (dump.nopic) { nodesToDelete.push({ tag: 'map' }); } /* Remove gallery boxes if pics need stripping of if it doesn't have thumbs */ nodesToDelete.push({ class: 'gallerybox', filter(n) { return !n.getElementsByTagName('img').length && !n.getElementsByTagName('audio').length && !n.getElementsByTagName('video').length; }, }); nodesToDelete.push({ class: 'gallery', filter(n) { return !n.getElementsByClassName('gallerybox').length; }, }); /* Remove element with black listed CSS classes */ filtersConfig.cssClassBlackList.forEach((classname) => { nodesToDelete.push({ class: classname }); }); if (dump.nodet) { filtersConfig.nodetCssClassBlackList.forEach((classname) => { nodesToDelete.push({ class: classname }); }); nodesToDelete.push({ tag: 'section', filter(n) { return n.getAttribute('data-mw-section-id') !== filtersConfig.leadSectionId; }, }); } /* Remove element with black listed CSS classes and no link */ filtersConfig.cssClassBlackListIfNoLink.forEach((classname) => { nodesToDelete.push({ class: classname, filter(n) { return n.getElementsByTagName('a').length === 0; }, }); }); /* Delete them all */ for (const t of nodesToDelete) { let nodes; if (t.tag) { nodes = parsoidDoc.getElementsByTagName(t.tag); } else if (t.class) { nodes = parsoidDoc.getElementsByClassName(t.class); } else { return; /* throw error? */ } for (const node of Array.from(nodes)) { if (!t.filter || t.filter(node)) { DU.deleteNode(node); } } } } clearNodes(parsoidDoc, filtersConfig) { const allNodes = Array.from(parsoidDoc.getElementsByTagName('*')); for (const node of allNodes) { /* ?? */ node.removeAttribute('data-parsoid'); node.removeAttribute('about'); /* Lots of bloat about templates wikitext */ node.removeAttribute('data-mw'); if (node.getAttribute('img')) { /* Remove a few images Parsoid attributes */ node.removeAttribute('data-file-width'); node.removeAttribute('data-file-height'); node.removeAttribute('data-file-type'); } /* Remove a few css class which are typically used to hide things when online, but we want to see them when offline */ filtersConfig.cssClassCallsBlackList.map((classname) => { if (node.getAttribute('class')) { node.setAttribute('class', node.getAttribute('class').replace(classname, '')); } }); } const kartographerMaplinkNodes = Array.from(parsoidDoc.querySelectorAll('.mw-kartographer-maplink')).filter((n) => !!n.textContent); for (const node of kartographerMaplinkNodes) { node.textContent = '🌍'; } } applyOtherTreatments(parsoidDoc, dump, articleId) { this.removeIframeTags(parsoidDoc); if (dump.nodet) { this.removeCitations(parsoidDoc); } const filtersConfig = config.filters; this.clearLinkAndInputTags(parsoidDoc, filtersConfig, dump); /* Go through all reference calls */ const spans = Array.from(parsoidDoc.getElementsByTagName('span')); for (const span of spans) { const rel = span.getAttribute('rel'); if (rel === 'dc:references') { const sup = parsoidDoc.createElement('sup'); if (span.innerHTML) { sup.id = span.id; sup.innerHTML = span.innerHTML; span.parentNode.replaceChild(sup, span); } else { DU.deleteNode(span); } } } /* Go through all inline style and rewrite it */ const styles = Array.from(parsoidDoc.getElementsByTagName('style')); for (const style of styles) { if (!style.textContent) { continue; } // We use MediaWiki.baseUrl which is an approximation but it is deemed sufficient because // all non-absolute URL found in inline CSS are expected to be relative to the root, not to // current web URL which is "moving" (could use something like /w/index.php?title=... or /wiki/...) style.textContent = processStylesheetContent(MediaWiki.baseUrl.toString(), '', style.textContent, articleId); } /* Remove element with id in the blacklist */ filtersConfig.idBlackList.forEach((id) => { const node = parsoidDoc.getElementById(id); if (node) { DU.deleteNode(node); } }); /* * Because of CSP, some ZIM reader environments do not allow inline JS. See issues/2096. */ const scripts = Array.from(parsoidDoc.getElementsByTagName('script')); for (const script of scripts) { script.parentNode.removeChild(script); } /* Force display of element with that CSS class */ filtersConfig.cssClassDisplayList.map((classname) => { const nodes = Array.from(parsoidDoc.getElementsByClassName(classname)); for (const node of nodes) { node.style.removeProperty('display'); } }); /* Remove empty sections */ if (!dump.opts.keepEmptySections) { const sections = Array.from(parsoidDoc.querySelectorAll('section')); for (const section of sections) { if (Array.from(section.children).filter((child) => { // Check if there is more than the expected children we have in all sections // RestApi : h2 tag // WikimediaDesktop, VisualEditor, Action Parse: mw-heading class // WikimediaMobile: pcs-edit-section-header class return !child.matches('h2') && !child.classList.contains('mw-heading') && !child.classList.contains('pcs-edit-section-header'); }).length === 0) { DU.deleteNode(section); } } } this.clearNodes(parsoidDoc, filtersConfig); return parsoidDoc; } /** * Add an H1 tag with page title on top of article except main page */ injectH1TitleToHtml(content, articleDetail) { const doc = domino.createDocument(content); const header = doc.createElement('h1'); if (articleDetail?.title) { header.appendChild(doc.createTextNode(articleDetail.title)); header.classList.add('article-header'); const target = doc.querySelector('body.mw-body-content'); if (target) { target.insertAdjacentElement('afterbegin', header); } } return doc.documentElement.outerHTML; } } //# sourceMappingURL=abstract.renderer.js.map