parsoid-jsapi
Version:
Parsoid JSAPI
1,488 lines (1,436 loc) • 40.2 kB
JavaScript
/*
* Handy JavaScript API for Parsoid DOM, inspired by the
* python `mwparserfromhell` package.
*/
'use strict';
require('parsoid/core-upgrade.js');
// TO DO:
// extension
// PExtLink#url PWikiLink#title should handle mw:ExpandedAttrs
// make separate package?
var util = require('util');
var DOMImpl = require('domino').impl;
var Node = DOMImpl.Node;
var NodeFilter = DOMImpl.NodeFilter;
var DU = require('parsoid/lib/utils/DOMUtils.js').DOMUtils;
var Promise = require('parsoid/lib/utils/promise.js');
// Note that the JSAPI exposes data-mw directly as a DOM attribute to
// allow clients to easily edit it.
// WTS helper
var wts = function(env, nodes) {
var body;
if (nodes.length === 0) {
return '';
} else if (nodes.length === 1 && DU.isBody(nodes[0])) {
body = nodes[0];
} else {
body = nodes[0].ownerDocument.createElement('body');
for (var i = 0; i < nodes.length; i++) {
body.appendChild(nodes[i].cloneNode(true));
}
}
return env.getContentHandler().fromHTML(env, body, false);
};
// toString helper
var toStringHelper = function(nodes, sizeLimit) {
var out;
if (sizeLimit === undefined) { sizeLimit = 80; /* characters */ }
if (nodes.length === 0) {
return '';
} else if (nodes.length === 1) {
var body = nodes[0].ownerDocument.createElement('body');
body.appendChild(nodes[0].cloneNode(true));
out = DU.normalizeOut(body, 'parsoidOnly');
if (out.length <= sizeLimit || !DU.isElt(nodes[0])) { return out; }
body.firstChild.innerHTML = '...';
out = DU.normalizeOut(body, 'parsoidOnly');
if (out.length <= sizeLimit) { return out; }
var name = nodes[0].nodeName.toLowerCase();
var children = nodes[0].childNodes;
if (children.length === 0) {
return '<' + name + ' .../>';
} else {
return '<' + name + ' ...>...</' + name + '>';
}
} else {
for (var i = 0; i < nodes.length; i++) {
out += toStringHelper(
[nodes[i]],
(sizeLimit - out.length) / (nodes.length - i)
);
}
return out;
}
};
// Forward declarations of Wrapper classes.
var PNode, PNodeList, PComment, PExtLink, PHeading, PHtmlEntity, PMedia, PTag, PTemplate, PText, PWikiLink;
// HTML escape helper
var toHtmlStr = function(node, v) {
if (typeof v === 'string') {
var div = node.ownerDocument.createElement('div');
div.textContent = v;
return div.innerHTML;
} else if (v instanceof PNodeList) {
return v.container.innerHTML;
} else {
return v.outerHTML;
}
};
/**
* The PNodeList class wraps a collection of DOM {@link Node}s.
* It provides methods that can be used to extract data from or
* modify the nodes. The `filter()` series of functions is very
* useful for extracting and iterating over, for example, all
* of the templates in the project (via {@link #filterTemplates}).
* @class PNodeList
* @alternateClassName Parsoid.PNodeList
*/
/**
* @method constructor
* @private
* @param {PDoc} pdoc The parent document for this {@link PNodeList}.
* @param {PNode|null} parent A {@link PNode} which will receive updates
* when this {@link PNodeList} is mutated.
* @param {Node} container A DOM {@link Node} which is the parent of all
* of the DOM {@link Node}s in this {@link PNodeList}. The container
* element itself is *not* considered part of the list.
* @param {Object} [opts]
* @param {Function} [opts.update]
* A function which will be invoked when {@link #update} is called.
*/
PNodeList = function PNodeList(pdoc, parent, container, opts) {
this.pdoc = pdoc;
this.parent = parent;
this.container = container;
this._update = (opts && opts.update);
this._cachedPNodes = null;
};
Object.defineProperties(PNodeList.prototype, {
/**
* Returns an {@link Array} of the DOM {@link Node}s represented
* by this {@link PNodeList}.
* @property {Node[]}
*/
nodes: {
get: function() { return Array.from(this.container.childNodes); },
},
/**
* Call {@link #update} after manually mutating any of the DOM
* {@link Node}s represented by this {@link PNodeList} in order to
* ensure that any containing templates are refreshed with their
* updated contents.
*
* The mutation methods in the {@link PDoc}/{@link PNodeList} API
* automatically call {@link #update} for you when required.
* @method
*/
update: {
value: function() {
this._cachedPNodes = null;
if (this._update) { this._update(); }
if (this.parent) { this.parent.update(); }
},
},
_querySelectorAll: {
value: function(selector) {
var tweakedSelector = ',' + selector + ',';
if (!(/,(COMMENT|TEXT),/.test(tweakedSelector))) {
// Use fast native querySelectorAll
return Array.from(this.container.querySelectorAll(selector));
}
// Implement comment/text node selector the hard way
/* jshint bitwise: false */
var whatToShow = NodeFilter.SHOW_ELEMENT; // always show templates
if (/,COMMENT,/.test(tweakedSelector)) {
whatToShow = whatToShow | NodeFilter.SHOW_COMMENT;
}
if (/,TEXT,/.test(tweakedSelector)) {
whatToShow = whatToShow | NodeFilter.SHOW_TEXT;
}
var nodeFilter = function(node) {
if (node.nodeType !== Node.ELEMENT_NODE) {
return NodeFilter.FILTER_ACCEPT;
}
if (node.matches(PTemplate._selector)) {
return NodeFilter.FILTER_ACCEPT;
}
return NodeFilter.FILTER_SKIP;
};
var result = [];
var includeTemplates =
/,\[typeof~="mw:Transclusion"\],/.test(tweakedSelector);
var treeWalker = this.pdoc.document.createTreeWalker(
this.container, whatToShow, nodeFilter, false
);
while (treeWalker.nextNode()) {
var node = treeWalker.currentNode;
// We don't need the extra test for ELEMENT_NODEs yet, since
// non-template element nodes will be skipped by the nodeFilter
// above. But if we ever extend filter() to be fully generic,
// we might need the commented-out portion of this test.
if (node.nodeType === Node.ELEMENT_NODE /* &&
node.matches(PTemplate._selector) */
) {
treeWalker.lastChild(); // always skip over all children
if (!includeTemplates) {
continue; // skip template itself
}
}
result.push(node);
}
return result;
},
},
_templatesForNode: {
value: function(node) {
// each Transclusion node could represent multiple templates.
var parent = this;
var result = [];
var parts = DU.getJSONAttribute(node, 'data-mw', {}).parts || [];
parts.forEach(function(part, i) {
if (part.template) {
result.push(new PTemplate(parent.pdoc, parent, node, i));
}
});
return result;
},
},
/**
* @method
* @private
* @param {Array} result
* A result array to append new items to as they are found
* @param {string} selector
* CSS-style selector for the nodes of interest
* @param {Function} func
* Function to apply to every non-template match
* @param {Object} [opts]
* @param {boolean} [opts.recursive]
* Set to `false` to avoid recursing into templates.
*/
_filter: {
value: function(result, selector, func, opts) {
var self = this;
var recursive = (opts && opts.recursive) !== false;
var tSelector = PTemplate._selector;
if (selector) {
tSelector += ',' + selector;
}
this._querySelectorAll(tSelector).forEach(function(node) {
var isTemplate = node.nodeType === Node.ELEMENT_NODE &&
node.matches(PTemplate._selector);
if (isTemplate) {
self._templatesForNode(node).forEach(function(t) {
if (!selector) {
result.push(t);
}
if (recursive) {
t.params.forEach(function(k) {
var td = t.get(k);
['key', 'value'].forEach(function(prop) {
if (td[prop]) {
td[prop]._filter(result, selector, func, opts);
}
});
});
}
});
} else {
func(result, self, node, opts);
}
});
return result;
},
},
/**
* Return an array of {@link PComment} representing comments
* found in this {@link PNodeList}.
* @inheritdoc #_filter
* @return {PComment[]}
*/
filterComments: {
value: function(opts) {
return this._filter([], PComment._selector, function(r, parent, node) {
r.push(new PComment(parent.pdoc, parent, node));
}, opts);
},
},
/**
* Return an array of {@link PExtLink} representing external links
* found in this {@link PNodeList}.
* @inheritdoc #_filter
* @return {PExtLink[]}
*/
filterExtLinks: {
value: function(opts) {
return this._filter([], PExtLink._selector, function(r, parent, node) {
r.push(new PExtLink(parent.pdoc, parent, node));
}, opts);
},
},
/**
* Return an array of {@link PHeading} representing headings
* found in this {@link PNodeList}.
* @inheritdoc #_filter
* @return {PHeading[]}
*/
filterHeadings: {
value: function(opts) {
return this._filter([], PHeading._selector, function(r, parent, node) {
r.push(new PHeading(parent.pdoc, parent, node));
}, opts);
},
},
/**
* Return an array of {@link PHtmlEntity} representing HTML entities
* found in this {@link PNodeList}.
* @inheritdoc #_filter
* @return {PHtmlEntity[]}
*/
filterHtmlEntities: {
value: function(opts) {
return this._filter([], PHtmlEntity._selector, function(r, parent, node) {
r.push(new PHtmlEntity(parent.pdoc, parent, node));
}, opts);
},
},
/**
* Return an array of {@link PMedia} representing images or other
* media content found in this {@link PNodeList}.
* @inheritdoc #_filter
* @return {PMedia[]}
*/
filterMedia: {
value: function(opts) {
return this._filter([], PMedia._selector, function(r, parent, node) {
r.push(new PMedia(parent.pdoc, parent, node));
}, opts);
},
},
/**
* Return an array of {@link PTemplate} representing templates
* found in this {@link PNodeList}.
* @inheritdoc #_filter
* @return {PTemplate[]}
*/
filterTemplates: {
value: function(opts) {
return this._filter([], null, null, opts);
},
},
/**
* Return an array of {@link PText} representing plain text
* found in this {@link PNodeList}.
* @inheritdoc #_filter
* @return {PText[]}
*/
filterText: {
value: function(opts) {
return this._filter([], PText._selector, function(r, parent, node) {
r.push(new PText(parent.pdoc, parent, node));
}, opts);
},
},
/**
* Return an array of {@link PWikiLink} representing wiki links
* found in this {@link PNodeList}.
* @inheritdoc #_filter
* @return {PWikiLink[]}
*/
filterWikiLinks: {
value: function(opts) {
return this._filter([], PWikiLink._selector, function(r, parent, node) {
r.push(new PWikiLink(parent.pdoc, parent, node));
}, opts);
},
},
/**
* Internal list of PNodes in this list.
* @property {PNode[]}
* @private
*/
pnodes: {
get: function() {
if (this._cachedPNodes !== null) {
return this._cachedPNodes;
}
var templates = new Set();
var result = [];
OUTER: for (var i = 0; i < this.container.childNodes.length; i++) {
var node = this.container.childNodes.item(i);
if (node.nodeType === Node.TEXT_NODE) {
result.push(new PText(this.pdoc, this, node));
continue;
}
if (node.nodeType === Node.COMMENT_NODE) {
result.push(new PComment(this.pdoc, this, node));
continue;
}
if (node.nodeType === Node.ELEMENT_NODE) {
// Note: multiple PTemplates per Node, and possibly
// multiple Nodes per PTemplate.
if (node.matches(PTemplate._selector)) {
templates.add(node.getAttribute('about'));
this._templatesForNode(node).forEach(function(t) {
result.push(t);
});
continue;
} else if (templates.has(node.getAttribute('about'))) {
continue;
}
// PTag is the catch-all; it should always be last.
var which = [
PExtLink, PHeading, PHtmlEntity, PMedia, PWikiLink,
PTag,
];
for (var j = 0; j < which.length; j++) {
var Ty = which[j];
if (node.matches(Ty._selector)) {
result.push(new Ty(this.pdoc, this, node));
continue OUTER;
}
}
}
// Unknown type.
result.push(new PNode(this.pdoc, this, node));
}
return (this._cachedPNodes = result);
},
},
/**
* The number of nodes within the node list.
* @property {Number}
*/
length: { get: function() { return this.pnodes.length; }, },
/**
* Return the `index`th node within the node list.
* @param {Number} index
* @return {PNode}
*/
get: { value: function(index) { return this.pnodes[index]; }, },
/**
* Return the index of `target` in the list of nodes, or `-1` if
* the target was not found.
*
* If `recursive` is true, we will look in all nodes of ours and
* their descendants, and return the index of our direct descendant
* node which contains the target. Otherwise, the search is done
* only on direct descendants.
*
* If `fromIndex` is provided, it is the index to start the search
* at.
* @param {PNode|Node} target
* @param {Object} [options]
* @param {Boolean} [options.recursive=false]
* @param {Number} [options.fromIndex=0]
*/
indexOf: {
value: function(target, options) {
var recursive = Boolean(options && options.recursive);
var fromIndex = Number(options && options.fromIndex) || 0;
var child, children;
var i, j;
if (target instanceof PNode) {
target = target.node;
}
for (i = fromIndex; i < this.length; i++) {
child = this.get(i);
if (child.matches(target)) {
return i;
}
if (recursive) {
children = child._children();
for (j = 0; j < children.length; j++) {
if (children[j].indexOf(target, options) !== -1) {
return i;
}
}
}
}
return -1;
},
},
/**
* Return a string representing the contents of this object
* as HTML conforming to the
* [MediaWiki DOM specification](https://www.mediawiki.org/wiki/Parsoid/MediaWiki_DOM_spec).
* @return {String}
*/
toHtml: {
value: function() {
return this.container.innerHTML;
},
},
/**
* Return a promise for a string representing the contents of this
* object as wikitext.
* @return {Promise}
*/
toWikitext: {
value: Promise.method(function() {
return wts(this.pdoc.env, this.nodes);
}),
},
/**
* Return a string representing the contents of this object for
* debugging. Some contents may be elided.
* @return {String}
*/
toString: {
value: function() {
return toStringHelper(this.nodes);
},
},
});
/**
* Create a {@link PNodeList} from a string containing HTML.
* @return {PNodeList}
* @static
*/
PNodeList.fromHTML = function(pdoc, html) {
var div = pdoc.document.createElement('div');
div.innerHTML = html;
return new PNodeList(pdoc, null, div);
};
/**
* @class PNode
* A PNode represents a specific DOM {@link Node}. Its subclasses provide
* specific accessors and mutators for associated semantic information.
*
* Useful subclasses of {@link PNode} include:
*
* - {@link PComment}: comments, like `<!-- example -->`
* - {@link PExtLink}: external links, like `[http://example.com Example]`
* - {@link PHeading}: headings, like `== Section 1 ==`
* - {@link PHtmlEntity}: html entities, like ` `
* - {@link PMedia}: images and media, like `[[File:Foo.jpg|caption]]`
* - {@link PTag}: other HTML tags, like `<span>`
* - {@link PTemplate}: templates, like `{{foo|bar}}`
* - {@link PText}: unformatted text, like `foo`
* - {@link PWikiLink}: wiki links, like `[[Foo|bar]]`
*/
/**
* @method constructor
* @private
* @param {PDoc} pdoc The parent document for this PNode.
* @param {PNodeList|null} parent A containing node list which will receive
* updates when this {@link PNode} is mutated.
* @param {Node} node The DOM node.
* @param {Object} [opts]
* @param {Function} [opts.update]
* A function which will be invoked when {@link #update} is called.
* @param {Function} [opts.wtsNodes]
* A function returning an array of {@link Node}s which can tweak the
* portion of the document serialized by {@link #toWikitext}.
*/
PNode = function PNode(pdoc, parent, node, opts) {
/** @property {PDoc} pdoc The parent document for this {@link PNode}. */
this.pdoc = pdoc;
this.parent = parent;
/** @property {Node} node The underlying DOM {@link Node}. */
this.node = node;
this._update = (opts && opts.update);
this._wtsNodes = (opts && opts.wtsNodes);
};
Object.defineProperties(PNode.prototype, {
ownerDocument: {
get: function() { return this.node.ownerDocument; },
},
dataMw: {
get: function() {
return DU.getJSONAttribute(this.node, 'data-mw', {});
},
set: function(v) {
DU.setJSONAttribute(this.node, 'data-mw', v);
this.update();
},
},
/**
* Internal helper: enumerate all PNodeLists contained within this node.
* @private
* @return {PNodeList[]}
*/
_children: { value: function() { return []; }, },
/**
* Call {@link #update} after manually mutating the DOM {@link Node}
* associated with this {@link PNode} in order to ensure that any
* containing templates are refreshed with their updated contents.
*
* The mutation methods in the API automatically call {@link #update}
* for you when required.
* @method
*/
update: {
value: function() {
if (this._update) { this._update(); }
if (this.parent) { this.parent.update(); }
},
},
/**
* Returns true if the `target` matches this node. By default a
* node matches only if its #node is strictly equal to the target
* or the target's #node. Subclasses can override this to provide
* more flexible matching: for example see {@link PText#matches}.
* @param {Node|PNode} target
* @return {Boolean} true if the target matches this node, false otherwise.
*/
matches: {
value: function(target) {
return (target === this) || (target === this.node) ||
(target instanceof PNode && target.node === this.node);
},
},
/**
* @inheritdoc PNodeList#toHtml
* @method
*/
toHtml: {
value: function() {
var nodes = this._wtsNodes ? this._wtsNodes() : [ this.node ];
return nodes.map(function(n) { return n.outerHTML; }).join('');
},
},
/**
* @inheritdoc PNodeList#toWikitext
* @method
*/
toWikitext: {
value: Promise.method(function() {
var nodes = this._wtsNodes ? this._wtsNodes() : [ this.node ];
return wts(this.pdoc.env, nodes);
}),
},
/**
* @inheritdoc PNodeList#toString
* @method
*/
toString: {
value: function() {
var nodes = this._wtsNodes ? this._wtsNodes() : [ this.node ];
return toStringHelper(nodes);
},
},
});
// Helper: getter and setter for the inner contents of a node.
var innerAccessor = {
get: function() {
return new PNodeList(this.pdoc, this, this.node);
},
set: function(v) {
this.node.innerHTML = toHtmlStr(this.node, v);
this.update();
},
};
/**
* PComment represents a hidden HTML comment, like `<!-- fobar -->`.
* @class PComment
* @extends PNode
*/
/**
* @method constructor
* @private
* @inheritdoc PNode#constructor
*/
PComment = function PComment(pdoc, parent, node, opts) {
PNode.call(this, pdoc, parent, node, opts);
};
util.inherits(PComment, PNode);
Object.defineProperties(PComment.prototype, {
/**
* The hidden text contained between `<!--` and `-->`.
* @property {String}
*/
contents: {
get: function() {
return DU.decodeComment(this.node.data);
},
set: function(v) {
this.node.data = DU.encodeComment(v);
this.update();
},
},
});
/**
* @ignore
* @static
* @private
*/
PComment._selector = 'COMMENT'; // non-standard selector
/**
* PExtLink represents an external link, like `[http://example.com Example]`.
* @class PExtLink
* @extends PNode
*/
/**
* @method constructor
* @private
* @inheritdoc PNode#constructor
*/
PExtLink = function PExtLink(pdoc, parent, node, opts) {
PNode.call(this, pdoc, parent, node, opts);
};
util.inherits(PExtLink, PNode);
Object.defineProperties(PExtLink.prototype, {
/**
* The URL of the link target.
* @property {String}
*/
url: {
// XXX url should be a PNodeList, but that requires handling
// typeof="mw:ExpandedAttrs"
get: function() {
return this.node.getAttribute('href');
},
set: function(v) {
this.node.setAttribute('href', v);
},
},
/**
* The link title, as a {@link PNodeList}.
* You can assign a String, Node, or PNodeList to mutate the title.
* @property {PNodeList}
*/
title: innerAccessor,
// XXX include this.url, once it is a PNodeList
_children: { value: function() { return [this.title]; }, },
});
/**
* @ignore
* @static
* @private
*/
PExtLink._selector = 'a[rel="mw:ExtLink"]';
/**
* PHeading represents a section heading in wikitext, like `== Foo ==`.
* @class PHeading
* @extends PNode
*/
/**
* @method constructor
* @private
* @inheritdoc PNode#constructor
*/
PHeading = function PHeading(pdoc, parent, node, opts) {
PNode.call(this, pdoc, parent, node, opts);
};
util.inherits(PHeading, PNode);
Object.defineProperties(PHeading.prototype, {
/**
* The heading level, as an integer between 1 and 6 inclusive.
* @property {Number}
*/
level: {
get: function() {
return +this.node.nodeName.slice(1);
},
set: function(v) {
v = +v;
if (v === this.level) {
return;
} else if (v >= 1 && v <= 6) {
var nh = this.ownerDocument.createElement('h' + v);
while (this.node.firstChild !== null) {
nh.appendChild(this.node.firstChild);
}
this.node.parentNode.replaceChild(nh, this.node);
this.node = nh;
this.update();
} else {
throw new Error("Level must be between 1 and 6, inclusive.");
}
},
},
/**
* The title of the heading, as a {@link PNodeList}.
* You can assign a String, Node, or PNodeList to mutate the title.
* @property {PNodeList}
*/
title: innerAccessor,
_children: { value: function() { return [this.title]; }, },
});
/**
* @ignore
* @static
* @private
*/
PHeading._selector = 'h1,h2,h3,h4,h5,h6';
/**
* PHtmlEntity represents an HTML entity, like ` `.
* @class PHtmlEntity
* @extends PNode
*/
/**
* @method constructor
* @private
* @inheritdoc PNode#constructor
*/
PHtmlEntity = function PHtmlEntity(pdoc, parent, node, opts) {
PNode.call(this, pdoc, parent, node, opts);
};
util.inherits(PHtmlEntity, PNode);
Object.defineProperties(PHtmlEntity.prototype, {
/**
* The character represented by the HTML entity.
* @property {String}
*/
normalized: {
get: function() { return this.node.textContent; },
set: function(v) {
this.node.textContent = v;
this.node.removeAttribute('data-parsoid');
this.update();
},
},
/**
* Extends {@link PNode#matches} to allow a target string to match
* if it matches this node's #normalized character.
* @method
* @inheritdoc PNode#matches
* @param {Node|PNode|String} target
*/
matches: {
value: function(target) {
return PNode.prototype.matches.call(this, target) ||
this.normalized === target;
},
},
});
/**
* @ignore
* @static
* @private
*/
PHtmlEntity._selector = '[typeof="mw:Entity"]';
/**
* PMedia represents an image or audio/video element in wikitext,
* like `[[File:Foobar.jpg|caption]]`.
* @class PMedia
* @extends PNode
*/
/**
* @method constructor
* @private
* @inheritdoc PNode#constructor
*/
PMedia = function PMedia(pdoc, parent, node, opts) {
PNode.call(this, pdoc, parent, node, opts);
};
util.inherits(PMedia, PNode);
Object.defineProperties(PMedia.prototype, {
// Internal helper: is the outer element a <figure> or a <span>?
_isBlock: { get: function() { return this.node.tagName === 'FIGURE'; }, },
// Internal helper: get at the 'caption' property in the dataMw
_caption: {
get: function() {
var c = this.dataMw.caption;
return c === undefined ? null : c;
},
set: function(v) {
var dmw = this.dataMw;
if (v === undefined || v === null) {
delete dmw.caption;
} else {
dmw.caption = v;
}
this.dataMw = dmw;
},
},
/**
* The caption of the image or media file, or `null` if not present.
* You can assign `null`, a String, Node, or PNodeList to mutate the
* contents.
* @property {PNodeList|null}
*/
caption: {
get: function() {
var c, captionDiv;
// Note that _cachedNodeList is null if caption is missing.
if (this._cachedNodeList === undefined) {
if (this._isBlock) {
c = this.node.firstChild.nextSibling;
this._cachedNodeList =
c ? new PNodeList(this.pdoc, this, c) : null;
} else {
c = this._caption;
if (c === null) {
this._cachedNodeList = null;
} else {
captionDiv = this.ownerDocument.createElement('div');
captionDiv.innerHTML = c;
this._cachedNodeList = new PNodeList(
this.pdoc, this, captionDiv, {
update: function() {
this.parent._caption = this.container.innerHTML;
},
});
}
}
}
return this._cachedNodeList;
},
set: function(v) {
this._cachedNodeList = undefined;
if (this._isBlock) {
var c = this.node.firstChild.nextSibling;
if (v === null || v === undefined) {
if (c) {
this.node.removeChild(c);
this.update();
}
} else {
if (!c) {
c = this.ownerDocument.createElement('figcaption');
this.node.appendChild(c);
}
c.innerHTML = toHtmlStr(c, v);
this.update();
}
} else {
this._caption = (v === null || v === undefined) ? v :
toHtmlStr(this.node, v);
this.update();
}
},
},
_children: {
value: function() {
var c = this.caption;
return c ? [ c ] : [];
},
},
});
/**
* @ignore
* @static
* @private
*/
PMedia._selector = 'figure,[typeof~="mw:Image"]';
/**
* PTag represents any otherwise-unmatched tag. This includes
* HTML-style tags in wikicode, like `<span>`, as well as some
* "invisible" tags like `<p>`.
* @class PTag
* @extends PNode
*/
/**
* @method constructor
* @private
* @inheritdoc PNode#constructor
*/
PTag = function PTag(pdoc, parent, node, opts) {
PNode.call(this, pdoc, parent, node, opts);
};
util.inherits(PTag, PNode);
Object.defineProperties(PTag.prototype, {
/**
* The name of the tag, in lowercase.
*/
tagName: {
get: function() { return this.node.tagName.toLowerCase(); },
},
/**
* The contents of the tag, as a {@PNodeList} object.
* You can assign a String, Node, or PNodeList to mutate the contents.
* @property {PNodeList}
*/
contents: innerAccessor,
_children: { value: function() { return [this.contents]; }, },
});
/**
* @ignore
* @static
* @private
*/
PTag._selector = '*'; // any otherwise-unmatched element
/**
* PTemplate represents a wikitext template, like `{{foo}}`.
* @class PTemplate
* @extends PNode
*/
/**
* @method constructor
* @private
* @inheritdoc PNode#constructor
* @param {PDoc} pdoc The parent document for this PNode.
* @param {PNodeList|null} parent A containing node list which will receive
* updates when this {@link PNode} is mutated.
* @param {Node} node The DOM node.
* @param {Number} which A single {@link Node} can represent multiple
* templates; this parameter serves to distinguish them.
*/
PTemplate = function PTemplate(pdoc, parent, node, which) {
PNode.call(this, pdoc, parent, node, {
wtsNodes: function() {
// Templates are actually a collection of nodes.
return this.parent._querySelectorAll
('[about="' + this.node.getAttribute('about') + '"]');
},
});
this.which = which;
this._cachedParams = Object.create(null);
};
util.inherits(PTemplate, PNode);
Object.defineProperties(PTemplate.prototype, {
_template: {
get: function() {
return this.dataMw.parts[this.which];
},
set: function(v) {
var dmw = this.dataMw;
dmw.parts[this.which] = v;
this.dataMw = dmw;
},
},
/**
* The name of the template, as a String.
*
* See: [T107194](https://phabricator.wikimedia.org/T107194)
* @property {String}
*/
name: {
get: function() {
// This should really be a PNodeList; see T107194
return this._template.template.target.wt;
},
set: function(v) {
var t = this._template;
t.template.target.wt = v;
t.template.target.href = './' +
this.pdoc.env.normalizedTitleKey('Template:' + v);
this._template = t;
},
},
/**
* Test whether the name of this template matches a given string, after
* normalizing titles.
* @param {String} name The template name to test against.
* @return {Boolean}
*/
nameMatches: {
value: function(name) {
var href = './' + this.pdoc.env.normalizedTitleKey('Template:' + name);
return this._template.template.target.href === href;
},
},
/**
* The parameters supplied to this template.
* @property {PTemplate.Parameter[]}
*/
params: {
get: function() {
return Object.keys(this._template.template.params).sort().map(function(k) {
return this.get(k);
}, this);
},
},
/**
* Return `true` if any parameter in the template is named `name`.
* With `ignoreEmpty`, `false` will be returned even if the template
* contains a parameter named `name`, if the parameter's value is empty
* (ie, only contains whitespace). Note that a template may have
* multiple parameters with the same name, but only the last one is
* read by Parsoid (and the MediaWiki parser).
* @param {String|PTemplate.Parameter} name
* @param {Object} [opts]
* @param {Boolean} [opts.ignoreEmpty=false]
*/
has: {
value: function(name, opts) {
if (name instanceof PTemplate.Parameter) {
name = name.name;
}
var t = this._template.template;
return Object.prototype.hasOwnProperty.call(t.params, name) && (
(opts && opts.ignoreEmpty) ?
!/^\s*$/.test(t.params[name].html) : true
);
},
},
/**
* Add a parameter to the template with a given `name` and `value`.
* If `name` is already a parameter in the template, we'll replace
* its value.
* @param {String|PTemplate.Parameter} name
* @param {String|Node|PNodeList} value
*/
add: {
value: function(k, v) {
if (k instanceof PTemplate.Parameter) {
k = k.name;
}
var t = this._template;
var html = toHtmlStr(this.node, v);
t.template.params[k] = { html: html };
this._template = t;
},
},
/**
* Remove a parameter from the template with the given `name`.
* If `keepField` is `true`, we will keep the parameter's name but
* blank its value. Otherwise we will remove the parameter completely
* *unless* other parameters are dependent on it (e.g. removing
* `bar` from `{{foo|bar|baz}}` is unsafe because `{{foo|baz}}` is
* not what we expected, so `{{foo||baz}}` will be produced instead).
* @param {String|PTemplate.Parameter} name
* @param {Object} [opts]
* @param {Boolean} [opts.keepField=false]
*/
remove: {
value: function(k, opts) {
if (k instanceof PTemplate.Parameter) {
k = k.name;
}
var t = this._template;
var keepField = opts && opts.keepField;
// if this is a numbered template, force keepField if there
// are subsequent numbered templates.
var isNumeric = (String(+k) === String(k));
if (isNumeric && this.has(1 + (+k))) {
keepField = true;
}
if (keepField) {
t.template.params[k] = { html: '' };
} else {
delete t.template.params[k];
}
this._template = t;
},
},
/**
* Get the parameter whose name is `name`.
* @param {String|PTemplate.Parameter} name
* @return {PTemplate.Parameter} The parameter record.
*/
get: {
value: function(k) {
if (k instanceof PTemplate.Parameter) {
k = k.name;
}
if (!this._cachedParams[k]) {
this._cachedParams[k] = new PTemplate.Parameter(this, k);
}
return this._cachedParams[k];
},
},
_children: {
value: function() {
var result = [];
this.params.forEach(function(k) {
var p = this.get(k);
if (p.key) { result.push(p.key); }
result.push(p.value);
}, this);
return result;
},
},
});
/**
* @ignore
* @static
* @private
*/
PTemplate._selector = '[typeof~="mw:Transclusion"]';
/**
* @class PTemplate.Parameter
*
* Represents a parameter of a template.
*
* For example, the template `{{foo|bar|spam=eggs}}` contains two
* {@link PTemplate.Parameter}s: one whose #name is `"1"` and whose
* whose #value is a {@link PNodeList} corresponding to `"bar"`, and one
* whose #name is `"spam"` and whose #value is a {@link PNodeList}
* corresponding to `"eggs"`.
*
* See: {@link PTemplate}
*/
/**
* @method constructor
* @private
* @param {PTemplate} parent The parent template for this parameter.
* @param {String} k The parameter name.
*/
PTemplate.Parameter = function Parameter(parent, k) {
var doc = parent.ownerDocument;
var param = parent._template.template.params[k];
var valDiv = doc.createElement('div');
valDiv.innerHTML = param.html;
this._name = k;
this._value = new PNodeList(parent.pdoc, parent, valDiv, {
update: function() {
var t = this.parent._template;
delete t.template.params[k].wt;
t.template.params[k].html = this.container.innerHTML;
this.parent._template = t;
},
});
var keyDiv = doc.createElement('div');
this._key = new PNodeList(parent.pdoc, parent, keyDiv, {
update: function() {
var t = this.parent._template;
if (this._hasKey) {
if (!t.template.params[k].key) {
t.template.params[k].key = {};
}
delete t.template.params[k].key.wt;
t.template.params[k].key.html = this.container.innerHTML;
} else {
delete t.template.params[k].key;
}
this.parent._template = t;
},
});
if (param.key && param.key.html) {
// T106852 means this doesn't always work.
keyDiv.innerHTML = param.key.html;
this._key._hasKey = true;
}
};
Object.defineProperties(PTemplate.Parameter.prototype, {
/**
* @property {String} name
* The expanded parameter name.
* Unnamed parameters are given numeric indexes.
* @readonly
*/
name: { get: function() { return this._name; }, },
/**
* @property {PNodeList|null} key
* Source nodes corresponding to the parameter name.
* For example, in `{{echo|{{echo|1}}=hello}}` the parameter name
* is `"1"`, but the `key` field would contain the `{{echo|1}}`
* template invocation, as a {@link PNodeList}.
*/
key: {
get: function() { return this._key._hasKey ? this._key : null; },
set: function(v) {
if (v === null || v === undefined) {
this._key.container.innerHTML = '';
this._key._hasKey = false;
} else {
this._key.container.innerHTML =
toHtmlStr(this._key.container, v);
}
this._key.update();
},
},
/**
* @property {PNodeList} value
* The parameter value.
*/
value: {
get: function() { return this._value; },
set: function(v) {
this._value.container.innerHTML =
toHtmlStr(this._value.container, v);
this._value.update();
},
},
toWikitext: {
value: Promise.method(function() {
var k = this.key;
return Promise.join(
k ? k.toWikitext() : this.name,
this.value.toWikitext()
).spread(function(keyWikitext, valueWikitext) {
return keyWikitext + '=' + valueWikitext;
});
}),
},
toString: {
value: function() {
var k = this.key;
return (k ? String(k) : this.name) + '=' + String(this.value);
},
},
});
/**
* PText represents ordinary unformatted text with no special properties.
* @class PText
* @extends PNode
*/
/**
* @method constructor
* @private
* @inheritdoc PNode#constructor
*/
PText = function PText(pdoc, parent, node, opts) {
PNode.call(this, pdoc, parent, node, opts);
};
util.inherits(PText, PNode);
Object.defineProperties(PText.prototype, {
/**
* The actual text itself.
* @property {String}
*/
value: {
get: function() {
return this.node.data;
},
set: function(v) {
this.node.data = v;
this.update();
},
},
/**
* Extends {@link PNode#matches} to allow a target string to match
* if it matches this node's #value.
* @method
* @inheritdoc PNode#matches
* @param {Node|PNode|String} target
*/
matches: {
value: function(target) {
return PNode.prototype.matches.call(this, target) ||
this.value === target;
},
},
});
/**
* @ignore
* @static
* @private
*/
PText._selector = 'TEXT'; // non-standard selector
/**
* PWikiLink represents an internal wikilink, like `[[Foo|Bar]]`.
* @class PWikiLink
* @extends PNode
*/
/**
* @method constructor
* @private
* @inheritdoc PNode#constructor
*/
PWikiLink = function PWikiLink(pdoc, parent, node, opts) {
PNode.call(this, pdoc, parent, node, opts);
};
util.inherits(PWikiLink, PNode);
Object.defineProperties(PWikiLink.prototype, {
/**
* The title of the linked page.
* @property {String}
*/
title: {
// XXX url should be a PNodeList, but that requires handling
// typeof="mw:ExpandedAttrs"
get: function() {
return this.node.getAttribute('href').replace(/^.\//, '');
},
set: function(v) {
var href = './' + this.pdoc.env.normalizedTitleKey(v);
this.node.setAttribute('href', href);
this.update();
},
},
/**
* The text to display, as a {@link PNodeList}.
* You can assign a String, Node, or PNodeList to mutate the text.
* @property {PNodeList}
*/
text: innerAccessor,
_children: { value: function() { return [this.text]; }, },
});
/**
* @ignore
* @static
* @private
*/
PWikiLink._selector = 'a[rel="mw:WikiLink"]';
/**
* A PDoc object wraps an entire Parsoid document. Since it is an
* instance of {@link PNodeList}, you can filter it, mutate it, etc.
* But it also provides means to serialize the document as either
* HTML (via {@link #document} or {@link #toHtml}) or wikitext
* (via {@link #toWikitext}).
* @class
* @extends PNodeList
* @alternateClassName Parsoid.PDoc
*/
var PDoc = function PDoc(env, doc) {
PNodeList.call(this, this, null, doc.body);
this.env = env;
};
util.inherits(PDoc, PNodeList);
Object.defineProperties(PDoc.prototype, {
/**
* An HTML {@link Document} representing article content conforming to the
* [MediaWiki DOM specification](https://www.mediawiki.org/wiki/Parsoid/MediaWiki_DOM_spec).
* @property {Document}
*/
document: {
get: function() { return this.container.ownerDocument; },
set: function(v) { this.container = v.body; },
},
/**
* Return a string representing the entire document as
* HTML conforming to the
* [MediaWiki DOM specification](https://www.mediawiki.org/wiki/Parsoid/MediaWiki_DOM_spec).
* @inheritdoc PNodeList#toHtml
* @method
*/
toHtml: {
value: function() {
// document.outerHTML is a Parsoid-ism; real browsers don't define it.
var html = this.document.outerHTML;
if (!html) {
html = this.document.body.outerHTML;
}
return html;
},
},
});
// Promise-using REPL, for easier debugging.
// We also handle `yield`, at least in common cases.
var repl = function() {
/* jshint evil:true */
// The older version of jshint on jenkins is confused.
var Parsoid = require('../');
console.log('Parsoid REPL', Parsoid.version);
var r = require('repl').start({ ignoreUndefined: true });
// `var Parsoid = require('parsoid');` by default.
r.context.Parsoid = Parsoid;
// `var Promise = require('prfun');` by default.
r.context.Promise = Promise;
// Patch the `eval` method to wait for Promises to be resolved.
var oldEval = r.eval;
r.eval = function(cmd, context, filename, callback) {
// If `cmd` mentions `yield`, wrap it in a `function*`
if (/\byield\b/.test(cmd)) {
// Hack to support `var xyz = yield pdq...;`, convert it
// to `var xyz; ...{ xyz = yield pdq...; }...`
var m = /^(var\s+)(\w+)\s*=/.exec(cmd);
if (m) { cmd = cmd.slice(m[1].length); }
cmd = 'Promise.async(function*(){' + cmd + '})();';
if (m) { cmd = m[1] + m[2] + ';' + cmd; }
}
oldEval.call(r, cmd, context, filename, function(e, v) {
if (e || !(typeof v === 'object' && typeof v.then === 'function')) {
return callback(e, v);
}
// OK, this is a promise! Wait for the result.
v.then(function(_v) {
callback(null, _v);
}, function(_e) {
callback(_e);
});
});
};
};
module.exports = {
PDoc: PDoc,
PNodeList: PNodeList,
PNode: PNode,
PComment: PComment,
PExtLink: PExtLink,
PHeading: PHeading,
PHtmlEntity: PHtmlEntity,
PMedia: PMedia,
PTag: PTag,
PTemplate: PTemplate,
PText: PText,
PWikiLink: PWikiLink,
// Helper function for `Promise.map`
toWikitext: Promise.method(function(n) { return n.toWikitext(); }),
// Useful REPL that handles promises and `yield` well.
repl: repl,
};