foremark
Version:
A technology for writing semi-plain text documents that extends upon the concept of Markdeep.
437 lines • 15.1 kB
JavaScript
Object.defineProperty(exports, "__esModule", { value: true });
var nextPlaceholderId = 1;
function placeholderHtmlWithId(name, i) {
return "<" + name + " ph-id=\"" + i + "\" />";
}
var PLACEHOLDER_REGEX = /<[-_a-zA-Z0-9]+ ph-id="([0-9]+)" \/>/g;
var PLACEHOLDER_ATTR = 'ph-id';
var testElement;
/**
* Specifies a DOM global object used for internal operations.
*
* You must call this function first if the library is being used in a Node.js
* environment.
*
* # Example
*
* import {JSDOM} from 'jsdom';
* const dom = new JSDOM('<html />', {
* contentType: 'application/xml',
* });
* setWorkingDom(dom.window);
*
*/
function setWorkingDom(window) {
var e = window.document.createElement('i');
if (e.tagName !== 'i') {
throw new Error("Sanity check failed - maybe the document is not XML?");
}
setWorkingDomUnchecked(window);
}
exports.setWorkingDom = setWorkingDom;
function setWorkingDomUnchecked(window) {
testElement = window.document.createElement('i');
exports.DomTypes = window;
}
/**
* Assuming `setWorkingDomUnchecked` already has been called, performs the
* sanity check that would be done by `setWorkingDom`.
*/
function checkXhtml() {
if (testElement.tagName !== 'i') {
throw new Error("Sanity check failed - maybe the document is not XML?");
}
}
exports.checkXhtml = checkXhtml;
if (typeof window !== 'undefined') {
setWorkingDomUnchecked(window);
}
function isElement(node) {
return node != null && node.nodeType === 1 /* ELEMENT_NODE */;
}
exports.isElement = isElement;
function isText(node) {
return node != null && (node.nodeType === 3 /* TEXT_NODE */ ||
node.nodeType === 4 /* CDATA_SECTION_NODE */);
}
exports.isText = isText;
/**
* Transforms the HTML markup of a given node's contents using a supplied
* function.
*
* Before passing a HTML markup to a given function, this function protects
* child elements by replacing them with placeholders. A placeholder is a
* self-closing tag that looks like `<tagname ph-id="12345" />`. The tag name is
* identical to the original tag name (if the original node was an element), or
* `InternalTagNames.Placeholder` (otherwise).
*
* If `recursionFilter` is specified, the contents of a child element is
* transformed as well if the element matches the predicate specified by
* `recursionFilter`.
*/
function transformHtmlWith(node, tx, recursionFilter, reverse) {
if (reverse) {
for (var n = node.lastChild; n;) {
var next = n.previousSibling;
if (isElement(n) && recursionFilter && recursionFilter(n)) {
transformHtmlWith(n, tx, recursionFilter);
}
n = next;
}
}
else {
for (var n = node.firstChild; n;) {
var next = n.nextSibling;
if (isElement(n) && recursionFilter && recursionFilter(n)) {
transformHtmlWith(n, tx, recursionFilter);
}
n = next;
}
}
// Get the inner HTML
var placeholders = new Map();
var html = '';
for (var n = node.firstChild; n; n = n.nextSibling) {
if (isText(n)) {
html += n.textContent
.replace(/&/g, '&')
.replace(/</g, '<')
.replace(/>/g, '>');
}
else if (isElement(n)) {
if (recursionFilter && recursionFilter(n)) {
transformHtmlWith(n, tx, recursionFilter);
}
var placeholderId = String(nextPlaceholderId++);
placeholders.set(placeholderId, n);
html += placeholderHtmlWithId(n.tagName, placeholderId);
}
else {
var placeholderId = String(nextPlaceholderId++);
placeholders.set(placeholderId, n);
html += placeholderHtmlWithId("mf-ph" /* Placeholder */, placeholderId);
}
}
var ctx = {
expand: function (html) {
return html.replace(PLACEHOLDER_REGEX, function (match, id) {
var original = placeholders.get(id);
if (original && isElement(original)) {
return original.outerHTML;
}
else {
return match;
}
});
},
};
var orig = html;
html = tx(html, ctx);
if (orig === html) {
return;
}
// Replace the old contents
html = html.replace(PLACEHOLDER_REGEX, "<" + "mf-ph" /* Placeholder */ + " " + PLACEHOLDER_ATTR + "=\"$1\"></" + "mf-ph" /* Placeholder */ + ">");
node.innerHTML = html;
if (placeholders.size === 0) {
return;
}
// Put the original elements back
function fillPlaceholders(e) {
if (e.tagName === "mf-ph" /* Placeholder */) {
var id = e.getAttribute(PLACEHOLDER_ATTR);
var original = id && placeholders.get(id);
if (original) {
var parent_1 = e.parentElement;
parent_1.insertBefore(original, e);
parent_1.removeChild(e);
return;
}
}
for (var child = e.firstChild; child;) {
var next = child.nextSibling;
if (isElement(child)) {
fillPlaceholders(child);
}
child = next;
}
}
fillPlaceholders(node);
}
exports.transformHtmlWith = transformHtmlWith;
/**
* Transforms the HTML markup of text nodes in a given node using a supplied
* function.
*
* The HTML markup of a text node is passed to `tx`. `tx` returns a transformed
* HTML markup, which may include other kinds of nodes.
*
* If `recursionFilter` is specified, the contents of a child element is
* transformed as well if the element matches the predicate specified by
* `recursionFilter`.
*
* This function is theoretically faster than `transformHtmlWith`. This function
* can be used in place of `transformHtmlWith` if:
*
* - Replaced substrings never include a XML element.
* - Replaced substrings are not insensitive to context such as line breaks and
* the start and end of an input string.
* - In addition, there are no sequences of two or more consecutive text nodes.
*
*/
function transformTextNodeWith(node, tx, recursionFilter, reverse) {
(reverse ? forEachNodeReversePreorder : forEachNodePreorder)(node, function (node) {
if (isElement(node)) {
return recursionFilter(node);
}
else if (isText(node)) {
var html = node.textContent
.replace(/&/g, '&')
.replace(/</g, '<')
.replace(/>/g, '>');
var orig = html;
html = tx(html);
if (orig === html) {
return;
}
// Deserialize the transformed XML (Yep, `Text` does not have
// `outerHTML` or `insertAdjacentHTML`.)
testElement.innerHTML = html;
while (testElement.firstChild != null) {
node.parentElement.insertBefore(testElement.firstChild, node);
}
node.parentElement.removeChild(node);
}
});
}
exports.transformTextNodeWith = transformTextNodeWith;
/**
* Iterate all nodes in pre-order using a callback function.
*
* Child nodes are not traversed if the callback function returns `false`.
*/
function forEachNodePreorder(node, f) {
if (f(node) === false) {
return;
}
if (isElement(node)) {
for (var n = node.firstChild; n;) {
var next = n.nextSibling;
forEachNodePreorder(n, f);
n = next;
}
}
}
exports.forEachNodePreorder = forEachNodePreorder;
/**
* Iterate all nodes in pre-order using a callback function. The iteration order
* of child nodes is reversed.
*
* Child nodes are not traversed if the callback function returns `false`.
*/
function forEachNodeReversePreorder(node, f) {
if (f(node) === false) {
return;
}
if (isElement(node)) {
for (var n = node.lastChild; n;) {
var next = n.previousSibling;
forEachNodePreorder(n, f);
n = next;
}
}
}
exports.forEachNodeReversePreorder = forEachNodeReversePreorder;
/**
* Attempts to fix malformed XML attributes.
*
* `attributeNames` specifies the list of other attributes used to reject
* duplicate attributes. Found attributes are added to `attributeNames`.
*
* Example: `legalizeAttributes(' a b="<>"')` returns `' a="a" b="<>"'`.
*
* # Positional attributes
*
* This function can automatically add a name to an attribute without one. The
* attribute names are chosen from `positionalAttributes`.
*/
function legalizeAttributes(xml, attributeNames, onwarning, positionalAttributes) {
if (onwarning === void 0) { onwarning = function () { }; }
if (xml === '') {
return xml;
}
var _a = xml.match(/^(\s*)([^]*)$/), _ = _a[0], spaces = _a[1], inner = _a[2];
var nextIndex = 0;
return spaces + (' ' + inner).replace(
// This regex matches a XML attribute with rather forgiving syntax.
// The union of all matches must cover entire the input except for
// trailing whitespace characters (if any).
/(\s*)(?:([^"'\s=][^\s=]*)(?:(\s*=\s*)("[^"]*"?|'[^']*'?|[^"'\s]+)?)?|("[^"]*"?|'[^']*'?))/ig,
//^^^ ^^^^^^^^^^^^^^^ ^^^^^^^^ ^^^^^^^^ ^^^^^^^^ ^^^^^^^^^^^^^^^^^
// | name "value" 'value' value positional attr.
// +-- separating space
function (_, space, name, equal, value, posAttr) {
if (space === '') {
onwarning("A separator between attributes is missing.");
space = ' ';
}
if (posAttr) {
if (!positionalAttributes) {
onwarning("Attribute name is missing.");
return '';
}
if (nextIndex >= positionalAttributes.length) {
onwarning("No more positional attributes.");
return '';
}
name = positionalAttributes[nextIndex++];
equal = '=';
value = posAttr;
}
else {
if (!isValidXmlName(name)) {
onwarning("Invalid attribute name: '" + name + "'");
return '';
}
}
if (attributeNames.indexOf(name) >= 0) {
onwarning("Duplicate attribute: '" + name + "'");
return '';
}
attributeNames.push(name);
if (equal == null) {
// Value elision - valid in HTML, so do not issue a warning
equal = '=';
value = "\"" + escapeXmlText(name) + "\"";
}
else if (value == null) {
onwarning("Value for attribute '" + name + "' is missing.");
value = '""';
}
else {
// Expand and re-escpae the value in either case
if (value.startsWith('"')) {
if (value.endsWith('"')) {
value = value.substring(1, value.length - 1);
}
else {
value = value.substr(1);
onwarning("Value for attribute '" + name + "' has no closing quotation mark.");
}
}
else if (value.startsWith("'")) {
if (value.endsWith("'")) {
value = value.substring(1, value.length - 1);
}
else {
value = value.substr(1);
onwarning("Value for attribute '" + name + "' has no closing quotation mark.");
}
}
else {
// Quotation mark elision - probably valid in HTML
}
value = unescapeXmlText(value);
value = "\"" + escapeXmlText(value) + "\"";
}
return space + name + equal + value;
}).substr(1);
}
exports.legalizeAttributes = legalizeAttributes;
/**
* Matches a given string against [XML NCName](http://www.w3.org/TR/1999/REC-xml-names-19990114/#NT-NCName).
*/
function isValidXmlName(name) {
// Fast path - This also prevents collision with the attributes that have
// a meaning predefined by the HTML specification.
if (name.match(/^[a-zA-Z][-\w]*$/)) {
return true;
}
return isValidXmlNameSlow(name);
}
function isValidXmlNameSlow(name) {
// I could've used <https://www.npmjs.com/package/ncname>, but that would
// increase the bundle size and the number of dependencies.
try {
testElement.setAttribute(name, "1");
return true;
}
catch (_) {
return false;
}
finally {
testElement.removeAttribute(name);
}
}
function escapeXmlText(text) {
return text
.replace(/&/g, '&')
.replace(/"/g, '"')
.replace(/</g, '<')
.replace(/>/g, '>');
}
exports.escapeXmlText = escapeXmlText;
function unescapeXmlText(xml) {
// I could've used <https://www.npmjs.com/package/entities>, but that would
// increase the bundle size and the number of dependencies.
return xml.replace(/&.*?;/g, function (match) {
// Numeric character reference
if (/&#x[0-9a-f]+;/i.test(match)) {
var i = parseInt(match.substring(3, match.length - 1), 16);
if (i >= 0 && i < 65536) {
return String.fromCharCode(i);
}
else {
return match;
}
}
if (/&#[0-9]+;/.test(match)) {
var i = parseInt(match.substring(2, match.length - 1), 10);
if (i >= 0 && i < 65536) {
return String.fromCharCode(i);
}
else {
return match;
}
}
// Character entity reference
switch (match) {
case '&': return '&';
case ''': return '\'';
case '"': return '"';
case '<': return '<';
case '>': return '>';
case ' ': return ' ';
}
return match;
});
}
exports.unescapeXmlText = unescapeXmlText;
/**
* Attempts to fix malformed XML markup.
*
* This function is designed to only fix particular classes of errors found in
* wild, specifically:
*
* - SoundCloud's oEmbed response includes unescaped ampersands in
* attributes.
* - Vimeo's oEmbed response uses attribute value elision.
* - Facebook's oEmbed response includes unescaped ampersands in text contents.
*/
function legalizeXML(xml, onwarning) {
var parts = xml.split(/(<[^\s>]+)([^>]*)(>)/);
for (var i = 2; i < parts.length; i += 4) {
if (parts[i] !== '') {
parts[i] = legalizeAttributes(parts[i], [], onwarning);
}
}
for (var i = 0; i < parts.length; i += 4) {
if (/[&<>]/.test(parts[i])) {
parts[i] = escapeXmlText(unescapeXmlText(parts[i]));
}
}
return parts.join('');
}
exports.legalizeXML = legalizeXML;
//# sourceMappingURL=dom.js.map
;