node-dom
Version:
Javascript fast W3C DOM generation.
239 lines (207 loc) • 5.9 kB
JavaScript
/* Modified from tmpvar/jsdom (MIT license) */
var isXHTML = false;
//List from node-htmlparser
var singleTags = {
area: 1,
base: 1,
basefont: 1,
br: 1,
col: 1,
frame: 1,
hr: 1,
img: 1,
input: 1,
isindex: 1,
link: 1,
meta: 1,
param: 1,
embed: 1
};
var expr = {
upperCaseChars: /([A-Z])/g,
breakBetweenTags: /(<(\/?\w+).*?>)(?=<(?!\/\2))/gi,
endsWithEndTag: /.+<\/\w[^>]*>$/,
startsWithEndTag: /^<\/\w/,
startsWithStartTag: /^<\w[^>]*[^\/]>.*$/,
singleTag: (function() {
var tags = [];
for (var i in singleTags) {
tags.push(i);
}
return new RegExp('<' + tags.join('|<'), 'i');
})()
};
var uncanon = function(str, letter) {
return '-' + letter.toLowerCase();
};
var HTMLEncode = require('./htmlencoding').HTMLEncode;
var styleIgnore = {
top: 1,
left: 1
};
exports.stringifyElement = function stringifyElement(element) {
//Modifications
//All attributes are retrieved in html
//Even the ones set in js not present initially
//Then not only the ones defined initially or in HTMLElement
var tagName = element.tagName.toLowerCase(),
ret = {
start: "<" + tagName,
end:''
}, attributes = [],
attribute = null;
var prop=Object.keys(element);
var l=prop.length;
ret.start += " ";
for (var i=0;i<l;i++) {
var p=prop[i];
if (p!='style') {
var value=element[p];
if ((typeof(value)=='string')&&(value)) {
if (p=='_class') {p='class';};
if (p=='__href') {p='href';};
if (p=='__src') {p='src';};
if (p=='___onload') {p='onload';};
if (p=='_id') {p='id';};
if (p=='_name') {p='name';};
attributes.push(p + '="' + HTMLEncode(value) + '"');
}
}
};
ret.start += attributes.join(" ");
if (element.style) {
var tmp=element.style.cssText;
if (tmp!='') {
ret.start += ' style="' + HTMLEncode(tmp) + '"';
}
}
if (singleTags[tagName]) {
if (isXHTML) {
ret.start += "/";
}
ret.start += ">";
ret.end = '';
} else {
ret.start += ">";
ret.end = "</" + tagName + ">";
}
return ret;
};
exports.formatHTML = function formatHTML(html) {
var formatted = '',
pad = 0;
html = html.replace(expr.breakBetweenTags, '$1\r\n');
html.split('\r\n').forEach(function(node, index) {
var indent = 0, padding = '', i;
if (node.match(expr.endsWithEndTag)) {
indent = 0;
} else if (node.match(expr.startsWithEndTag)) {
if (pad != 0) {
pad -= 1;
}
} else if (node.match(expr.startsWithStartTag)) {
if (!expr.singleTag.exec(node)) {
indent = 1;
}
} else {
indent = 0;
}
for (i = 0; i < pad; i++) {
padding += ' ';
}
formatted += padding + node + '\r\n';
pad += indent;
});
return formatted;
};
var rawTextElements = /SCRIPT|STYLE/i;
function stringifyDoctype (doctype) {
if (doctype.ownerDocument && doctype.ownerDocument._fullDT) {
return doctype.ownerDocument._fullDT;
}
var dt = '<!DOCTYPE ' + doctype.name;
if (doctype.publicId) {
// Public ID may never contain double quotes, so this is always safe.
dt += ' PUBLIC "' + doctype.publicId + '" ';
}
if (!doctype.publicId && doctype.systemId) {
dt += ' SYSTEM '
}
if (doctype.systemId) {
// System ID may contain double quotes OR single quotes, not never both.
if (doctype.systemId.indexOf('"') > -1) {
dt += "'" + doctype.systemId + "'";
} else {
dt += '"' + doctype.systemId + '"';
}
}
dt += '>';
return dt;
}
exports.generateHtmlRecursive = function generateHtmlRecursive(node, rawText, removeScript) {
var ret = "", parent, current, i;
if (node) {
if (node.nodeType &&
node.nodeType === node.ENTITY_REFERENCE_NODE)
{
node = node._entity;
}
if (!rawText && node._parentNode) {
rawText = rawTextElements.test(node._parentNode.nodeName);
}
switch (node.nodeType) {
case node.ELEMENT_NODE:
if ((removeScript)&&(node.__name=='script')) {break;};
current = exports.stringifyElement(node);
ret += current.start;
if (node._childNodes.length > 0) {
for (i=0; i<node._childNodes.length; i++) {
ret += exports.generateHtmlRecursive(node._childNodes[i], rawText, removeScript);
}
} else {
//ret += rawText ? node.nodeValue : HTMLEncode(node.nodeValue);
ret += node.nodeValue || '';
}
ret += current.end;
break;
case node.TEXT_NODE:
//ret += rawText ? node.nodeValue : HTMLEncode(node.nodeValue);
ret += node.nodeValue;
break;
case node.COMMENT_NODE:
ret += '<!--' + node.nodeValue + '-->';
break;
case node.DOCUMENT_NODE:
for (i=0; i<node._childNodes.length; i++) {
ret += exports.generateHtmlRecursive(node._childNodes[i], rawText, removeScript);
}
break;
case node.DOCUMENT_TYPE_NODE:
ret += stringifyDoctype(node);
break;
}
}
//require('sys').puts(require('sys').inspect(ret));
return ret;
};
exports.domToHtml = function(dom, removeScript, raw) {
var ret = "";
if (dom.toArray) {
// node list
dom = dom.toArray();
}
if (Array.isArray(dom)) {
for (var i=0,len=dom.length; i<len; i++) {
ret += exports.generateHtmlRecursive(dom[i], raw, removeScript);
}
} else {
// single node
ret = exports.generateHtmlRecursive(dom, raw, removeScript);
}
//if (noformat) {
return ret;
//} else {
//Dangerous, can break the page if html inside scripts for example (yahoo.com for example)
//return exports.formatHTML(ret);
//}
};