UNPKG

node-dom

Version:

Javascript fast W3C DOM generation.

239 lines (207 loc) 5.9 kB
/* Modified from tmpvar/jsdom (MIT license) */ var isXHTML = false; //List from node-htmlparser var singleTags = { area: 1, base: 1, basefont: 1, br: 1, col: 1, frame: 1, hr: 1, img: 1, input: 1, isindex: 1, link: 1, meta: 1, param: 1, embed: 1 }; var expr = { upperCaseChars: /([A-Z])/g, breakBetweenTags: /(<(\/?\w+).*?>)(?=<(?!\/\2))/gi, endsWithEndTag: /.+<\/\w[^>]*>$/, startsWithEndTag: /^<\/\w/, startsWithStartTag: /^<\w[^>]*[^\/]>.*$/, singleTag: (function() { var tags = []; for (var i in singleTags) { tags.push(i); } return new RegExp('<' + tags.join('|<'), 'i'); })() }; var uncanon = function(str, letter) { return '-' + letter.toLowerCase(); }; var HTMLEncode = require('./htmlencoding').HTMLEncode; var styleIgnore = { top: 1, left: 1 }; exports.stringifyElement = function stringifyElement(element) { //Modifications //All attributes are retrieved in html //Even the ones set in js not present initially //Then not only the ones defined initially or in HTMLElement var tagName = element.tagName.toLowerCase(), ret = { start: "<" + tagName, end:'' }, attributes = [], attribute = null; var prop=Object.keys(element); var l=prop.length; ret.start += " "; for (var i=0;i<l;i++) { var p=prop[i]; if (p!='style') { var value=element[p]; if ((typeof(value)=='string')&&(value)) { if (p=='_class') {p='class';}; if (p=='__href') {p='href';}; if (p=='__src') {p='src';}; if (p=='___onload') {p='onload';}; if (p=='_id') {p='id';}; if (p=='_name') {p='name';}; attributes.push(p + '="' + HTMLEncode(value) + '"'); } } }; ret.start += attributes.join(" "); if (element.style) { var tmp=element.style.cssText; if (tmp!='') { ret.start += ' style="' + HTMLEncode(tmp) + '"'; } } if (singleTags[tagName]) { if (isXHTML) { ret.start += "/"; } ret.start += ">"; ret.end = ''; } else { ret.start += ">"; ret.end = "</" + tagName + ">"; } return ret; }; exports.formatHTML = function formatHTML(html) { var formatted = '', pad = 0; html = html.replace(expr.breakBetweenTags, '$1\r\n'); html.split('\r\n').forEach(function(node, index) { var indent = 0, padding = '', i; if (node.match(expr.endsWithEndTag)) { indent = 0; } else if (node.match(expr.startsWithEndTag)) { if (pad != 0) { pad -= 1; } } else if (node.match(expr.startsWithStartTag)) { if (!expr.singleTag.exec(node)) { indent = 1; } } else { indent = 0; } for (i = 0; i < pad; i++) { padding += ' '; } formatted += padding + node + '\r\n'; pad += indent; }); return formatted; }; var rawTextElements = /SCRIPT|STYLE/i; function stringifyDoctype (doctype) { if (doctype.ownerDocument && doctype.ownerDocument._fullDT) { return doctype.ownerDocument._fullDT; } var dt = '<!DOCTYPE ' + doctype.name; if (doctype.publicId) { // Public ID may never contain double quotes, so this is always safe. dt += ' PUBLIC "' + doctype.publicId + '" '; } if (!doctype.publicId && doctype.systemId) { dt += ' SYSTEM ' } if (doctype.systemId) { // System ID may contain double quotes OR single quotes, not never both. if (doctype.systemId.indexOf('"') > -1) { dt += "'" + doctype.systemId + "'"; } else { dt += '"' + doctype.systemId + '"'; } } dt += '>'; return dt; } exports.generateHtmlRecursive = function generateHtmlRecursive(node, rawText, removeScript) { var ret = "", parent, current, i; if (node) { if (node.nodeType && node.nodeType === node.ENTITY_REFERENCE_NODE) { node = node._entity; } if (!rawText && node._parentNode) { rawText = rawTextElements.test(node._parentNode.nodeName); } switch (node.nodeType) { case node.ELEMENT_NODE: if ((removeScript)&&(node.__name=='script')) {break;}; current = exports.stringifyElement(node); ret += current.start; if (node._childNodes.length > 0) { for (i=0; i<node._childNodes.length; i++) { ret += exports.generateHtmlRecursive(node._childNodes[i], rawText, removeScript); } } else { //ret += rawText ? node.nodeValue : HTMLEncode(node.nodeValue); ret += node.nodeValue || ''; } ret += current.end; break; case node.TEXT_NODE: //ret += rawText ? node.nodeValue : HTMLEncode(node.nodeValue); ret += node.nodeValue; break; case node.COMMENT_NODE: ret += '<!--' + node.nodeValue + '-->'; break; case node.DOCUMENT_NODE: for (i=0; i<node._childNodes.length; i++) { ret += exports.generateHtmlRecursive(node._childNodes[i], rawText, removeScript); } break; case node.DOCUMENT_TYPE_NODE: ret += stringifyDoctype(node); break; } } //require('sys').puts(require('sys').inspect(ret)); return ret; }; exports.domToHtml = function(dom, removeScript, raw) { var ret = ""; if (dom.toArray) { // node list dom = dom.toArray(); } if (Array.isArray(dom)) { for (var i=0,len=dom.length; i<len; i++) { ret += exports.generateHtmlRecursive(dom[i], raw, removeScript); } } else { // single node ret = exports.generateHtmlRecursive(dom, raw, removeScript); } //if (noformat) { return ret; //} else { //Dangerous, can break the page if html inside scripts for example (yahoo.com for example) //return exports.formatHTML(ret); //} };