UNPKG

mark-js

Version:

Mark, an unified notation for all

github.com/henry-luo/mark

213 lines (195 loc) • 6.76 kB

JavaScript

var MarkConvert = function(Mark) { let m = Mark; // holding reference to the Mark interface // convert DOM elment into Mark object function toMark(elmt, options) { if (!elmt) return null; var obj = m(options.format === 'xml' ? elmt.tagName:elmt.tagName.toLowerCase()); if (elmt.hasAttributes()) { var attrs = elmt.attributes; for (var i = 0; i < attrs.length; i++) { var attr = attrs[i]; if (attr.specified) obj[attr.name] = attr.value; } } if (elmt.hasChildNodes()) { for (var i=0; i<elmt.childNodes.length; i++) { var child = elmt.childNodes[i]; if (child.nodeType === 3) { // text node if (options.ignoreSpace && /^\s*$/.test(child.textContent)) { // console.log("skip whitespace text", text); } else { obj.push(child.textContent); } } else if (child.nodeType === 1) { // element obj.push(toMark(child, options)); } else if (child.nodeType === 8) { // comment // '--' is kept to differentiate with PI obj.push(m.pragma('--' + child.nodeValue, obj)); } // todo: other node types are ignore } } return obj; } // parse html into Mark objects MarkConvert.parse = function(source, options) { if (!options) { options = {ignoreWhitespace:false}; } // console.log('options:', options); if (typeof document !== 'undefined') { // in browser environment if (source.match(/^\s*(<!doctype|<html)/i)) { // treat as whole doc // console.log('parse html doc'); // doc.documentElement.innerHTML = source; // innerHTML is insufficient, as global attributes on root html element will be ignored var parser = new DOMParser(); var doc = parser.parseFromString(source, "text/html"); // console.log('doc elmt', doc.documentElement); // todo: error handling return toMark(doc.documentElement, options); } else if (source.match(/^\s*(<\?xml)/i)) { var parser = new DOMParser(); // console.log('parse xml', source); var doc = parser.parseFromString(source, "text/xml"); // todo: error handling - dump(oDOM.documentElement.nodeName == "parsererror" ? "error while parsing" : oDOM.documentElement.nodeName); return toMark(doc.documentElement, options); } else { // treat as html fragment // console.log('parse html fragment'); var doc = document.implementation.createHTMLDocument(null); doc.body.innerHTML = source; var children = doc.body.children; if (!children) return null; if (children.length > 1) { var result = []; for (var i=0; i<children.length; i++) { result.push(toMark(children[i], options)); } return result; } else { return toMark(doc.body.children[0], options); } } } else { // use htmlparser2 to parse // console.log('parse doc with htmlparser2'); // setup the parser var htmlparser = require("htmlparser2"); var parent = [], stack=[]; var opt = {decodeEntities: true}; if (options.format == 'xml') { opt.xmlMode = true; } var parser = new htmlparser.Parser({ onopentag: function(name, attribs) { var obj = m(name, attribs, null, parent); if (parent) { parent.push(obj); stack.push(parent); parent = obj; } else { parent = obj; } }, ontext: function(text) { if (options.ignoreSpace && /^\s*$/.test(text)) { // console.log("skip whitespace text", text); } else { parent.push(text); } }, oncomment: function(comment) { parent.push(Mark.pragma('--' + comment, parent)); }, onclosetag: function(tagname) { parent = stack.pop(); } // todo: onprocessinginstruction }, opt); // start parsing parser.write(source.trim()); parser.end(); // result might be an array return (parent.length == 1) ? parent[0]:parent; } } var sgmlEscapes = { '&': '&', '<': '<', '>': '>', '"': '"', "'": ''', // ' is not defined in HTML4 }; var sgmlEscaper = /[&<>"']/g; // escape unsafe chars in the string into HTML/XML entities function escapeStr(str) { return ('' + str).replace(sgmlEscaper, function(match) { return sgmlEscapes[match]; }); } // ref: https://github.com/mixu/htmlparser-to-html var emptyTags = { "area": 1, "base": 1, "basefont": 1, "br": 1, "col": 1, "frame": 1, "hr": 1, "img": 1, "input": 1, "isindex": 1, "link": 1, "meta": 1, "param": 1, "embed": 1, }; // stringify Mark as HTML or XML // note: stringify with indentation space, may distort the HTML/XML content MarkConvert.toSgml = function(object, options) { let buffer = '', hasSpace = options.space, isXml = options.format === 'xml'; if (typeof object !== 'object') { return null; } // skip invalid object // opening signature if (isXml) { buffer += '<?xml version="1.0" encoding="UTF-8"?>' } else { if (object.constructor.name == 'html') { buffer += "<!DOCTYPE html>"; // full html } // else treat as html fragment } function markup(obj, level = 0) { if (!obj.constructor) { // XML/HTML comment or PI let pragma = obj.pragma(); buffer += pragma.indexOf('--', 0) === 0 ? ('<!' + pragma + '-->') // comment :('<?' + pragma + '?>'); // processing instruction return; } // MarkConvert.indent() is set by Mark // print opening tag buffer += (hasSpace ? MarkConvert.indent(level):'') + "<"+ obj.constructor.name; // print object attributes for (var prop in obj) { var value = obj[prop]; // https://stackoverflow.com/questions/2647867/how-to-determine-if-variable-is-undefined-or-null if (value != null && typeof value !== 'function') { // exclude null, undefined and function // todo: ensure 'prop' is proper html name buffer += ' '+ prop +'="'+ escapeStr(value) +'"'; } } // print object content if (isXml && !obj[0]) { buffer += "/>"; } else { buffer += ">"; let hasElmt = false; for (var item of obj) { if (typeof item === "string") { buffer += escapeStr(item); } else if (typeof item === "object") { markup(item, level+1); hasElmt = true; } else { console.trace("unknown object", item); } } // print closing tag for XML and non-empty HTML element if (isXml || !emptyTags[obj.constructor.name]) { buffer += (hasElmt && hasSpace ? MarkConvert.indent(level) : '') + "</"+ obj.constructor.name +">"; } } } markup(object); return buffer; } return MarkConvert; } if (typeof module === 'object') module.exports = MarkConvert;