UNPKG

compromise

Version:
55 lines (51 loc) 1.55 kB
'use strict'; //turn xml special characters into apersand-encoding. //i'm not sure this is perfectly safe. const escapeHtml = (s) => { const HTML_CHAR_MAP = { '<': '&lt;', '>': '&gt;', '&': '&amp;', '"': '&quot;', '\'': '&#39;', ' ': '&nbsp;' }; return s.replace(/[<>&"' ]/g, function(ch) { return HTML_CHAR_MAP[ch]; }); }; //remove html elements already in the text //not tested! //http://stackoverflow.com/questions/295566/sanitize-rewrite-html-on-the-client-side const sanitize = (html) => { const tagBody = '(?:[^"\'>]|"[^"]*"|\'[^\']*\')*'; const tagOrComment = new RegExp( '<(?:' // Comment body. + '!--(?:(?:-*[^->])*--+|-?)' // Special "raw text" elements whose content should be elided. + '|script\\b' + tagBody + '>[\\s\\S]*?</script\\s*' + '|style\\b' + tagBody + '>[\\s\\S]*?</style\\s*' // Regular name + '|/?[a-z]' + tagBody + ')>', 'gi'); let oldHtml; do { oldHtml = html; html = html.replace(tagOrComment, ''); } while (html !== oldHtml); return html.replace(/</g, '&lt;'); } //turn the term into ~properly~ formatted html const renderHtml = function(t) { let classes = Object.keys(t.tag).filter((tag) => tag !== 'Term'); classes = classes.map(c => 'nlp' + c); classes = classes.join(' '); let text = sanitize(t.text) text = escapeHtml(text); let el = '<span class="' + classes + '">' + text + '</span>'; return escapeHtml(t.whitespace.before) + el + escapeHtml(t.whitespace.after) }; module.exports = renderHtml;