UNPKG

compromise

Version:

natural language processing in the browser

github.com/nlp-compromise/nlp_compromise

nlp-compromise/nlp_compromise

55 lines (51 loc) • 1.55 kB

JavaScript

'use strict'; //turn xml special characters into apersand-encoding. //i'm not sure this is perfectly safe. const escapeHtml = (s) => { const HTML_CHAR_MAP = { '<': '<', '>': '>', '&': '&', '"': '"', '\'': ''', ' ': ' ' }; return s.replace(/[<>&"' ]/g, function(ch) { return HTML_CHAR_MAP[ch]; }); }; //remove html elements already in the text //not tested! //http://stackoverflow.com/questions/295566/sanitize-rewrite-html-on-the-client-side const sanitize = (html) => { const tagBody = '(?:[^"\'>]|"[^"]*"|\'[^\']*\')*'; const tagOrComment = new RegExp( '<(?:' // Comment body. + '!--(?:(?:-*[^->])*--+|-?)' // Special "raw text" elements whose content should be elided. + '|script\\b' + tagBody + '>[\\s\\S]*?</script\\s*' + '|style\\b' + tagBody + '>[\\s\\S]*?</style\\s*' // Regular name + '|/?[a-z]' + tagBody + ')>', 'gi'); let oldHtml; do { oldHtml = html; html = html.replace(tagOrComment, ''); } while (html !== oldHtml); return html.replace(/</g, '<'); } //turn the term into ~properly~ formatted html const renderHtml = function(t) { let classes = Object.keys(t.tag).filter((tag) => tag !== 'Term'); classes = classes.map(c => 'nlp' + c); classes = classes.join(' '); let text = sanitize(t.text) text = escapeHtml(text); let el = '<span class="' + classes + '">' + text + '</span>'; return escapeHtml(t.whitespace.before) + el + escapeHtml(t.whitespace.after) }; module.exports = renderHtml;