UNPKG

striptags

Version:

PHP strip_tags in Node.js

github.com/ericnorris/striptags

ericnorris/striptags

257 lines (211 loc) • 7.84 kB

JavaScript

'use strict'; (function (root, factory) { if (typeof define === 'function' && define.amd) { // AMD. Register as an anonymous module. define([], factory); } else if (typeof module === 'object' && module.exports) { // Node. Does not work with strict CommonJS, but // only CommonJS-like environments that support module.exports, // like Node. module.exports = factory(); } else { // Browser globals (root is window) root.striptags = factory(); } }(this, function () { const STATE_OUTPUT = 0; const STATE_HTML = 1; const STATE_PRE_COMMENT = 2; const STATE_COMMENT = 3; const WHITESPACE = /\s/; const ALLOWED_TAGS_REGEX = /<(\w*)>/g; function striptags(html = '', allowableTags = [], tagReplacement = '') { var state = STATE_OUTPUT, depth = 0, output = '', tagBuffer = '', inQuote = false, i, length, c; if (typeof allowableTags === 'string') { // Parse the string into an array of tags allowableTags = parseAllowableTags(allowableTags); } else if (!Array.isArray(allowableTags)) { // If it is not an array, explicitly set to null allowableTags = null; } for (i = 0, length = html.length; i < length; i++) { c = html[i]; switch (c) { case '<': { // ignore '<' if inside a quote if (inQuote) { break; } // '<' followed by a space is not a valid tag, continue if (html[i + 1] == ' ') { consumeCharacter(c); break; } // change to STATE_HTML if (state == STATE_OUTPUT) { state = STATE_HTML; consumeCharacter(c); break; } // ignore additional '<' characters when inside a tag if (state == STATE_HTML) { depth++; break; } consumeCharacter(c); break; } case '>': { // something like this is happening: '<<>>' if (depth) { depth--; break; } // ignore '>' if inside a quote if (inQuote) { break; } // an HTML tag was closed if (state == STATE_HTML) { inQuote = state = 0; if (allowableTags) { tagBuffer += '>'; flushTagBuffer(); } break; } // '<!' met its ending '>' if (state == STATE_PRE_COMMENT) { inQuote = state = 0; tagBuffer = ''; break; } // if last two characters were '--', then end comment if (state == STATE_COMMENT && html[i - 1] == '-' && html[i - 2] == '-') { inQuote = state = 0; tagBuffer = ''; break; } consumeCharacter(c); break; } // catch both single and double quotes case '"': case '\'': { if (state == STATE_HTML) { if (inQuote == c) { // end quote found inQuote = false; } else if (!inQuote) { // start quote only if not already in one inQuote = c; } } consumeCharacter(c); break; } case '!': { if (state == STATE_HTML && html[i - 1] == '<') { // looks like we might be starting a comment state = STATE_PRE_COMMENT; break; } consumeCharacter(c); break; } case '-': { // if the previous two characters were '!-', this is a comment if (state == STATE_PRE_COMMENT && html[i - 1] == '-' && html[i - 2] == '!') { state = STATE_COMMENT; break; } consumeCharacter(c); break; } case 'E': case 'e': { // check for DOCTYPE, because it looks like a comment and isn't if (state == STATE_PRE_COMMENT && html.substr(i - 6, 7).toLowerCase() == 'doctype') { state = STATE_HTML; break; } consumeCharacter(c); break; } default: { consumeCharacter(c); } } } function consumeCharacter(c) { if (state == STATE_OUTPUT) { output += c; } else if (allowableTags && state == STATE_HTML) { tagBuffer += c; } } function flushTagBuffer() { var normalized = '', nonWhitespaceSeen = false, i, length, c; normalizeTagBuffer: for (i = 0, length = tagBuffer.length; i < length; i++) { c = tagBuffer[i].toLowerCase(); switch (c) { case '<': { break; } case '>': { break normalizeTagBuffer; } case '/': { nonWhitespaceSeen = true; break; } default: { if (!c.match(WHITESPACE)) { nonWhitespaceSeen = true; normalized += c; } else if (nonWhitespaceSeen) { break normalizeTagBuffer; } } } } if (allowableTags.indexOf(normalized) !== -1) { output += tagBuffer; } else if (tagReplacement) { output += tagReplacement; } tagBuffer = ''; } return output; } /** * Return an array containing tags that are allowed to pass through the * algorithm. * * @param string allowableTags A string of tags to allow (e.g. "<b><strong>"). * @return array|null An array of allowed tags or null if none. */ function parseAllowableTags(allowableTags) { var tagsArray = [], match; while ((match = ALLOWED_TAGS_REGEX.exec(allowableTags)) !== null) { tagsArray.push(match[1]); } return tagsArray.length !== 0 ? tagsArray : null; } return striptags; }));