voca
Version:
The ultimate JavaScript string library
283 lines (231 loc) • 6.4 kB
JavaScript
'use strict';
require('./internal/is_nil.js');
require('./is_string.js');
var coerce_to_string = require('./internal/coerce_to_string.js');
var _const = require('./internal/const.js');
/**
* Checks whether `subject` contains substring at specific `index`.
*
* @ignore
* @param {string} subject The subject to search in.
* @param {string} substring The substring to search/
* @param {number} index The index to search substring.
* @param {boolean} lookBehind Whether to look behind (true) or ahead (false).
* @return {boolean} Returns a boolean whether the substring exists.
*/
function hasSubstringAtIndex(subject, substring, index) {
var lookBehind = arguments.length > 3 && arguments[3] !== undefined ? arguments[3] : true;
var indexOffset = 0;
if (lookBehind) {
indexOffset = -substring.length + 1;
}
var extractedSubstring = subject.substr(index + indexOffset, substring.length);
return extractedSubstring.toLowerCase() === substring;
}
/**
* Parses the tags from the string '<tag1><tag2>...<tagN>'.
*
* @ignore
* @param {string} tags The string that contains the tags.
* @return {string[]} Returns the array of tag names.
*/
function parseTagList(tags) {
var tagsList = [];
var match;
while ((match = _const.REGEXP_TAG_LIST.exec(tags)) !== null) {
tagsList.push(match[1]);
}
return tagsList;
}
var STATE_START_TAG = 0;
var STATE_NON_WHITESPACE = 1;
var STATE_DONE = 2;
/**
* Parses the tag name from html content.
*
* @ignore
* @param {string} tagContent The tag content.
* @return {string} Returns the tag name.
*/
function parseTagName(tagContent) {
var state = STATE_START_TAG;
var tagName = '';
var index = 0;
while (state !== STATE_DONE) {
var char = tagContent[index++].toLowerCase();
switch (char) {
case '<':
break;
case '>':
state = STATE_DONE;
break;
default:
if (_const.REGEXP_WHITESPACE.test(char)) {
if (state === STATE_NON_WHITESPACE) {
state = STATE_DONE;
}
} else {
if (state === STATE_START_TAG) {
state = STATE_NON_WHITESPACE;
}
if (char !== '/') {
tagName += char;
}
}
break;
}
}
return tagName;
}
var STATE_OUTPUT = 0;
var STATE_HTML = 1;
var STATE_EXCLAMATION = 2;
var STATE_COMMENT = 3;
/**
* Strips HTML tags from `subject`.
*
* @function stripTags
* @static
* @since 1.1.0
* @memberOf Strip
* @param {string} [subject=''] The string to strip from.
* @param {string|Array} [allowableTags] The string `'<tag1><tag2>'` or array `['tag1', 'tag2']` of tags that should not be stripped.
* @param {string} [replacement=''] The string to replace the stripped tag.
* @return {string} Returns the stripped string.
* @example
*
* v.stripTags('<span><a href="#">Summer</a> is nice</span>');
* // => 'Summer is nice'
*
* v.stripTags('<span><i>Winter</i> is <b>cold</b></span>', ['b', 'i']);
* // => '<i>Winter</i> is <b>cold</b>'
*
* v.stripTags('Sun<br/>set', '', '-');
* // => 'Sun-set'
*/
function trim(subject, allowableTags, replacement) {
subject = coerce_to_string.coerceToString(subject);
if (subject === '') {
return '';
}
if (!Array.isArray(allowableTags)) {
var allowableTagsString = coerce_to_string.coerceToString(allowableTags);
allowableTags = allowableTagsString === '' ? [] : parseTagList(allowableTagsString);
}
var replacementString = coerce_to_string.coerceToString(replacement);
var length = subject.length;
var hasAllowableTags = allowableTags.length > 0;
var hasSubstring = hasSubstringAtIndex.bind(null, subject);
var state = STATE_OUTPUT;
var depth = 0;
var output = '';
var tagContent = '';
var quote = null;
for (var index = 0; index < length; index++) {
var char = subject[index];
var advance = false;
switch (char) {
case '<':
if (quote) {
break;
}
if (hasSubstring('< ', index, false)) {
advance = true;
break;
}
if (state === STATE_OUTPUT) {
advance = true;
state = STATE_HTML;
break;
}
if (state === STATE_HTML) {
depth++;
break;
}
advance = true;
break;
case '!':
if (state === STATE_HTML && hasSubstring('<!', index)) {
state = STATE_EXCLAMATION;
break;
}
advance = true;
break;
case '-':
if (state === STATE_EXCLAMATION && hasSubstring('!--', index)) {
state = STATE_COMMENT;
break;
}
advance = true;
break;
case '"':
case "'":
if (state === STATE_HTML) {
if (quote === char) {
quote = null;
} else if (!quote) {
quote = char;
}
}
advance = true;
break;
case 'E':
case 'e':
if (state === STATE_EXCLAMATION && hasSubstring('doctype', index)) {
state = STATE_HTML;
break;
}
advance = true;
break;
case '>':
if (depth > 0) {
depth--;
break;
}
if (quote) {
break;
}
if (state === STATE_HTML) {
quote = null;
state = STATE_OUTPUT;
if (hasAllowableTags) {
tagContent += '>';
var tagName = parseTagName(tagContent);
if (allowableTags.indexOf(tagName.toLowerCase()) !== -1) {
output += tagContent;
} else {
output += replacementString;
}
tagContent = '';
} else {
output += replacementString;
}
break;
}
if (state === STATE_EXCLAMATION || state === STATE_COMMENT && hasSubstring('-->', index)) {
quote = null;
state = STATE_OUTPUT;
tagContent = '';
break;
}
advance = true;
break;
default:
advance = true;
}
if (advance) {
switch (state) {
case STATE_OUTPUT:
output += char;
break;
case STATE_HTML:
if (hasAllowableTags) {
tagContent += char;
}
break;
}
}
}
return output;
}
module.exports = trim;