ineed
Version:
Web scraping and HTML-reprocessing. The easy way.
67 lines (53 loc) • 1.57 kB
JavaScript
//Escaping regexes
var AMP_REGEX = /&/g,
NBSP_REGEX = /\u00a0/g,
DOUBLE_QUOTE_REGEX = /"/g,
LT_REGEX = /</g,
GT_REGEX = />/g;
exports.escapeHtml = function (str, attrMode) {
str = str
.replace(AMP_REGEX, '&')
.replace(NBSP_REGEX, ' ');
if (attrMode)
str = str.replace(DOUBLE_QUOTE_REGEX, '"');
else {
str = str
.replace(LT_REGEX, '<')
.replace(GT_REGEX, '>');
}
return str;
};
var getAttrValue = exports.getAttrValue = function (attrs, attrName) {
for (var i = 0; i < attrs.length; i++) {
if (attrs[i].name.toLowerCase() === attrName)
return attrs[i].value;
}
return null;
};
exports.setAttrValue = function (attrs, attrName, value) {
for (var i = 0; i < attrs.length; i++) {
if (attrs[i].name.toLowerCase() === attrName) {
attrs[i].value = value;
break;
}
}
};
exports.isStylesheet = function (startTag) {
if (startTag.tagName === 'link') {
var relAttr = getAttrValue(startTag.attrs, 'rel');
return relAttr && relAttr.toLowerCase() === 'stylesheet';
}
return false;
};
exports.isPlainText = function (inBody, leadingStartTag, text) {
return inBody &&
leadingStartTag !== 'script' &&
leadingStartTag !== 'style' &&
text.trim().length > 0;
};
exports.toLookupTable = function (arr) {
return arr.reduce(function (table, value) {
table[value] = true;
return table;
}, {});
};