UNPKG

sajari-website

Version:

Website extensions for the Sajari API. Automatically index site content, add user profiles, render search and recommendations, etc.

47 lines (43 loc) 1.71 kB
var Text = function(node) { var REGEXPS = { negative: /hidden|^hid$| hid$| hid |^hid |adblade|alert|banner|combx|comment|com-|contact|cookies-dialog|foot|footer|footnote|masthead|media|meta|navbar|outbrain|promo|related|scroll|share|shoutbox|sidebar|skyscraper|sponsor|shopping|tags|tool|widget/i, } var badTags = ["head","script","style","footer","header","nav","select","noscript"]; var text = ""; // Element nodes if (node.nodeType === 1) { if (badTags.indexOf(node.tagName.toLowerCase()) > -1) { return text; // Known bad tag names } if (node.hasAttribute("data-sj-ignore")) { return text; // Specific ignore elems } if (node.style.display === "none" && node.hasAttribute("hidden")) { return text; // invisible text } if (node.tagName.toLowerCase() !== 'body') { if (typeof(node.className) === "string" && node.className !== "") { if (REGEXPS.negative.test(node.className)) { return text; // bad classes } } if (typeof(node.id) === "string" && node.id !== "") { if (REGEXPS.negative.test(node.id)) { return text; // bad ids } } } // Recursively walk the DOM if (node.childNodes !== undefined) { for (var j = 0; j < node.childNodes.length; j++) { text += Text(node.childNodes[j]); } } } // Text nodes if (node.nodeType === 3) { text += node.textContent; } return text.replace(/\s+/g,' '); }; module.exports = Text;