UNPKG

@4884org/jumble

Version:

🛡️ Advanced bot detection & content protection component with 100ms optimized timing. Automatically detects bots and scrambles text content while obfuscating media to protect websites. Universal implementation works with React, Vue, Angular, Svelte.

467 lines (463 loc) 20 kB
import { p as proxyCustomElement, H } from './index2.js'; const JumbleHead$1 = /*@__PURE__*/ proxyCustomElement(class JumbleHead extends H { constructor() { super(); this.__registerHost(); } trapLinkId = '__bot_trap_link__'; botConfirmed = false; timeoutId = null; forceBot = false; isProcessed = false; detectionStartTime = 0; /** * Allow search engine bots to see real content for SEO. * When true, legitimate search engines are allowed. * When false, all bots see scrambled content. */ allowSEO = true; /** * Custom list of additional legitimate bot user agents to allow. * These will be added to the default search engine allowlist. */ customAllowlist = []; /** * Custom list of additional suspicious bot patterns to block. * These will be added to the default malicious bot patterns. */ customDenylist = []; componentDidLoad() { this.detectionStartTime = this.safePerformanceNow(); this.injectTrapLink(); this.runDetection(); } safePerformanceNow() { // Graceful fallback for older browsers without performance.now() return typeof performance?.now === 'function' ? performance.now() : Date.now(); } isLegitimateSearchEngine(ua) { // Comprehensive list of legitimate search engine user agents const defaultLegitimate = [ // Google 'googlebot', 'google-inspectiontool', 'google-site-verification', 'googleother', // Bing/Microsoft 'bingbot', 'msnbot', 'adidxbot', 'bingpreview', // Other major search engines 'duckduckbot', // DuckDuckGo 'slurp', // Yahoo 'yandexbot', // Yandex 'baidubot', // Baidu 'facebookexternalhit', // Facebook 'twitterbot', // Twitter 'linkedinbot', // LinkedIn 'whatsapp', // WhatsApp 'telegrambot', // Telegram // SEO & Analytics tools 'semrushbot', 'ahrefsbot', 'mj12bot', // Majestic 'dotbot', // Moz 'screaming frog', // SEO Spider // Content platforms 'discordbot', 'slackbot', 'redditbot', 'pinterest', // Monitoring & Security 'uptimerobot', 'pingdom', 'gtmetrix', ]; // Combine default allowlist with custom allowlist const allLegitimate = [...defaultLegitimate, ...this.customAllowlist]; return allLegitimate.some(bot => ua.includes(bot.toLowerCase())); } isSuspiciousUserAgent(ua) { // Target only suspicious/malicious bot patterns const defaultSuspicious = [ 'headless', // Headless browsers (often scrapers) 'phantom', // PhantomJS 'selenium', // Selenium automation 'webdriver', // WebDriver automation 'puppeteer', // Puppeteer automation 'playwright', // Playwright automation 'scraper', // Explicit scraper identification 'crawler', // Generic crawlers (not search engines) 'harvester', // Content harvesting 'extractor', // Data extraction tools 'spider', // Generic spiders (but skip legitimate ones) 'grabber', // Content grabbers 'leech', // Content leeching tools // Known AI training scrapers that collect data for LLM training 'gptbot', // OpenAI primary training crawler 'oai-searchbot', // OpenAI search indexing 'chatgpt-user', // OpenAI user-triggered fetching 'anthropic-ai', // Anthropic bulk training data collection 'claudebot', // Anthropic real-time citation fetching 'claude-web', // Anthropic web-focused crawling 'google-extended', // Google Gemini AI training 'meta-externalagent', // Meta AI training scraper (Aug 2024) 'perplexitybot', // Perplexity AI training 'ccbot', // Common Crawl (used by many open-source LLMs) 'cohere-ai', // Cohere model training 'bytespider', // ByteDance/TikTok AI training 'duckassistbot', // DuckDuckGo AI features 'ai2bot', // Allen Institute research 'diffbot', // Structured data extraction for ML 'omgili', // Forum/discussion scraping 'youbot', // You.com AI search 'timpibot', // Timpi decentralized search ]; // Combine default denylist with custom denylist const allSuspicious = [...defaultSuspicious, ...this.customDenylist]; // Check for suspicious patterns while excluding legitimate search engines const hasSuspiciousPattern = allSuspicious.some(pattern => ua.includes(pattern.toLowerCase())); const isLegitimate = this.isLegitimateSearchEngine(ua); return hasSuspiciousPattern && !isLegitimate; } disconnectedCallback() { if (this.timeoutId !== null) { clearTimeout(this.timeoutId); this.timeoutId = null; } } async toggleBotMode() { this.forceBot = !this.forceBot; this.runDetection(); } async setBotMode(enabled) { this.forceBot = enabled; this.runDetection(); } async getCurrentBotMode() { return this.forceBot || this.botConfirmed || this.isLikelyBot(); } runDetection() { if (this.timeoutId !== null) { clearTimeout(this.timeoutId); } // If we've already processed, restore content first if (this.isProcessed) { this.restoreContent(document.body); } // Optimized timeout for fast user experience with effective bot detection // 100ms provides excellent protection while being barely noticeable to users this.timeoutId = setTimeout(() => { const likelyBot = this.forceBot || this.isLikelyBot(); if (this.forceBot || this.botConfirmed || likelyBot) { this.scrambleAllText(document.body); this.obfuscateMedia(document.body); document.body.classList.add('jumble-bot-detected'); } else { document.body.classList.remove('jumble-bot-detected'); } // Remove the visibility hiding class to show content document.documentElement.classList.remove('jumble-init'); this.isProcessed = true; this.timeoutId = null; // Log detection timing for analysis const detectionTime = this.safePerformanceNow() - this.detectionStartTime; console.debug(`Jumble: Bot detection completed in ${Math.round(detectionTime)}ms`); }, 100); // Optimized for user experience } restoreContent(root) { // Restore text content by reloading the page content // For images and videos, restore from backup attributes const mediaElements = root.querySelectorAll('img.jumble-obfuscated, video.jumble-obfuscated'); mediaElements.forEach(element => { const mediaEl = element; if (mediaEl.tagName.toLowerCase() === 'img') { const imgEl = mediaEl; if (imgEl.dataset.jumbleOriginalSrc) { imgEl.src = imgEl.dataset.jumbleOriginalSrc; } if (imgEl.dataset.jumbleOriginalSrcset) { imgEl.srcset = imgEl.dataset.jumbleOriginalSrcset; } imgEl.classList.remove('jumble-obfuscated'); } else if (mediaEl.tagName.toLowerCase() === 'video') { const videoEl = mediaEl; if (videoEl.dataset.jumbleOriginalSrc) { videoEl.src = videoEl.dataset.jumbleOriginalSrc; } if (videoEl.dataset.jumbleOriginalPoster) { videoEl.poster = videoEl.dataset.jumbleOriginalPoster; } // Restore source elements const sources = videoEl.querySelectorAll('source'); sources.forEach(source => { if (source.dataset.jumbleOriginalSrc) { source.src = source.dataset.jumbleOriginalSrc; } }); videoEl.classList.remove('jumble-obfuscated'); } }); // For text content, we need to reload the page to restore original text // In a real implementation, you might store original text content // For this demo, we'll indicate restoration is needed const elements = root.querySelectorAll('*'); elements.forEach(el => { const shadow = el.shadowRoot; if (shadow) { this.restoreContent(shadow); } }); } injectTrapLink() { const trap = document.createElement('a'); trap.href = '/__bot-trap__'; trap.style.position = 'absolute'; trap.style.left = '-9999px'; trap.style.top = '-9999px'; trap.style.width = '1px'; trap.style.height = '1px'; trap.style.overflow = 'hidden'; trap.style.textDecoration = 'none'; trap.style.color = 'transparent'; trap.textContent = 'bot trap'; trap.setAttribute('aria-hidden', 'true'); trap.setAttribute('tabindex', '-1'); trap.id = this.trapLinkId; document.body.appendChild(trap); trap.addEventListener('focus', () => this.flagAsBot()); trap.addEventListener('mouseenter', () => this.flagAsBot()); trap.addEventListener('click', () => this.flagAsBot()); } flagAsBot() { this.botConfirmed = true; console.debug('Jumble: Bot activity detected via trap link'); } isLikelyBot() { const ua = navigator?.userAgent?.toLowerCase() || ''; const w = window; let suspicion = 0; const detectionReasons = []; // First, check if SEO protection is enabled and this is a legitimate search engine if (this.allowSEO && this.isLegitimateSearchEngine(ua)) { console.debug('Jumble: Legitimate search engine detected - allowing content indexing for SEO', ua); return false; // Allow SEO bots to see real content } // Enhanced bot detection with graceful API fallbacks // Core checks (universal browser support) const noTouch = !('ontouchstart' in window); const noChrome = !w.chrome; // Chrome-specific API (intentionally missing in other browsers) const suspiciousUA = this.isSuspiciousUserAgent(ua); // More targeted suspicious bot detection const zeroViewport = (window.outerWidth || 0) === 0 && (window.outerHeight || 0) === 0; // Modern browser checks (with fallbacks for older browsers) const webdriver = navigator?.webdriver || false; // Chrome 63+, Firefox 57+, Safari 13+ const noLangs = !navigator?.languages || navigator.languages.length === 0; // IE11+, all modern browsers const noPlugins = !navigator?.plugins || navigator.plugins.length === 0; // Deprecated but widely supported // Advanced checks (limited browser support, graceful degradation) const noConnection = !navigator?.connection; // Chrome 61+, mobile browsers const suspiciousScreen = (screen?.width || 0) === 0 || (screen?.height || 0) === 0; // Universal support const noStorage = !window.localStorage || !window.sessionStorage; // IE8+, all modern browsers const fastExecution = this.safePerformanceNow() - this.detectionStartTime < 50; // IE10+ with fallback const noHistory = (window.history?.length || 0) <= 1; // Universal support // Specific bot detection const phantomJS = ua.includes('phantomjs'); const selenium = ua.includes('selenium') || w._selenium || w.__selenium_unwrapped || w.__selenium_evaluate || w.__fxdriver_evaluate; // Reduced weights for legitimate browser characteristics if (noTouch) { suspicion += 0.5; // Reduced from 1 - desktop browsers often don't have touch detectionReasons.push('No touch support'); } if (noChrome) { suspicion += 0.5; // Reduced from 1 - non-Chrome browsers are normal detectionReasons.push('No Chrome APIs'); } if (webdriver) { suspicion += 3; detectionReasons.push('WebDriver detected'); } // Higher weight - this is a strong bot indicator if (noLangs) { suspicion += 2; detectionReasons.push('No languages'); } if (noPlugins) { suspicion += 0.5; // Reduced from 1 - modern browsers often have no plugins detectionReasons.push('No plugins'); } if (suspiciousUA) { suspicion += 3; detectionReasons.push('Malicious bot user agent'); } // Higher weight for actual malicious bots if (zeroViewport) { suspicion += 2; detectionReasons.push('Zero viewport'); } if (noConnection) { suspicion += 0.5; // Reduced from 1 - not all browsers support this API detectionReasons.push('No connection API'); } if (suspiciousScreen) { suspicion += 2; detectionReasons.push('Suspicious screen dimensions'); } if (noStorage) { suspicion += 1; detectionReasons.push('No storage APIs'); } if (fastExecution) { suspicion += 0.5; // Reduced from 1 - fast loading can be legitimate detectionReasons.push('Unusually fast execution'); } if (noHistory) { suspicion += 1; detectionReasons.push('Minimal browser history'); } if (phantomJS) { suspicion += 3; detectionReasons.push('PhantomJS detected'); } if (selenium) { suspicion += 3; detectionReasons.push('Selenium detected'); } const isBot = suspicion >= 3; // Lowered threshold from 5 to 3 for much better bot detection if (isBot) { console.debug(`Jumble: Malicious bot detected (suspicion: ${suspicion}/12) - scrambling content`, detectionReasons); } else { console.debug(`Jumble: Human detected (suspicion: ${suspicion}/12)`); } return isBot; } obfuscateMedia(root) { // Find all img and video elements const mediaElements = root.querySelectorAll('img, video'); mediaElements.forEach(element => { const mediaEl = element; // Store original attributes for potential restoration if (!mediaEl.dataset.jumbleOriginalSrc) { mediaEl.dataset.jumbleOriginalSrc = mediaEl.src || ''; } if (mediaEl.tagName.toLowerCase() === 'img') { const imgEl = mediaEl; // Store additional img attributes if (imgEl.srcset && !imgEl.dataset.jumbleOriginalSrcset) { imgEl.dataset.jumbleOriginalSrcset = imgEl.srcset; } // Replace with placeholder - a 1x1 transparent pixel imgEl.src = 'data:image/svg+xml;base64,PHN2ZyB3aWR0aD0iMSIgaGVpZ2h0PSIxIiB2aWV3Qm94PSIwIDAgMSAxIiBmaWxsPSJub25lIiB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciPjxyZWN0IHdpZHRoPSIxIiBoZWlnaHQ9IjEiIGZpbGw9IiNmM2Y0ZjYiLz48L3N2Zz4='; imgEl.srcset = ''; // Add a class to indicate obfuscation imgEl.classList.add('jumble-obfuscated'); } else if (mediaEl.tagName.toLowerCase() === 'video') { const videoEl = mediaEl; // Store video-specific attributes if (videoEl.poster && !videoEl.dataset.jumbleOriginalPoster) { videoEl.dataset.jumbleOriginalPoster = videoEl.poster; } // Remove src and poster, pause video videoEl.src = ''; videoEl.poster = ''; videoEl.pause(); // Remove source elements const sources = videoEl.querySelectorAll('source'); sources.forEach(source => { if (!source.dataset.jumbleOriginalSrc) { source.dataset.jumbleOriginalSrc = source.src || ''; } source.src = ''; }); // Add a class to indicate obfuscation videoEl.classList.add('jumble-obfuscated'); } }); // Handle shadow DOM elements const elements = root.querySelectorAll('*'); elements.forEach(el => { const shadow = el.shadowRoot; if (shadow) { this.obfuscateMedia(shadow); } }); } scrambleAllText(root) { const walker = document.createTreeWalker(root, NodeFilter.SHOW_TEXT, { acceptNode: node => { const parent = node.parentElement; if (!parent) return NodeFilter.FILTER_REJECT; const tag = parent.tagName.toLowerCase(); return ['script', 'style', 'meta', 'title', 'noscript', 'head'].includes(tag) ? NodeFilter.FILTER_REJECT : NodeFilter.FILTER_ACCEPT; }, }); while (walker.nextNode()) { const node = walker.currentNode; node.textContent = this.scramble(node.textContent || ''); } const elements = root.querySelectorAll('*'); elements.forEach(el => { const shadow = el.shadowRoot; if (shadow) { this.scrambleAllText(shadow); } }); } scrambleChar(char) { const code = char.charCodeAt(0); // Arabic block: 0x0600–0x06FF if (code >= 0x0600 && code <= 0x06ff) { return String.fromCharCode(0x0600 + Math.floor(Math.random() * 0x100)); } // CJK Unified Ideographs (Chinese): 0x4E00–0x9FFF if (code >= 0x4e00 && code <= 0x9fff) { return String.fromCharCode(0x4e00 + Math.floor(Math.random() * (0x9fff - 0x4e00))); } // Basic Latin: a–z if ((code >= 0x0041 && code <= 0x005a) || (code >= 0x0061 && code <= 0x007a)) { return String.fromCharCode(97 + Math.floor(Math.random() * 26)); } return char; // leave numbers, punctuation, emojis unchanged } scramble(text) { return Array.from(text).map(this.scrambleChar).join(''); } render() { return null; } }, [0, "jumble-head", { "allowSEO": [1540, "allow-seo"], "customAllowlist": [1040, "custom-allowlist"], "customDenylist": [1040, "custom-denylist"], "toggleBotMode": [64], "setBotMode": [64], "getCurrentBotMode": [64] }]); function defineCustomElement$1() { if (typeof customElements === "undefined") { return; } const components = ["jumble-head"]; components.forEach(tagName => { switch (tagName) { case "jumble-head": if (!customElements.get(tagName)) { customElements.define(tagName, JumbleHead$1); } break; } }); } defineCustomElement$1(JumbleHead$1); const JumbleHead = JumbleHead$1; const defineCustomElement = defineCustomElement$1; export { JumbleHead, defineCustomElement }; //# sourceMappingURL=jumble-head.js.map //# sourceMappingURL=jumble-head.js.map