@naturalcycles/js-lib
Version:
Standard library for universal (browser + Node.js) javascript
121 lines (120 loc) • 4.43 kB
JavaScript
// Relevant material:
// https://deviceandbrowserinfo.com/learning_zone/articles/detecting-headless-chrome-puppeteer-2024
import { BotReason } from '../bot.model.js';
import { isServerSide } from '../env.js';
const botRegex = /bot|spider|crawl|headless|electron|phantom|slimer|proximic|cincraw|slurp|MicrosoftPreview|ahrefs|preview|lighthouse|facebookexternal|pinterest|screaming|apis-google|duplexweb-google|feedfetcher-google|google-read-aloud|googleweblight|mediapartners-google/i;
/**
* Service to detect bots and CDP (Chrome DevTools Protocol).
*
* @experimental
*/
export class BotDetectionService {
cfg;
constructor(cfg = {}) {
this.cfg = cfg;
}
// memoized results
botReason;
cdp;
isBotOrCDP() {
return !!this.getBotReason() || this.isCDP();
}
isBot() {
return !!this.getBotReason();
}
/**
* Returns null if it's not a Bot,
* otherwise a truthy BotReason.
*/
getBotReason() {
if (this.cfg.enabled === false)
return null;
if (this.cfg.memoizeResults && this.botReason !== undefined) {
return this.botReason;
}
this.botReason = this.detectBotReason();
return this.botReason;
}
detectBotReason() {
// SSR - not a bot
if (isServerSide())
return null;
const { navigator } = globalThis;
if (!navigator)
return BotReason.NoNavigator;
const { userAgent } = navigator;
if (!userAgent)
return BotReason.NoUserAgent;
if (botRegex.test(userAgent)) {
return BotReason.UserAgent;
}
if (navigator.webdriver) {
return BotReason.WebDriver;
}
// Kirill: commented out, as it's no longer seems reliable,
// e.g generates false positives with latest Android clients (e.g. Chrome 129)
// if (navigator.plugins?.length === 0) {
// return BotReason.ZeroPlugins // Headless Chrome
// }
if (navigator.languages === '') {
return BotReason.EmptyLanguages; // Headless Chrome
}
// isChrome is true if the browser is Chrome, Chromium or Opera
// this is "the chrome test" from https://intoli.com/blog/not-possible-to-block-chrome-headless/
// this property is for some reason not present by default in headless chrome
// Kirill: criterium removed due to false positives with Android
// if (userAgent.includes('Chrome') && !(globalThis as any).chrome) {
// return BotReason.ChromeWithoutChrome // Headless Chrome
// }
if (this.cfg.treatCDPAsBotReason && this.isCDP()) {
return BotReason.CDP;
}
return null;
}
/**
* CDP stands for Chrome DevTools Protocol.
* This function tests if the current environment is a CDP environment.
* If it's true - it's one of:
*
* 1. Bot, automated with CDP, e.g Puppeteer, Playwright or such.
* 2. Developer with Chrome DevTools open.
*
* 2 is certainly not a bot, but unfortunately we can't distinguish between the two.
* That's why this function is not part of `isBot()`, because it can give "false positive" with DevTools.
*
* Based on: https://deviceandbrowserinfo.com/learning_zone/articles/detecting-headless-chrome-puppeteer-2024
*/
isCDP() {
if (this.cfg.enabled === false)
return false;
if (this.cfg.memoizeResults && this.cdp !== undefined) {
return this.cdp;
}
this.cdp = this.detectCDP();
return this.cdp;
}
detectCDP() {
if (isServerSide())
return false;
let cdpCheck1 = false;
try {
/* eslint-disable */
// biome-ignore lint/suspicious/useErrorMessage: ok
const e = new window.Error();
window.Object.defineProperty(e, 'stack', {
configurable: false,
enumerable: false,
// biome-ignore lint/complexity/useArrowFunction: ok
get: function () {
cdpCheck1 = true;
return '';
},
});
// This is part of the detection and shouldn't be deleted
window.console.debug(e);
/* eslint-enable */
}
catch { }
return cdpCheck1;
}
}