@naturalcycles/js-lib
Version:
Standard library for universal (browser + Node.js) javascript
139 lines (138 loc) • 5.18 kB
JavaScript
"use strict";
// Relevant material:
// https://deviceandbrowserinfo.com/learning_zone/articles/detecting-headless-chrome-puppeteer-2024
Object.defineProperty(exports, "__esModule", { value: true });
exports.BotReason = exports.BotDetectionService = void 0;
const env_1 = require("./env");
const botRegex = /bot|spider|crawl|headless|electron|phantom|slimer|proximic|cincraw|slurp|MicrosoftPreview|ahrefs|preview|lighthouse|facebookexternal|pinterest|screaming|apis-google|duplexweb-google|feedfetcher-google|google-read-aloud|googleweblight|mediapartners-google/i;
/**
* Service to detect bots and CDP (Chrome DevTools Protocol).
*
* @experimental
*/
class BotDetectionService {
cfg;
constructor(cfg = {}) {
this.cfg = cfg;
}
// memoized results
botReason;
cdp;
isBotOrCDP() {
return !!this.getBotReason() || this.isCDP();
}
isBot() {
return !!this.getBotReason();
}
/**
* Returns null if it's not a Bot,
* otherwise a truthy BotReason.
*/
getBotReason() {
if (this.cfg.enabled === false)
return null;
if (this.cfg.memoizeResults && this.botReason !== undefined) {
return this.botReason;
}
this.botReason = this.detectBotReason();
return this.botReason;
}
detectBotReason() {
// SSR - not a bot
if ((0, env_1.isServerSide)())
return null;
const { navigator } = globalThis;
if (!navigator)
return BotReason.NoNavigator;
const { userAgent } = navigator;
if (!userAgent)
return BotReason.NoUserAgent;
if (botRegex.test(userAgent)) {
return BotReason.UserAgent;
}
if (navigator.webdriver) {
return BotReason.WebDriver;
}
// Kirill: commented out, as it's no longer seems reliable,
// e.g generates false positives with latest Android clients (e.g. Chrome 129)
// if (navigator.plugins?.length === 0) {
// return BotReason.ZeroPlugins // Headless Chrome
// }
if (navigator.languages === '') {
return BotReason.EmptyLanguages; // Headless Chrome
}
// isChrome is true if the browser is Chrome, Chromium or Opera
// this is "the chrome test" from https://intoli.com/blog/not-possible-to-block-chrome-headless/
// this property is for some reason not present by default in headless chrome
// Kirill: criterium removed due to false positives with Android
// if (userAgent.includes('Chrome') && !(globalThis as any).chrome) {
// return BotReason.ChromeWithoutChrome // Headless Chrome
// }
if (this.cfg.treatCDPAsBotReason && this.isCDP()) {
return BotReason.CDP;
}
return null;
}
/**
* CDP stands for Chrome DevTools Protocol.
* This function tests if the current environment is a CDP environment.
* If it's true - it's one of:
*
* 1. Bot, automated with CDP, e.g Puppeteer, Playwright or such.
* 2. Developer with Chrome DevTools open.
*
* 2 is certainly not a bot, but unfortunately we can't distinguish between the two.
* That's why this function is not part of `isBot()`, because it can give "false positive" with DevTools.
*
* Based on: https://deviceandbrowserinfo.com/learning_zone/articles/detecting-headless-chrome-puppeteer-2024
*/
isCDP() {
if (this.cfg.enabled === false)
return false;
if (this.cfg.memoizeResults && this.cdp !== undefined) {
return this.cdp;
}
this.cdp = this.detectCDP();
return this.cdp;
}
detectCDP() {
if ((0, env_1.isServerSide)())
return false;
let cdpCheck1 = false;
try {
/* eslint-disable */
// biome-ignore lint/suspicious/useErrorMessage: ok
const e = new window.Error();
window.Object.defineProperty(e, 'stack', {
configurable: false,
enumerable: false,
// biome-ignore lint/complexity/useArrowFunction: ok
get: function () {
cdpCheck1 = true;
return '';
},
});
// This is part of the detection and shouldn't be deleted
window.console.debug(e);
/* eslint-enable */
}
catch { }
return cdpCheck1;
}
}
exports.BotDetectionService = BotDetectionService;
var BotReason;
(function (BotReason) {
BotReason[BotReason["NoNavigator"] = 1] = "NoNavigator";
BotReason[BotReason["NoUserAgent"] = 2] = "NoUserAgent";
BotReason[BotReason["UserAgent"] = 3] = "UserAgent";
BotReason[BotReason["WebDriver"] = 4] = "WebDriver";
// ZeroPlugins = 5,
BotReason[BotReason["EmptyLanguages"] = 6] = "EmptyLanguages";
// ChromeWithoutChrome = 7,
/**
* This is when CDP is considered to be a reason to be a Bot.
* By default it's not.
*/
BotReason[BotReason["CDP"] = 8] = "CDP";
})(BotReason || (exports.BotReason = BotReason = {}));