UNPKG

puppeteer-extra-plugin-stealth

Version:

Stealth mode: Applies various techniques to make detection of headless puppeteer harder.

209 lines (181 loc) 6.84 kB
'use strict' const { PuppeteerExtraPlugin } = require('puppeteer-extra-plugin') /** * Fixes the UserAgent info (composed of UA string, Accept-Language, Platform, and UA hints). * * If you don't provide any values this plugin will default to using the regular UserAgent string (while stripping the headless part). * Default language is set to "en-US,en", the other settings match the UserAgent string. * If you are running on Linux, it will mask the settins to look like Windows. This behavior can be disabled with the `maskLinux` option. * * By default puppeteer will not set a `Accept-Language` header in headless: * It's (theoretically) possible to fix that using either `page.setExtraHTTPHeaders` or a `--lang` launch arg. * Unfortunately `page.setExtraHTTPHeaders` will lowercase everything and launch args are not always available. :) * * In addition, the `navigator.platform` property is always set to the host value, e.g. `Linux` which makes detection very easy. * * Note: You cannot use the regular `page.setUserAgent()` puppeteer call in your code, * as it will reset the language and platform values you set with this plugin. * * @example * const puppeteer = require("puppeteer-extra") * * const StealthPlugin = require("puppeteer-extra-plugin-stealth") * const stealth = StealthPlugin() * // Remove this specific stealth plugin from the default set * stealth.enabledEvasions.delete("user-agent-override") * puppeteer.use(stealth) * * // Stealth plugins are just regular `puppeteer-extra` plugins and can be added as such * const UserAgentOverride = require("puppeteer-extra-plugin-stealth/evasions/user-agent-override") * // Define custom UA and locale * const ua = UserAgentOverride({ userAgent: "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1)", locale: "de-DE,de" }) * puppeteer.use(ua) * * @param {Object} [opts] - Options * @param {string} [opts.userAgent] - The user agent to use (default: browser.userAgent()) * @param {string} [opts.locale] - The locale to use in `Accept-Language` header and in `navigator.languages` (default: `en-US,en`) * @param {boolean} [opts.maskLinux] - Wether to hide Linux as platform in the user agent or not - true by default * */ class Plugin extends PuppeteerExtraPlugin { constructor(opts = {}) { super(opts) this._headless = false } get name() { return 'stealth/evasions/user-agent-override' } get dependencies() { return new Set(['user-preferences']) } get defaults() { return { userAgent: null, locale: 'en-US,en', maskLinux: true } } async onPageCreated(page) { // Determine the full user agent string, strip the "Headless" part let ua = this.opts.userAgent || (await page.browser().userAgent()).replace('HeadlessChrome/', 'Chrome/') if ( this.opts.maskLinux && ua.includes('Linux') && !ua.includes('Android') // Skip Android user agents since they also contain Linux ) { ua = ua.replace(/\(([^)]+)\)/, '(Windows NT 10.0; Win64; x64)') // Replace the first part in parentheses with Windows data } // Full version number from Chrome const uaVersion = ua.includes('Chrome/') ? ua.match(/Chrome\/([\d|.]+)/)[1] : (await page.browser().version()).match(/\/([\d|.]+)/)[1] // Get platform identifier (short or long version) const _getPlatform = (extended = false) => { if (ua.includes('Mac OS X')) { return extended ? 'Mac OS X' : 'MacIntel' } else if (ua.includes('Android')) { return 'Android' } else if (ua.includes('Linux')) { return 'Linux' } else { return extended ? 'Windows' : 'Win32' } } // Source in C++: https://source.chromium.org/chromium/chromium/src/+/master:components/embedder_support/user_agent_utils.cc;l=55-100 const _getBrands = () => { const seed = uaVersion.split('.')[0] // the major version number of Chrome const order = [ [0, 1, 2], [0, 2, 1], [1, 0, 2], [1, 2, 0], [2, 0, 1], [2, 1, 0] ][seed % 6] const escapedChars = [' ', ' ', ';'] const greaseyBrand = `${escapedChars[order[0]]}Not${ escapedChars[order[1]] }A${escapedChars[order[2]]}Brand` const greasedBrandVersionList = [] greasedBrandVersionList[order[0]] = { brand: greaseyBrand, version: '99' } greasedBrandVersionList[order[1]] = { brand: 'Chromium', version: seed } greasedBrandVersionList[order[2]] = { brand: 'Google Chrome', version: seed } return greasedBrandVersionList } // Return OS version const _getPlatformVersion = () => { if (ua.includes('Mac OS X ')) { return ua.match(/Mac OS X ([^)]+)/)[1] } else if (ua.includes('Android ')) { return ua.match(/Android ([^;]+)/)[1] } else if (ua.includes('Windows ')) { return ua.match(/Windows .*?([\d|.]+);?/)[1] } else { return '' } } // Get architecture, this seems to be empty on mobile and x86 on desktop const _getPlatformArch = () => (_getMobile() ? '' : 'x86') // Return the Android model, empty on desktop const _getPlatformModel = () => _getMobile() ? ua.match(/Android.*?;\s([^)]+)/)[1] : '' const _getMobile = () => ua.includes('Android') const override = { userAgent: ua, platform: _getPlatform(), userAgentMetadata: { brands: _getBrands(), fullVersion: uaVersion, platform: _getPlatform(true), platformVersion: _getPlatformVersion(), architecture: _getPlatformArch(), model: _getPlatformModel(), mobile: _getMobile() } } // In case of headless, override the acceptLanguage in CDP. // This is not preferred, as it messed up the header order. // On headful, we set the user preference language setting instead. if (this._headless) { override.acceptLanguage = this.opts.locale || 'en-US,en' } this.debug('onPageCreated - Will set these user agent options', { override, opts: this.opts }) const client = typeof page._client === 'function' ? page._client() : page._client client.send('Network.setUserAgentOverride', override) } async beforeLaunch(options) { // Check if launched headless this._headless = options.headless } async beforeConnect() { // Treat browsers using connect() as headless browsers this._headless = true } get data() { return [ { name: 'userPreferences', value: { intl: { accept_languages: this.opts.locale || 'en-US,en' } } } ] } } const defaultExport = opts => new Plugin(opts) module.exports = defaultExport