puppeteer-extra-plugin-stealth
Version:
Stealth mode: Applies various techniques to make detection of headless puppeteer harder.
584 lines (538 loc) • 21.7 kB
JavaScript
/**
* A set of shared utility functions specifically for the purpose of modifying native browser APIs without leaving traces.
*
* Meant to be passed down in puppeteer and used in the context of the page (everything in here runs in NodeJS as well as a browser).
*
* Note: If for whatever reason you need to use this outside of `puppeteer-extra`:
* Just remove the `module.exports` statement at the very bottom, the rest can be copy pasted into any browser context.
*
* Alternatively take a look at the `extract-stealth-evasions` package to create a finished bundle which includes these utilities.
*
*/
const utils = {}
utils.init = () => {
utils.preloadCache()
}
/**
* Wraps a JS Proxy Handler and strips it's presence from error stacks, in case the traps throw.
*
* The presence of a JS Proxy can be revealed as it shows up in error stack traces.
*
* @param {object} handler - The JS Proxy handler to wrap
*/
utils.stripProxyFromErrors = (handler = {}) => {
const newHandler = {
setPrototypeOf: function (target, proto) {
if (proto === null)
throw new TypeError('Cannot convert object to primitive value')
if (Object.getPrototypeOf(target) === Object.getPrototypeOf(proto)) {
throw new TypeError('Cyclic __proto__ value')
}
return Reflect.setPrototypeOf(target, proto)
}
}
// We wrap each trap in the handler in a try/catch and modify the error stack if they throw
const traps = Object.getOwnPropertyNames(handler)
traps.forEach(trap => {
newHandler[trap] = function () {
try {
// Forward the call to the defined proxy handler
return handler[trap].apply(this, arguments || [])
} catch (err) {
// Stack traces differ per browser, we only support chromium based ones currently
if (!err || !err.stack || !err.stack.includes(`at `)) {
throw err
}
// When something throws within one of our traps the Proxy will show up in error stacks
// An earlier implementation of this code would simply strip lines with a blacklist,
// but it makes sense to be more surgical here and only remove lines related to our Proxy.
// We try to use a known "anchor" line for that and strip it with everything above it.
// If the anchor line cannot be found for some reason we fall back to our blacklist approach.
const stripWithBlacklist = (stack, stripFirstLine = true) => {
const blacklist = [
`at Reflect.${trap} `, // e.g. Reflect.get or Reflect.apply
`at Object.${trap} `, // e.g. Object.get or Object.apply
`at Object.newHandler.<computed> [as ${trap}] ` // caused by this very wrapper :-)
]
return (
err.stack
.split('\n')
// Always remove the first (file) line in the stack (guaranteed to be our proxy)
.filter((line, index) => !(index === 1 && stripFirstLine))
// Check if the line starts with one of our blacklisted strings
.filter(line => !blacklist.some(bl => line.trim().startsWith(bl)))
.join('\n')
)
}
const stripWithAnchor = (stack, anchor) => {
const stackArr = stack.split('\n')
anchor = anchor || `at Object.newHandler.<computed> [as ${trap}] ` // Known first Proxy line in chromium
const anchorIndex = stackArr.findIndex(line =>
line.trim().startsWith(anchor)
)
if (anchorIndex === -1) {
return false // 404, anchor not found
}
// Strip everything from the top until we reach the anchor line
// Note: We're keeping the 1st line (zero index) as it's unrelated (e.g. `TypeError`)
stackArr.splice(1, anchorIndex)
return stackArr.join('\n')
}
// Special cases due to our nested toString proxies
err.stack = err.stack.replace(
'at Object.toString (',
'at Function.toString ('
)
if ((err.stack || '').includes('at Function.toString (')) {
err.stack = stripWithBlacklist(err.stack, false)
throw err
}
// Try using the anchor method, fallback to blacklist if necessary
err.stack = stripWithAnchor(err.stack) || stripWithBlacklist(err.stack)
throw err // Re-throw our now sanitized error
}
}
})
return newHandler
}
/**
* Strip error lines from stack traces until (and including) a known line the stack.
*
* @param {object} err - The error to sanitize
* @param {string} anchor - The string the anchor line starts with
*/
utils.stripErrorWithAnchor = (err, anchor) => {
const stackArr = err.stack.split('\n')
const anchorIndex = stackArr.findIndex(line => line.trim().startsWith(anchor))
if (anchorIndex === -1) {
return err // 404, anchor not found
}
// Strip everything from the top until we reach the anchor line (remove anchor line as well)
// Note: We're keeping the 1st line (zero index) as it's unrelated (e.g. `TypeError`)
stackArr.splice(1, anchorIndex)
err.stack = stackArr.join('\n')
return err
}
/**
* Replace the property of an object in a stealthy way.
*
* Note: You also want to work on the prototype of an object most often,
* as you'd otherwise leave traces (e.g. showing up in Object.getOwnPropertyNames(obj)).
*
* @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Object/defineProperty
*
* @example
* replaceProperty(WebGLRenderingContext.prototype, 'getParameter', { value: "alice" })
* // or
* replaceProperty(Object.getPrototypeOf(navigator), 'languages', { get: () => ['en-US', 'en'] })
*
* @param {object} obj - The object which has the property to replace
* @param {string} propName - The property name to replace
* @param {object} descriptorOverrides - e.g. { value: "alice" }
*/
utils.replaceProperty = (obj, propName, descriptorOverrides = {}) => {
return Object.defineProperty(obj, propName, {
// Copy over the existing descriptors (writable, enumerable, configurable, etc)
...(Object.getOwnPropertyDescriptor(obj, propName) || {}),
// Add our overrides (e.g. value, get())
...descriptorOverrides
})
}
/**
* Preload a cache of function copies and data.
*
* For a determined enough observer it would be possible to overwrite and sniff usage of functions
* we use in our internal Proxies, to combat that we use a cached copy of those functions.
*
* Note: Whenever we add a `Function.prototype.toString` proxy we should preload the cache before,
* by executing `utils.preloadCache()` before the proxy is applied (so we don't cause recursive lookups).
*
* This is evaluated once per execution context (e.g. window)
*/
utils.preloadCache = () => {
if (utils.cache) {
return
}
utils.cache = {
// Used in our proxies
Reflect: {
get: Reflect.get.bind(Reflect),
apply: Reflect.apply.bind(Reflect)
},
// Used in `makeNativeString`
nativeToStringStr: Function.toString + '' // => `function toString() { [native code] }`
}
}
/**
* Utility function to generate a cross-browser `toString` result representing native code.
*
* There's small differences: Chromium uses a single line, whereas FF & Webkit uses multiline strings.
* To future-proof this we use an existing native toString result as the basis.
*
* The only advantage we have over the other team is that our JS runs first, hence we cache the result
* of the native toString result once, so they cannot spoof it afterwards and reveal that we're using it.
*
* @example
* makeNativeString('foobar') // => `function foobar() { [native code] }`
*
* @param {string} [name] - Optional function name
*/
utils.makeNativeString = (name = '') => {
return utils.cache.nativeToStringStr.replace('toString', name || '')
}
/**
* Helper function to modify the `toString()` result of the provided object.
*
* Note: Use `utils.redirectToString` instead when possible.
*
* There's a quirk in JS Proxies that will cause the `toString()` result to differ from the vanilla Object.
* If no string is provided we will generate a `[native code]` thing based on the name of the property object.
*
* @example
* patchToString(WebGLRenderingContext.prototype.getParameter, 'function getParameter() { [native code] }')
*
* @param {object} obj - The object for which to modify the `toString()` representation
* @param {string} str - Optional string used as a return value
*/
utils.patchToString = (obj, str = '') => {
const handler = {
apply: function (target, ctx) {
// This fixes e.g. `HTMLMediaElement.prototype.canPlayType.toString + ""`
if (ctx === Function.prototype.toString) {
return utils.makeNativeString('toString')
}
// `toString` targeted at our proxied Object detected
if (ctx === obj) {
// We either return the optional string verbatim or derive the most desired result automatically
return str || utils.makeNativeString(obj.name)
}
// Check if the toString protype of the context is the same as the global prototype,
// if not indicates that we are doing a check across different windows., e.g. the iframeWithdirect` test case
const hasSameProto = Object.getPrototypeOf(
Function.prototype.toString
).isPrototypeOf(ctx.toString) // eslint-disable-line no-prototype-builtins
if (!hasSameProto) {
// Pass the call on to the local Function.prototype.toString instead
return ctx.toString()
}
return target.call(ctx)
}
}
const toStringProxy = new Proxy(
Function.prototype.toString,
utils.stripProxyFromErrors(handler)
)
utils.replaceProperty(Function.prototype, 'toString', {
value: toStringProxy
})
}
/**
* Make all nested functions of an object native.
*
* @param {object} obj
*/
utils.patchToStringNested = (obj = {}) => {
return utils.execRecursively(obj, ['function'], utils.patchToString)
}
/**
* Redirect toString requests from one object to another.
*
* @param {object} proxyObj - The object that toString will be called on
* @param {object} originalObj - The object which toString result we wan to return
*/
utils.redirectToString = (proxyObj, originalObj) => {
const handler = {
apply: function (target, ctx) {
// This fixes e.g. `HTMLMediaElement.prototype.canPlayType.toString + ""`
if (ctx === Function.prototype.toString) {
return utils.makeNativeString('toString')
}
// `toString` targeted at our proxied Object detected
if (ctx === proxyObj) {
const fallback = () =>
originalObj && originalObj.name
? utils.makeNativeString(originalObj.name)
: utils.makeNativeString(proxyObj.name)
// Return the toString representation of our original object if possible
return originalObj + '' || fallback()
}
if (typeof ctx === 'undefined' || ctx === null) {
return target.call(ctx)
}
// Check if the toString protype of the context is the same as the global prototype,
// if not indicates that we are doing a check across different windows., e.g. the iframeWithdirect` test case
const hasSameProto = Object.getPrototypeOf(
Function.prototype.toString
).isPrototypeOf(ctx.toString) // eslint-disable-line no-prototype-builtins
if (!hasSameProto) {
// Pass the call on to the local Function.prototype.toString instead
return ctx.toString()
}
return target.call(ctx)
}
}
const toStringProxy = new Proxy(
Function.prototype.toString,
utils.stripProxyFromErrors(handler)
)
utils.replaceProperty(Function.prototype, 'toString', {
value: toStringProxy
})
}
/**
* All-in-one method to replace a property with a JS Proxy using the provided Proxy handler with traps.
*
* Will stealthify these aspects (strip error stack traces, redirect toString, etc).
* Note: This is meant to modify native Browser APIs and works best with prototype objects.
*
* @example
* replaceWithProxy(WebGLRenderingContext.prototype, 'getParameter', proxyHandler)
*
* @param {object} obj - The object which has the property to replace
* @param {string} propName - The name of the property to replace
* @param {object} handler - The JS Proxy handler to use
*/
utils.replaceWithProxy = (obj, propName, handler) => {
const originalObj = obj[propName]
const proxyObj = new Proxy(obj[propName], utils.stripProxyFromErrors(handler))
utils.replaceProperty(obj, propName, { value: proxyObj })
utils.redirectToString(proxyObj, originalObj)
return true
}
/**
* All-in-one method to replace a getter with a JS Proxy using the provided Proxy handler with traps.
*
* @example
* replaceGetterWithProxy(Object.getPrototypeOf(navigator), 'vendor', proxyHandler)
*
* @param {object} obj - The object which has the property to replace
* @param {string} propName - The name of the property to replace
* @param {object} handler - The JS Proxy handler to use
*/
utils.replaceGetterWithProxy = (obj, propName, handler) => {
const fn = Object.getOwnPropertyDescriptor(obj, propName).get
const fnStr = fn.toString() // special getter function string
const proxyObj = new Proxy(fn, utils.stripProxyFromErrors(handler))
utils.replaceProperty(obj, propName, { get: proxyObj })
utils.patchToString(proxyObj, fnStr)
return true
}
/**
* All-in-one method to replace a getter and/or setter. Functions get and set
* of handler have one more argument that contains the native function.
*
* @example
* replaceGetterSetter(HTMLIFrameElement.prototype, 'contentWindow', handler)
*
* @param {object} obj - The object which has the property to replace
* @param {string} propName - The name of the property to replace
* @param {object} handlerGetterSetter - The handler with get and/or set
* functions
* @see https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Object/defineProperty#description
*/
utils.replaceGetterSetter = (obj, propName, handlerGetterSetter) => {
const ownPropertyDescriptor = Object.getOwnPropertyDescriptor(obj, propName)
const handler = { ...ownPropertyDescriptor }
if (handlerGetterSetter.get !== undefined) {
const nativeFn = ownPropertyDescriptor.get
handler.get = function() {
return handlerGetterSetter.get.call(this, nativeFn.bind(this))
}
utils.redirectToString(handler.get, nativeFn)
}
if (handlerGetterSetter.set !== undefined) {
const nativeFn = ownPropertyDescriptor.set
handler.set = function(newValue) {
handlerGetterSetter.set.call(this, newValue, nativeFn.bind(this))
}
utils.redirectToString(handler.set, nativeFn)
}
Object.defineProperty(obj, propName, handler)
}
/**
* All-in-one method to mock a non-existing property with a JS Proxy using the provided Proxy handler with traps.
*
* Will stealthify these aspects (strip error stack traces, redirect toString, etc).
*
* @example
* mockWithProxy(chrome.runtime, 'sendMessage', function sendMessage() {}, proxyHandler)
*
* @param {object} obj - The object which has the property to replace
* @param {string} propName - The name of the property to replace or create
* @param {object} pseudoTarget - The JS Proxy target to use as a basis
* @param {object} handler - The JS Proxy handler to use
*/
utils.mockWithProxy = (obj, propName, pseudoTarget, handler) => {
const proxyObj = new Proxy(pseudoTarget, utils.stripProxyFromErrors(handler))
utils.replaceProperty(obj, propName, { value: proxyObj })
utils.patchToString(proxyObj)
return true
}
/**
* All-in-one method to create a new JS Proxy with stealth tweaks.
*
* This is meant to be used whenever we need a JS Proxy but don't want to replace or mock an existing known property.
*
* Will stealthify certain aspects of the Proxy (strip error stack traces, redirect toString, etc).
*
* @example
* createProxy(navigator.mimeTypes.__proto__.namedItem, proxyHandler) // => Proxy
*
* @param {object} pseudoTarget - The JS Proxy target to use as a basis
* @param {object} handler - The JS Proxy handler to use
*/
utils.createProxy = (pseudoTarget, handler) => {
const proxyObj = new Proxy(pseudoTarget, utils.stripProxyFromErrors(handler))
utils.patchToString(proxyObj)
return proxyObj
}
/**
* Helper function to split a full path to an Object into the first part and property.
*
* @example
* splitObjPath(`HTMLMediaElement.prototype.canPlayType`)
* // => {objName: "HTMLMediaElement.prototype", propName: "canPlayType"}
*
* @param {string} objPath - The full path to an object as dot notation string
*/
utils.splitObjPath = objPath => ({
// Remove last dot entry (property) ==> `HTMLMediaElement.prototype`
objName: objPath.split('.').slice(0, -1).join('.'),
// Extract last dot entry ==> `canPlayType`
propName: objPath.split('.').slice(-1)[0]
})
/**
* Convenience method to replace a property with a JS Proxy using the provided objPath.
*
* Supports a full path (dot notation) to the object as string here, in case that makes it easier.
*
* @example
* replaceObjPathWithProxy('WebGLRenderingContext.prototype.getParameter', proxyHandler)
*
* @param {string} objPath - The full path to an object (dot notation string) to replace
* @param {object} handler - The JS Proxy handler to use
*/
utils.replaceObjPathWithProxy = (objPath, handler) => {
const { objName, propName } = utils.splitObjPath(objPath)
const obj = eval(objName) // eslint-disable-line no-eval
return utils.replaceWithProxy(obj, propName, handler)
}
/**
* Traverse nested properties of an object recursively and apply the given function on a whitelist of value types.
*
* @param {object} obj
* @param {array} typeFilter - e.g. `['function']`
* @param {Function} fn - e.g. `utils.patchToString`
*/
utils.execRecursively = (obj = {}, typeFilter = [], fn) => {
function recurse(obj) {
for (const key in obj) {
if (obj[key] === undefined) {
continue
}
if (obj[key] && typeof obj[key] === 'object') {
recurse(obj[key])
} else {
if (obj[key] && typeFilter.includes(typeof obj[key])) {
fn.call(this, obj[key])
}
}
}
}
recurse(obj)
return obj
}
/**
* Everything we run through e.g. `page.evaluate` runs in the browser context, not the NodeJS one.
* That means we cannot just use reference variables and functions from outside code, we need to pass everything as a parameter.
*
* Unfortunately the data we can pass is only allowed to be of primitive types, regular functions don't survive the built-in serialization process.
* This utility function will take an object with functions and stringify them, so we can pass them down unharmed as strings.
*
* We use this to pass down our utility functions as well as any other functions (to be able to split up code better).
*
* @see utils.materializeFns
*
* @param {object} fnObj - An object containing functions as properties
*/
utils.stringifyFns = (fnObj = { hello: () => 'world' }) => {
// Object.fromEntries() ponyfill (in 6 lines) - supported only in Node v12+, modern browsers are fine
// https://github.com/feross/fromentries
function fromEntries(iterable) {
return [...iterable].reduce((obj, [key, val]) => {
obj[key] = val
return obj
}, {})
}
return (Object.fromEntries || fromEntries)(
Object.entries(fnObj)
.filter(([key, value]) => typeof value === 'function')
.map(([key, value]) => [key, value.toString()]) // eslint-disable-line no-eval
)
}
/**
* Utility function to reverse the process of `utils.stringifyFns`.
* Will materialize an object with stringified functions (supports classic and fat arrow functions).
*
* @param {object} fnStrObj - An object containing stringified functions as properties
*/
utils.materializeFns = (fnStrObj = { hello: "() => 'world'" }) => {
return Object.fromEntries(
Object.entries(fnStrObj).map(([key, value]) => {
if (value.startsWith('function')) {
// some trickery is needed to make oldschool functions work :-)
return [key, eval(`() => ${value}`)()] // eslint-disable-line no-eval
} else {
// arrow functions just work
return [key, eval(value)] // eslint-disable-line no-eval
}
})
)
}
// Proxy handler templates for re-usability
utils.makeHandler = () => ({
// Used by simple `navigator` getter evasions
getterValue: value => ({
apply(target, ctx, args) {
// Let's fetch the value first, to trigger and escalate potential errors
// Illegal invocations like `navigator.__proto__.vendor` will throw here
utils.cache.Reflect.apply(...arguments)
return value
}
})
})
/**
* Compare two arrays.
*
* @param {array} array1 - First array
* @param {array} array2 - Second array
*/
utils.arrayEquals = (array1, array2) => {
if (array1.length !== array2.length) {
return false
}
for (let i = 0; i < array1.length; ++i) {
if (array1[i] !== array2[i]) {
return false
}
}
return true
}
/**
* Cache the method return according to its arguments.
*
* @param {Function} fn - A function that will be cached
*/
utils.memoize = fn => {
const cache = []
return function(...args) {
if (!cache.some(c => utils.arrayEquals(c.key, args))) {
cache.push({ key: args, value: fn.apply(this, args) })
}
return cache.find(c => utils.arrayEquals(c.key, args)).value
}
}
// --
// Stuff starting below this line is NodeJS specific.
// --
module.exports = utils