UNPKG

import-in-the-middle

Version:
800 lines (698 loc) 27.8 kB
// Unless explicitly stated otherwise all files in this repository are licensed under the Apache 2.0 License. // // This product includes software developed at Datadog (https://www.datadoghq.com/). Copyright 2021 Datadog, Inc. import { URL, fileURLToPath } from 'url' import { inspect } from 'util' import { builtinModules } from 'module' import { getExports, hasModuleExportsCJSDefault } from './lib/get-exports.mjs' import { RESOLVE, driveSync, driveAsync } from './lib/io.mjs' const specifiers = new Map() const isWin = process.platform === 'win32' // FIXME: Typescript extensions are added temporarily until we find a better // way of supporting arbitrary extensions const EXTENSION_RE = /\.(js|mjs|cjs|ts|mts|cts)$/ const HANDLED_FORMATS = new Set(['builtin', 'module', 'commonjs']) const TRACE_WARNINGS = process.execArgv.includes('--trace-warnings') // process.versions.node is always "major.minor.patch" (nightlies add a suffix // the regex ignores). const [, NODE_MAJOR, NODE_MINOR, NODE_PATCH] = process.versions.node.match(/^(\d+)\.(\d+)\.(\d+)/).map(Number) /** * Whether the running Node.js can correctly run the synchronous loader via * `module.registerHooks`. * * `module.registerHooks` exists since v22.15, but its synchronous load hook * rejected the nullish CommonJS `source` the loader returns for `require()`s * pulled into the ESM graph (throwing `ERR_INVALID_RETURN_PROPERTY_VALUE`) until * https://github.com/nodejs/node/pull/59929, released in 22.22.3, 24.11.1, * 25.1.0 and 26.0.0. Earlier 24.x (<= 24.11.0) and 25.0.0 ship `registerHooks` * but predate the fix, so the synchronous loader must fall back to the * asynchronous one there. * * @returns {boolean} */ export function supportsSyncHooks () { if (NODE_MAJOR >= 26) return true if (NODE_MAJOR === 25) return NODE_MINOR >= 1 if (NODE_MAJOR === 24) return NODE_MINOR > 11 || (NODE_MINOR === 11 && NODE_PATCH >= 1) if (NODE_MAJOR === 22) return NODE_MINOR > 22 || (NODE_MINOR === 22 && NODE_PATCH >= 3) return false } function hasIitm (url) { // Fast path: avoid URL parsing on the hot path when there's clearly no iitm. if (typeof url !== 'string' || url.indexOf('iitm') === -1) { return false } try { return new URL(url).searchParams.has('iitm') } catch { return false } } function isIitm (url, meta) { return url === meta.url || url === meta.url.replace('hook.mjs', 'create-hook.mjs') } function deleteIitm (url) { // Fast path: avoid URL parsing / try-catch on bare specifiers and normal file URLs. if (typeof url !== 'string' || url.indexOf('iitm') === -1) { return url } let resultUrl const stackTraceLimit = Error.stackTraceLimit try { Error.stackTraceLimit = 0 const urlObj = new URL(url) if (urlObj.searchParams.has('iitm')) { urlObj.searchParams.delete('iitm') resultUrl = urlObj.href if (resultUrl.startsWith('file:///node:')) { resultUrl = resultUrl.replace('file:///', '') } } else { resultUrl = urlObj.href } } catch { resultUrl = url } Error.stackTraceLimit = stackTraceLimit return resultUrl } /** * Determines if a specifier represents an export all ESM line. * Note that the expected `line` isn't 100% valid ESM. It is derived * from the `getExports` function wherein we have recognized the true * line and re-mapped it to one we expect. * * @param {string} line * @returns {boolean} */ function isStarExportLine (line) { return /^\* from /.test(line) } function isBareSpecifier (specifier) { // Relative and absolute paths are not bare specifiers. if ( specifier.startsWith('.') || specifier.startsWith('/')) { return false } // Valid URLs are not bare specifiers. (file:, http:, node:, etc.) // eslint-disable-next-line no-prototype-builtins if (URL.hasOwnProperty('canParse')) { return !URL.canParse(specifier) } const stackTraceLimit = Error.stackTraceLimit try { Error.stackTraceLimit = 0 // eslint-disable-next-line no-new new URL(specifier) return false } catch (err) { return true } finally { Error.stackTraceLimit = stackTraceLimit } } /** * Determines whether the input is a bare specifier, file URL or a regular expression. * * - node: prefixed URL strings are considered bare specifiers in this context. */ function isBareSpecifierFileUrlOrRegex (input) { if (input instanceof RegExp) { return true } // Relative and absolute paths if ( input.startsWith('.') || input.startsWith('/')) { return false } const stackTraceLimit = Error.stackTraceLimit try { Error.stackTraceLimit = 0 // eslint-disable-next-line no-new const url = new URL(input) // We consider node: URLs bare specifiers in this context return url.protocol === 'file:' || url.protocol === 'node:' } catch (err) { // Anything that fails parsing is a bare specifier return true } finally { Error.stackTraceLimit = stackTraceLimit } } /** * Ensure an array only contains bare specifiers, file URLs or regular expressions. * * - We consider node: prefixed URL string as bare specifiers in this context. * - For node built-in modules, we add additional node: prefixed modules to the * output array. */ function ensureArrayWithBareSpecifiersFileUrlsAndRegex (array, type) { if (!Array.isArray(array)) { return undefined } const invalid = array.filter(s => !isBareSpecifierFileUrlOrRegex(s)) if (invalid.length) { throw new Error(`'${type}' option only supports bare specifiers, file URLs or regular expressions. Invalid entries: ${inspect(invalid)}`) } // Rather than evaluate whether we have a node: scoped built-in-module for // every call to resolve, we just add them to include/exclude now. for (const each of array) { if (typeof each === 'string' && !each.startsWith('node:') && builtinModules.includes(each)) { array.push(`node:${each}`) } } return array } function emitWarning (err) { // Unfortunately, process.emitWarning does not output the full error // with error.cause like console.warn does so we need to inspect it when // tracing warnings const warnMessage = TRACE_WARNINGS ? inspect(err) : err process.emitWarning(warnMessage) } /** * Builds the setter/getter/re-export block injected into the wrapper module for * a single named export. This is pure string generation, identical regardless * of how the loader is driven, so both the synchronous and asynchronous paths * share it. * * @param {string} n The exported name. * @param {string} srcUrl The URL of the module the export belongs to. * @returns {string} */ function buildSetter (n, srcUrl) { const variableName = `$${n.replace(/[^a-zA-Z0-9_$]/g, '_')}` const objectKey = JSON.stringify(n) const reExportedName = n === 'default' ? n : objectKey // For the module.exports synthetic export (Node 23+), fall back to $default // when namespace['module.exports'] is not exposed by the native ESM namespace // (builtins don't expose it). This ensures the IITM hook proxy returns the // actual CJS value (e.g. EventEmitter) when an instrumentor reads // capturedExports['module.exports'], rather than undefined. const moduleExportsFallback = n === 'module.exports' ? ' ?? $default' : '' const reExportLine = (n === 'module.exports' && (srcUrl.startsWith('node:') || builtinModules.includes(srcUrl))) ? '' : `export { ${variableName} as ${reExportedName} }` return ` let ${variableName} __overridden[${objectKey}] = false let ${variableName}Defer = false try { ${variableName} = _[${objectKey}] = namespace[${objectKey}]${moduleExportsFallback} } catch (err) { if (!(err instanceof ReferenceError)) throw err ${variableName}Defer = true } if (${variableName}Defer || ${variableName} === undefined) { __pending.push(__makeUpdater( ${objectKey}, () => namespace[${objectKey}]${moduleExportsFallback}, (v) => { ${variableName} = _[${objectKey}] = v } )) } ${reExportLine} set[${objectKey}] = (v) => { __overridden[${objectKey}] = true ${variableName} = v return true } get[${objectKey}] = () => ${variableName} ` } /** * Processes a module's exports and builds a set of setter blocks. * * Written as a "sans-io" generator (see `lib/io.mjs`): instead of calling the * loader's resolve/load hooks directly it `yield`s `[RESOLVE, ...]` to resolve * star re-exports and `[LOAD, ...]` (via {@link getExports}) to read source, * and is driven by either {@link driveSync} (for * `module.registerHooks`) or {@link driveAsync} (for `module.register`). The * body is identical for both, so there is a single implementation to maintain. * * @param {object} params * @param {string} params.srcUrl The full URL to the module to process. * @param {object} params.context Provided by the loaders API. * @param {boolean} [params.excludeDefault = false] Exclude the default export. * * @returns {Generator<Array, Map<string, string>>} A generator that yields I/O * operations and ultimately returns the shimmed setters for all the exports * from the module and any transitive export all modules. */ function * processModule ({ srcUrl, context, excludeDefault = false }) { const exportNames = yield * getExports(srcUrl, context) const starExports = new Set() const setters = new Map() const addSetter = (name, setter, isStarExport = false) => { if (setters.has(name)) { if (isStarExport) { // If there's already a matching star export, delete it if (starExports.has(name)) { setters.delete(name) } // and return so this is excluded return } // if we already have this export but it is from a * export, overwrite it if (starExports.has(name)) { starExports.delete(name) setters.set(name, setter) } } else { // Store export * exports so we know they can be overridden by explicit // named exports if (isStarExport) { starExports.add(name) } setters.set(name, setter) } } for (const n of exportNames) { if (excludeDefault) { const isDefault = n === 'default' || ( n === 'module.exports' && context.format === 'commonjs' && hasModuleExportsCJSDefault ) if (isDefault) continue } if (isStarExportLine(n) === true) { const [, modFile] = n.split('* from ') // Relative paths need to be resolved relative to the parent module const newSpecifier = isBareSpecifier(modFile) ? modFile : new URL(modFile, srcUrl).href // We need to resolve bare specifiers to a full URL. We also need to // resolve all sub-modules to get the `format`. We can't rely on the // parent's `format` to know if this sub-module is ESM or CJS! const result = yield [RESOLVE, newSpecifier, { parentURL: srcUrl }] const subSetters = yield * processModule({ srcUrl: result.url, context: { ...context, format: result.format }, excludeDefault: true }) for (const [name, setter] of subSetters.entries()) { addSetter(name, setter, true) } } else { addSetter(n, buildSetter(n, srcUrl)) } } return setters } function addIitm (url) { const urlObj = new URL(url) urlObj.searchParams.set('iitm', 'true') return urlObj.href } export function createHook (meta) { let cachedResolve const iitmURL = new URL('lib/register.js', meta.url).toString() let includeModules, excludeModules let shouldInclude = defaultShouldInclude // Track CJS module URLs that IITM has wrapped. On Node 24+, CJS modules loaded // via loadCJSModule (in an ESM import chain) have their require() calls for // builtins routed through the ESM resolver. Without this guard, IITM would // intercept those require() calls and return an ESM namespace object instead // of the native CJS module value (e.g. EventEmitter constructor), breaking // patterns like `class App extends require('events') {}`. const cjsInIitmChain = new Set() // Default matcher, used unless the consumer supplies its own `shouldInclude` // (see applyOptions). It applies the include/exclude lists, so finishResolve // always has a predicate to call and never has to special-case its absence. // // We check the specifier to match libraries loaded with bare specifiers from // node_modules, and the full file URL for non-bare specifier imports (relative // paths would be error prone). An absolute path entry added via Hook over the // message port matches the resolved file path, so it is resolved here. function defaultShouldInclude (url, specifier) { let resultPath if (url.startsWith('file:')) { const stackTraceLimit = Error.stackTraceLimit Error.stackTraceLimit = 0 try { resultPath = fileURLToPath(url) } catch {} Error.stackTraceLimit = stackTraceLimit } function match (each) { if (each instanceof RegExp) { return each.test(url) } return each === specifier || each === url || (resultPath && each === resultPath) } if (includeModules && !includeModules.some(match)) { return false } if (excludeModules && excludeModules.some(match)) { return false } return true } // Applies the include/exclude/message-port configuration. Shared by the // asynchronous `initialize` (off-thread `module.register`, which receives // `data` over the registration boundary) and by synchronous registration // (`module.registerHooks`), which has no `initialize` step and passes the // same options directly. function applyOptions (data) { includeModules = ensureArrayWithBareSpecifiersFileUrlsAndRegex(data.include, 'include') excludeModules = ensureArrayWithBareSpecifiersFileUrlsAndRegex(data.exclude, 'exclude') // A consumer can supply its own matcher as `shouldInclude(url, specifier)`, // taking ownership of the include/exclude decision instead of expressing it // as bare-specifier / file-URL / regex lists. It replaces the default list // matcher and is called with the resolved URL and specifier; otherwise the // default applies the include/exclude options. shouldInclude = typeof data.shouldInclude === 'function' ? data.shouldInclude : defaultShouldInclude if (data.addHookMessagePort) { data.addHookMessagePort.on('message', (modules) => { if (includeModules === undefined) { includeModules = [] } for (const each of modules) { if (!each.startsWith('node:') && builtinModules.includes(each)) { includeModules.push(`node:${each}`) } includeModules.push(each) } data.addHookMessagePort.postMessage('ack') }).unref() } } async function initialize (data) { if (global.__import_in_the_middle_initialized__) { process.emitWarning("The 'import-in-the-middle' hook has already been initialized") } global.__import_in_the_middle_initialized__ = true if (data) { applyOptions(data) } } // Shared post-processing for the `resolve` hook: everything that happens // once the parent loader has turned the specifier into a resolved URL. The // only difference between the asynchronous and synchronous hooks is whether // that resolution was awaited, so all the wrapping decisions live here. function finishResolve (result, specifier, context, parentURL) { // Do not wrap the entrypoint module. Many CLIs check whether they are the // "main" module (e.g. require.main === module). Wrapping changes how they // are evaluated, and can make them exit without doing anything. if (parentURL === '') { if (!EXTENSION_RE.test(result.url) && !hasIitm(result.url)) { return { url: result.url, format: 'commonjs' } } return result } // Never wrap a module whose format we don't handle (e.g. json, wasm); this // holds regardless of how inclusion is decided below. if (result.format && !HANDLED_FORMATS.has(result.format)) { return result } // The synchronous hooks (`module.registerHooks`) fire for `require()` as well // as `import`, but iitm only owns the ESM graph: CommonJS modules are // instrumented separately through require-in-the-middle, and `require()` must // return the native, mutable module value (e.g. graceful-fs does // `Object.defineProperty(require('fs'), ...)`, which throws on a frozen ESM // namespace). Node reports the active module system in `context.conditions` // ('require' vs 'import'), so leave any require() resolution untouched. The // asynchronous hook never sees the 'require' condition, so this is a no-op // there and only affects the synchronous path. if (context.conditions?.includes('require')) { return result } // `shouldInclude` is always set (the include/exclude list matcher by default, // a consumer-provided predicate otherwise), so no nullish check is needed. if (!shouldInclude(result.url, specifier)) { return result } if (isIitm(parentURL, meta) || (parentURL && hasIitm(parentURL))) { return result } // When a CJS module is loaded by an IITM shim, its require() calls for // builtins may be routed through the ESM resolver on Node 24+. Skip IITM // wrapping in that case so require() returns the native module value. // We also propagate the membership to the resolved child so that its own // transitive require() calls are likewise skipped (the entire synchronous // CJS require chain must remain unwrapped to avoid ERR_VM_MODULE_LINK_FAILURE). if (cjsInIitmChain.has(parentURL)) { cjsInIitmChain.add(result.url) return result } // We don't want to attempt to wrap native modules if (result.url.endsWith('.node')) { return result } // Node.js v21 renames importAssertions to importAttributes const importAttributes = context.importAttributes || context.importAssertions if (importAttributes && importAttributes.type === 'json') { return result } // If the file is referencing itself, we need to skip adding the iitm search params if (result.url === parentURL) { return { url: result.url, shortCircuit: true, format: result.format } } specifiers.set(result.url, specifier) return { url: addIitm(result.url), shortCircuit: true, // Node's synchronous resolver drops `format: 'builtin'` for bare builtin // specifiers (`require('crypto')` -> `node:crypto`), so restore it; // otherwise the load hook reads `node:crypto` from disk and throws ENOENT. format: result.format ?? (result.url.startsWith('node:') ? 'builtin' : undefined) } } async function resolve (specifier, context, parentResolve) { cachedResolve = parentResolve // See https://github.com/nodejs/import-in-the-middle/pull/76. if (specifier === iitmURL) { return { url: specifier, shortCircuit: true } } const { parentURL = '' } = context const newSpecifier = deleteIitm(specifier) if (isWin && parentURL.indexOf('file:node') === 0) { context.parentURL = '' } const result = await parentResolve(newSpecifier, context) return finishResolve(result, specifier, context, parentURL) } // Synchronous counterpart to `resolve`, for `module.registerHooks`. The // synchronous `nextResolve` returns its result directly. We stash it so the // synchronous `load` hook can resolve star re-exports later, mirroring how // `resolve` caches `parentResolve`. function resolveSync (specifier, context, nextResolve) { cachedResolve = nextResolve if (specifier === iitmURL) { return { url: specifier, shortCircuit: true } } const { parentURL = '' } = context const newSpecifier = deleteIitm(specifier) if (isWin && parentURL.indexOf('file:node') === 0) { context.parentURL = '' } const result = nextResolve(newSpecifier, context) return finishResolve(result, specifier, context, parentURL) } // Builds the wrapper module source that re-exports the real module through // iitm's proxy. Pure string generation shared by the asynchronous and // synchronous `load` paths. function buildWrapperSource (realUrl, setters, originalSpecifier) { return ` import { register } from '${iitmURL}' import * as namespace from ${JSON.stringify(realUrl)} // Mimic a Module object (https://tc39.es/ecma262/#sec-module-namespace-objects). const _ = Object.create(null, { [Symbol.toStringTag]: { value: 'Module' } }) const set = {} const get = {} const __overridden = Object.create(null) let __pending = [] function __makeUpdater (key, read, assign) { return () => { if (__overridden[key] === true) return true try { const v = read() if (v !== undefined) { assign(v) return true } return false } catch (err) { if (err instanceof ReferenceError) return false throw err } } } function __flushPendingOnce () { if (__pending.length === 0) return const next = [] for (const fn of __pending) { // If it still throws ReferenceError, keep it for the (single) next attempt. if (fn() !== true) next.push(fn) } __pending = next } ${Array.from(setters.values()).join('\n')} if (__pending.length > 0) { queueMicrotask(() => { __flushPendingOnce() if (__pending.length > 0) { const __retryDelays = [0, 10, 50] const __schedulePending = (i) => { if (__pending.length === 0) return if (i >= __retryDelays.length) { // Give up: leave exports as-is to avoid unbounded retries. __pending = [] return } const t = setTimeout(() => { __flushPendingOnce() __schedulePending(i + 1) }, __retryDelays[i]) // Don't keep the process alive just for best-effort retries. if (t && typeof t.unref === 'function') t.unref() } __schedulePending(0) } }) } register(${JSON.stringify(realUrl)}, _, set, get, ${JSON.stringify(originalSpecifier)}) ` } // Bookkeeping shared by the async and sync wrap paths once `processModule` // succeeds: free the specifier entry early, and remember CJS modules so their // transitive require() chain bypasses iitm (see `load`). Returns the wrapper // module source. function onWrapSuccess (realUrl, context, originalSpecifier, setters) { specifiers.delete(realUrl) // context.format is set to 'commonjs' by getCjsExports during processModule. if (context.format === 'commonjs') { cjsInIitmChain.add(realUrl) } return buildWrapperSource(realUrl, setters, originalSpecifier) } // Bookkeeping shared by the async and sync wrap paths when `processModule` // throws. iitm falls back to the parent loader so the module loads unwrapped // (it just can't be Hook'ed) rather than taking down the whole app. We free // the specifier entry to avoid a leak, and log because a failure here is // usually an iitm bug and would otherwise be very tricky to debug. function onWrapFailure (realUrl, cause) { specifiers.delete(realUrl) const err = new Error(`'import-in-the-middle' failed to wrap '${realUrl}'`) err.cause = cause emitWarning(err) } async function getSource (url, context, parentGetSource) { if (hasIitm(url)) { const realUrl = deleteIitm(url) const originalSpecifier = specifiers.get(realUrl) try { const setters = await driveAsync( processModule({ srcUrl: realUrl, context }), { resolve: cachedResolve, load: parentGetSource } ) return { source: onWrapSuccess(realUrl, context, originalSpecifier, setters) } } catch (cause) { onWrapFailure(realUrl, cause) // Revert back to the non-iitm URL url = realUrl } } return parentGetSource(url, context) } // Synchronous counterpart to `getSource`, for `module.registerHooks`. Drives // `processModule` straight through; all bookkeeping and source generation is // shared with `getSource`. function getSourceSync (url, context, nextLoad) { if (hasIitm(url)) { const realUrl = deleteIitm(url) const originalSpecifier = specifiers.get(realUrl) try { const setters = driveSync( processModule({ srcUrl: realUrl, context }), { resolve: cachedResolve, load: nextLoad } ) return { source: onWrapSuccess(realUrl, context, originalSpecifier, setters) } } catch (cause) { onWrapFailure(realUrl, cause) url = realUrl } } return nextLoad(url, context) } async function load (url, context, parentLoad) { if (hasIitm(url)) { const result = await getSource(url, context, parentLoad) // If wrapping failed, `getSource()` may have fallen back to `parentLoad`, // which can legally return `source: null` (e.g. for non-JS formats). if (result && typeof result === 'object' && result.source != null) { return { source: result.source, shortCircuit: true, format: 'module' } } // Fall back to the parent loader with the original (non-iitm) URL. return parentLoad(deleteIitm(url), context) } // On Node 22+, when a CJS module is loaded through the ESM translator and // another loader hook provides its source (instead of leaving source null // for Node to read natively), require() calls inside that CJS module for // packages using the "module-sync" exports condition fail with // ERR_VM_MODULE_LINK_FAILURE. Work around this Node bug by stripping // hook-provided source for CJS modules in the synchronous require chain, // forcing Node to use its native CJS loader which handles this correctly. if (cjsInIitmChain.has(url)) { const result = await parentLoad(url, context) if (result.format === 'commonjs' && result.source != null) { return { format: result.format, source: undefined } } return result } return parentLoad(url, context) } // Synchronous counterpart to `load`, for `module.registerHooks`. Mirrors the // async `load` exactly — wrapping via `getSourceSync` and applying the same // CJS-in-iitm-chain source stripping — only without awaiting. function loadSync (url, context, nextLoad) { if (hasIitm(url)) { const result = getSourceSync(url, context, nextLoad) // If wrapping failed, `getSourceSync()` may have fallen back to `nextLoad`, // which can legally return `source: null` (e.g. for non-JS formats). if (result && typeof result === 'object' && result.source != null) { return { source: result.source, shortCircuit: true, format: 'module' } } // Fall back to the parent loader with the original (non-iitm) URL. return nextLoad(deleteIitm(url), context) } if (cjsInIitmChain.has(url)) { const result = nextLoad(url, context) if (result.format === 'commonjs' && result.source != null) { return { format: result.format, source: undefined } } return result } return nextLoad(url, context) } return { initialize, load, resolve, resolveSync, loadSync, applyOptions } }