UNPKG

@cyclonedx/cdxgen

Version:

Creates CycloneDX Software Bill of Materials (SBOM) from source or container image

github.com/cdxgen/cdxgen

1,767 lines (1,675 loc) • 698 kB

JavaScript

import { Buffer } from "node:buffer"; import { spawnSync } from "node:child_process"; import { createHash, randomUUID } from "node:crypto"; import { chmodSync, constants, copyFileSync, createReadStream, existsSync, lstatSync, mkdirSync, mkdtempSync, readFileSync, realpathSync, rmSync, unlinkSync, writeFileSync, } from "node:fs"; import { homedir, platform, tmpdir } from "node:os"; import path, { delimiter as _delimiter, sep as _sep, basename, dirname, extname, join, relative, resolve, } from "node:path"; import process from "node:process"; import { fileURLToPath, URL } from "node:url"; import toml from "@iarna/toml"; import { load } from "cheerio"; import { parseEDNString } from "edn-data"; import { globSync } from "glob"; import got from "got"; import iconv from "iconv-lite"; import Keyv from "keyv"; import StreamZip from "node-stream-zip"; import { PackageURL } from "packageurl-js"; import propertiesReader from "properties-reader"; import { clean, coerce, compare, maxSatisfying, parse, satisfies, valid, } from "semver"; import { xml2js } from "xml-js"; import { parse as _load, parseAllDocuments } from "yaml"; import { getTreeWithPlugin } from "../managers/piptree.js"; import { IriValidationStrategy, validateIri } from "../parsers/iri.js"; import Arborist from "../third-party/arborist/lib/index.js"; import { analyzeSuspiciousJsFile } from "./analyzer.js"; import { DEFAULT_HBOM_AUDIT_CATEGORIES } from "./auditCategories.js"; import { parseWorkflowFile } from "./ciParsers/githubActions.js"; import { addDosaiSetValue, buildDosaiPurlAliasMap, dosaiSourceLocation, dosaiSourceLocationFromNode, resolveDosaiComponentPurl, } from "./dosaiParsers.js"; import { extractPackageInfoFromHintPath } from "./dotnetutils.js"; import { createOccurrenceEvidence, parseOccurrenceEvidenceLocation, } from "./evidenceUtils.js"; import { createGtfoBinsPropertiesFromRow } from "./gtfobins.js"; import { thoughtLog, traceLog } from "./logger.js"; import { createLolbasProperties } from "./lolbas.js"; import { createOsQueryFallbackBomRef, createOsQueryPurl, deriveOsQueryDescription, deriveOsQueryName, deriveOsQueryPublisher, deriveOsQueryVersion, sanitizeOsQueryIdentity, shouldCreateOsQueryPurl, } from "./osqueryTransform.js"; import { collectPyLockFileComponents, collectPyLockPackageProperties, collectPyLockTopLevelProperties, getPyLockPackages, isDefaultPypiRegistry, isPyLockObject, } from "./pylockutils.js"; import { get_python_command_from_env, getVenvMetadata } from "./pythonutils.js"; import { collectCargoRegistryProvenanceProperties, collectNpmRegistryProvenanceProperties, collectPypiRegistryProvenanceProperties, } from "./registryProvenance.js"; let url = import.meta?.url; if (url && !url.startsWith("file://")) { url = new URL(`file://${import.meta.url}`).toString(); } // TODO: verify if this is a good method (Prabhu) // this is due to dirNameStr being "cdxgen/lib/helpers" which causes errors export const dirNameStr = url ? dirname(dirname(dirname(fileURLToPath(url)))) : __dirname; export const isSecureMode = ["true", "1"].includes(process.env?.CDXGEN_SECURE_MODE) || process.env?.NODE_OPTIONS?.includes("--permission"); // CLI dry-run must be detected during module initialization because some probes // execute while modules are imported, before bin/cdxgen.js can thread options. const hasDryRunArg = process.argv?.some( (arg) => arg === "--dry-run" || arg === "--dry-run=true" || arg === "--dry-run=1", ); export let isDryRun = ["true", "1"].includes(process.env?.CDXGEN_DRY_RUN) || hasDryRunArg; export const isNode = globalThis.process?.versions?.node !== undefined; export const isBun = globalThis.Bun?.version !== undefined; export const isDeno = globalThis.Deno?.version?.deno !== undefined; export const isWin = platform() === "win32"; export const isMac = platform() === "darwin"; export const DRY_RUN_ERROR_CODE = "CDXGEN_DRY_RUN"; const activityLedger = []; let activityCounter = 0; let currentActivityContext = {}; const dryRunReadTraceState = globalThis.__cdxgenDryRunReadTraceState || (globalThis.__cdxgenDryRunReadTraceState = { environmentReads: new Map(), observations: new Map(), recordActivity: undefined, sensitiveFileReads: new Map(), }); const SENSITIVE_ENV_VAR_PATTERN = /(^|_)(?:token|key|secret|pass(?:word)?|credential(?:s)?|cred|auth|session|cookie|email|user)$/i; const DIRECTORY_DISCOVERY_NAMES = new Set([ ".cargo", ".docker", ".gem", ".github", ".m2", ".nuget", ".venv", ".yarn", "blobs", "extensions", "node_modules", "target", "vendor", ]); const LOCKFILE_ACTIVITY_HINTS = new Map([ [ "bun.lock", { classification: "lockfile", ecosystem: "bun", label: "Bun lockfile" }, ], [ "cargo.lock", { classification: "lockfile", ecosystem: "cargo", label: "Cargo lockfile" }, ], [ "composer.lock", { classification: "lockfile", ecosystem: "composer", label: "Composer lockfile", }, ], [ "gemfile.lock", { classification: "lockfile", ecosystem: "rubygems", label: "Bundler lockfile", }, ], [ "package-lock.json", { classification: "lockfile", ecosystem: "npm", label: "npm lockfile" }, ], [ "packages.lock.json", { classification: "lockfile", ecosystem: "nuget", label: "NuGet lockfile" }, ], [ "pdm.lock", { classification: "lockfile", ecosystem: "python", label: "PDM lockfile" }, ], [ "pnpm-lock.yaml", { classification: "lockfile", ecosystem: "pnpm", label: "pnpm lockfile" }, ], [ "poetry.lock", { classification: "lockfile", ecosystem: "python", label: "Poetry lockfile", }, ], [ "podfile.lock", { classification: "lockfile", ecosystem: "cocoapods", label: "CocoaPods lockfile", }, ], [ "pylock.toml", { classification: "lockfile", ecosystem: "python", label: "PEP 751 lockfile", }, ], [ "uv.lock", { classification: "lockfile", ecosystem: "python", label: "uv lockfile" }, ], [ "yarn.lock", { classification: "lockfile", ecosystem: "yarn", label: "Yarn lockfile" }, ], ]); const MANIFEST_ACTIVITY_HINTS = new Map([ [ "cargo.toml", { classification: "manifest", ecosystem: "cargo", label: "Cargo manifest" }, ], [ "composer.json", { classification: "manifest", ecosystem: "composer", label: "Composer manifest", }, ], [ "gemfile", { classification: "manifest", ecosystem: "rubygems", label: "Gem manifest", }, ], [ "package.json", { classification: "manifest", ecosystem: "npm", label: "package manifest" }, ], [ "pom.xml", { classification: "manifest", ecosystem: "maven", label: "Maven manifest" }, ], [ "pyproject.toml", { classification: "manifest", ecosystem: "python", label: "Python project manifest", }, ], [ "requirements.txt", { classification: "manifest", ecosystem: "python", label: "Python requirements manifest", }, ], [ "setup.py", { classification: "manifest", ecosystem: "python", label: "Python setup manifest", }, ], ]); const SENSITIVE_CONFIG_ACTIVITY_HINTS = [ { matcher: (lowerPath, _baseName) => lowerPath.includes("/.cargo/config.toml") || lowerPath.endsWith("/.cargo/credentials") || lowerPath.endsWith("/.cargo/credentials.toml"), metadata: { classification: "config", ecosystem: "cargo", label: "Cargo registry configuration", sensitive: true, }, }, { matcher: (lowerPath, baseName) => lowerPath.includes("/.docker/config.json") || (baseName === "config.json" && lowerPath.includes("/docker")), metadata: { classification: "credential", ecosystem: "oci", label: "Docker credential file", sensitive: true, }, }, { matcher: (lowerPath) => lowerPath.endsWith("/.gem/credentials"), metadata: { classification: "credential", ecosystem: "rubygems", label: "RubyGems credentials file", sensitive: true, }, }, { matcher: (_lowerPath, baseName) => baseName === ".npmrc" || baseName === ".pnpmrc" || baseName === ".yarnrc", metadata: { classification: "config", ecosystem: "npm", label: "JavaScript package manager configuration", sensitive: true, }, }, { matcher: (_lowerPath, baseName) => baseName === ".yarnrc.yml", metadata: { classification: "config", ecosystem: "yarn", label: "Yarn configuration", sensitive: true, }, }, { matcher: (_lowerPath, baseName) => baseName === ".pypirc" || baseName === "pip.conf", metadata: { classification: "config", ecosystem: "python", label: "Python package publishing configuration", sensitive: true, }, }, { matcher: (_lowerPath, baseName) => baseName === "uv.toml" || baseName === "poetry.toml", metadata: { classification: "config", ecosystem: "python", label: "Python package manager configuration", sensitive: true, }, }, { matcher: (_lowerPath, baseName) => baseName === "nuget.config", metadata: { classification: "config", ecosystem: "nuget", label: "NuGet configuration", sensitive: true, }, }, { matcher: (_lowerPath, baseName) => baseName === "settings.xml", metadata: { classification: "config", ecosystem: "maven", label: "Maven settings.xml", sensitive: true, }, }, ]; const CERTIFICATE_FILE_EXTENSIONS = new Set([".crt", ".cer", ".pem"]); const KEY_FILE_EXTENSIONS = new Set([ ".key", ".jks", ".keystore", ".p12", ".pfx", ]); const buildReadCountSuffix = (count) => (count > 1 ? ` (${count} times)` : ""); const buildEnvironmentReadReason = (varName, count, sensitive) => `Read ${sensitive ? "sensitive " : ""}environment variable ${varName}${buildReadCountSuffix(count)}.`; const buildSensitiveFileReadReason = (filePath, count, label) => `Read ${label} ${filePath}${buildReadCountSuffix(count)}.`; function emitActivity(activity) { if (typeof dryRunReadTraceState.recordActivity !== "function") { return undefined; } return dryRunReadTraceState.recordActivity(activity); } function classifyActivityPath(filePath) { if (typeof filePath !== "string" || !filePath.length) { return undefined; } const normalizedPath = filePath.replaceAll("\\", "/"); const lowerPath = normalizedPath.toLowerCase(); const baseName = basename(lowerPath); if (LOCKFILE_ACTIVITY_HINTS.has(baseName)) { return LOCKFILE_ACTIVITY_HINTS.get(baseName); } if (MANIFEST_ACTIVITY_HINTS.has(baseName)) { return MANIFEST_ACTIVITY_HINTS.get(baseName); } for (const { matcher, metadata } of SENSITIVE_CONFIG_ACTIVITY_HINTS) { if (matcher(lowerPath, baseName)) { return metadata; } } if ( lowerPath.includes("/cache/") || lowerPath.includes("/.cache/") || lowerPath.includes("/caches/") ) { return { classification: "cache", label: "cache path", }; } if ( CERTIFICATE_FILE_EXTENSIONS.has(extname(baseName)) || baseName === "cert.pem" ) { return { classification: "certificate", label: "certificate file", sensitive: true, }; } if ( KEY_FILE_EXTENSIONS.has(extname(baseName)) || baseName === "key.pem" || baseName.startsWith("id_") ) { return { classification: "key", label: "private key file", sensitive: true, }; } const trimmedPath = normalizedPath.endsWith("/") ? normalizedPath.slice(0, -1) : normalizedPath; const directoryName = basename(trimmedPath.toLowerCase()); if (DIRECTORY_DISCOVERY_NAMES.has(directoryName)) { return { classification: "directory", label: "directory discovery path", }; } return undefined; } function classifyDiscoveryPattern(pattern) { const patternValue = Array.isArray(pattern) ? pattern.join(",") : String(pattern); const lowerPattern = patternValue.toLowerCase(); if ( lowerPattern.includes("package-lock.json") || lowerPattern.includes("pnpm-lock.yaml") || lowerPattern.includes("yarn.lock") || lowerPattern.includes("poetry.lock") || lowerPattern.includes("uv.lock") || lowerPattern.includes("cargo.lock") || lowerPattern.includes("gemfile.lock") ) { return { discoveryType: "lockfile-discovery", label: "lockfile discovery", }; } if ( lowerPattern.includes("package.json") || lowerPattern.includes("pom.xml") || lowerPattern.includes("pyproject.toml") || lowerPattern.includes("cargo.toml") || lowerPattern.includes("composer.json") ) { return { discoveryType: "manifest-discovery", label: "manifest discovery", }; } return { discoveryType: "directory-enumeration", label: "directory enumeration", }; } function recordDeduplicatedRead(traceMap, traceKey, activity, createReason) { const existingTrace = traceMap.get(traceKey); if (existingTrace) { existingTrace.count += 1; if (existingTrace.entry) { existingTrace.entry.count = existingTrace.count; existingTrace.entry.reason = createReason(existingTrace.count); } return existingTrace.entry; } const entry = emitActivity({ ...activity, reason: createReason(1), }); if (entry) { entry.count = 1; } traceMap.set(traceKey, { count: 1, entry, }); return entry; } export function isSensitiveEnvironmentVariableName(varName) { return typeof varName === "string" && SENSITIVE_ENV_VAR_PATTERN.test(varName); } export function recordObservedActivity(kind, target, options = {}) { if (!(isDryRun || DEBUG_MODE) || !kind || !target) { return undefined; } const status = options.status || "completed"; const traceKey = options.traceKey || `${kind}:${status}:${target}:${options.traceDetail || ""}`; const metadata = options.metadata || {}; const reasonBuilder = options.reasonBuilder || ((count) => options.reason ? `${options.reason}${buildReadCountSuffix(count)}` : `Recorded ${kind} activity for ${target}${buildReadCountSuffix(count)}.`); return recordDeduplicatedRead( dryRunReadTraceState.observations, traceKey, { kind, status, target, ...metadata, }, reasonBuilder, ); } export function recordDecisionActivity(target, options = {}) { return recordObservedActivity(options.kind || "decision", target, options); } export function recordDiscoveryActivity(target, options = {}) { return recordObservedActivity(options.kind || "discover", target, options); } export function recordPolicyActivity(target, options = {}) { return recordObservedActivity(options.kind || "policy", target, options); } function normalizeRecordedPathForComparison( candidatePath, basePath = undefined, ) { if (typeof candidatePath !== "string" || !candidatePath.length) { return undefined; } let normalizedPath = candidatePath.replaceAll("\\", "/"); if (basePath && path.isAbsolute(candidatePath)) { const resolvedBasePath = resolve(basePath); const normalizedBasePath = resolvedBasePath.replaceAll("\\", "/"); const isWithinBasePath = (candidate) => { const normalizedCandidate = candidate.replaceAll("\\", "/"); return ( normalizedCandidate === normalizedBasePath || normalizedCandidate.startsWith(`${normalizedBasePath}/`) ); }; const resolvedCandidatePath = resolve(candidatePath); if (isWithinBasePath(resolvedCandidatePath)) { normalizedPath = relative( resolvedBasePath, resolvedCandidatePath, ).replaceAll("\\", "/"); } else { const rebasedCandidatePath = resolve( resolvedBasePath, candidatePath.replace(/^([A-Za-z]:)?[\\/]+/, ""), ); if (isWithinBasePath(rebasedCandidatePath)) { normalizedPath = relative( resolvedBasePath, rebasedCandidatePath, ).replaceAll("\\", "/"); } } } return normalizedPath; } export function recordSymlinkResolution( sourcePath, resolvedPath, options = {}, ) { const normalizedSourcePath = normalizeRecordedPathForComparison( sourcePath, options.basePath, ); const normalizedResolvedPath = normalizeRecordedPathForComparison( resolvedPath, options.basePath, ); const status = options.status || "completed"; if ( !normalizedSourcePath || (status === "completed" && (!normalizedResolvedPath || normalizedSourcePath === normalizedResolvedPath)) ) { return undefined; } const metadata = { capability: "symlink-resolution", ...(normalizedResolvedPath ? { resolvedPath: normalizedResolvedPath } : {}), ...(options.errorCode ? { errorCode: options.errorCode } : {}), ...(options.metadata || {}), }; return recordObservedActivity("symlink-resolution", normalizedSourcePath, { metadata, reason: options.reason || (status === "failed" ? `Failed to resolve symlink ${normalizedSourcePath}.` : `Resolved symlink ${normalizedSourcePath} to ${normalizedResolvedPath}.`), status, }); } function getArchiveSourceByteSize(sourcePath) { if (!sourcePath || !safeExistsSync(sourcePath)) { return undefined; } try { const sourceStats = lstatSync(sourcePath); return sourceStats.isFile() ? sourceStats.size : undefined; } catch { return undefined; } } export function recordEnvironmentRead(varName, options = {}) { // Read tracing intentionally mirrors the activity ledger's dry-run/debug behavior. if (!(isDryRun || DEBUG_MODE) || !varName) { return undefined; } const source = options.source || "process.env"; const sensitive = options.sensitive ?? isSensitiveEnvironmentVariableName(varName); const status = options.status || "completed"; const traceKey = `${source}:${varName}:${status}`; const target = `${source}:${varName}`; return recordDeduplicatedRead( dryRunReadTraceState.environmentReads, traceKey, { kind: "env", redacted: sensitive, secretCategory: sensitive ? "environment-variable" : undefined, sensitive, status, target, }, (count) => options.reason || buildEnvironmentReadReason(varName, count, sensitive), ); } export function recordSensitiveFileRead(filePath, options = {}) { // Read tracing intentionally mirrors the activity ledger's dry-run/debug behavior. if (!(isDryRun || DEBUG_MODE) || !filePath) { return undefined; } const kind = options.kind || "read"; const pathMetadata = classifyActivityPath(filePath) || {}; const label = options.label || pathMetadata.label || "sensitive file"; const status = options.status || "completed"; const traceKey = `${kind}:${status}:${filePath}`; return recordDeduplicatedRead( dryRunReadTraceState.sensitiveFileReads, traceKey, { classification: pathMetadata.classification, ecosystem: pathMetadata.ecosystem, kind, redacted: pathMetadata.sensitive ?? true, secretCategory: pathMetadata.classification === "key" ? "private-key" : pathMetadata.classification === "certificate" ? "certificate" : "credential-file", status, target: filePath, }, (count) => options.reason || buildSensitiveFileReadReason(filePath, count, label), ); } export function readEnvironmentVariable(varName, options = {}) { recordEnvironmentRead(varName, options); return process.env[varName]; } export function setDryRunMode(enabled) { isDryRun = !!enabled; if (enabled) { process.env.CDXGEN_DRY_RUN = "true"; return; } delete process.env.CDXGEN_DRY_RUN; } export function createDryRunError(action, target, reason) { const message = reason || `Dry run mode blocked the attempted ${action} operation.`; const error = new Error(message); error.code = DRY_RUN_ERROR_CODE; error.name = "DryRunError"; error.action = action; error.target = target; error.dryRun = true; return error; } export function isDryRunError(error) { return !!(error?.dryRun || error?.code === DRY_RUN_ERROR_CODE); } export function setActivityContext(context = {}) { currentActivityContext = { ...currentActivityContext, ...context, }; } export function resetActivityContext() { currentActivityContext = {}; } export function recordActivity(activity) { if (!(isDryRun || DEBUG_MODE)) { return undefined; } const identifier = `ACT-${String(++activityCounter).padStart(4, "0")}`; const entry = { identifier, ...currentActivityContext, timestamp: new Date().toISOString(), ...activity, }; activityLedger.push(entry); traceLog("activity", entry); return entry; } dryRunReadTraceState.recordActivity = recordActivity; export function getRecordedActivities() { return [...activityLedger]; } export function resetRecordedActivities() { activityLedger.length = 0; activityCounter = 0; dryRunReadTraceState.environmentReads.clear(); dryRunReadTraceState.observations.clear(); dryRunReadTraceState.sensitiveFileReads.clear(); } function recordFilesystemActivity( kind, target, status, reason = undefined, metadata = {}, ) { return recordActivity({ kind, ...metadata, reason, status, target, }); } function hasReadPermission(filePath) { if (!(isSecureMode && process.permission)) { return true; } return process.permission.has("fs.read", join(filePath, "", "*")); } function hasWritePermission(filePath) { if (!(isSecureMode && process.permission)) { return true; } const candidatePaths = [ filePath, join(filePath, "", "*"), join(dirname(filePath), "*"), ]; return candidatePaths.some((candidatePath) => process.permission.has("fs.write", candidatePath), ); } /** * Safely check if a file path exists without crashing due to a lack of permissions * * @param {String} filePath File path * @Boolean True if the path exists. False otherwise */ export function safeExistsSync(filePath) { const pathMetadata = classifyActivityPath(filePath); if (!hasReadPermission(filePath)) { if (DEBUG_MODE) { console.log("cdxgen lacks read permission for a requested path."); } if (pathMetadata) { recordPolicyActivity(filePath, { metadata: { classification: pathMetadata.classification, ecosystem: pathMetadata.ecosystem, policyType: "fs.read", }, reason: `Denied inspection of ${pathMetadata.label} ${filePath} due to missing fs.read permission.`, status: "blocked", }); } return false; } const exists = existsSync(filePath); if (pathMetadata) { const inspectionKind = pathMetadata.classification === "directory" ? "discover" : "inspect"; recordObservedActivity(inspectionKind, filePath, { metadata: { classification: pathMetadata.classification, ecosystem: pathMetadata.ecosystem, exists, redacted: pathMetadata.sensitive ?? false, }, reasonBuilder: (count) => `${exists ? "Inspected" : "Checked for"} ${pathMetadata.label} ${filePath}${buildReadCountSuffix(count)}.`, }); } return exists; } export function safeWriteSync(filePath, data, options) { if (isDryRun) { recordFilesystemActivity( "write", filePath, "blocked", "Dry run mode blocks filesystem writes.", ); return undefined; } if (!hasWritePermission(filePath)) { if (DEBUG_MODE) { console.log("cdxgen lacks write permission for a requested path."); } recordFilesystemActivity( "write", filePath, "blocked", "cdxgen lacks write permission for this path.", ); return undefined; } writeFileSync(filePath, data, options); recordFilesystemActivity("write", filePath, "completed"); } /** * Safely create a directory without crashing due to a lack of permissions * * @param {String} filePath File path * @param options {Options} mkdir options * @Boolean True if the path exists. False otherwise */ export function safeMkdirSync(filePath, options) { if (isDryRun) { recordFilesystemActivity( "mkdir", filePath, "blocked", "Dry run mode blocks directory creation.", ); return undefined; } if (!hasWritePermission(filePath)) { if (DEBUG_MODE) { console.log("cdxgen lacks write permission for a requested path."); } recordFilesystemActivity( "mkdir", filePath, "blocked", "cdxgen lacks write permission for this path.", ); return undefined; } mkdirSync(filePath, options); recordFilesystemActivity("mkdir", filePath, "completed"); } export function safeMkdtempSync(prefix, options = undefined) { const resourceType = typeof prefix === "string" && prefix.toLowerCase().includes("cache") ? "cache" : "temporary-workspace"; if (isDryRun) { const tempPath = `${prefix}${randomUUID().replaceAll("-", "").slice(0, 6)}`; recordFilesystemActivity( "temp-dir", tempPath, "blocked", `Dry run mode blocks temporary directory creation for ${resourceType}.`, { resourceType, }, ); return tempPath; } const tempPath = mkdtempSync(prefix, options); recordFilesystemActivity("temp-dir", tempPath, "completed", undefined, { resourceType, }); return tempPath; } export function safeRmSync(filePath, options = undefined) { if (isDryRun) { recordFilesystemActivity( "cleanup", filePath, "blocked", "Dry run mode blocks filesystem deletions.", ); return undefined; } rmSync(filePath, options); recordFilesystemActivity("cleanup", filePath, "completed"); } export function safeUnlinkSync(filePath) { if (isDryRun) { recordFilesystemActivity( "cleanup", filePath, "blocked", "Dry run mode blocks file deletions.", ); return undefined; } unlinkSync(filePath); recordFilesystemActivity("cleanup", filePath, "completed"); } export function safeCopyFileSync(src, dest, mode = undefined) { if (isDryRun) { recordFilesystemActivity( "write", dest, "blocked", `Dry run mode blocks copying files from ${src}.`, ); return undefined; } const result = mode === undefined ? copyFileSync(src, dest) : copyFileSync(src, dest, mode); recordFilesystemActivity("write", dest, "completed", `Copied from ${src}.`); return result; } export async function safeExtractArchive( sourcePath, targetPath, extractor, kind = "unzip", options = undefined, ) { const traceArchiveStats = isDryRun || DEBUG_MODE; const sourceBytes = traceArchiveStats ? getArchiveSourceByteSize(sourcePath) : undefined; if (isDryRun) { recordActivity({ archiveKind: kind, capability: "archive-extraction", kind, ...(options?.metadata || {}), ...(sourceBytes !== undefined ? { sourceBytes } : {}), reason: options?.blockedReason || `Dry run mode blocks ${kind} extraction from ${sourcePath} into ${targetPath}.`, status: "blocked", target: `${sourcePath} -> ${targetPath}`, }); return false; } try { await extractor(); recordActivity({ archiveKind: kind, capability: "archive-extraction", kind, ...(options?.metadata || {}), ...(sourceBytes !== undefined ? { sourceBytes } : {}), status: "completed", target: `${sourcePath} -> ${targetPath}`, }); return true; } catch (error) { recordActivity({ archiveKind: kind, capability: "archive-extraction", kind, ...(options?.metadata || {}), ...(sourceBytes !== undefined ? { sourceBytes } : {}), ...(error?.code ? { errorCode: error.code } : {}), reason: options?.failureReason || `Failed ${kind} extraction from ${sourcePath} into ${targetPath}: ${error.message}`, status: "failed", target: `${sourcePath} -> ${targetPath}`, }); throw error; } } export const commandsExecuted = new Set(); function isAllowedCommand( command, allowedCommandsEnv = readEnvironmentVariable("CDXGEN_ALLOWED_COMMANDS"), ) { if (!allowedCommandsEnv) { return true; } return allowedCommandsEnv .split(",") .map((entry) => entry.trim()) .includes(command.trim()); } const ALLOWED_WRAPPERS = new Set(["gradlew", "mvnw"]); /** * Check for Windows CWD executable hijack when shell: true is used. * cmd.exe searches CWD before PATH, allowing local files to shadow system commands. * * @param {string} command The executable to spawn * @param {Object} options Options forwarded to spawnSync (e.g. cwd, env, shell) * * @returns {boolean} true if there is a hijack risk. false otherwise. */ function isWindowsShellHijackRisk(command, options) { const cwd = options?.cwd; const usesShell = options?.shell === true; if (!isWin || !usesShell || !cwd || !command) { return false; } if (/[\/\\]/.test(command)) { return false; } const cmdBase = command.toLowerCase(); if (ALLOWED_WRAPPERS.has(cmdBase)) { return false; } const pathExt = ( process.env.PATHEXT || ".COM;.EXE;.BAT;.CMD;.VBS;.VBE;.JS;.JSE;.WSF;.WSH;.MSC" ) .split(";") .filter(Boolean); const candidates = [ cmdBase, ...pathExt.map((ext) => cmdBase + ext.toLowerCase()), ]; const absCwd = resolve(cwd); for (const candidate of candidates) { const candidatePath = path.join(absCwd, candidate); if (existsSync(candidatePath)) { return true; } } return false; } const VERSION_PROBE_ARGS = new Set(["--version", "-version", "version"]); const POSIX_SHELL_METACHARACTERS = /[;&|<>$`\\\n\r]/; const WINDOWS_SHELL_METACHARACTERS = /[&|<>^%\n\r]/; function hasShellMetacharacters(value) { if (value === undefined || value === null) { return false; } const stringValue = String(value); return isWin ? WINDOWS_SHELL_METACHARACTERS.test(stringValue) : POSIX_SHELL_METACHARACTERS.test(stringValue); } function getUnsafeShellToken(command, args) { if (hasShellMetacharacters(command)) { return command; } const argList = Array.isArray(args) ? args : args === undefined || args === null ? [] : [args]; return argList.find((arg) => hasShellMetacharacters(arg)); } function recordSuspiciousShellPathActivities(files, metadata = {}) { for (const file of files) { if (!hasShellMetacharacters(file)) { continue; } recordActivity({ classification: "suspicious-path", discoveryType: metadata.discoveryType, kind: "inspect", pattern: metadata.pattern, reason: "Suspicious path contains shell metacharacters. cdxgen passes direct process arguments as argv values, but review this path before invoking external build tools on untrusted projects.", risk: "shell-metacharacters", status: "completed", target: file, }); } } function detectProbeType(command, args = []) { const normalizedCommand = basename(String(command || "")).toLowerCase(); const normalizedArgs = (args || []).map((arg) => String(arg).toLowerCase()); if ( normalizedArgs.some((arg) => VERSION_PROBE_ARGS.has(arg)) || (normalizedArgs.length === 1 && normalizedArgs[0] === "-v") ) { return "version-check"; } if (normalizedCommand === "which" || normalizedArgs.includes("--help")) { return "capability-probe"; } if ( normalizedCommand.startsWith("python") && normalizedArgs.includes("-c") && normalizedArgs.some((arg) => arg.includes("import")) ) { return "runtime-probe"; } return undefined; } function buildCommandActivityDescriptor(command, args, options) { const target = `${command}${args?.length ? ` ${args.join(" ")}` : ""}`; const cdxgenActivity = options?.cdxgenActivity || {}; const probeType = cdxgenActivity.probeType || detectProbeType(command, args); const metadata = { ...(cdxgenActivity.metadata || {}), }; if (probeType) { metadata.capability = metadata.capability || "tool-runtime-probe"; metadata.probeType = probeType; } if (cdxgenActivity.gitOperation) { metadata.gitOperation = cdxgenActivity.gitOperation; } return { blockedReason: cdxgenActivity.blockedReason || (probeType ? `Dry run mode blocks ${probeType.replaceAll("-", " ")} command execution.` : "Dry run mode blocks child process execution."), kind: cdxgenActivity.kind || "execute", metadata, target: cdxgenActivity.target || target, }; } function getOutputByteSize(value, encoding = "utf-8") { if (value === undefined || value === null) { return 0; } if (Buffer.isBuffer(value)) { return value.length; } if (ArrayBuffer.isView(value)) { return value.byteLength; } const safeEncoding = typeof encoding === "string" && encoding !== "buffer" ? encoding : "utf8"; return Buffer.byteLength(String(value), safeEncoding); } /** * Safe wrapper around spawnSync that enforces permission checks, injects default * options (maxBuffer, encoding, timeout), warns about unsafe Python and pip/uv * invocations, and records every executed command in the commandsExecuted set. * * @param {string} command The executable to spawn * @param {string[]} args Arguments to pass to the command * @param {Object} options Options forwarded to spawnSync (e.g. cwd, env, shell) * @returns {Object} spawnSync result object with status, stdout, stderr, and error fields */ export function safeSpawnSync(command, args, options) { const activityDescriptor = buildCommandActivityDescriptor( command, args, options, ); const allowedCommandsEnv = readEnvironmentVariable("CDXGEN_ALLOWED_COMMANDS"); const commandAllowed = isAllowedCommand(command, allowedCommandsEnv); if (allowedCommandsEnv) { recordPolicyActivity(command, { metadata: { allowed: commandAllowed, allowlist: allowedCommandsEnv, policyType: "command-allowlist", }, reason: `${commandAllowed ? "Allowed" : "Blocked"} command ${command} against CDXGEN_ALLOWED_COMMANDS.`, status: commandAllowed ? "completed" : "blocked", traceDetail: "allowlist", }); } if (isSecureMode && process.permission) { const hasChildPermission = process.permission.has("child"); recordPolicyActivity(command, { metadata: { allowed: hasChildPermission, policyType: "child-process", }, reason: `${hasChildPermission ? "Confirmed" : "Denied"} child-process permission for ${command}.`, status: hasChildPermission ? "completed" : "blocked", traceDetail: "child-permission", }); } if (isDryRun) { const error = createDryRunError( "execute", command, activityDescriptor.blockedReason, ); recordActivity({ kind: activityDescriptor.kind, ...activityDescriptor.metadata, reason: error.message, status: "blocked", target: activityDescriptor.target, }); return { status: 1, stdout: undefined, stderr: undefined, error, }; } if ( (isSecureMode && process.permission && !process.permission.has("child")) || !commandAllowed ) { if (DEBUG_MODE) { console.log(`cdxgen lacks execute permission for ${command}`); } recordActivity({ kind: activityDescriptor.kind, ...activityDescriptor.metadata, reason: "cdxgen lacks execute permission for this command.", status: "blocked", target: activityDescriptor.target, }); return { status: 1, stdout: undefined, stderr: undefined, error: new Error("No execute permission"), }; } if (isSecureMode) { if (isWindowsShellHijackRisk(command, options)) { const blockedReason = `${command} matches local file in cwd (Windows shell hijack risk)`; console.warn(`\x1b[1;31mSecurity Alert: ${blockedReason}\x1b[0m`); recordActivity({ kind: activityDescriptor.kind, ...activityDescriptor.metadata, reason: blockedReason, status: "blocked", target: activityDescriptor.target, }); return { status: 1, stdout: undefined, stderr: undefined, error: new Error(blockedReason), }; } if (options?.cwd && options.cwd !== resolve(options.cwd)) { if (DEBUG_MODE) { console.log( "Executing commands with a relative cwd can cause security issues.", ); } } } if (!options) { options = {}; } else if (options.cdxgenActivity) { options = { ...options, }; } if (options.cdxgenActivity) { delete options.cdxgenActivity; } if (options.shell === true) { const unsafeShellToken = getUnsafeShellToken(command, args); if (unsafeShellToken !== undefined) { const blockedReason = `Blocked shell execution for ${command}: command or argument contains shell metacharacters.`; console.warn(`\x1b[1;31mSecurity Alert: ${blockedReason}\x1b[0m`); recordActivity({ kind: activityDescriptor.kind, ...activityDescriptor.metadata, reason: blockedReason, status: "blocked", target: activityDescriptor.target, }); return { status: 1, stdout: undefined, stderr: undefined, error: new Error(blockedReason), }; } } // Inject maxBuffer if (!options.maxBuffer) { options.maxBuffer = MAX_BUFFER; } // Inject encoding if (!options.encoding) { options.encoding = "utf-8"; } // Inject timeout if (!options.timeout) { options.timeout = TIMEOUT_MS; } // Emit certain operational warnings only once per process to keep audit logs readable. const emitNoticeOnce = (noticeKey, message, level = "warn") => { if (!globalThis.__cdxgenNoticeCache) { globalThis.__cdxgenNoticeCache = new Set(); } if (globalThis.__cdxgenNoticeCache.has(noticeKey)) { return; } globalThis.__cdxgenNoticeCache.add(noticeKey); if (level === "log") { console.log(message); return; } console.warn(message); }; // Check for -S for python invocations in secure mode if (command.includes("python") && (!args?.length || args[0] !== "-S")) { if (isSecureMode) { emitNoticeOnce( "python-without-S-secure", "\x1b[1;35mNotice: Running python command without '-S' argument. This is a bug in cdxgen. Please report with an example repo here https://github.com/cdxgen/cdxgen/issues.\x1b[0m", ); } else if (process.env?.CDXGEN_IN_CONTAINER === "true") { emitNoticeOnce( "python-without-S-container", "Running python command without '-S' argument.", "log", ); } else { emitNoticeOnce( "python-without-S-host", "\x1b[1;35mNotice: Running python command without '-S' argument. Only run cdxgen in trusted directories to prevent auto-executing local scripts.\x1b[0m", ); } } let isPyPackageInstall = false; if (command.includes("pip") && args?.includes("install")) { isPyPackageInstall = true; } else if ( command.includes("python") && args?.includes("pip") && args?.includes("install") ) { isPyPackageInstall = true; } else if ( command.includes("uv") && args?.includes("pip") && args?.includes("install") ) { isPyPackageInstall = true; } if (isPyPackageInstall) { const hasOnlyBinary = args?.some( (arg) => arg === "--only-binary" || arg.startsWith("--only-binary="), ); if (!hasOnlyBinary) { if (isSecureMode) { emitNoticeOnce( "pip-without-only-binary-secure", "\x1b[1;31mSecurity Alert: pip/uv install invoked without '--only-binary' argument in secure mode. This is a bug in cdxgen and introduces Arbitrary Code Execution (ACE) risks. Please report with an example repo here https://github.com/cdxgen/cdxgen/issues.\x1b[0m", ); } else if (process.env?.CDXGEN_IN_CONTAINER === "true") { emitNoticeOnce( "pip-without-only-binary-container", "Running pip/uv install without '--only-binary' argument.", "log", ); } else { emitNoticeOnce( "pip-without-only-binary-host", "\x1b[1;35mNotice: pip/uv install invoked without '--only-binary'. This allows executing untrusted setup.py scripts. Only run cdxgen in trusted directories.\x1b[0m", ); } } } traceLog("spawn", { command, args, ...options }); commandsExecuted.add(command); // Fix for DEP0190 warning if (options?.shell === true) { if (args?.length) { command = `${command} ${args.join(" ")}`; args = undefined; } } const result = spawnSync(command, args, options); recordActivity({ kind: activityDescriptor.kind, ...activityDescriptor.metadata, stderrBytes: getOutputByteSize(result.stderr, options.encoding), reason: result.error?.message, status: result.status === 0 && !result.error ? "completed" : "failed", stdoutBytes: getOutputByteSize(result.stdout, options.encoding), target: activityDescriptor.target, }); return result; } const licenseMapping = JSON.parse( readFileSync(join(dirNameStr, "data", "lic-mapping.json"), "utf-8"), ); const vendorAliases = JSON.parse( readFileSync(join(dirNameStr, "data", "vendor-alias.json"), "utf-8"), ); const spdxLicenses = JSON.parse( readFileSync(join(dirNameStr, "data", "spdx-licenses.json"), "utf-8"), ); const knownLicenses = JSON.parse( readFileSync(join(dirNameStr, "data", "known-licenses.json"), "utf-8"), ); const mesonWrapDB = JSON.parse( readFileSync(join(dirNameStr, "data", "wrapdb-releases.json"), "utf-8"), ); export const frameworksList = JSON.parse( readFileSync(join(dirNameStr, "data", "frameworks-list.json"), "utf-8"), ); const selfPJson = JSON.parse( readFileSync(join(dirNameStr, "package.json"), "utf-8"), ); const CPP_STD_MODULES = JSON.parse( readFileSync(join(dirNameStr, "data", "glibc-stdlib.json"), "utf-8"), ); export const CDXGEN_VERSION = selfPJson.version; // Refer to contrib/py-modules.py for a script to generate this list // The script needs to be used once every few months to update this list const PYTHON_STD_MODULES = JSON.parse( readFileSync(join(dirNameStr, "data", "python-stdlib.json"), "utf-8"), ); // Mapping between modules and package names const PYPI_MODULE_PACKAGE_MAPPING = JSON.parse( readFileSync(join(dirNameStr, "data", "pypi-pkg-aliases.json"), "utf-8"), ); // FIXME. This has to get removed, once we improve the module detection one-liner. // If you're a Rubyist, please help us improve this code. const RUBY_KNOWN_MODULES = JSON.parse( readFileSync(join(dirNameStr, "data", "ruby-known-modules.json"), "utf-8"), ); // Debug mode flag export const DEBUG_MODE = ["debug", "verbose"].includes(process.env.CDXGEN_DEBUG_MODE) || process.env.SCAN_DEBUG_MODE === "debug"; export const CDXGEN_SPDX_CREATED_BY = process.env.CDXGEN_SPDX_CREATED_BY; // Table border style for console output. export const TABLE_BORDER_STYLE = ["ascii", "unicode", "auto"].includes( `${process.env.CDXGEN_TABLE_BORDER || ""}`.toLowerCase(), ) ? `${process.env.CDXGEN_TABLE_BORDER}`.toLowerCase() : "auto"; // Timeout milliseconds. Default 20 mins export const TIMEOUT_MS = Number.parseInt(process.env.CDXGEN_TIMEOUT_MS, 10) || 20 * 60 * 1000; // Max buffer for stdout and stderr. Defaults to 100MB export const MAX_BUFFER = Number.parseInt(process.env.CDXGEN_MAX_BUFFER, 10) || 100 * 1024 * 1024; // Metadata cache export let metadata_cache = {}; // Speed up lookup namespaces for a given jar const jarNSMapping_cache = {}; // Temporary files written by cdxgen, will be removed on exit const temporaryFiles = new Set(); process.on("exit", () => temporaryFiles.forEach((tempFile) => { if (safeExistsSync(tempFile)) { safeUnlinkSync(tempFile); } }), ); // Whether test scope shall be included for java/maven projects; default, if unset shall be 'true' export const includeMavenTestScope = !process.env.CDX_MAVEN_INCLUDE_TEST_SCOPE || ["true", "1"].includes(process.env.CDX_MAVEN_INCLUDE_TEST_SCOPE); // Whether to use the native maven dependency tree command. Defaults to true. export const PREFER_MAVEN_DEPS_TREE = !["false", "0"].includes( process.env?.PREFER_MAVEN_DEPS_TREE, ); /** * Determines whether license information should be fetched from remote sources, * based on the FETCH_LICENSE environment variable. * * @returns {boolean} True if the FETCH_LICENSE env var is set to "true" or "1" */ export function shouldFetchLicense() { return ( process.env.FETCH_LICENSE && ["true", "1"].includes(process.env.FETCH_LICENSE) ); } /** * Determines whether remote package metadata should be fetched for enrichment. * * @returns {boolean} True when registry metadata enrichment is enabled. */ export function shouldFetchPackageMetadata() { return ( shouldFetchLicense() || (process.env.CDXGEN_FETCH_PKG_METADATA && ["true", "1"].includes(process.env.CDXGEN_FETCH_PKG_METADATA)) ); } /** * Determines whether VCS (version control system) information should be fetched * for Go packages, based on the GO_FETCH_VCS environment variable. * * @returns {boolean} True if the GO_FETCH_VCS env var is set to "true" or "1" */ export function shouldFetchVCS() { return ( process.env.GO_FETCH_VCS && ["true", "1"].includes(process.env.GO_FETCH_VCS) ); } // Whether license information should be fetched export const FETCH_LICENSE = shouldFetchLicense(); // Whether search.maven.org will be used to identify jars without maven metadata; default, if unset shall be 'true' export const SEARCH_MAVEN_ORG = !process.env.SEARCH_MAVEN_ORG || ["true", "1"].includes(process.env.SEARCH_MAVEN_ORG); // circuit breaker for search maven.org let search_maven_org_errors = 0; const MAX_SEARCH_MAVEN_ORG_ERRORS = 1; // circuit breaker for get repo license let get_repo_license_errors = 0; const MAX_GET_REPO_LICENSE_ERRORS = 5; const MAX_LICENSE_ID_LENGTH = 100; export const JAVA_CMD = getJavaCommand(); /** * Returns the Java executable command to use, resolved in priority order: * JAVA_CMD env var > JAVA_HOME/bin/java > "java". * * @returns {string} Path or name of the Java executable */ export function getJavaCommand() { let javaCmd = "java"; if (process.env.JAVA_CMD) { javaCmd = process.env.JAVA_CMD; } else if ( process.env.JAVA_HOME && safeExistsSync(process.env.JAVA_HOME) && safeExistsSync(join(process.env.JAVA_HOME, "bin", "java")) ) { javaCmd = join(process.env.JAVA_HOME, "bin", "java"); } return javaCmd; } export const PYTHON_CMD = getPythonCommand(); /** * Returns the Python executable command to use, resolved in priority order: * PYTHON_CMD env var > CONDA_PYTHON_EXE env var > "python". * * @returns {string} Path or name of the Python executable */ export function getPythonCommand() { let pythonCmd = "python"; if (process.env.PYTHON_CMD) { pythonCmd = process.env.PYTHON_CMD; } else if (process.env.CONDA_PYTHON_EXE) { pythonCmd = process.env.CONDA_PYTHON_EXE; } return pythonCmd; } export let DOTNET_CMD = "dotnet"; if (process.env.DOTNET_CMD) { DOTNET_CMD = process.env.DOTNET_CMD; } export let NODE_CMD = "node"; if (process.env.NODE_CMD) { NODE_CMD = process.env.NODE_CMD; } export let NPM_CMD = "npm"; if (process.env.NPM_CMD) { NPM_CMD = process.env.NPM_CMD; } export let YARN_CMD = "yarn"; if (process.env.YARN_CMD) { YARN_CMD = process.env.YARN_CMD; } export let GCC_CMD = "gcc"; if (process.env.GCC_CMD) { GCC_CMD = process.env.GCC_CMD; } export let RUSTC_CMD = "rustc"; if (process.env.RUSTC_CMD) { RUSTC_CMD = process.env.RUSTC_CMD; } export let GO_CMD = "go"; if (process.env.GO_CMD) { GO_CMD = process.env.GO_CMD; } export let CARGO_CMD = "cargo"; if (process.env.CARGO_CMD) { CARGO_CMD = process.env.CARGO_CMD; } // Clojure CLI export let CLJ_CMD = "clj"; if (process.env.CLJ_CMD) { CLJ_CMD = process.env.CLJ_CMD; } export let LEIN_CMD = "lein"; if (process.env.LEIN_CMD) { LEIN_CMD = process.env.LEIN_CMD; } export let CDXGEN_TEMP_DIR = "temp"; if (process.env.CDXGEN_TEMP_DIR) { CDXGEN_TEMP_DIR = process.env.CDXGEN_TEMP_DIR; } export const SWIFT_CMD = process.env.SWIFT_CMD || "swift"; export const RUBY_CMD = process.env.RUBY_CMD || "ruby"; // Python components that can be excluded export const PYTHON_EXCLUDED_COMPONENTS = [ "pip", "setuptools", "wheel", "conda", "conda-build", "conda-index", "conda-libmamba-solver", "conda-package-handling", "conda-package-streaming", "conda-content-trust", ]; // Project type aliases export const PROJECT_TYPE_ALIASES = { java: [ "java", "java8", "java11", "java17", "java21", "java22", "java23", "java24", "groovy", "kotlin", "kt", "scala", "jvm", "gradle", "mvn", "maven", "sbt", "bazel", "quarkus", "mill", ], android: ["android", "apk", "aab"], jar: ["jar", "war", "ear"], "gradle-index": ["gradle-index", "gradle-cache"], "sbt-index": ["sbt-index", "sbt-cache"], "maven-index": ["maven-index", "maven-cache", "maven-core"], "cargo-cache": ["cargo-cache", "cargo-index"], js: [ "npm", "pnpm", "nodejs", "nodejs8", "nodejs10", "nodejs12", "nodejs14", "nodejs16", "nodejs18", "nodejs20", "n