UNPKG

@cyclonedx/cdxgen

Version:

Creates CycloneDX Software Bill of Materials (SBOM) from source or container image

github.com/cdxgen/cdxgen

1,509 lines (1,457 loc) • 78.4 kB

JavaScript

import { createHash } from "node:crypto"; import { readdirSync, readFileSync, realpathSync, statSync } from "node:fs"; import { dirname, join, relative, resolve } from "node:path"; import process from "node:process"; import { createBom } from "../cli/index.js"; import { DEFAULT_HBOM_AUDIT_CATEGORIES } from "../helpers/auditCategories.js"; import { getCycloneDxFormat, getNonCycloneDxErrorMessage, isCycloneDxBom, } from "../helpers/bomUtils.js"; import { thoughtLog } from "../helpers/logger.js"; import { hasRegistryProvenanceEvidenceProperties, hasTrustedPublishingProperties, } from "../helpers/provenanceUtils.js"; import { cleanupSourceDir, findGitRefForPurlVersion, hardenedGitCommand, resolveGitUrlFromPurl, resolvePurlSourceDirectory, sanitizeRemoteUrlForLogs, } from "../helpers/source.js"; import { dirNameStr, getTmpDir, isDryRun, recordActivity, safeExistsSync, safeMkdirSync, safeMkdtempSync, safeRmSync, safeWriteSync, } from "../helpers/utils.js"; import { auditBom, isHbomLikeBom, isObomLikeBom, } from "../stages/postgen/auditBom.js"; import { postProcess } from "../stages/postgen/postgen.js"; import { formatTargetLabel } from "./progress.js"; import { renderAuditReport } from "./reporters.js"; import { SEVERITY_ORDER, scoreTargetRisk, severityMeetsThreshold, } from "./scoring.js"; import { collectAuditTargets, enrichInputBomsWithRegistryMetadata, normalizePackageName, } from "./targets.js"; export const DEFAULT_AUDIT_CATEGORIES = [ "ai-agent", "ci-permission", "dependency-source", "package-integrity", ]; const DIRECT_AUDIT_TOOL_NAME = "cdx-audit"; const AUDIT_CACHE_DIRNAME = ".cdx-audit"; const AUDIT_CACHE_BOM_FILE = "source-bom.json"; const AUDIT_CACHE_META_FILE = "source-bom.meta.json"; const CLONE_RETRY_DELAYS_MS = [750, 1500]; const LARGE_PREDICTIVE_AUDIT_THRESHOLD = 50; const VERY_LARGE_PREDICTIVE_AUDIT_THRESHOLD = 100; const PYTHON_METADATA_FILES = ["pyproject.toml", "setup.cfg", "setup.py"]; const PYTHON_HEURISTIC_FILENAMES = new Set(["setup.py", "__init__.py"]); const PYTHON_HEURISTIC_FILE_LIMIT = 32; const PYTHON_HEURISTIC_MAX_FILE_BYTES = 256 * 1024; const PYTHON_SKIP_DIRS = new Set([ ".git", ".hg", ".tox", ".venv", "__pycache__", "build", "dist", "node_modules", "site-packages", "venv", ]); const PYTHON_EXECUTION_PATTERN = /\b(?:exec|eval|compile)\s*\(|\b(?:subprocess\.(?:Popen|run|call|check_output)|os\.(?:system|popen))\b/i; const PYTHON_NETWORK_PATTERN = /\b(?:requests\.(?:get|post|put|patch)|urllib(?:\.request)?\.urlopen|http\.client|socket\.socket)\b/i; const PYTHON_OBFUSCATION_PATTERN = /\b(?:base64\.(?:b64decode|urlsafe_b64decode)|binascii\.a2b_base64|marshal\.loads|zlib\.decompress|codecs\.decode\s*\([^)]*base64|bytes\.fromhex)\b/i; const PYTHON_SETUP_CMDCLASS_PATTERN = /\bcmdclass\s*=/i; /** * Read and validate a CycloneDX BOM file. * * @param {string} bomPath BOM file path * @returns {object} parsed CycloneDX BOM */ export function loadBomFile(bomPath) { const resolvedPath = resolve(bomPath); let bomJson; try { bomJson = JSON.parse(readFileSync(resolvedPath, "utf8")); } catch (error) { throw new Error(`Failed to parse ${resolvedPath}: ${error.message}`); } if (!isCycloneDxBom(bomJson)) { throw new Error(getNonCycloneDxErrorMessage(bomJson, "cdx-audit")); } return bomJson; } /** * Recursively list JSON files under a BOM directory. * * @param {string} bomDir directory path * @returns {string[]} discovered file paths */ export function listBomFiles(bomDir) { const foundFiles = []; const queue = [resolve(bomDir)]; while (queue.length) { const currentDir = queue.shift(); const entries = readdirSync(currentDir, { withFileTypes: true }); for (const entry of entries) { const entryPath = join(currentDir, entry.name); if (entry.isDirectory()) { queue.push(entryPath); continue; } if (entry.isFile() && entry.name.endsWith(".json")) { foundFiles.push(entryPath); } } } return foundFiles.sort(); } /** * Load input BOM files from either a single file or a directory. * * @param {object} options CLI options * @returns {{ source: string, bomJson: object }[]} loaded input BOMs */ export function loadInputBoms(options) { const inputBoms = []; if (options.bom) { inputBoms.push({ bomJson: loadBomFile(options.bom), source: resolve(options.bom), }); } if (options.bomDir) { const bomFiles = listBomFiles(options.bomDir); for (const bomFile of bomFiles) { try { inputBoms.push({ bomJson: loadBomFile(bomFile), source: bomFile, }); } catch (error) { console.warn( `Skipping non-CycloneDX JSON file '${bomFile}': ${error.message}`, ); } } } return inputBoms; } function summarizeDirectAuditFindings(findings = []) { const findingsBySeverity = { critical: 0, high: 0, low: 0, medium: 0, }; let maxSeverity = "none"; for (const finding of findings) { const severity = finding?.severity || "low"; if (findingsBySeverity[severity] !== undefined) { findingsBySeverity[severity] += 1; } if ( (SEVERITY_ORDER[severity] ?? -1) > (SEVERITY_ORDER[maxSeverity] ?? -1) ) { maxSeverity = severity; } } return { findingsBySeverity, findingsCount: findings.length, maxSeverity, }; } function buildDirectAuditOptions(bomJson, options = {}) { const explicitCategories = options.categories?.length ? options.categories.join(",") : undefined; return { bomAuditCategories: explicitCategories || (isHbomLikeBom(bomJson) ? DEFAULT_HBOM_AUDIT_CATEGORIES : isObomLikeBom(bomJson) ? "obom-runtime" : undefined), bomAuditMinSeverity: options.minSeverity || "low", bomAuditRulesDir: options.rulesDir, }; } export async function runDirectBomAuditFromBoms(inputBoms, options = {}) { if (!inputBoms.length) { throw new Error("No CycloneDX BOM inputs were found."); } const results = []; for (const inputBom of inputBoms) { const directAuditOptions = buildDirectAuditOptions( inputBom.bomJson, options, ); const findings = await auditBom(inputBom.bomJson, directAuditOptions); results.push({ auditOptions: directAuditOptions, bomFormat: getCycloneDxFormat(inputBom.bomJson), findings, serialNumber: inputBom.bomJson?.serialNumber, source: inputBom.source, specVersion: inputBom.bomJson?.specVersion, status: "audited", summary: summarizeDirectAuditFindings(findings), }); } const summary = { findingsBySeverity: { critical: 0, high: 0, low: 0, medium: 0, }, inputBomCount: inputBoms.length, maxSeverity: "none", totalFindings: 0, bomsWithFindings: 0, }; for (const result of results) { summary.totalFindings += result.summary.findingsCount; if (result.summary.findingsCount > 0) { summary.bomsWithFindings += 1; } for (const [severity, count] of Object.entries( result.summary.findingsBySeverity, )) { summary.findingsBySeverity[severity] += count; } if ( (SEVERITY_ORDER[result.summary.maxSeverity] ?? -1) > (SEVERITY_ORDER[summary.maxSeverity] ?? -1) ) { summary.maxSeverity = result.summary.maxSeverity; } } return { auditMode: "direct", generatedAt: new Date().toISOString(), inputs: inputBoms.map((inputBom) => inputBom.source), results, summary, tool: { name: DIRECT_AUDIT_TOOL_NAME, version: readPackageVersion(), }, }; } /** * Read the package version from the local package.json file. * * @returns {string} package version */ function readPackageVersion() { const packageJson = JSON.parse( readFileSync(join(dirNameStr, "package.json"), "utf8"), ); return packageJson.version; } /** * Build a deterministic directory-safe slug for report and workspace paths. * * @param {object} target audit target * @returns {string} slug string */ function targetSlug(target) { const packageName = target.namespace ? `${target.namespace}-${target.name}` : target.name; const normalized = normalizePackageName(packageName) .replace(/[^a-z0-9-]/g, "-") .replace(/-+/g, "-") .replace(/^-|-$/g, ""); const version = normalizePackageName(target.version || "latest") || "latest"; const digest = createHash("sha256") .update(target.purl) .digest("hex") .slice(0, 12); return `${target.type}-${normalized || "package"}-${version}-${digest}`; } /** * Ensure a parent directory exists before writing a file. * * @param {string} filePath file path to create * @param {string} content file content * @returns {void} */ function writeTextFile(filePath, content) { const parentDir = dirname(filePath); if (!safeExistsSync(parentDir)) { safeMkdirSync(parentDir, { recursive: true }); } safeWriteSync(filePath, content); } /** * Ensure a parent directory exists before writing JSON. * * @param {string} filePath file path to create * @param {object} payload JSON payload * @returns {void} */ function writeJsonFile(filePath, payload) { writeTextFile(filePath, `${JSON.stringify(payload, null, 2)}\n`); } function sleep(ms) { return new Promise((resolvePromise) => { setTimeout(resolvePromise, ms); }); } function isPathWithin(parentDir, childPath) { const normalizePath = (candidatePath) => { try { return realpathSync.native ? realpathSync.native(candidatePath) : realpathSync(candidatePath); } catch { return resolve(candidatePath); } }; const normalizedChild = normalizePath(childPath); const candidateParents = [parentDir]; if (process.platform !== "win32") { candidateParents.push("/tmp"); candidateParents.push("/private/tmp"); } return candidateParents.some((candidateParent) => { const normalizedParent = normalizePath(candidateParent); return ( normalizedChild === normalizedParent || normalizedChild.startsWith(`${normalizedParent}/`) ); }); } function isTemporaryWorkspaceDir(workspaceDir) { return workspaceDir ? isPathWithin(getTmpDir(), workspaceDir) : false; } function prepareWorkspaceContext(options = {}) { if (!options.workspaceDir) { return { cleanupOnFinish: false, workspaceDir: undefined, }; } const workspaceDir = resolve(options.workspaceDir); const existed = safeExistsSync(workspaceDir); if (!existed) { safeMkdirSync(workspaceDir, { recursive: true }); } return { cleanupOnFinish: !existed && isTemporaryWorkspaceDir(workspaceDir), workspaceDir, }; } function getWorkspaceTargetDir(workspaceDir, target) { return join(resolve(workspaceDir), targetSlug(target)); } function getWorkspaceCachePaths(workspaceDir, target) { const targetDir = getWorkspaceTargetDir(workspaceDir, target); const cacheDir = join(targetDir, AUDIT_CACHE_DIRNAME); return { cacheDir, metadataFile: join(cacheDir, AUDIT_CACHE_META_FILE), sourceBomFile: join(cacheDir, AUDIT_CACHE_BOM_FILE), targetDir, }; } function loadCachedChildBom(workspaceDir, target) { if (!workspaceDir) { return undefined; } const cachePaths = getWorkspaceCachePaths(workspaceDir, target); if (!safeExistsSync(cachePaths.sourceBomFile)) { return undefined; } try { const bomJson = loadBomFile(cachePaths.sourceBomFile); let metadata = {}; if (safeExistsSync(cachePaths.metadataFile)) { metadata = JSON.parse(readFileSync(cachePaths.metadataFile, "utf8")); } const scanDir = metadata.scanDirRelative ? resolve(cachePaths.targetDir, metadata.scanDirRelative) : cachePaths.targetDir; return { bomJson, cacheDir: cachePaths.cacheDir, repoUrl: metadata.repoUrl, resolution: metadata.resolution, scanDir, sourceDirectoryConfidence: metadata.sourceDirectoryConfidence || "medium", versionMatched: metadata.versionMatched !== false, }; } catch { return undefined; } } function writeCachedChildBom(workspaceDir, target, payload) { if (!workspaceDir || !payload?.bomJson) { return; } const cachePaths = getWorkspaceCachePaths(workspaceDir, target); safeMkdirSync(cachePaths.cacheDir, { recursive: true }); writeJsonFile(cachePaths.sourceBomFile, payload.bomJson); writeJsonFile(cachePaths.metadataFile, { generatedAt: new Date().toISOString(), repoUrl: payload.repoUrl, resolution: payload.resolution, scanDirRelative: payload.scanDir ? relative(cachePaths.targetDir, resolve(payload.scanDir)) || "." : ".", sourceDirectoryConfidence: payload.sourceDirectoryConfidence, versionMatched: payload.versionMatched, }); } function persistAuditArtifacts(result, options, sourceBomJson) { if (!options.reportsDir) { return result; } const resultDir = join( resolve(options.reportsDir), targetSlug(result.target), ); safeMkdirSync(resultDir, { recursive: true }); result.reportDir = resultDir; result.findingsFile = join(resultDir, "findings.json"); result.summaryFile = join(resultDir, "summary.json"); if (sourceBomJson) { result.sourceBomFile = join(resultDir, "source-bom.json"); writeJsonFile(result.sourceBomFile, sourceBomJson); } writeJsonFile(result.findingsFile, result.findings || []); writeJsonFile(result.summaryFile, { assessment: result.assessment, cacheHit: result.cacheHit || false, error: result.error, errorType: result.errorType, findingsCount: result.findings?.length || 0, repoUrl: result.repoUrl, sourceDirectoryConfidence: result.sourceDirectoryConfidence, status: result.status, target: result.target, }); return result; } /** * Emit a progress event when a callback is configured. * * @param {object} options CLI options * @param {object} event progress event payload * @returns {void} */ function emitProgress(options, event) { if (typeof options?.onProgress === "function") { options.onProgress(event); } } function buildPredictiveAuditEstimate(selectedTargets) { if (selectedTargets >= VERY_LARGE_PREDICTIVE_AUDIT_THRESHOLD) { return "This may take 10+ minutes depending on repository lookups and child SBOM generation."; } if (selectedTargets >= LARGE_PREDICTIVE_AUDIT_THRESHOLD) { return "This may take several minutes depending on repository lookups and child SBOM generation."; } return undefined; } function buildPredictiveAuditPreflightMessage(extractedTargets, options) { const selectedTargets = extractedTargets?.targets?.length || 0; const allowlistedTargetsExcluded = extractedTargets?.stats?.allowlistedTargetsExcluded || 0; const availableTargets = extractedTargets?.stats?.availableTargets || 0; const requiredTargets = extractedTargets?.stats?.requiredTargets || 0; const trustedTargetsExcluded = extractedTargets?.stats?.trustedTargetsExcluded || 0; const truncatedTargets = extractedTargets?.stats?.truncatedTargets || 0; const estimate = buildPredictiveAuditEstimate(selectedTargets); const trustedHint = options?.trustedSelectionHelp ? ` ${options.trustedSelectionHelp}` : ""; const trustedExclusionMessage = trustedTargetsExcluded ? ` Skipping ${trustedTargetsExcluded} trusted-publishing-backed package(s) by default.${trustedHint}` : ""; const customAllowlistSuffix = options?.allowlistFile ? " and your custom allowlist" : ""; const allowlistExclusionMessage = allowlistedTargetsExcluded ? ` Skipping ${allowlistedTargetsExcluded} allowlisted package(s) using the built-in well-known purl prefix filter${customAllowlistSuffix}.` : ""; if (!estimate && availableTargets < LARGE_PREDICTIVE_AUDIT_THRESHOLD) { const passiveMessage = `${trustedExclusionMessage}${allowlistExclusionMessage}`.trim(); return passiveMessage || undefined; } if (options?.scope === "required") { return `Predictive audit will scan ${selectedTargets} required package(s). ${estimate || "Large required-only scans may still take a while depending on repository lookups and child SBOM generation."}${trustedExclusionMessage}${allowlistExclusionMessage}`; } if (truncatedTargets > 0) { const additionalTargets = Math.max(0, selectedTargets - requiredTargets); return `Predictive audit selected ${selectedTargets} of ${availableTargets} package(s) (${requiredTargets} required${additionalTargets ? ` + ${additionalTargets} additional` : ""}) using required-first prioritization. ${estimate || "This run was trimmed to keep audit time reasonable."}${trustedExclusionMessage}${allowlistExclusionMessage}`; } return `Predictive audit will scan ${selectedTargets} package(s). ${estimate || "Large predictive audits may still take a while depending on repository lookups and child SBOM generation."}${trustedExclusionMessage}${allowlistExclusionMessage}`; } /** * Read a custom property from a target descriptor. * * @param {object} target audit target * @param {string} propertyName property name * @returns {string | undefined} property value */ function getTargetProperty(target, propertyName) { return target?.properties?.find((property) => property.name === propertyName) ?.value; } function getTargetNumberProperty(target, propertyName) { const value = getTargetProperty(target, propertyName); if (!value) { return undefined; } const numericValue = Number(value); return Number.isFinite(numericValue) ? numericValue : undefined; } function getTargetTimestampProperty(target, propertyName) { const value = getTargetProperty(target, propertyName); if (!value) { return undefined; } const timestamp = Date.parse(value); return Number.isNaN(timestamp) ? undefined : timestamp; } function getTargetListProperty(target, propertyName) { const value = getTargetProperty(target, propertyName); if (!value) { return []; } return [ ...new Set( value .split(",") .map((entry) => entry.trim()) .filter(Boolean), ), ]; } function isEstablishedPackage(target, propertyPrefix) { const packageCreatedTime = getTargetTimestampProperty( target, `${propertyPrefix}:packageCreatedTime`, ); const versionCount = getTargetNumberProperty( target, `${propertyPrefix}:versionCount`, ); if (!packageCreatedTime || !versionCount) { return false; } const packageAgeMs = Date.now() - packageCreatedTime; return packageAgeMs >= 1000 * 60 * 60 * 24 * 30 && versionCount >= 3; } function isRecentRelease(target, propertyPrefix) { const publishTime = getTargetTimestampProperty( target, `${propertyPrefix}:publishTime`, ); if (!publishTime) { return false; } const releaseAgeMs = Date.now() - publishTime; return releaseAgeMs >= 0 && releaseAgeMs <= 1000 * 60 * 60 * 72; } function hasPublisherDrift(target, propertyPrefix) { return ( getTargetProperty(target, `${propertyPrefix}:publisherDrift`) === "true" ); } function hasMaintainerSetDrift(target, propertyPrefix) { return ( getTargetProperty(target, `${propertyPrefix}:maintainerSetDrift`) === "true" || getTargetProperty(target, `${propertyPrefix}:uploaderSetDrift`) === "true" ); } function hasPartialIdentitySetDrift(target, propertyPrefix) { const explicitPropertyName = propertyPrefix === "cdx:npm" ? `${propertyPrefix}:maintainerSetPartialDrift` : `${propertyPrefix}:uploaderSetPartialDrift`; if (getTargetProperty(target, explicitPropertyName) === "true") { return true; } const currentPropertyName = propertyPrefix === "cdx:npm" ? `${propertyPrefix}:maintainerSet` : `${propertyPrefix}:uploaderSet`; const priorPropertyName = propertyPrefix === "cdx:npm" ? `${propertyPrefix}:priorMaintainerSet` : `${propertyPrefix}:priorUploaderSet`; const currentSet = getTargetListProperty(target, currentPropertyName); const priorSet = getTargetListProperty(target, priorPropertyName); if (!currentSet.length || !priorSet.length) { return false; } const priorValues = new Set(priorSet); const overlapCount = currentSet.filter((value) => priorValues.has(value), ).length; if (overlapCount === 0) { return false; } const unionCount = new Set([...currentSet, ...priorSet]).size; return ( overlapCount < unionCount && (overlapCount < currentSet.length || overlapCount < priorSet.length) ); } function hasDormantReleaseGapAnomaly(target, propertyPrefix) { const currentGapDays = getTargetNumberProperty( target, `${propertyPrefix}:releaseGapDays`, ); const baselineGapDays = getTargetNumberProperty( target, `${propertyPrefix}:releaseGapBaselineDays`, ); const sampleSize = getTargetNumberProperty( target, `${propertyPrefix}:releaseGapSampleSize`, ); if (!currentGapDays || !baselineGapDays || !sampleSize || sampleSize < 3) { return false; } return currentGapDays >= Math.max(90, baselineGapDays * 8); } function hasCompressedCadence(target, propertyPrefix) { if ( getTargetProperty(target, `${propertyPrefix}:compressedCadence`) === "true" ) { return true; } const currentGapDays = getTargetNumberProperty( target, `${propertyPrefix}:releaseGapDays`, ); const baselineGapDays = getTargetNumberProperty( target, `${propertyPrefix}:releaseGapBaselineDays`, ); const sampleSize = getTargetNumberProperty( target, `${propertyPrefix}:releaseGapSampleSize`, ); if ( currentGapDays === undefined || baselineGapDays === undefined || sampleSize === undefined || sampleSize < 3 || currentGapDays <= 0 || baselineGapDays <= 0 || baselineGapDays < 21 ) { return false; } return currentGapDays <= 14 && currentGapDays / baselineGapDays <= 0.33; } /** * Build low-noise provenance-aware contextual findings from the root BOM target. * * These are intentionally conservative and only fire when there is explicit risk * posture already present in the target metadata. * * @param {object} target audit target * @returns {object[]} contextual findings */ export function buildTargetContextFindings(target) { const findings = []; const hasTrustedPublishing = hasTrustedPublishingProperties( target?.properties, ); const hasProvenanceEvidence = hasRegistryProvenanceEvidenceProperties( target?.properties, ); if (target.type === "npm") { const hasInstallScript = getTargetProperty(target, "cdx:npm:hasInstallScript") === "true"; const establishedPackage = isEstablishedPackage(target, "cdx:npm"); const recentRelease = isRecentRelease(target, "cdx:npm"); const publisherDrift = hasPublisherDrift(target, "cdx:npm"); const maintainerSetDrift = hasMaintainerSetDrift(target, "cdx:npm"); const partialMaintainerSetDrift = hasPartialIdentitySetDrift( target, "cdx:npm", ); const dormantReleaseGapAnomaly = hasDormantReleaseGapAnomaly( target, "cdx:npm", ); const compressedCadence = hasCompressedCadence(target, "cdx:npm"); if ( target.version && hasInstallScript && !hasTrustedPublishing && !hasProvenanceEvidence ) { findings.push({ category: "package-integrity", description: "Install-time execution combined with missing registry-visible provenance raises future tampering risk.", location: { bomRef: target.bomRefs?.[0], purl: target.purl, }, message: `npm package '${target.name}@${target.version}' has install-time execution hooks but no registry-visible trusted publishing or provenance evidence.`, mitigation: "Prefer versions with registry-visible provenance evidence, review install scripts carefully, and pin/allowlist publishers for high-risk packages.", ruleId: "PROV-001", severity: "medium", }); } if ( target.version && establishedPackage && recentRelease && hasInstallScript && !hasTrustedPublishing && !hasProvenanceEvidence ) { findings.push({ category: "package-integrity", description: "A very recent release on a mature package, combined with install-time execution and missing provenance, deserves extra scrutiny before adoption.", location: { bomRef: target.bomRefs?.[0], purl: target.purl, }, message: `npm package '${target.name}@${target.version}' is a very recent release on an established package and still lacks registry-visible provenance.`, mitigation: "Delay adoption briefly, verify publisher identity, and prefer registry-visible provenance for high-risk packages with install hooks.", ruleId: "PROV-003", severity: "low", }); } if ( target.version && establishedPackage && publisherDrift && hasInstallScript && !hasTrustedPublishing && !hasProvenanceEvidence ) { findings.push({ category: "package-integrity", description: "Publisher drift on mature packages can be legitimate, but becomes more concerning when install-time execution is present and provenance is weak.", location: { bomRef: target.bomRefs?.[0], purl: target.purl, }, message: `npm package '${target.name}@${target.version}' was published by a different identity than the prior release and lacks registry-visible provenance.`, mitigation: "Review maintainer changes, compare the prior release publisher, and validate provenance before upgrading execution-capable packages.", ruleId: "PROV-004", severity: "medium", }); } if ( target.version && establishedPackage && maintainerSetDrift && hasInstallScript && !hasTrustedPublishing && !hasProvenanceEvidence ) { findings.push({ category: "package-integrity", description: "Maintainer-set drift on execution-capable packages is a triage signal when the resolved release also lacks registry-visible provenance.", location: { bomRef: target.bomRefs?.[0], purl: target.purl, }, message: `npm package '${target.name}@${target.version}' has a fully different maintainer identity set than the prior release and lacks registry-visible provenance.`, mitigation: "Compare the prior and current maintainer sets, verify maintainer transitions, and prefer releases with provenance before upgrading packages with install hooks.", ruleId: "PROV-007", severity: "medium", }); } if ( target.version && establishedPackage && partialMaintainerSetDrift && !maintainerSetDrift && hasInstallScript && !hasTrustedPublishing && !hasProvenanceEvidence ) { findings.push({ category: "package-integrity", description: "Partial maintainer-set drift is a low-severity triage signal when execution-capable releases retain some identities but also introduce maintainer churn without registry-visible provenance.", location: { bomRef: target.bomRefs?.[0], purl: target.purl, }, message: `npm package '${target.name}@${target.version}' retains only part of the prior maintainer identity set and lacks registry-visible provenance.`, mitigation: "Review which maintainer identities changed, compare against the prior release, and validate the transition before upgrading packages with install hooks.", ruleId: "PROV-011", severity: "low", }); } if ( target.version && establishedPackage && dormantReleaseGapAnomaly && hasInstallScript && !hasTrustedPublishing && !hasProvenanceEvidence ) { findings.push({ category: "package-integrity", description: "A long dormant gap followed by a new execution-capable release can warrant a short review window when provenance is missing.", location: { bomRef: target.bomRefs?.[0], purl: target.purl, }, message: `npm package '${target.name}@${target.version}' arrived after an unusually long release gap and lacks registry-visible provenance.`, mitigation: "Review the release diff, compare against the prior version, and validate maintainer continuity before adopting after long dormancy.", ruleId: "PROV-008", severity: "low", }); } if ( target.version && establishedPackage && compressedCadence && hasInstallScript && !hasTrustedPublishing && !hasProvenanceEvidence ) { findings.push({ category: "package-integrity", description: "A materially faster-than-usual release on a mature execution-capable package is a low-severity review signal when registry-visible provenance is absent.", location: { bomRef: target.bomRefs?.[0], purl: target.purl, }, message: `npm package '${target.name}@${target.version}' arrived materially faster than its prior release cadence and lacks registry-visible provenance.`, mitigation: "Review the release diff, compare the release timing against prior cadence, and validate the publisher transition before rapid upgrades of execution-capable packages.", ruleId: "PROV-012", severity: "low", }); } } if (target.type === "pypi") { const registry = getTargetProperty(target, "cdx:pypi:registry"); const isDefaultRegistry = !registry || ["https://pypi.org", "https://pypi.org/simple"].includes(registry); const uploaderVerified = getTargetProperty(target, "cdx:pypi:uploaderVerified") === "true"; const establishedPackage = isEstablishedPackage(target, "cdx:pypi"); const recentRelease = isRecentRelease(target, "cdx:pypi"); const publisherDrift = hasPublisherDrift(target, "cdx:pypi"); const maintainerSetDrift = hasMaintainerSetDrift(target, "cdx:pypi"); const partialMaintainerSetDrift = hasPartialIdentitySetDrift( target, "cdx:pypi", ); const dormantReleaseGapAnomaly = hasDormantReleaseGapAnomaly( target, "cdx:pypi", ); const compressedCadence = hasCompressedCadence(target, "cdx:pypi"); if ( target.version && isDefaultRegistry && !hasTrustedPublishing && !hasProvenanceEvidence && !uploaderVerified ) { findings.push({ category: "package-integrity", description: "Default-registry PyPI packages without provenance or verified uploader context are weaker candidates for publisher-trust decisions.", location: { bomRef: target.bomRefs?.[0], purl: target.purl, }, message: `PyPI package '${target.name}@${target.version}' lacks registry-visible provenance and uploader verification signals.`, mitigation: "Prefer releases with provenance evidence or verified uploader metadata, especially for sensitive or newly introduced dependencies.", ruleId: "PROV-002", severity: "low", }); } if ( target.version && isDefaultRegistry && establishedPackage && recentRelease && !hasTrustedPublishing && !hasProvenanceEvidence && !uploaderVerified ) { findings.push({ category: "package-integrity", description: "Very recent releases on mature packages can benefit from a short review window when provenance and uploader-verification signals are absent.", location: { bomRef: target.bomRefs?.[0], purl: target.purl, }, message: `PyPI package '${target.name}@${target.version}' is a very recent release on an established package without provenance or uploader verification signals.`, mitigation: "Delay adoption briefly, compare the release to the previous known-good version, and prefer verified/provenance-backed uploads for sensitive dependencies.", ruleId: "PROV-005", severity: "low", }); } if ( target.version && isDefaultRegistry && establishedPackage && publisherDrift && !hasTrustedPublishing && !hasProvenanceEvidence && !uploaderVerified ) { findings.push({ category: "package-integrity", description: "Uploader drift on established PyPI packages is usually a triage signal, but becomes more meaningful when provenance and verification are missing.", location: { bomRef: target.bomRefs?.[0], purl: target.purl, }, message: `PyPI package '${target.name}@${target.version}' was uploaded by a different identity than the prior release and lacks provenance or uploader verification signals.`, mitigation: "Review the uploader change, compare the prior release uploader, and validate project ownership before upgrading critical dependencies.", ruleId: "PROV-006", severity: "low", }); } if ( target.version && isDefaultRegistry && establishedPackage && maintainerSetDrift && !hasTrustedPublishing && !hasProvenanceEvidence && !uploaderVerified ) { findings.push({ category: "package-integrity", description: "Uploader-set drift on established PyPI packages is a triage signal when provenance and uploader verification are absent.", location: { bomRef: target.bomRefs?.[0], purl: target.purl, }, message: `PyPI package '${target.name}@${target.version}' has a fully different uploader identity set than the prior release and lacks provenance or uploader verification signals.`, mitigation: "Review uploader transitions, compare the prior release uploader set, and validate project ownership before upgrading sensitive dependencies.", ruleId: "PROV-009", severity: "low", }); } if ( target.version && isDefaultRegistry && establishedPackage && partialMaintainerSetDrift && !maintainerSetDrift && !hasTrustedPublishing && !hasProvenanceEvidence && !uploaderVerified ) { findings.push({ category: "package-integrity", description: "Partial uploader-set drift is a low-severity review signal on established PyPI packages when provenance and uploader verification are absent.", location: { bomRef: target.bomRefs?.[0], purl: target.purl, }, message: `PyPI package '${target.name}@${target.version}' retains only part of the prior uploader identity set and lacks provenance or uploader verification signals.`, mitigation: "Review which uploader identities changed, compare the release against the prior version, and validate project ownership before upgrading sensitive dependencies.", ruleId: "PROV-013", severity: "low", }); } if ( target.version && isDefaultRegistry && establishedPackage && dormantReleaseGapAnomaly && !hasTrustedPublishing && !hasProvenanceEvidence && !uploaderVerified ) { findings.push({ category: "package-integrity", description: "Established packages resurfacing after a long dormant gap benefit from extra review when provenance is weak.", location: { bomRef: target.bomRefs?.[0], purl: target.purl, }, message: `PyPI package '${target.name}@${target.version}' followed an unusually long release gap and lacks provenance or uploader verification signals.`, mitigation: "Compare the release to the prior known-good version and review maintainership continuity before adopting after long dormancy.", ruleId: "PROV-010", severity: "low", }); } if ( target.version && isDefaultRegistry && establishedPackage && compressedCadence && !hasTrustedPublishing && !hasProvenanceEvidence && !uploaderVerified ) { findings.push({ category: "package-integrity", description: "Materially faster-than-usual release timing is a low-severity triage signal on mature PyPI packages when provenance and uploader verification remain weak.", location: { bomRef: target.bomRefs?.[0], purl: target.purl, }, message: `PyPI package '${target.name}@${target.version}' arrived materially faster than its prior release cadence and lacks provenance or uploader verification signals.`, mitigation: "Compare the release timing and contents against prior versions, then validate uploader continuity before rapid upgrades of sensitive dependencies.", ruleId: "PROV-014", severity: "low", }); } } if (target.type === "cargo") { const yanked = getTargetProperty(target, "cdx:cargo:yanked") === "true"; const establishedPackage = isEstablishedPackage(target, "cdx:cargo"); const recentRelease = isRecentRelease(target, "cdx:cargo"); const publisherDrift = hasPublisherDrift(target, "cdx:cargo"); const dormantReleaseGapAnomaly = hasDormantReleaseGapAnomaly( target, "cdx:cargo", ); const compressedCadence = hasCompressedCadence(target, "cdx:cargo"); if (target.version && yanked) { findings.push({ category: "package-integrity", description: "Yanked crates are removed from normal Cargo resolution and usually indicate a correctness, security, or publisher-action issue that deserves review before further adoption.", location: { bomRef: target.bomRefs?.[0], purl: target.purl, }, message: `Cargo crate '${target.name}@${target.version}' has been yanked from crates.io.`, mitigation: "Prefer a non-yanked release and review the crate's publisher history and changelog before upgrading.", ruleId: "PROV-015", severity: "high", }); } if ( target.version && establishedPackage && recentRelease && !hasTrustedPublishing && !hasProvenanceEvidence ) { findings.push({ category: "package-integrity", description: "Very recent releases on established crates benefit from a short review window when trusted publishing and provenance remain weak.", location: { bomRef: target.bomRefs?.[0], purl: target.purl, }, message: `Cargo crate '${target.name}@${target.version}' is a very recent release on an established package without registry-visible provenance signals.`, mitigation: "Delay adoption briefly, compare the release to the prior version, and prefer trusted-publishing-backed releases for sensitive crates.", ruleId: "PROV-016", severity: "low", }); } if ( target.version && establishedPackage && publisherDrift && !hasTrustedPublishing && !hasProvenanceEvidence ) { findings.push({ category: "package-integrity", description: "Publisher drift on established crates is often benign, but becomes more meaningful when provenance and trusted publishing are absent.", location: { bomRef: target.bomRefs?.[0], purl: target.purl, }, message: `Cargo crate '${target.name}@${target.version}' was published by a different identity than the prior release and lacks registry-visible provenance signals.`, mitigation: "Review the publisher transition, compare the prior release metadata, and validate ownership before upgrading sensitive crates.", ruleId: "PROV-017", severity: "medium", }); } if ( target.version && establishedPackage && (dormantReleaseGapAnomaly || compressedCadence) && !hasTrustedPublishing && !hasProvenanceEvidence ) { findings.push({ category: "package-integrity", description: "Release timing anomalies on established crates are low-noise triage signals when provenance remains weak.", location: { bomRef: target.bomRefs?.[0], purl: target.purl, }, message: `Cargo crate '${target.name}@${target.version}' shows unusual release timing and lacks registry-visible provenance signals.`, mitigation: "Review the release diff and timing versus prior versions before rapidly adopting the new crate release.", ruleId: "PROV-018", severity: "low", }); } } return findings; } /** * Clone a repository into a deterministic workspace directory. * * @param {string} repoUrl repository URL * @param {string} cloneDir target clone directory * @param {string | undefined} gitRef git ref to checkout * @returns {void} */ function cloneRepositoryToDir(repoUrl, cloneDir, gitRef) { const gitArgs = [ "-c", "alias.clone=", "-c", "core.fsmonitor=false", "-c", "safe.bareRepository=explicit", "-c", "core.hooksPath=/dev/null", "clone", "--template=", repoUrl, "--depth", "1", cloneDir, ]; if (gitRef) { const cloneIndex = gitArgs.indexOf("clone"); gitArgs.splice(cloneIndex + 1, 0, "--branch", gitRef); } const result = hardenedGitCommand(gitArgs); if (result.status !== 0) { const stderr = result.stderr ? result.stderr.toString() : "unknown git clone error"; const error = new Error(stderr.trim()); error.retryable = /(timed out|unable to connect|could not resolve host|network is unreachable|connection reset|connection refused|temporary failure|remote end hung up unexpectedly|http 5\d\d|tls|econnreset|econnrefused|etimedout)/i.test( stderr, ); error.errorType = error.retryable ? "network" : "clone"; throw error; } } async function cloneRepositoryToDirWithRetry(repoUrl, cloneDir, gitRef) { let lastError; for (let attempt = 0; attempt <= CLONE_RETRY_DELAYS_MS.length; attempt += 1) { try { cloneRepositoryToDir(repoUrl, cloneDir, gitRef); return; } catch (error) { lastError = error; safeRmSync(cloneDir, { force: true, recursive: true }); if (!error?.retryable || attempt >= CLONE_RETRY_DELAYS_MS.length) { break; } await sleep(CLONE_RETRY_DELAYS_MS[attempt]); } } const sanitizedRepoUrl = sanitizeRemoteUrlForLogs(repoUrl); const message = lastError?.message || "unknown git clone error"; const error = new Error( `Unable to clone '${sanitizedRepoUrl}' after ${CLONE_RETRY_DELAYS_MS.length + 1} attempt(s): ${message}`, ); error.errorType = lastError?.errorType || "clone"; error.retryable = Boolean(lastError?.retryable); throw error; } /** * Reuse or create a checkout for a target repository. * * @param {object} target audit target * @param {object} resolution resolved repository metadata * @param {string | undefined} workspaceDir workspace directory * @param {string | undefined} gitRef git ref to checkout * @returns {{ cleanup: boolean, cloneDir: string, reused: boolean }} checkout info */ async function ensureCheckout(target, resolution, workspaceDir, gitRef) { if (!workspaceDir) { const cloneDir = safeMkdtempSync( join(getTmpDir(), `${targetSlug(target)}-`), ); await cloneRepositoryToDirWithRetry(resolution.repoUrl, cloneDir, gitRef); return { cleanup: true, cloneDir, reused: false, }; } const resolvedWorkspaceDir = resolve(workspaceDir); if (!safeExistsSync(resolvedWorkspaceDir)) { safeMkdirSync(resolvedWorkspaceDir, { recursive: true }); } const cloneDir = join(resolvedWorkspaceDir, targetSlug(target)); if (safeExistsSync(join(cloneDir, ".git"))) { return { cleanup: false, cloneDir, reused: true, }; } if (safeExistsSync(cloneDir)) { safeRmSync(cloneDir, { force: true, recursive: true }); } await cloneRepositoryToDirWithRetry(resolution.repoUrl, cloneDir, gitRef); return { cleanup: false, cloneDir, reused: false, }; } /** * Extract an expected package name from Python packaging metadata. * * @param {string} filePath metadata file path * @returns {string | undefined} discovered package name */ function readPythonPackageName(filePath) { let fileContent; try { fileContent = readFileSync(filePath, "utf8"); } catch { return undefined; } const patterns = [ /(^|\n)\s*name\s*=\s*["']([^"'\n]+)["']/m, /(^|\n)\s*name\s*=\s*([^\n#]+)/m, /setup\s*\([^)]*name\s*=\s*["']([^"']+)["']/ms, ]; for (const pattern of patterns) { const match = fileContent.match(pattern); if (!match) { continue; } const packageName = (match[2] || match[1] || "").trim(); if (packageName) { return packageName; } } return undefined; } /** * Resolve the most specific Python package directory inside a cloned repo. * * @param {string} cloneDir cloned repository root * @param {object} target audit target * @returns {{ confidence: string, scanDir: string }} selected directory and confidence */ export function resolvePythonSourceDirectory(cloneDir, target) { const normalizedTargetName = normalizePackageName(target.name); const queue = [cloneDir]; const matches = []; while (queue.length) { const currentDir = queue.shift(); let entries = []; try { entries = readdirSync(currentDir, { withFileTypes: true }); } catch { continue; } for (const entry of entries) { const entryPath = join(currentDir, entry.name); if (entry.isDirectory()) { if (!PYTHON_SKIP_DIRS.has(entry.name)) { queue.push(entryPath); } continue; } if (!entry.isFile() || !PYTHON_METADATA_FILES.includes(entry.name)) { continue; } const packageName = readPythonPackageName(entryPath); if (normalizePackageName(packageName) === normalizedTargetName) { matches.push(currentDir); } } } if (!matches.length) { return { confidence: "low", scanDir: cloneDir, }; } matches.sort((left, right) => left.length - right.length); return { confidence: matches[0] === cloneDir ? "medium" : "high", scanDir: matches[0], }; } /** * Resolve the most appropriate scan directory for a cloned target repository. * * @param {string} cloneDir cloned repository root * @param {object} target audit target * @param {object} resolution repository resolution metadata * @returns {{ confidence: string, scanDir: string }} selected directory and confidence */ export function resolveTargetSourceDirectory(cloneDir, target, resolution) { if (target.type === "npm") { const scanDir = resolvePurlSourceDirectory(cloneDir, resolution); if (!scanDir) { return { confidence: "medium", scanDir: cloneDir, }; } return { confidence: scanDir === cloneDir ? "medium" : "high", scanDir, }; } if (target.type === "pypi") { return resolvePythonSourceDirectory(cloneDir, target); } return { confidence: "low", scanDir: cloneDir, }; } function collectPythonHeuristicFiles(scanDir) { const candidates = []; const queue = [scanDir]; while (queue.length && candidates.length < PYTHON_HEURISTIC_FILE_LIMIT) { const currentDir = queue.shift(); let entries = []; try { entries = readdirSync(currentDir, { withFileTypes: true }); } catch { continue; } for (const entry of entries) { const entryPath = join(currentDir, entry.name); if (entry.isDirectory()) { if (!PYTHON_SKIP_DIRS.has(entry.name)) { queue.push(entryPath); } continue; } if ( entry.isFile() && PYTHON_HEURISTIC_FILENAMES.has(entry.name) && candidates.length < PYTHON_HEURISTIC_FILE_LIMIT ) { candidates.push(entryPath); } } } return candidates; } function inspectPythonHeuristicFile(filePath) { let fileSize; try { fileSize = statSync(filePath).size; } catch { return undefined; } if (fileSize > PYTHON_HEURISTIC_MAX_FILE_BYTES) { return undefined; } let fileContent; try { fileContent = readFileSync(filePath, "utf8"); } catch { return undefined; } const indicators = []; if (PYTHON_EXECUTION_PATTERN.test(fileContent)) { indicators.push("process-or-dynamic-execution"); } if (PYTHON_NETWORK_PATTERN.test(fileContent)) { indicators.push("network-access"); } if (PYTHON_OBFUSCATION_PATTERN.test(fileContent)) { indicators.push("encoded-loader"); } if ( filePath.endsWith("setup.py") && PYTHON_SETUP_CMDCLASS_PATTERN.test(fileContent) ) { indicators.push("setup-cmdclass"); } return indicators.length ? indicators : undefined; } /** * Build shallow predictive findings for suspicious Python packaging files. * * Phase 1 intentionally focuses on high-signal packaging surfaces (`setup.py` * and package `__init__.py`) until deeper Python static analysis is added. * * @param {string} scanDir cloned repository scan directory * @param {object} target audit target * @returns {object[]} predictive findings */ export function buildPythonSourceHeuristicFindings(scanDir, target) { if (!scanDir || target?.type !== "pypi") { return []; } const findings = []